monkeyshines 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/.document +4 -0
  2. data/.gitignore +43 -0
  3. data/LICENSE +20 -0
  4. data/LICENSE.textile +20 -0
  5. data/README.textile +125 -0
  6. data/Rakefile +105 -0
  7. data/VERSION +1 -0
  8. data/examples/.gitignore +4 -0
  9. data/examples/bulk_urls/scrape_bulk_urls.rb +64 -0
  10. data/examples/rename_tree/rename_hdp_tree.rb +151 -0
  11. data/examples/rename_tree/rename_ripd_tree.rb +82 -0
  12. data/examples/rss_feeds/scrape_rss_feeds.rb +52 -0
  13. data/examples/shorturls/README.textile +111 -0
  14. data/examples/shorturls/bulkdump_shorturls.rb +46 -0
  15. data/examples/shorturls/bulkload_shorturls.rb +45 -0
  16. data/examples/shorturls/extract_urls.rb +12 -0
  17. data/examples/shorturls/multiplex_shorturl_cache.rb +32 -0
  18. data/examples/shorturls/old/multidump_and_fix_shorturls.rb +66 -0
  19. data/examples/shorturls/old/shorturl_stats.rb +81 -0
  20. data/examples/shorturls/scrape_shorturls.rb +112 -0
  21. data/examples/shorturls/shorturl_request.rb +29 -0
  22. data/examples/shorturls/shorturl_sequence.rb +121 -0
  23. data/examples/shorturls/shorturl_start_tyrant.sh +16 -0
  24. data/examples/shorturls/start_shorturl_cache.sh +2 -0
  25. data/lib/monkeyshines.rb +31 -0
  26. data/lib/monkeyshines/extensions.rb +16 -0
  27. data/lib/monkeyshines/fetcher.rb +10 -0
  28. data/lib/monkeyshines/fetcher/authed_http_fetcher.rb +35 -0
  29. data/lib/monkeyshines/fetcher/base.rb +44 -0
  30. data/lib/monkeyshines/fetcher/fake_fetcher.rb +19 -0
  31. data/lib/monkeyshines/fetcher/http_fetcher.rb +127 -0
  32. data/lib/monkeyshines/fetcher/http_head_fetcher.rb +23 -0
  33. data/lib/monkeyshines/monitor.rb +7 -0
  34. data/lib/monkeyshines/monitor/chunked_store.rb +23 -0
  35. data/lib/monkeyshines/monitor/periodic_logger.rb +33 -0
  36. data/lib/monkeyshines/monitor/periodic_monitor.rb +65 -0
  37. data/lib/monkeyshines/options.rb +59 -0
  38. data/lib/monkeyshines/recursive_runner.rb +26 -0
  39. data/lib/monkeyshines/repository/base.rb +57 -0
  40. data/lib/monkeyshines/repository/s3.rb +169 -0
  41. data/lib/monkeyshines/request_stream.rb +11 -0
  42. data/lib/monkeyshines/request_stream/base.rb +32 -0
  43. data/lib/monkeyshines/request_stream/edamame_queue.rb +54 -0
  44. data/lib/monkeyshines/request_stream/klass_request_stream.rb +39 -0
  45. data/lib/monkeyshines/request_stream/simple_request_stream.rb +22 -0
  46. data/lib/monkeyshines/runner.rb +161 -0
  47. data/lib/monkeyshines/runner_core/options.rb +5 -0
  48. data/lib/monkeyshines/runner_core/parsing_runner.rb +29 -0
  49. data/lib/monkeyshines/scrape_job/old_paginated.rb +343 -0
  50. data/lib/monkeyshines/scrape_job/recursive.rb +9 -0
  51. data/lib/monkeyshines/scrape_request.rb +136 -0
  52. data/lib/monkeyshines/scrape_request/paginated.rb +290 -0
  53. data/lib/monkeyshines/scrape_request/raw_json_contents.rb +16 -0
  54. data/lib/monkeyshines/scrape_request/signed_url.rb +86 -0
  55. data/lib/monkeyshines/store.rb +14 -0
  56. data/lib/monkeyshines/store/base.rb +29 -0
  57. data/lib/monkeyshines/store/chunked_flat_file_store.rb +37 -0
  58. data/lib/monkeyshines/store/conditional_store.rb +57 -0
  59. data/lib/monkeyshines/store/factory.rb +8 -0
  60. data/lib/monkeyshines/store/flat_file_store.rb +84 -0
  61. data/lib/monkeyshines/store/key_store.rb +51 -0
  62. data/lib/monkeyshines/store/null_store.rb +15 -0
  63. data/lib/monkeyshines/store/read_thru_store.rb +22 -0
  64. data/lib/monkeyshines/store/tokyo_tdb_key_store.rb +33 -0
  65. data/lib/monkeyshines/store/tyrant_rdb_key_store.rb +56 -0
  66. data/lib/monkeyshines/store/tyrant_tdb_key_store.rb +20 -0
  67. data/lib/monkeyshines/utils/factory_module.rb +106 -0
  68. data/lib/monkeyshines/utils/filename_pattern.rb +134 -0
  69. data/lib/monkeyshines/utils/logger.rb +15 -0
  70. data/lib/monkeyshines/utils/trollop-1.14/FAQ.txt +84 -0
  71. data/lib/monkeyshines/utils/trollop-1.14/History.txt +101 -0
  72. data/lib/monkeyshines/utils/trollop-1.14/Manifest.txt +7 -0
  73. data/lib/monkeyshines/utils/trollop-1.14/README.txt +40 -0
  74. data/lib/monkeyshines/utils/trollop-1.14/Rakefile +36 -0
  75. data/lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb +744 -0
  76. data/lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb +1048 -0
  77. data/lib/monkeyshines/utils/trollop.rb +744 -0
  78. data/lib/monkeyshines/utils/union_interval.rb +52 -0
  79. data/lib/monkeyshines/utils/uri.rb +70 -0
  80. data/lib/monkeyshines/utils/uuid.rb +32 -0
  81. data/monkeyshines.gemspec +147 -0
  82. data/scrape_from_file.rb +44 -0
  83. data/spec/monkeyshines_spec.rb +7 -0
  84. data/spec/spec_helper.rb +9 -0
  85. metadata +183 -0
@@ -0,0 +1,52 @@
1
+ #
2
+ # A numeric interval
3
+ #
4
+ # --
5
+ # could be done with a Range but proved annoying in practice
6
+ # what with Range's immutability, etc.
7
+ # ++
8
+ #
9
+ class UnionInterval
10
+ attr_accessor :min, :max
11
+ # initialize with set min or max values.
12
+ # To create an interval with no lower bound call:
13
+ # UnionInterval.new(nil, 69)
14
+ # Pass nil (or omit) +max+ for no upper bound:
15
+ # UnionInterval.new(5, nil)
16
+ def initialize min=nil, max=nil
17
+ self.min = min
18
+ self.max = max
19
+ end
20
+ # Expand the interval to include all the vals
21
+ def << vals
22
+ self.min = [min, vals.to_a].flatten.compact.min
23
+ self.max = [max, vals.to_a].flatten.compact.max
24
+ end
25
+ def + min_max
26
+ sum_min = [min, min_max.to_a].flatten.compact.min
27
+ sum_max = [max, min_max.to_a].flatten.compact.max
28
+ UnionInterval.new sum_min, sum_max
29
+ end
30
+ # returns span as an array:
31
+ # [min, max]
32
+ def to_a
33
+ [min, max]
34
+ end
35
+ # true if the extent is defined but empty (lower bound exceeds upper bound)
36
+ def empty?
37
+ min && max && (min > max)
38
+ end
39
+ def include? val
40
+ val && (!min || (val >= min)) && (!max || (val <= max))
41
+ end
42
+ def size
43
+ return 0 unless max && min
44
+ max - min
45
+ end
46
+ # string conversion:
47
+ # #<span:7..956734>
48
+ def to_s
49
+ "#<span:#{min}..#{max}>"
50
+ end
51
+ def inspect() to_s end
52
+ end
@@ -0,0 +1,70 @@
1
+ require 'addressable/uri'
2
+ module Addressable
3
+ #
4
+ # Add the #scrubbed and #revhost calls
5
+ #
6
+ class URI
7
+ #
8
+ # These are illegal but *are* found in URLs. We're going to let them through.
9
+ # Note that ' ' space is one of the tolerated miscreants.
10
+ #
11
+ URL_ILLEGAL_BUT_WHATEVER_DOOD_CHARS = '\{\}\| \^\`'
12
+ #
13
+ # These are all the characters that belong in a URL
14
+ #
15
+ PERMISSIVE_SCRUB_CHARS =
16
+ URL_ILLEGAL_BUT_WHATEVER_DOOD_CHARS +
17
+ Addressable::URI::CharacterClasses::UNRESERVED +
18
+ Addressable::URI::CharacterClasses::RESERVED + '%'
19
+
20
+ #
21
+ # Replace all url-insane characters by their %encoding. We don't really
22
+ # care here whether the URLs do anything: we just want to remove stuff that
23
+ # absosmurfly don't belong.
24
+ #
25
+ # This code is stolen from Addressable::URI, which unfortunately has a bug
26
+ # in exactly this method (fixed here). (http://addressable.rubyforge.org)
27
+ # Note that we are /not/ re-encoding characters like '%' -- it's assumed
28
+ # that the url is encoded, but perhaps poorly.
29
+ #
30
+ # In practice the illegal characters most often seen are those in
31
+ # RE_URL_ILLEGAL_BUT_WHATEVER_DOOD_CHARS plus
32
+ # <>"\t\\
33
+ #
34
+ def self.scrub_url url
35
+ return url if url.blank?
36
+ url.gsub(/[^#{PERMISSIVE_SCRUB_CHARS}]+/) do |sequence|
37
+ sequence.unpack('C*').map{ |c| ("%%%02x"%c).upcase }.join("")
38
+ end
39
+ end
40
+
41
+ #
42
+ # +revhost+
43
+ # the dot-reversed host:
44
+ # foo.company.com => com.company.foo
45
+ #
46
+ def revhost
47
+ return host unless host =~ /\./
48
+ host.split('.').reverse.join('.')
49
+ end
50
+
51
+ #
52
+ # The md5hash of this URI
53
+ #
54
+ # make sure to require 'digest/md5' somewhere...
55
+ def md5hash
56
+ Digest::MD5.hexdigest(self.normalize.to_s)
57
+ end
58
+
59
+ #
60
+ # +uuid+ -- RFC-4122 ver.5 uuid; guaranteed to be universally unique
61
+ #
62
+ # See http://www.faqs.org/rfcs/rfc4122.html
63
+ #
64
+ # You ned to require "monkeyshines/utils/uuid" as well...
65
+ #
66
+ def url_uuid
67
+ UUID.sha1_create(UUID_URL_NAMESPACE, self.normalize.to_s)
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,32 @@
1
+ require 'uuidtools'
2
+ class UUID
3
+
4
+ #
5
+ # A string suitable for using as a path name --
6
+ #
7
+ # Ex.
8
+ # urn:uuid:3c0dce44-80a8-11dd-a897-001ff35a0a8b =>
9
+ # urn_uuid/3c0dce44/80a8/11dd/a897/001ff35a0a8b
10
+ #
11
+ # It's well possible there are more perspicacious choices for points to split
12
+ # the string, but until we hit that limit this'll do.
13
+ #
14
+ def to_path
15
+ 'urn_uuid/' + to_s.gsub(/[\:\-]/,'/')
16
+ end
17
+
18
+ def self.hex_to_str str
19
+ /([\da-f]{8})([\da-f]{4})([\da-f]{4})([\da-f]{4})([\da-f]{12})/.match(str).captures.join '-'
20
+ end
21
+
22
+
23
+ def self.parse_hex str
24
+ parse(UUID.hex_to_str(str))
25
+ end
26
+
27
+ # Overrides UUIDTools -- force 32 hex digits (leading zeros)
28
+ def hexdigest
29
+ "%032x" % self.to_i
30
+ end
31
+
32
+ end
@@ -0,0 +1,147 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{monkeyshines}
8
+ s.version = "0.0.2"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Philip (flip) Kromer"]
12
+ s.date = %q{2009-10-12}
13
+ s.description = %q{A simple scraper for directed scrapes of APIs, feed or structured HTML. Plays nicely with wuclan and wukong.}
14
+ s.email = %q{flip@infochimps.org}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "LICENSE.textile",
18
+ "README.textile"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".gitignore",
23
+ "LICENSE",
24
+ "LICENSE.textile",
25
+ "README.textile",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "examples/.gitignore",
29
+ "examples/bulk_urls/scrape_bulk_urls.rb",
30
+ "examples/rename_tree/rename_hdp_tree.rb",
31
+ "examples/rename_tree/rename_ripd_tree.rb",
32
+ "examples/rss_feeds/scrape_rss_feeds.rb",
33
+ "examples/shorturls/README.textile",
34
+ "examples/shorturls/bulkdump_shorturls.rb",
35
+ "examples/shorturls/bulkload_shorturls.rb",
36
+ "examples/shorturls/extract_urls.rb",
37
+ "examples/shorturls/multiplex_shorturl_cache.rb",
38
+ "examples/shorturls/old/multidump_and_fix_shorturls.rb",
39
+ "examples/shorturls/old/shorturl_stats.rb",
40
+ "examples/shorturls/scrape_shorturls.rb",
41
+ "examples/shorturls/shorturl_request.rb",
42
+ "examples/shorturls/shorturl_sequence.rb",
43
+ "examples/shorturls/shorturl_start_tyrant.sh",
44
+ "examples/shorturls/start_shorturl_cache.sh",
45
+ "lib/monkeyshines.rb",
46
+ "lib/monkeyshines/extensions.rb",
47
+ "lib/monkeyshines/fetcher.rb",
48
+ "lib/monkeyshines/fetcher/authed_http_fetcher.rb",
49
+ "lib/monkeyshines/fetcher/base.rb",
50
+ "lib/monkeyshines/fetcher/fake_fetcher.rb",
51
+ "lib/monkeyshines/fetcher/http_fetcher.rb",
52
+ "lib/monkeyshines/fetcher/http_head_fetcher.rb",
53
+ "lib/monkeyshines/monitor.rb",
54
+ "lib/monkeyshines/monitor/chunked_store.rb",
55
+ "lib/monkeyshines/monitor/periodic_logger.rb",
56
+ "lib/monkeyshines/monitor/periodic_monitor.rb",
57
+ "lib/monkeyshines/options.rb",
58
+ "lib/monkeyshines/recursive_runner.rb",
59
+ "lib/monkeyshines/repository/base.rb",
60
+ "lib/monkeyshines/repository/s3.rb",
61
+ "lib/monkeyshines/request_stream.rb",
62
+ "lib/monkeyshines/request_stream/base.rb",
63
+ "lib/monkeyshines/request_stream/edamame_queue.rb",
64
+ "lib/monkeyshines/request_stream/klass_request_stream.rb",
65
+ "lib/monkeyshines/request_stream/simple_request_stream.rb",
66
+ "lib/monkeyshines/runner.rb",
67
+ "lib/monkeyshines/runner_core/options.rb",
68
+ "lib/monkeyshines/runner_core/parsing_runner.rb",
69
+ "lib/monkeyshines/scrape_job/old_paginated.rb",
70
+ "lib/monkeyshines/scrape_job/recursive.rb",
71
+ "lib/monkeyshines/scrape_request.rb",
72
+ "lib/monkeyshines/scrape_request/paginated.rb",
73
+ "lib/monkeyshines/scrape_request/raw_json_contents.rb",
74
+ "lib/monkeyshines/scrape_request/signed_url.rb",
75
+ "lib/monkeyshines/store.rb",
76
+ "lib/monkeyshines/store/base.rb",
77
+ "lib/monkeyshines/store/chunked_flat_file_store.rb",
78
+ "lib/monkeyshines/store/conditional_store.rb",
79
+ "lib/monkeyshines/store/factory.rb",
80
+ "lib/monkeyshines/store/flat_file_store.rb",
81
+ "lib/monkeyshines/store/key_store.rb",
82
+ "lib/monkeyshines/store/null_store.rb",
83
+ "lib/monkeyshines/store/read_thru_store.rb",
84
+ "lib/monkeyshines/store/tokyo_tdb_key_store.rb",
85
+ "lib/monkeyshines/store/tyrant_rdb_key_store.rb",
86
+ "lib/monkeyshines/store/tyrant_tdb_key_store.rb",
87
+ "lib/monkeyshines/utils/factory_module.rb",
88
+ "lib/monkeyshines/utils/filename_pattern.rb",
89
+ "lib/monkeyshines/utils/logger.rb",
90
+ "lib/monkeyshines/utils/trollop-1.14/FAQ.txt",
91
+ "lib/monkeyshines/utils/trollop-1.14/History.txt",
92
+ "lib/monkeyshines/utils/trollop-1.14/Manifest.txt",
93
+ "lib/monkeyshines/utils/trollop-1.14/README.txt",
94
+ "lib/monkeyshines/utils/trollop-1.14/Rakefile",
95
+ "lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb",
96
+ "lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb",
97
+ "lib/monkeyshines/utils/trollop.rb",
98
+ "lib/monkeyshines/utils/union_interval.rb",
99
+ "lib/monkeyshines/utils/uri.rb",
100
+ "lib/monkeyshines/utils/uuid.rb",
101
+ "monkeyshines.gemspec",
102
+ "scrape_from_file.rb",
103
+ "spec/monkeyshines_spec.rb",
104
+ "spec/spec_helper.rb"
105
+ ]
106
+ s.homepage = %q{http://github.com/mrflip/monkeyshines}
107
+ s.rdoc_options = ["--charset=UTF-8"]
108
+ s.require_paths = ["lib"]
109
+ s.rubygems_version = %q{1.3.5}
110
+ s.summary = %q{A simple scraper for directed scrapes of APIs, feed or structured HTML.}
111
+ s.test_files = [
112
+ "spec/monkeyshines_spec.rb",
113
+ "spec/spec_helper.rb",
114
+ "examples/bulk_urls/scrape_bulk_urls.rb",
115
+ "examples/rename_tree/rename_hdp_tree.rb",
116
+ "examples/rename_tree/rename_ripd_tree.rb",
117
+ "examples/rss_feeds/scrape_rss_feeds.rb",
118
+ "examples/shorturls/bulkdump_shorturls.rb",
119
+ "examples/shorturls/bulkload_shorturls.rb",
120
+ "examples/shorturls/extract_urls.rb",
121
+ "examples/shorturls/multiplex_shorturl_cache.rb",
122
+ "examples/shorturls/old/multidump_and_fix_shorturls.rb",
123
+ "examples/shorturls/old/shorturl_stats.rb",
124
+ "examples/shorturls/scrape_shorturls.rb",
125
+ "examples/shorturls/shorturl_request.rb",
126
+ "examples/shorturls/shorturl_sequence.rb"
127
+ ]
128
+
129
+ if s.respond_to? :specification_version then
130
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
131
+ s.specification_version = 3
132
+
133
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
134
+ s.add_runtime_dependency(%q<addressable>, [">= 0"])
135
+ s.add_runtime_dependency(%q<uuid>, [">= 0"])
136
+ s.add_runtime_dependency(%q<wukong>, [">= 0"])
137
+ else
138
+ s.add_dependency(%q<addressable>, [">= 0"])
139
+ s.add_dependency(%q<uuid>, [">= 0"])
140
+ s.add_dependency(%q<wukong>, [">= 0"])
141
+ end
142
+ else
143
+ s.add_dependency(%q<addressable>, [">= 0"])
144
+ s.add_dependency(%q<uuid>, [">= 0"])
145
+ s.add_dependency(%q<wukong>, [">= 0"])
146
+ end
147
+ end
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ $: << File.dirname(__FILE__)+'/lib'
4
+ require 'wukong'
5
+ require 'monkeyshines'
6
+ require 'monkeyshines/http_fetcher'
7
+
8
+ request_filename = ARGV[0]
9
+ if ! request_filename
10
+ warn "Please give the name of a file holding URLs to scrape"; exit
11
+ end
12
+ dump_filename = "/tmp/req_dump.tsv"
13
+
14
+ class SimpleScrapeRequest < Struct.new(
15
+ :url,
16
+ :scraped_at, :response_code, :response_message,
17
+ :contents )
18
+ end
19
+
20
+ class String
21
+ def to_flat
22
+ self
23
+ end
24
+ end
25
+
26
+ class Monkeyshines::FlatFileStore
27
+ attr_accessor :file, :filename
28
+ def initialize filename
29
+ self.filename = filename
30
+ self.file = File.open(filename, "w")
31
+ end
32
+ def << contents
33
+ p contents.to_flat
34
+ self.file << contents.to_flat.join("\t") + "\n"
35
+ end
36
+ end
37
+
38
+ fetcher = Monkeyshines::HttpFetcher.new('twitter.com')
39
+ reqs = Monkeyshines::FlatFileRequestStream.new(request_filename, SimpleScrapeRequest)
40
+ store = Monkeyshines::FlatFileStore.new(dump_filename)
41
+ reqs.each do |scrape_request|
42
+ p scrape_request
43
+ store << fetcher.get(scrape_request)
44
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Monkeyshines" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ require 'monkeyshines'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,183 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: monkeyshines
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Philip (flip) Kromer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-12 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: addressable
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: uuid
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: wukong
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: A simple scraper for directed scrapes of APIs, feed or structured HTML. Plays nicely with wuclan and wukong.
46
+ email: flip@infochimps.org
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ extra_rdoc_files:
52
+ - LICENSE
53
+ - LICENSE.textile
54
+ - README.textile
55
+ files:
56
+ - .document
57
+ - .gitignore
58
+ - LICENSE
59
+ - LICENSE.textile
60
+ - README.textile
61
+ - Rakefile
62
+ - VERSION
63
+ - examples/.gitignore
64
+ - examples/bulk_urls/scrape_bulk_urls.rb
65
+ - examples/rename_tree/rename_hdp_tree.rb
66
+ - examples/rename_tree/rename_ripd_tree.rb
67
+ - examples/rss_feeds/scrape_rss_feeds.rb
68
+ - examples/shorturls/README.textile
69
+ - examples/shorturls/bulkdump_shorturls.rb
70
+ - examples/shorturls/bulkload_shorturls.rb
71
+ - examples/shorturls/extract_urls.rb
72
+ - examples/shorturls/multiplex_shorturl_cache.rb
73
+ - examples/shorturls/old/multidump_and_fix_shorturls.rb
74
+ - examples/shorturls/old/shorturl_stats.rb
75
+ - examples/shorturls/scrape_shorturls.rb
76
+ - examples/shorturls/shorturl_request.rb
77
+ - examples/shorturls/shorturl_sequence.rb
78
+ - examples/shorturls/shorturl_start_tyrant.sh
79
+ - examples/shorturls/start_shorturl_cache.sh
80
+ - lib/monkeyshines.rb
81
+ - lib/monkeyshines/extensions.rb
82
+ - lib/monkeyshines/fetcher.rb
83
+ - lib/monkeyshines/fetcher/authed_http_fetcher.rb
84
+ - lib/monkeyshines/fetcher/base.rb
85
+ - lib/monkeyshines/fetcher/fake_fetcher.rb
86
+ - lib/monkeyshines/fetcher/http_fetcher.rb
87
+ - lib/monkeyshines/fetcher/http_head_fetcher.rb
88
+ - lib/monkeyshines/monitor.rb
89
+ - lib/monkeyshines/monitor/chunked_store.rb
90
+ - lib/monkeyshines/monitor/periodic_logger.rb
91
+ - lib/monkeyshines/monitor/periodic_monitor.rb
92
+ - lib/monkeyshines/options.rb
93
+ - lib/monkeyshines/recursive_runner.rb
94
+ - lib/monkeyshines/repository/base.rb
95
+ - lib/monkeyshines/repository/s3.rb
96
+ - lib/monkeyshines/request_stream.rb
97
+ - lib/monkeyshines/request_stream/base.rb
98
+ - lib/monkeyshines/request_stream/edamame_queue.rb
99
+ - lib/monkeyshines/request_stream/klass_request_stream.rb
100
+ - lib/monkeyshines/request_stream/simple_request_stream.rb
101
+ - lib/monkeyshines/runner.rb
102
+ - lib/monkeyshines/runner_core/options.rb
103
+ - lib/monkeyshines/runner_core/parsing_runner.rb
104
+ - lib/monkeyshines/scrape_job/old_paginated.rb
105
+ - lib/monkeyshines/scrape_job/recursive.rb
106
+ - lib/monkeyshines/scrape_request.rb
107
+ - lib/monkeyshines/scrape_request/paginated.rb
108
+ - lib/monkeyshines/scrape_request/raw_json_contents.rb
109
+ - lib/monkeyshines/scrape_request/signed_url.rb
110
+ - lib/monkeyshines/store.rb
111
+ - lib/monkeyshines/store/base.rb
112
+ - lib/monkeyshines/store/chunked_flat_file_store.rb
113
+ - lib/monkeyshines/store/conditional_store.rb
114
+ - lib/monkeyshines/store/factory.rb
115
+ - lib/monkeyshines/store/flat_file_store.rb
116
+ - lib/monkeyshines/store/key_store.rb
117
+ - lib/monkeyshines/store/null_store.rb
118
+ - lib/monkeyshines/store/read_thru_store.rb
119
+ - lib/monkeyshines/store/tokyo_tdb_key_store.rb
120
+ - lib/monkeyshines/store/tyrant_rdb_key_store.rb
121
+ - lib/monkeyshines/store/tyrant_tdb_key_store.rb
122
+ - lib/monkeyshines/utils/factory_module.rb
123
+ - lib/monkeyshines/utils/filename_pattern.rb
124
+ - lib/monkeyshines/utils/logger.rb
125
+ - lib/monkeyshines/utils/trollop-1.14/FAQ.txt
126
+ - lib/monkeyshines/utils/trollop-1.14/History.txt
127
+ - lib/monkeyshines/utils/trollop-1.14/Manifest.txt
128
+ - lib/monkeyshines/utils/trollop-1.14/README.txt
129
+ - lib/monkeyshines/utils/trollop-1.14/Rakefile
130
+ - lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb
131
+ - lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb
132
+ - lib/monkeyshines/utils/trollop.rb
133
+ - lib/monkeyshines/utils/union_interval.rb
134
+ - lib/monkeyshines/utils/uri.rb
135
+ - lib/monkeyshines/utils/uuid.rb
136
+ - monkeyshines.gemspec
137
+ - scrape_from_file.rb
138
+ - spec/monkeyshines_spec.rb
139
+ - spec/spec_helper.rb
140
+ has_rdoc: true
141
+ homepage: http://github.com/mrflip/monkeyshines
142
+ licenses: []
143
+
144
+ post_install_message:
145
+ rdoc_options:
146
+ - --charset=UTF-8
147
+ require_paths:
148
+ - lib
149
+ required_ruby_version: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - ">="
152
+ - !ruby/object:Gem::Version
153
+ version: "0"
154
+ version:
155
+ required_rubygems_version: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: "0"
160
+ version:
161
+ requirements: []
162
+
163
+ rubyforge_project:
164
+ rubygems_version: 1.3.5
165
+ signing_key:
166
+ specification_version: 3
167
+ summary: A simple scraper for directed scrapes of APIs, feed or structured HTML.
168
+ test_files:
169
+ - spec/monkeyshines_spec.rb
170
+ - spec/spec_helper.rb
171
+ - examples/bulk_urls/scrape_bulk_urls.rb
172
+ - examples/rename_tree/rename_hdp_tree.rb
173
+ - examples/rename_tree/rename_ripd_tree.rb
174
+ - examples/rss_feeds/scrape_rss_feeds.rb
175
+ - examples/shorturls/bulkdump_shorturls.rb
176
+ - examples/shorturls/bulkload_shorturls.rb
177
+ - examples/shorturls/extract_urls.rb
178
+ - examples/shorturls/multiplex_shorturl_cache.rb
179
+ - examples/shorturls/old/multidump_and_fix_shorturls.rb
180
+ - examples/shorturls/old/shorturl_stats.rb
181
+ - examples/shorturls/scrape_shorturls.rb
182
+ - examples/shorturls/shorturl_request.rb
183
+ - examples/shorturls/shorturl_sequence.rb