monkeyshines 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/.document +4 -0
  2. data/.gitignore +43 -0
  3. data/LICENSE +20 -0
  4. data/LICENSE.textile +20 -0
  5. data/README.textile +125 -0
  6. data/Rakefile +105 -0
  7. data/VERSION +1 -0
  8. data/examples/.gitignore +4 -0
  9. data/examples/bulk_urls/scrape_bulk_urls.rb +64 -0
  10. data/examples/rename_tree/rename_hdp_tree.rb +151 -0
  11. data/examples/rename_tree/rename_ripd_tree.rb +82 -0
  12. data/examples/rss_feeds/scrape_rss_feeds.rb +52 -0
  13. data/examples/shorturls/README.textile +111 -0
  14. data/examples/shorturls/bulkdump_shorturls.rb +46 -0
  15. data/examples/shorturls/bulkload_shorturls.rb +45 -0
  16. data/examples/shorturls/extract_urls.rb +12 -0
  17. data/examples/shorturls/multiplex_shorturl_cache.rb +32 -0
  18. data/examples/shorturls/old/multidump_and_fix_shorturls.rb +66 -0
  19. data/examples/shorturls/old/shorturl_stats.rb +81 -0
  20. data/examples/shorturls/scrape_shorturls.rb +112 -0
  21. data/examples/shorturls/shorturl_request.rb +29 -0
  22. data/examples/shorturls/shorturl_sequence.rb +121 -0
  23. data/examples/shorturls/shorturl_start_tyrant.sh +16 -0
  24. data/examples/shorturls/start_shorturl_cache.sh +2 -0
  25. data/lib/monkeyshines.rb +31 -0
  26. data/lib/monkeyshines/extensions.rb +16 -0
  27. data/lib/monkeyshines/fetcher.rb +10 -0
  28. data/lib/monkeyshines/fetcher/authed_http_fetcher.rb +35 -0
  29. data/lib/monkeyshines/fetcher/base.rb +44 -0
  30. data/lib/monkeyshines/fetcher/fake_fetcher.rb +19 -0
  31. data/lib/monkeyshines/fetcher/http_fetcher.rb +127 -0
  32. data/lib/monkeyshines/fetcher/http_head_fetcher.rb +23 -0
  33. data/lib/monkeyshines/monitor.rb +7 -0
  34. data/lib/monkeyshines/monitor/chunked_store.rb +23 -0
  35. data/lib/monkeyshines/monitor/periodic_logger.rb +33 -0
  36. data/lib/monkeyshines/monitor/periodic_monitor.rb +65 -0
  37. data/lib/monkeyshines/options.rb +59 -0
  38. data/lib/monkeyshines/recursive_runner.rb +26 -0
  39. data/lib/monkeyshines/repository/base.rb +57 -0
  40. data/lib/monkeyshines/repository/s3.rb +169 -0
  41. data/lib/monkeyshines/request_stream.rb +11 -0
  42. data/lib/monkeyshines/request_stream/base.rb +32 -0
  43. data/lib/monkeyshines/request_stream/edamame_queue.rb +54 -0
  44. data/lib/monkeyshines/request_stream/klass_request_stream.rb +39 -0
  45. data/lib/monkeyshines/request_stream/simple_request_stream.rb +22 -0
  46. data/lib/monkeyshines/runner.rb +161 -0
  47. data/lib/monkeyshines/runner_core/options.rb +5 -0
  48. data/lib/monkeyshines/runner_core/parsing_runner.rb +29 -0
  49. data/lib/monkeyshines/scrape_job/old_paginated.rb +343 -0
  50. data/lib/monkeyshines/scrape_job/recursive.rb +9 -0
  51. data/lib/monkeyshines/scrape_request.rb +136 -0
  52. data/lib/monkeyshines/scrape_request/paginated.rb +290 -0
  53. data/lib/monkeyshines/scrape_request/raw_json_contents.rb +16 -0
  54. data/lib/monkeyshines/scrape_request/signed_url.rb +86 -0
  55. data/lib/monkeyshines/store.rb +14 -0
  56. data/lib/monkeyshines/store/base.rb +29 -0
  57. data/lib/monkeyshines/store/chunked_flat_file_store.rb +37 -0
  58. data/lib/monkeyshines/store/conditional_store.rb +57 -0
  59. data/lib/monkeyshines/store/factory.rb +8 -0
  60. data/lib/monkeyshines/store/flat_file_store.rb +84 -0
  61. data/lib/monkeyshines/store/key_store.rb +51 -0
  62. data/lib/monkeyshines/store/null_store.rb +15 -0
  63. data/lib/monkeyshines/store/read_thru_store.rb +22 -0
  64. data/lib/monkeyshines/store/tokyo_tdb_key_store.rb +33 -0
  65. data/lib/monkeyshines/store/tyrant_rdb_key_store.rb +56 -0
  66. data/lib/monkeyshines/store/tyrant_tdb_key_store.rb +20 -0
  67. data/lib/monkeyshines/utils/factory_module.rb +106 -0
  68. data/lib/monkeyshines/utils/filename_pattern.rb +134 -0
  69. data/lib/monkeyshines/utils/logger.rb +15 -0
  70. data/lib/monkeyshines/utils/trollop-1.14/FAQ.txt +84 -0
  71. data/lib/monkeyshines/utils/trollop-1.14/History.txt +101 -0
  72. data/lib/monkeyshines/utils/trollop-1.14/Manifest.txt +7 -0
  73. data/lib/monkeyshines/utils/trollop-1.14/README.txt +40 -0
  74. data/lib/monkeyshines/utils/trollop-1.14/Rakefile +36 -0
  75. data/lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb +744 -0
  76. data/lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb +1048 -0
  77. data/lib/monkeyshines/utils/trollop.rb +744 -0
  78. data/lib/monkeyshines/utils/union_interval.rb +52 -0
  79. data/lib/monkeyshines/utils/uri.rb +70 -0
  80. data/lib/monkeyshines/utils/uuid.rb +32 -0
  81. data/monkeyshines.gemspec +147 -0
  82. data/scrape_from_file.rb +44 -0
  83. data/spec/monkeyshines_spec.rb +7 -0
  84. data/spec/spec_helper.rb +9 -0
  85. metadata +183 -0
@@ -0,0 +1,52 @@
1
+ #
2
+ # A numeric interval
3
+ #
4
+ # --
5
+ # could be done with a Range but proved annoying in practice
6
+ # what with Range's immutability, etc.
7
+ # ++
8
+ #
9
+ class UnionInterval
10
+ attr_accessor :min, :max
11
+ # initialize with set min or max values.
12
+ # To create an interval with no lower bound call:
13
+ # UnionInterval.new(nil, 69)
14
+ # Pass nil (or omit) +max+ for no upper bound:
15
+ # UnionInterval.new(5, nil)
16
+ def initialize min=nil, max=nil
17
+ self.min = min
18
+ self.max = max
19
+ end
20
+ # Expand the interval to include all the vals
21
+ def << vals
22
+ self.min = [min, vals.to_a].flatten.compact.min
23
+ self.max = [max, vals.to_a].flatten.compact.max
24
+ end
25
+ def + min_max
26
+ sum_min = [min, min_max.to_a].flatten.compact.min
27
+ sum_max = [max, min_max.to_a].flatten.compact.max
28
+ UnionInterval.new sum_min, sum_max
29
+ end
30
+ # returns span as an array:
31
+ # [min, max]
32
+ def to_a
33
+ [min, max]
34
+ end
35
+ # true if the extent is defined but empty (lower bound exceeds upper bound)
36
+ def empty?
37
+ min && max && (min > max)
38
+ end
39
+ def include? val
40
+ val && (!min || (val >= min)) && (!max || (val <= max))
41
+ end
42
+ def size
43
+ return 0 unless max && min
44
+ max - min
45
+ end
46
+ # string conversion:
47
+ # #<span:7..956734>
48
+ def to_s
49
+ "#<span:#{min}..#{max}>"
50
+ end
51
+ def inspect() to_s end
52
+ end
@@ -0,0 +1,70 @@
1
+ require 'addressable/uri'
2
+ module Addressable
3
+ #
4
+ # Add the #scrubbed and #revhost calls
5
+ #
6
+ class URI
7
+ #
8
+ # These are illegal but *are* found in URLs. We're going to let them through.
9
+ # Note that ' ' space is one of the tolerated miscreants.
10
+ #
11
+ URL_ILLEGAL_BUT_WHATEVER_DOOD_CHARS = '\{\}\| \^\`'
12
+ #
13
+ # These are all the characters that belong in a URL
14
+ #
15
+ PERMISSIVE_SCRUB_CHARS =
16
+ URL_ILLEGAL_BUT_WHATEVER_DOOD_CHARS +
17
+ Addressable::URI::CharacterClasses::UNRESERVED +
18
+ Addressable::URI::CharacterClasses::RESERVED + '%'
19
+
20
+ #
21
+ # Replace all url-insane characters by their %encoding. We don't really
22
+ # care here whether the URLs do anything: we just want to remove stuff that
23
+ # absosmurfly don't belong.
24
+ #
25
+ # This code is stolen from Addressable::URI, which unfortunately has a bug
26
+ # in exactly this method (fixed here). (http://addressable.rubyforge.org)
27
+ # Note that we are /not/ re-encoding characters like '%' -- it's assumed
28
+ # that the url is encoded, but perhaps poorly.
29
+ #
30
+ # In practice the illegal characters most often seen are those in
31
+ # RE_URL_ILLEGAL_BUT_WHATEVER_DOOD_CHARS plus
32
+ # <>"\t\\
33
+ #
34
+ def self.scrub_url url
35
+ return url if url.blank?
36
+ url.gsub(/[^#{PERMISSIVE_SCRUB_CHARS}]+/) do |sequence|
37
+ sequence.unpack('C*').map{ |c| ("%%%02x"%c).upcase }.join("")
38
+ end
39
+ end
40
+
41
+ #
42
+ # +revhost+
43
+ # the dot-reversed host:
44
+ # foo.company.com => com.company.foo
45
+ #
46
+ def revhost
47
+ return host unless host =~ /\./
48
+ host.split('.').reverse.join('.')
49
+ end
50
+
51
+ #
52
+ # The md5hash of this URI
53
+ #
54
+ # make sure to require 'digest/md5' somewhere...
55
+ def md5hash
56
+ Digest::MD5.hexdigest(self.normalize.to_s)
57
+ end
58
+
59
+ #
60
+ # +uuid+ -- RFC-4122 ver.5 uuid; guaranteed to be universally unique
61
+ #
62
+ # See http://www.faqs.org/rfcs/rfc4122.html
63
+ #
64
+ # You ned to require "monkeyshines/utils/uuid" as well...
65
+ #
66
+ def url_uuid
67
+ UUID.sha1_create(UUID_URL_NAMESPACE, self.normalize.to_s)
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,32 @@
1
+ require 'uuidtools'
2
+ class UUID
3
+
4
+ #
5
+ # A string suitable for using as a path name --
6
+ #
7
+ # Ex.
8
+ # urn:uuid:3c0dce44-80a8-11dd-a897-001ff35a0a8b =>
9
+ # urn_uuid/3c0dce44/80a8/11dd/a897/001ff35a0a8b
10
+ #
11
+ # It's well possible there are more perspicacious choices for points to split
12
+ # the string, but until we hit that limit this'll do.
13
+ #
14
+ def to_path
15
+ 'urn_uuid/' + to_s.gsub(/[\:\-]/,'/')
16
+ end
17
+
18
+ def self.hex_to_str str
19
+ /([\da-f]{8})([\da-f]{4})([\da-f]{4})([\da-f]{4})([\da-f]{12})/.match(str).captures.join '-'
20
+ end
21
+
22
+
23
+ def self.parse_hex str
24
+ parse(UUID.hex_to_str(str))
25
+ end
26
+
27
+ # Overrides UUIDTools -- force 32 hex digits (leading zeros)
28
+ def hexdigest
29
+ "%032x" % self.to_i
30
+ end
31
+
32
+ end
@@ -0,0 +1,147 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{monkeyshines}
8
+ s.version = "0.0.2"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Philip (flip) Kromer"]
12
+ s.date = %q{2009-10-12}
13
+ s.description = %q{A simple scraper for directed scrapes of APIs, feed or structured HTML. Plays nicely with wuclan and wukong.}
14
+ s.email = %q{flip@infochimps.org}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "LICENSE.textile",
18
+ "README.textile"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".gitignore",
23
+ "LICENSE",
24
+ "LICENSE.textile",
25
+ "README.textile",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "examples/.gitignore",
29
+ "examples/bulk_urls/scrape_bulk_urls.rb",
30
+ "examples/rename_tree/rename_hdp_tree.rb",
31
+ "examples/rename_tree/rename_ripd_tree.rb",
32
+ "examples/rss_feeds/scrape_rss_feeds.rb",
33
+ "examples/shorturls/README.textile",
34
+ "examples/shorturls/bulkdump_shorturls.rb",
35
+ "examples/shorturls/bulkload_shorturls.rb",
36
+ "examples/shorturls/extract_urls.rb",
37
+ "examples/shorturls/multiplex_shorturl_cache.rb",
38
+ "examples/shorturls/old/multidump_and_fix_shorturls.rb",
39
+ "examples/shorturls/old/shorturl_stats.rb",
40
+ "examples/shorturls/scrape_shorturls.rb",
41
+ "examples/shorturls/shorturl_request.rb",
42
+ "examples/shorturls/shorturl_sequence.rb",
43
+ "examples/shorturls/shorturl_start_tyrant.sh",
44
+ "examples/shorturls/start_shorturl_cache.sh",
45
+ "lib/monkeyshines.rb",
46
+ "lib/monkeyshines/extensions.rb",
47
+ "lib/monkeyshines/fetcher.rb",
48
+ "lib/monkeyshines/fetcher/authed_http_fetcher.rb",
49
+ "lib/monkeyshines/fetcher/base.rb",
50
+ "lib/monkeyshines/fetcher/fake_fetcher.rb",
51
+ "lib/monkeyshines/fetcher/http_fetcher.rb",
52
+ "lib/monkeyshines/fetcher/http_head_fetcher.rb",
53
+ "lib/monkeyshines/monitor.rb",
54
+ "lib/monkeyshines/monitor/chunked_store.rb",
55
+ "lib/monkeyshines/monitor/periodic_logger.rb",
56
+ "lib/monkeyshines/monitor/periodic_monitor.rb",
57
+ "lib/monkeyshines/options.rb",
58
+ "lib/monkeyshines/recursive_runner.rb",
59
+ "lib/monkeyshines/repository/base.rb",
60
+ "lib/monkeyshines/repository/s3.rb",
61
+ "lib/monkeyshines/request_stream.rb",
62
+ "lib/monkeyshines/request_stream/base.rb",
63
+ "lib/monkeyshines/request_stream/edamame_queue.rb",
64
+ "lib/monkeyshines/request_stream/klass_request_stream.rb",
65
+ "lib/monkeyshines/request_stream/simple_request_stream.rb",
66
+ "lib/monkeyshines/runner.rb",
67
+ "lib/monkeyshines/runner_core/options.rb",
68
+ "lib/monkeyshines/runner_core/parsing_runner.rb",
69
+ "lib/monkeyshines/scrape_job/old_paginated.rb",
70
+ "lib/monkeyshines/scrape_job/recursive.rb",
71
+ "lib/monkeyshines/scrape_request.rb",
72
+ "lib/monkeyshines/scrape_request/paginated.rb",
73
+ "lib/monkeyshines/scrape_request/raw_json_contents.rb",
74
+ "lib/monkeyshines/scrape_request/signed_url.rb",
75
+ "lib/monkeyshines/store.rb",
76
+ "lib/monkeyshines/store/base.rb",
77
+ "lib/monkeyshines/store/chunked_flat_file_store.rb",
78
+ "lib/monkeyshines/store/conditional_store.rb",
79
+ "lib/monkeyshines/store/factory.rb",
80
+ "lib/monkeyshines/store/flat_file_store.rb",
81
+ "lib/monkeyshines/store/key_store.rb",
82
+ "lib/monkeyshines/store/null_store.rb",
83
+ "lib/monkeyshines/store/read_thru_store.rb",
84
+ "lib/monkeyshines/store/tokyo_tdb_key_store.rb",
85
+ "lib/monkeyshines/store/tyrant_rdb_key_store.rb",
86
+ "lib/monkeyshines/store/tyrant_tdb_key_store.rb",
87
+ "lib/monkeyshines/utils/factory_module.rb",
88
+ "lib/monkeyshines/utils/filename_pattern.rb",
89
+ "lib/monkeyshines/utils/logger.rb",
90
+ "lib/monkeyshines/utils/trollop-1.14/FAQ.txt",
91
+ "lib/monkeyshines/utils/trollop-1.14/History.txt",
92
+ "lib/monkeyshines/utils/trollop-1.14/Manifest.txt",
93
+ "lib/monkeyshines/utils/trollop-1.14/README.txt",
94
+ "lib/monkeyshines/utils/trollop-1.14/Rakefile",
95
+ "lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb",
96
+ "lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb",
97
+ "lib/monkeyshines/utils/trollop.rb",
98
+ "lib/monkeyshines/utils/union_interval.rb",
99
+ "lib/monkeyshines/utils/uri.rb",
100
+ "lib/monkeyshines/utils/uuid.rb",
101
+ "monkeyshines.gemspec",
102
+ "scrape_from_file.rb",
103
+ "spec/monkeyshines_spec.rb",
104
+ "spec/spec_helper.rb"
105
+ ]
106
+ s.homepage = %q{http://github.com/mrflip/monkeyshines}
107
+ s.rdoc_options = ["--charset=UTF-8"]
108
+ s.require_paths = ["lib"]
109
+ s.rubygems_version = %q{1.3.5}
110
+ s.summary = %q{A simple scraper for directed scrapes of APIs, feed or structured HTML.}
111
+ s.test_files = [
112
+ "spec/monkeyshines_spec.rb",
113
+ "spec/spec_helper.rb",
114
+ "examples/bulk_urls/scrape_bulk_urls.rb",
115
+ "examples/rename_tree/rename_hdp_tree.rb",
116
+ "examples/rename_tree/rename_ripd_tree.rb",
117
+ "examples/rss_feeds/scrape_rss_feeds.rb",
118
+ "examples/shorturls/bulkdump_shorturls.rb",
119
+ "examples/shorturls/bulkload_shorturls.rb",
120
+ "examples/shorturls/extract_urls.rb",
121
+ "examples/shorturls/multiplex_shorturl_cache.rb",
122
+ "examples/shorturls/old/multidump_and_fix_shorturls.rb",
123
+ "examples/shorturls/old/shorturl_stats.rb",
124
+ "examples/shorturls/scrape_shorturls.rb",
125
+ "examples/shorturls/shorturl_request.rb",
126
+ "examples/shorturls/shorturl_sequence.rb"
127
+ ]
128
+
129
+ if s.respond_to? :specification_version then
130
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
131
+ s.specification_version = 3
132
+
133
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
134
+ s.add_runtime_dependency(%q<addressable>, [">= 0"])
135
+ s.add_runtime_dependency(%q<uuid>, [">= 0"])
136
+ s.add_runtime_dependency(%q<wukong>, [">= 0"])
137
+ else
138
+ s.add_dependency(%q<addressable>, [">= 0"])
139
+ s.add_dependency(%q<uuid>, [">= 0"])
140
+ s.add_dependency(%q<wukong>, [">= 0"])
141
+ end
142
+ else
143
+ s.add_dependency(%q<addressable>, [">= 0"])
144
+ s.add_dependency(%q<uuid>, [">= 0"])
145
+ s.add_dependency(%q<wukong>, [">= 0"])
146
+ end
147
+ end
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ $: << File.dirname(__FILE__)+'/lib'
4
+ require 'wukong'
5
+ require 'monkeyshines'
6
+ require 'monkeyshines/http_fetcher'
7
+
8
+ request_filename = ARGV[0]
9
+ if ! request_filename
10
+ warn "Please give the name of a file holding URLs to scrape"; exit
11
+ end
12
+ dump_filename = "/tmp/req_dump.tsv"
13
+
14
+ class SimpleScrapeRequest < Struct.new(
15
+ :url,
16
+ :scraped_at, :response_code, :response_message,
17
+ :contents )
18
+ end
19
+
20
+ class String
21
+ def to_flat
22
+ self
23
+ end
24
+ end
25
+
26
+ class Monkeyshines::FlatFileStore
27
+ attr_accessor :file, :filename
28
+ def initialize filename
29
+ self.filename = filename
30
+ self.file = File.open(filename, "w")
31
+ end
32
+ def << contents
33
+ p contents.to_flat
34
+ self.file << contents.to_flat.join("\t") + "\n"
35
+ end
36
+ end
37
+
38
+ fetcher = Monkeyshines::HttpFetcher.new('twitter.com')
39
+ reqs = Monkeyshines::FlatFileRequestStream.new(request_filename, SimpleScrapeRequest)
40
+ store = Monkeyshines::FlatFileStore.new(dump_filename)
41
+ reqs.each do |scrape_request|
42
+ p scrape_request
43
+ store << fetcher.get(scrape_request)
44
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Monkeyshines" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ require 'monkeyshines'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,183 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: monkeyshines
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Philip (flip) Kromer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-12 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: addressable
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: uuid
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: wukong
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: A simple scraper for directed scrapes of APIs, feed or structured HTML. Plays nicely with wuclan and wukong.
46
+ email: flip@infochimps.org
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ extra_rdoc_files:
52
+ - LICENSE
53
+ - LICENSE.textile
54
+ - README.textile
55
+ files:
56
+ - .document
57
+ - .gitignore
58
+ - LICENSE
59
+ - LICENSE.textile
60
+ - README.textile
61
+ - Rakefile
62
+ - VERSION
63
+ - examples/.gitignore
64
+ - examples/bulk_urls/scrape_bulk_urls.rb
65
+ - examples/rename_tree/rename_hdp_tree.rb
66
+ - examples/rename_tree/rename_ripd_tree.rb
67
+ - examples/rss_feeds/scrape_rss_feeds.rb
68
+ - examples/shorturls/README.textile
69
+ - examples/shorturls/bulkdump_shorturls.rb
70
+ - examples/shorturls/bulkload_shorturls.rb
71
+ - examples/shorturls/extract_urls.rb
72
+ - examples/shorturls/multiplex_shorturl_cache.rb
73
+ - examples/shorturls/old/multidump_and_fix_shorturls.rb
74
+ - examples/shorturls/old/shorturl_stats.rb
75
+ - examples/shorturls/scrape_shorturls.rb
76
+ - examples/shorturls/shorturl_request.rb
77
+ - examples/shorturls/shorturl_sequence.rb
78
+ - examples/shorturls/shorturl_start_tyrant.sh
79
+ - examples/shorturls/start_shorturl_cache.sh
80
+ - lib/monkeyshines.rb
81
+ - lib/monkeyshines/extensions.rb
82
+ - lib/monkeyshines/fetcher.rb
83
+ - lib/monkeyshines/fetcher/authed_http_fetcher.rb
84
+ - lib/monkeyshines/fetcher/base.rb
85
+ - lib/monkeyshines/fetcher/fake_fetcher.rb
86
+ - lib/monkeyshines/fetcher/http_fetcher.rb
87
+ - lib/monkeyshines/fetcher/http_head_fetcher.rb
88
+ - lib/monkeyshines/monitor.rb
89
+ - lib/monkeyshines/monitor/chunked_store.rb
90
+ - lib/monkeyshines/monitor/periodic_logger.rb
91
+ - lib/monkeyshines/monitor/periodic_monitor.rb
92
+ - lib/monkeyshines/options.rb
93
+ - lib/monkeyshines/recursive_runner.rb
94
+ - lib/monkeyshines/repository/base.rb
95
+ - lib/monkeyshines/repository/s3.rb
96
+ - lib/monkeyshines/request_stream.rb
97
+ - lib/monkeyshines/request_stream/base.rb
98
+ - lib/monkeyshines/request_stream/edamame_queue.rb
99
+ - lib/monkeyshines/request_stream/klass_request_stream.rb
100
+ - lib/monkeyshines/request_stream/simple_request_stream.rb
101
+ - lib/monkeyshines/runner.rb
102
+ - lib/monkeyshines/runner_core/options.rb
103
+ - lib/monkeyshines/runner_core/parsing_runner.rb
104
+ - lib/monkeyshines/scrape_job/old_paginated.rb
105
+ - lib/monkeyshines/scrape_job/recursive.rb
106
+ - lib/monkeyshines/scrape_request.rb
107
+ - lib/monkeyshines/scrape_request/paginated.rb
108
+ - lib/monkeyshines/scrape_request/raw_json_contents.rb
109
+ - lib/monkeyshines/scrape_request/signed_url.rb
110
+ - lib/monkeyshines/store.rb
111
+ - lib/monkeyshines/store/base.rb
112
+ - lib/monkeyshines/store/chunked_flat_file_store.rb
113
+ - lib/monkeyshines/store/conditional_store.rb
114
+ - lib/monkeyshines/store/factory.rb
115
+ - lib/monkeyshines/store/flat_file_store.rb
116
+ - lib/monkeyshines/store/key_store.rb
117
+ - lib/monkeyshines/store/null_store.rb
118
+ - lib/monkeyshines/store/read_thru_store.rb
119
+ - lib/monkeyshines/store/tokyo_tdb_key_store.rb
120
+ - lib/monkeyshines/store/tyrant_rdb_key_store.rb
121
+ - lib/monkeyshines/store/tyrant_tdb_key_store.rb
122
+ - lib/monkeyshines/utils/factory_module.rb
123
+ - lib/monkeyshines/utils/filename_pattern.rb
124
+ - lib/monkeyshines/utils/logger.rb
125
+ - lib/monkeyshines/utils/trollop-1.14/FAQ.txt
126
+ - lib/monkeyshines/utils/trollop-1.14/History.txt
127
+ - lib/monkeyshines/utils/trollop-1.14/Manifest.txt
128
+ - lib/monkeyshines/utils/trollop-1.14/README.txt
129
+ - lib/monkeyshines/utils/trollop-1.14/Rakefile
130
+ - lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb
131
+ - lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb
132
+ - lib/monkeyshines/utils/trollop.rb
133
+ - lib/monkeyshines/utils/union_interval.rb
134
+ - lib/monkeyshines/utils/uri.rb
135
+ - lib/monkeyshines/utils/uuid.rb
136
+ - monkeyshines.gemspec
137
+ - scrape_from_file.rb
138
+ - spec/monkeyshines_spec.rb
139
+ - spec/spec_helper.rb
140
+ has_rdoc: true
141
+ homepage: http://github.com/mrflip/monkeyshines
142
+ licenses: []
143
+
144
+ post_install_message:
145
+ rdoc_options:
146
+ - --charset=UTF-8
147
+ require_paths:
148
+ - lib
149
+ required_ruby_version: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - ">="
152
+ - !ruby/object:Gem::Version
153
+ version: "0"
154
+ version:
155
+ required_rubygems_version: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: "0"
160
+ version:
161
+ requirements: []
162
+
163
+ rubyforge_project:
164
+ rubygems_version: 1.3.5
165
+ signing_key:
166
+ specification_version: 3
167
+ summary: A simple scraper for directed scrapes of APIs, feed or structured HTML.
168
+ test_files:
169
+ - spec/monkeyshines_spec.rb
170
+ - spec/spec_helper.rb
171
+ - examples/bulk_urls/scrape_bulk_urls.rb
172
+ - examples/rename_tree/rename_hdp_tree.rb
173
+ - examples/rename_tree/rename_ripd_tree.rb
174
+ - examples/rss_feeds/scrape_rss_feeds.rb
175
+ - examples/shorturls/bulkdump_shorturls.rb
176
+ - examples/shorturls/bulkload_shorturls.rb
177
+ - examples/shorturls/extract_urls.rb
178
+ - examples/shorturls/multiplex_shorturl_cache.rb
179
+ - examples/shorturls/old/multidump_and_fix_shorturls.rb
180
+ - examples/shorturls/old/shorturl_stats.rb
181
+ - examples/shorturls/scrape_shorturls.rb
182
+ - examples/shorturls/shorturl_request.rb
183
+ - examples/shorturls/shorturl_sequence.rb