monkeyshines 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/.document +4 -0
  2. data/.gitignore +43 -0
  3. data/LICENSE +20 -0
  4. data/LICENSE.textile +20 -0
  5. data/README.textile +125 -0
  6. data/Rakefile +105 -0
  7. data/VERSION +1 -0
  8. data/examples/.gitignore +4 -0
  9. data/examples/bulk_urls/scrape_bulk_urls.rb +64 -0
  10. data/examples/rename_tree/rename_hdp_tree.rb +151 -0
  11. data/examples/rename_tree/rename_ripd_tree.rb +82 -0
  12. data/examples/rss_feeds/scrape_rss_feeds.rb +52 -0
  13. data/examples/shorturls/README.textile +111 -0
  14. data/examples/shorturls/bulkdump_shorturls.rb +46 -0
  15. data/examples/shorturls/bulkload_shorturls.rb +45 -0
  16. data/examples/shorturls/extract_urls.rb +12 -0
  17. data/examples/shorturls/multiplex_shorturl_cache.rb +32 -0
  18. data/examples/shorturls/old/multidump_and_fix_shorturls.rb +66 -0
  19. data/examples/shorturls/old/shorturl_stats.rb +81 -0
  20. data/examples/shorturls/scrape_shorturls.rb +112 -0
  21. data/examples/shorturls/shorturl_request.rb +29 -0
  22. data/examples/shorturls/shorturl_sequence.rb +121 -0
  23. data/examples/shorturls/shorturl_start_tyrant.sh +16 -0
  24. data/examples/shorturls/start_shorturl_cache.sh +2 -0
  25. data/lib/monkeyshines.rb +31 -0
  26. data/lib/monkeyshines/extensions.rb +16 -0
  27. data/lib/monkeyshines/fetcher.rb +10 -0
  28. data/lib/monkeyshines/fetcher/authed_http_fetcher.rb +35 -0
  29. data/lib/monkeyshines/fetcher/base.rb +44 -0
  30. data/lib/monkeyshines/fetcher/fake_fetcher.rb +19 -0
  31. data/lib/monkeyshines/fetcher/http_fetcher.rb +127 -0
  32. data/lib/monkeyshines/fetcher/http_head_fetcher.rb +23 -0
  33. data/lib/monkeyshines/monitor.rb +7 -0
  34. data/lib/monkeyshines/monitor/chunked_store.rb +23 -0
  35. data/lib/monkeyshines/monitor/periodic_logger.rb +33 -0
  36. data/lib/monkeyshines/monitor/periodic_monitor.rb +65 -0
  37. data/lib/monkeyshines/options.rb +59 -0
  38. data/lib/monkeyshines/recursive_runner.rb +26 -0
  39. data/lib/monkeyshines/repository/base.rb +57 -0
  40. data/lib/monkeyshines/repository/s3.rb +169 -0
  41. data/lib/monkeyshines/request_stream.rb +11 -0
  42. data/lib/monkeyshines/request_stream/base.rb +32 -0
  43. data/lib/monkeyshines/request_stream/edamame_queue.rb +54 -0
  44. data/lib/monkeyshines/request_stream/klass_request_stream.rb +39 -0
  45. data/lib/monkeyshines/request_stream/simple_request_stream.rb +22 -0
  46. data/lib/monkeyshines/runner.rb +161 -0
  47. data/lib/monkeyshines/runner_core/options.rb +5 -0
  48. data/lib/monkeyshines/runner_core/parsing_runner.rb +29 -0
  49. data/lib/monkeyshines/scrape_job/old_paginated.rb +343 -0
  50. data/lib/monkeyshines/scrape_job/recursive.rb +9 -0
  51. data/lib/monkeyshines/scrape_request.rb +136 -0
  52. data/lib/monkeyshines/scrape_request/paginated.rb +290 -0
  53. data/lib/monkeyshines/scrape_request/raw_json_contents.rb +16 -0
  54. data/lib/monkeyshines/scrape_request/signed_url.rb +86 -0
  55. data/lib/monkeyshines/store.rb +14 -0
  56. data/lib/monkeyshines/store/base.rb +29 -0
  57. data/lib/monkeyshines/store/chunked_flat_file_store.rb +37 -0
  58. data/lib/monkeyshines/store/conditional_store.rb +57 -0
  59. data/lib/monkeyshines/store/factory.rb +8 -0
  60. data/lib/monkeyshines/store/flat_file_store.rb +84 -0
  61. data/lib/monkeyshines/store/key_store.rb +51 -0
  62. data/lib/monkeyshines/store/null_store.rb +15 -0
  63. data/lib/monkeyshines/store/read_thru_store.rb +22 -0
  64. data/lib/monkeyshines/store/tokyo_tdb_key_store.rb +33 -0
  65. data/lib/monkeyshines/store/tyrant_rdb_key_store.rb +56 -0
  66. data/lib/monkeyshines/store/tyrant_tdb_key_store.rb +20 -0
  67. data/lib/monkeyshines/utils/factory_module.rb +106 -0
  68. data/lib/monkeyshines/utils/filename_pattern.rb +134 -0
  69. data/lib/monkeyshines/utils/logger.rb +15 -0
  70. data/lib/monkeyshines/utils/trollop-1.14/FAQ.txt +84 -0
  71. data/lib/monkeyshines/utils/trollop-1.14/History.txt +101 -0
  72. data/lib/monkeyshines/utils/trollop-1.14/Manifest.txt +7 -0
  73. data/lib/monkeyshines/utils/trollop-1.14/README.txt +40 -0
  74. data/lib/monkeyshines/utils/trollop-1.14/Rakefile +36 -0
  75. data/lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb +744 -0
  76. data/lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb +1048 -0
  77. data/lib/monkeyshines/utils/trollop.rb +744 -0
  78. data/lib/monkeyshines/utils/union_interval.rb +52 -0
  79. data/lib/monkeyshines/utils/uri.rb +70 -0
  80. data/lib/monkeyshines/utils/uuid.rb +32 -0
  81. data/monkeyshines.gemspec +147 -0
  82. data/scrape_from_file.rb +44 -0
  83. data/spec/monkeyshines_spec.rb +7 -0
  84. data/spec/spec_helper.rb +9 -0
  85. metadata +183 -0
@@ -0,0 +1,5 @@
1
+ module Monkeyshines
2
+ class Runner
3
+
4
+ end
5
+ end
@@ -0,0 +1,29 @@
1
+ require 'yaml'
2
+ require 'monkeyshines/runner_core/options'
3
+
4
+ module Monkeyshines
5
+
6
+ #
7
+ # In general, you should
8
+ #
9
+ # But where an external library is alread providing cooked results or it's
10
+ # otherwise most straightforward to directly emit model objects, you can use
11
+ # a parsing runner
12
+ #
13
+ class ParsingRunner < Runner
14
+
15
+ #
16
+ # Fetch and store result
17
+ #
18
+ #
19
+ def fetch_and_store req
20
+ result = fetcher.get(req) # do the url fetch
21
+ # results.each do |result|
22
+ result.parse do |obj|
23
+ dest.save(obj)
24
+ end
25
+ #end
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,343 @@
1
+ # require 'time'
2
+ # module Monkeyshines
3
+ # #
4
+ # # Paginated lets you make repeated requests to collect a timeline or
5
+ # # collection of items.
6
+ # #
7
+ # # You will typically want to set the
8
+ # #
9
+ # # A Paginated-compatible ScrapeRequest should inherit from or be compatible
10
+ # # with +Monkeyshines::ScrapeRequest+ and additionally define
11
+ # # * [#items] list of individual items in the response; +nil+ if there was an
12
+ # # error, +[]+ if the response was well-formed but returned no items.
13
+ # # * [#num_items] number of items from this response
14
+ # # * [#span] the range of (typically) IDs within this scrape. Used to know when
15
+ # # we've reached results from previous session
16
+ # #
17
+ # #
18
+ # module Paginated
19
+ # #
20
+ # # Generates request for each page to be scraped
21
+ # #
22
+ # # Block must return the fulfilled scrape_request response. This response is
23
+ # # passed to +#acknowledge+ for bookkeeping, then the next request is
24
+ # # made.
25
+ # #
26
+ # # Scraping stops after max_pages requests or when is_last?(response, page)
27
+ # #
28
+ # def each_request pageinfo={}, &block
29
+ # begin_pagination!
30
+ # (1..hard_request_limit).each do |page|
31
+ # response = yield make_request(page, pageinfo)
32
+ # warn 'nil response' unless response
33
+ # acknowledge(response, page)
34
+ # break if is_last?(response, page)
35
+ # end
36
+ # finish_pagination!
37
+ # end
38
+ #
39
+ # # Set up bookkeeping for pagination tracking
40
+ # def begin_pagination!
41
+ # end
42
+ #
43
+ # # Finalize bookkeeping at conclusion of scrape_job.
44
+ # def finish_pagination!
45
+ # end
46
+ #
47
+ # #
48
+ # # Feed back info from the scrape
49
+ # #
50
+ # def acknowledge response, page
51
+ # end
52
+ #
53
+ # # return true if the next request would be pointless (true if, perhaps, the
54
+ # # response had no items, or the API page limit is reached)
55
+ # def is_last? response, page
56
+ # ( (page >= max_pages) ||
57
+ # (response && response.healthy? && (response.num_items < max_items)) )
58
+ # end
59
+ #
60
+ # #
61
+ # # Soft limit on the number of pages to scrape.
62
+ # #
63
+ # # Typically, leave this set to the hard_request_limit if you don't know
64
+ # # beforehand how many pages to scrape, and override is_last? to decide when
65
+ # # to stop short of the API limit
66
+ # #
67
+ # def max_pages
68
+ # hard_request_limit
69
+ # end
70
+ #
71
+ # # inject class variables
72
+ # def self.included base
73
+ # base.class_eval do
74
+ # # Hard request limit: do not in any case exceed this number of requests
75
+ # class_inheritable_accessor :hard_request_limit
76
+ # # max items per page, from API
77
+ # class_inheritable_accessor :max_items
78
+ # #
79
+ # # Span of items gathered in this scrape scrape_job.
80
+ # attr_accessor :sess_items, :sess_span, :sess_timespan
81
+ # end
82
+ # end
83
+ # end
84
+ #
85
+ # module PaginatedTimeline
86
+ # # Soft limit on the number of pages to scrape.
87
+ # #
88
+ # # Typically, leave this set to the hard_request_limit if you don't know
89
+ # # beforehand how many pages to scrape, and override is_last? to decide when
90
+ # # to stop short of the API limit
91
+ # #
92
+ # def max_pages
93
+ # mp = fudge_factor * (n_items - prev_scraped_items) / max_items
94
+ # return 0 if mp == 0
95
+ # (mp+1).clamp(1, hard_request_limit).to_i
96
+ # end
97
+ # # inject class variables
98
+ # def self.included base
99
+ # base.class_eval do
100
+ # include Monkeyshines::Paginated
101
+ # end
102
+ # end
103
+ #
104
+ # # #
105
+ # # # Threshold count-per-page and actual count to get number of expected pages.
106
+ # # # Cap the request with max
107
+ # # def pages_from_count per_page, count, max=nil
108
+ # # num = [ (count.to_f / per_page.to_f).ceil, 0 ].max
109
+ # # [num, max].compact.min
110
+ # # end
111
+ # end
112
+ #
113
+ # #
114
+ # # Scenario: you request paginated search requests with a limit parameter (a
115
+ # # max_id or min_id, for example).
116
+ # #
117
+ # # * request successive pages,
118
+ # # * use info on the requested page to set the next limit parameter
119
+ # # * stop when max_pages is reached or a successful request gives fewer than
120
+ # # max_items
121
+ # #
122
+ # #
123
+ # # The first
124
+ # #
125
+ # # req?min_id=1234&max_id=
126
+ # # => [ [8675, ...], ..., [8012, ...] ] # 100 items
127
+ # # req?min_id=1234&max_id=8011
128
+ # # => [ [7581, ...], ..., [2044, ...] ] # 100 items
129
+ # # req?min_id=1234&max_id=2043
130
+ # # => [ [2012, ...], ..., [1234, ...] ] # 69 items
131
+ # #
132
+ # # * The search terminates when
133
+ # # ** max_requests requests have been made, or
134
+ # # ** the limit params interval is zero, or
135
+ # # ** a successful response with fewer than max_items is received.
136
+ # #
137
+ # # * You will want to save <req?min_id=8676&max_id=""> for later scrape
138
+ # #
139
+ # module PaginatedWithLimit
140
+ #
141
+ # #
142
+ # # Return true if the next request would be pointless (true if, perhaps, the
143
+ # # response had no items, or the API page limit is reached)
144
+ # def is_last? response, page
145
+ # unscraped_span.empty? || super(response, page)
146
+ # end
147
+ #
148
+ # # Set up bookkeeping for pagination tracking
149
+ # def begin_pagination!
150
+ # self.sess_items ||= 0
151
+ # self.sess_span = UnionInterval.new
152
+ # self.sess_timespan = UnionInterval.new
153
+ # super
154
+ # end
155
+ #
156
+ # def finish_pagination!
157
+ # # piw = [(prev_items.to_f ** 0.66), (max_items * hard_request_limit * 4.0)].min
158
+ # # puts ([Time.now.strftime("%M:%S"), "%-23s"%query_term] + [prev_rate, sess_rate, avg_rate, sess_timespan.size.to_f, prev_items, sess_items, piw, (1000/avg_rate)].map{|s| "%15.4f"%(s||0) }).join("\t") rescue nil
159
+ # self.prev_rate = avg_rate
160
+ # if sess_items == (hard_request_limit * max_items)
161
+ # # bump the rate if we hit the hard cap:
162
+ # new_rate = [prev_rate * 1.25, 1000/120.0].max
163
+ # Log.info "Bumping rate on #{query_term} from #{prev_rate} to #{new_rate}"
164
+ # self.prev_rate = new_rate
165
+ # end
166
+ # self.prev_items = prev_items.to_i + sess_items.to_i
167
+ # self.prev_span = sess_span + prev_span
168
+ # self.new_items = sess_items.to_i + new_items.to_i
169
+ # self.sess_items = 0
170
+ # self.sess_span = UnionInterval.new
171
+ # self.sess_timespan = UnionInterval.new
172
+ # super
173
+ # end
174
+ #
175
+ # #
176
+ # # Feed back info from the scrape
177
+ # #
178
+ # def acknowledge response, page
179
+ # super response, page
180
+ # return unless response && response.items
181
+ # count_new_items response
182
+ # update_spans response
183
+ # end
184
+ #
185
+ # # account for additional items
186
+ # def count_new_items response
187
+ # num_items = response.num_items
188
+ # # if there was overlap with a previous scrape, we have to count the items by hand
189
+ # prev_span = self.prev_span
190
+ # if prev_span.max && response.span && (response.span.min < prev_span.max)
191
+ # num_items = response.items.inject(0){|n,item| (prev_span.include? item['id']) ? n : n+1 }
192
+ # end
193
+ # self.sess_items += num_items
194
+ # end
195
+ #
196
+ #
197
+ # def sess_rate
198
+ # return nil if (!sess_timespan) || (sess_timespan.size == 0)
199
+ # sess_items.to_f / sess_timespan.size.to_f
200
+ # end
201
+ # #
202
+ # # How often an item rolls in, on average
203
+ # #
204
+ # def avg_rate
205
+ # return nil if (sess_items.to_f == 0 && (prev_rate.blank? || prev_items.to_f == 0))
206
+ # prev_weight = prev_items.to_f ** 0.66
207
+ # sess_weight = sess_items.to_f
208
+ # prev_weight = [prev_weight, sess_weight*3].min if sess_weight > 0
209
+ # weighted_sum = (
210
+ # (prev_rate.to_f * prev_weight) + # damped previous avg
211
+ # (sess_rate.to_f * sess_weight) ) # current avg
212
+ # rt = weighted_sum / (prev_weight + sess_weight)
213
+ # rt
214
+ # end
215
+ #
216
+ #
217
+ # # inject class variables
218
+ # def self.included base
219
+ # base.class_eval do
220
+ # attr_accessor :new_items
221
+ # # include Monkeyshines::Paginated
222
+ # end
223
+ # end
224
+ # end
225
+ #
226
+ # end
227
+ #
228
+ #
229
+ # module Monkeyshines
230
+ # module Paginated
231
+ # end
232
+ #
233
+ # module PaginatedTimeline
234
+ # # Soft limit on the number of pages to scrape.
235
+ # #
236
+ # # Typically, leave this set to the hard_request_limit if you don't know
237
+ # # beforehand how many pages to scrape, and override is_last? to decide when
238
+ # # to stop short of the API limit
239
+ # #
240
+ # def max_pages
241
+ # mp = fudge_factor * (n_items - prev_scraped_items) / max_items
242
+ # return 0 if mp == 0
243
+ # (mp+1).clamp(1, hard_request_limit).to_i
244
+ # end
245
+ # # inject class variables
246
+ # def self.included base
247
+ # base.class_eval do
248
+ # include Monkeyshines::Paginated
249
+ # end
250
+ # end
251
+ #
252
+ # # #
253
+ # # # Threshold count-per-page and actual count to get number of expected pages.
254
+ # # # Cap the request with max
255
+ # # def pages_from_count per_page, count, max=nil
256
+ # # num = [ (count.to_f / per_page.to_f).ceil, 0 ].max
257
+ # # [num, max].compact.min
258
+ # # end
259
+ # end
260
+ #
261
+ # module PaginatedWithRateAndLimit
262
+ #
263
+ # def after_pagination
264
+ # # piw = [(prev_items.to_f ** 0.66), (max_items * hard_request_limit * 4.0)].min
265
+ # # puts ([Time.now.strftime("%M:%S"), "%-23s"%query_term] + [prev_rate, sess_rate, avg_rate, sess_timespan.size.to_f, prev_items, sess_items, piw, (1000/avg_rate)].map{|s| "%15.4f"%(s||0) }).join("\t") rescue nil
266
+ # self.prev_rate = avg_rate
267
+ # if sess_items == (hard_request_limit * max_items)
268
+ # # bump the rate if we hit the hard cap:
269
+ # new_rate = [prev_rate * 1.25, 1000/120.0].max
270
+ # Log.info "Bumping rate on #{query_term} from #{prev_rate} to #{new_rate}"
271
+ # self.prev_rate = new_rate
272
+ # end
273
+ # self.prev_items = prev_items.to_i + sess_items.to_i
274
+ # self.prev_span = sess_span + prev_span
275
+ # self.new_items = sess_items.to_i + new_items.to_i
276
+ # self.sess_items = 0
277
+ # self.sess_span = UnionInterval.new
278
+ # self.sess_timespan = UnionInterval.new
279
+ # super
280
+ # end
281
+ #
282
+ # #
283
+ # # Feed back info from the scrape
284
+ # #
285
+ # def after_fetch response, page
286
+ # super response, page
287
+ # return unless response && response.items
288
+ # count_new_items response
289
+ # update_spans response
290
+ # end
291
+ #
292
+ # # account for additional items
293
+ # def count_new_items response
294
+ # num_items = response.num_items
295
+ # # if there was overlap with a previous scrape, we have to count the items by hand
296
+ # prev_span = self.prev_span
297
+ # if prev_span.max && response.span && (response.span.min < prev_span.max)
298
+ # num_items = response.items.inject(0){|n,item| (prev_span.include? item['id']) ? n : n+1 }
299
+ # end
300
+ # self.sess_items += num_items
301
+ # end
302
+ #
303
+ # def update_spans response
304
+ # # Update intervals
305
+ # self.sess_span << response.span
306
+ # self.sess_timespan << response.timespan
307
+ # end
308
+ #
309
+ # # gap between oldest scraped in this scrape_job and last one scraped in
310
+ # # previous scrape_job.
311
+ # def unscraped_span
312
+ # UnionInterval.new(prev_span_max, sess_span.min)
313
+ # end
314
+ # # span of previous scrape
315
+ # def prev_span
316
+ # @prev_span ||= UnionInterval.new(prev_span_min, prev_span_max)
317
+ # end
318
+ # def prev_span= min_max
319
+ # self.prev_span_min, self.prev_span_max = min_max.to_a
320
+ # @prev_span = UnionInterval.new(prev_span_min, prev_span_max)
321
+ # end
322
+ #
323
+ # def sess_rate
324
+ # return nil if (!sess_timespan) || (sess_timespan.size == 0)
325
+ # sess_items.to_f / sess_timespan.size.to_f
326
+ # end
327
+ # #
328
+ # # How often an item rolls in, on average
329
+ # #
330
+ # def avg_rate
331
+ # return nil if (sess_items.to_f == 0 && (prev_rate.blank? || prev_items.to_f == 0))
332
+ # prev_weight = prev_items.to_f ** 0.66
333
+ # sess_weight = sess_items.to_f
334
+ # prev_weight = [prev_weight, sess_weight*3].min if sess_weight > 0
335
+ # weighted_sum = (
336
+ # (prev_rate.to_f * prev_weight) + # damped previous avg
337
+ # (sess_rate.to_f * sess_weight) ) # current avg
338
+ # rt = weighted_sum / (prev_weight + sess_weight)
339
+ # rt
340
+ # end
341
+ # end
342
+ #
343
+ # end
@@ -0,0 +1,9 @@
1
+ require 'time'
2
+ require 'monkeyshines/utils/union_interval'
3
+ module Monkeyshines
4
+ module ScrapeJob
5
+ module RecursesJobs
6
+
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,136 @@
1
+ require 'digest/md5'
2
+ module Monkeyshines
3
+ def self.url_encode str
4
+ return '' if str.blank?
5
+ str = str.gsub(/ /, '+')
6
+ Addressable::URI.encode_component(str, Addressable::URI::CharacterClasses::UNRESERVED+'+')
7
+ end
8
+
9
+ def self.url_decode str
10
+ return '' if str.blank?
11
+ str = str.gsub(/\+/, ' ')
12
+ Addressable::URI.unencode(str)
13
+ end
14
+
15
+ XML_ENCODED_BADNESS = { "\r" => "&#13;", "\n" => "&#10;", "\t" => "&#9;" }
16
+ #
17
+ # Takes an already-encoded XML string and replaces ONLY the characters in
18
+ # XML_ENCODED_BADNESS (by default, \r newline, \n carriage return and \t tab)
19
+ # with their XML encodings (&#10; and so forth). Doesn't do any other
20
+ # encoding, and leaves exiting entities alone.
21
+ #
22
+ def self.scrub_xml_encoded_badness str
23
+ str.chomp.gsub(/[\r\n\t]/){|c| XML_ENCODED_BADNESS[c]}
24
+ end
25
+ end
26
+
27
+ module Monkeyshines
28
+ #
29
+ # Base class for Scrape requests
30
+ #
31
+ module ScrapeRequestCore
32
+
33
+ autoload :SignedUrl, 'monkeyshines/scrape_request/signed_url'
34
+ autoload :Paginated, 'monkeyshines/scrape_request/paginated'
35
+ autoload :Paginating, 'monkeyshines/scrape_request/paginated'
36
+ autoload :PaginatedWithLimit, 'monkeyshines/scrape_request/paginated'
37
+
38
+ def initialize *args
39
+ super *args
40
+ if (moreinfo.is_a?(String)) then self.moreinfo = JSON.load(moreinfo) rescue nil end
41
+ make_url! if (! url)
42
+ end
43
+
44
+ def to_hash *args
45
+ hsh = super *args
46
+ if hsh['moreinfo'].is_a?(Hash)
47
+ hsh['moreinfo'] = moreinfo.to_json
48
+ end
49
+ hsh
50
+ end
51
+
52
+ def to_a *args
53
+ to_hash.values_of(*members).to_flat
54
+ end
55
+
56
+ #
57
+ def healthy?
58
+ (! url.blank?) && ( # has a URL and either:
59
+ scraped_at.blank? || # hasn't been scraped,
60
+ (! response_code.blank?) || # or has, with response code
61
+ (! contents.blank?) ) # or has, with response
62
+ end
63
+
64
+ # Set URL from other attributes
65
+ def make_url!
66
+ self.url = make_url
67
+ end
68
+
69
+ def response= response
70
+ return unless response
71
+ self.contents = Monkeyshines.scrub_xml_encoded_badness(response.body)
72
+ end
73
+
74
+ def url_encode str
75
+ Monkeyshines.url_encode str
76
+ end
77
+
78
+ def key
79
+ Digest::MD5.hexdigest(self.url)
80
+ end
81
+
82
+ def req_generation= val
83
+ (self.moreinfo||={})['req_generation'] = val
84
+ end
85
+ def req_generation
86
+ (self.moreinfo||={})['req_generation']
87
+ end
88
+
89
+ # inject methods at class level
90
+ module ClassMethods
91
+ # Builds a URL query string from a hash of key,value pairs
92
+ #
93
+ # parameters are in sort order by encoded string
94
+ #
95
+ # Ex.
96
+ # make_url_query( :foo => 'bar', :q => 'happy meal', :angle => 90 )
97
+ # #=> "angle=90&foo=bar&q=happy%20meal"
98
+ #
99
+ def make_url_query hsh
100
+ hsh.map{|attr, val| "#{attr}=#{Monkeyshines.url_encode(val)}" }.sort.join("&")
101
+ end
102
+ end
103
+ def self.included base
104
+ base.class_eval do
105
+ include ClassMethods
106
+ end
107
+ end
108
+ end
109
+
110
+ class ScrapeRequest < TypedStruct.new(
111
+ [:identifier, Integer],
112
+ [:page, Integer],
113
+ [:moreinfo, String],
114
+ [:url, String],
115
+ [:scraped_at, Bignum],
116
+ [:response_code, Integer],
117
+ [:response_message, String],
118
+ [:contents, String]
119
+ )
120
+ include ScrapeRequestCore
121
+ end
122
+
123
+ #
124
+ # A SimpleRequest just holds a URL and the fetch result.
125
+ #
126
+ class SimpleRequest < TypedStruct.new(
127
+ [:url, String],
128
+ [:scraped_at, Bignum],
129
+ [:response_code, Integer],
130
+ [:response_message, String],
131
+ [:contents, String]
132
+ )
133
+ include ScrapeRequestCore
134
+ end
135
+
136
+ end