monkeyshines 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/.document +4 -0
  2. data/.gitignore +43 -0
  3. data/LICENSE +20 -0
  4. data/LICENSE.textile +20 -0
  5. data/README.textile +125 -0
  6. data/Rakefile +105 -0
  7. data/VERSION +1 -0
  8. data/examples/.gitignore +4 -0
  9. data/examples/bulk_urls/scrape_bulk_urls.rb +64 -0
  10. data/examples/rename_tree/rename_hdp_tree.rb +151 -0
  11. data/examples/rename_tree/rename_ripd_tree.rb +82 -0
  12. data/examples/rss_feeds/scrape_rss_feeds.rb +52 -0
  13. data/examples/shorturls/README.textile +111 -0
  14. data/examples/shorturls/bulkdump_shorturls.rb +46 -0
  15. data/examples/shorturls/bulkload_shorturls.rb +45 -0
  16. data/examples/shorturls/extract_urls.rb +12 -0
  17. data/examples/shorturls/multiplex_shorturl_cache.rb +32 -0
  18. data/examples/shorturls/old/multidump_and_fix_shorturls.rb +66 -0
  19. data/examples/shorturls/old/shorturl_stats.rb +81 -0
  20. data/examples/shorturls/scrape_shorturls.rb +112 -0
  21. data/examples/shorturls/shorturl_request.rb +29 -0
  22. data/examples/shorturls/shorturl_sequence.rb +121 -0
  23. data/examples/shorturls/shorturl_start_tyrant.sh +16 -0
  24. data/examples/shorturls/start_shorturl_cache.sh +2 -0
  25. data/lib/monkeyshines.rb +31 -0
  26. data/lib/monkeyshines/extensions.rb +16 -0
  27. data/lib/monkeyshines/fetcher.rb +10 -0
  28. data/lib/monkeyshines/fetcher/authed_http_fetcher.rb +35 -0
  29. data/lib/monkeyshines/fetcher/base.rb +44 -0
  30. data/lib/monkeyshines/fetcher/fake_fetcher.rb +19 -0
  31. data/lib/monkeyshines/fetcher/http_fetcher.rb +127 -0
  32. data/lib/monkeyshines/fetcher/http_head_fetcher.rb +23 -0
  33. data/lib/monkeyshines/monitor.rb +7 -0
  34. data/lib/monkeyshines/monitor/chunked_store.rb +23 -0
  35. data/lib/monkeyshines/monitor/periodic_logger.rb +33 -0
  36. data/lib/monkeyshines/monitor/periodic_monitor.rb +65 -0
  37. data/lib/monkeyshines/options.rb +59 -0
  38. data/lib/monkeyshines/recursive_runner.rb +26 -0
  39. data/lib/monkeyshines/repository/base.rb +57 -0
  40. data/lib/monkeyshines/repository/s3.rb +169 -0
  41. data/lib/monkeyshines/request_stream.rb +11 -0
  42. data/lib/monkeyshines/request_stream/base.rb +32 -0
  43. data/lib/monkeyshines/request_stream/edamame_queue.rb +54 -0
  44. data/lib/monkeyshines/request_stream/klass_request_stream.rb +39 -0
  45. data/lib/monkeyshines/request_stream/simple_request_stream.rb +22 -0
  46. data/lib/monkeyshines/runner.rb +161 -0
  47. data/lib/monkeyshines/runner_core/options.rb +5 -0
  48. data/lib/monkeyshines/runner_core/parsing_runner.rb +29 -0
  49. data/lib/monkeyshines/scrape_job/old_paginated.rb +343 -0
  50. data/lib/monkeyshines/scrape_job/recursive.rb +9 -0
  51. data/lib/monkeyshines/scrape_request.rb +136 -0
  52. data/lib/monkeyshines/scrape_request/paginated.rb +290 -0
  53. data/lib/monkeyshines/scrape_request/raw_json_contents.rb +16 -0
  54. data/lib/monkeyshines/scrape_request/signed_url.rb +86 -0
  55. data/lib/monkeyshines/store.rb +14 -0
  56. data/lib/monkeyshines/store/base.rb +29 -0
  57. data/lib/monkeyshines/store/chunked_flat_file_store.rb +37 -0
  58. data/lib/monkeyshines/store/conditional_store.rb +57 -0
  59. data/lib/monkeyshines/store/factory.rb +8 -0
  60. data/lib/monkeyshines/store/flat_file_store.rb +84 -0
  61. data/lib/monkeyshines/store/key_store.rb +51 -0
  62. data/lib/monkeyshines/store/null_store.rb +15 -0
  63. data/lib/monkeyshines/store/read_thru_store.rb +22 -0
  64. data/lib/monkeyshines/store/tokyo_tdb_key_store.rb +33 -0
  65. data/lib/monkeyshines/store/tyrant_rdb_key_store.rb +56 -0
  66. data/lib/monkeyshines/store/tyrant_tdb_key_store.rb +20 -0
  67. data/lib/monkeyshines/utils/factory_module.rb +106 -0
  68. data/lib/monkeyshines/utils/filename_pattern.rb +134 -0
  69. data/lib/monkeyshines/utils/logger.rb +15 -0
  70. data/lib/monkeyshines/utils/trollop-1.14/FAQ.txt +84 -0
  71. data/lib/monkeyshines/utils/trollop-1.14/History.txt +101 -0
  72. data/lib/monkeyshines/utils/trollop-1.14/Manifest.txt +7 -0
  73. data/lib/monkeyshines/utils/trollop-1.14/README.txt +40 -0
  74. data/lib/monkeyshines/utils/trollop-1.14/Rakefile +36 -0
  75. data/lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb +744 -0
  76. data/lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb +1048 -0
  77. data/lib/monkeyshines/utils/trollop.rb +744 -0
  78. data/lib/monkeyshines/utils/union_interval.rb +52 -0
  79. data/lib/monkeyshines/utils/uri.rb +70 -0
  80. data/lib/monkeyshines/utils/uuid.rb +32 -0
  81. data/monkeyshines.gemspec +147 -0
  82. data/scrape_from_file.rb +44 -0
  83. data/spec/monkeyshines_spec.rb +7 -0
  84. data/spec/spec_helper.rb +9 -0
  85. metadata +183 -0
@@ -0,0 +1,290 @@
1
+ require 'time'
2
+ require 'monkeyshines/utils/union_interval'
3
+ module Monkeyshines
4
+ module ScrapeRequestCore
5
+
6
+ #
7
+ # Paginated lets you make repeated requests to collect a timeline or
8
+ # collection of items.
9
+ #
10
+ # You will typically want to set the
11
+ #
12
+ # A Paginated-compatible ScrapeRequest should inherit from or be compatible
13
+ # with +Monkeyshines::ScrapeRequest+ and additionally define
14
+ # * [#items] list of individual items in the response; +nil+ if there was an
15
+ # error, +[]+ if the response was well-formed but returned no items.
16
+ # * [#num_items] number of items from this response
17
+ # * [#span] the range of (typically) IDs within this scrape. Used to know when
18
+ # we've reached results from previous session
19
+ #
20
+ #
21
+ module Paginated
22
+ #
23
+ # Soft limit on the number of pages to scrape.
24
+ #
25
+ # If we know the max_total_items, use it to set the number of pages;
26
+ # otherwise, let it run up to the hard limit.
27
+ #
28
+ # Typically, use this to set an upper limit that you know beforehand, and
29
+ # use #is_last? to decide based on the results
30
+ #
31
+ def max_pages
32
+ return hard_request_limit if (!max_total_items)
33
+ (max_total_items.to_f / max_items).ceil.clamp(0, hard_request_limit)
34
+ end
35
+
36
+ # Number of items returned in this request
37
+ def num_items()
38
+ items ? items.length : 0
39
+ end
40
+
41
+ # inject class variables
42
+ def self.included base
43
+ base.class_eval do
44
+ # Hard request limit: do not in any case exceed this number of requests
45
+ class_inheritable_accessor :hard_request_limit
46
+
47
+ # max items per page the API might return
48
+ class_inheritable_accessor :max_items
49
+
50
+ # Total items in all requests, if known ahead of time -- eg. a
51
+ # twitter_user's statuses_count can be used to set the max_total_items
52
+ # for TwitterUserTimelineRequests
53
+ attr_accessor :max_total_items
54
+ end
55
+ end
56
+ end # Paginated
57
+
58
+ module Paginating
59
+ #
60
+ # Generates request for each page to be scraped.
61
+ #
62
+ # The job class must define a #request_for_page(page) method.
63
+ #
64
+ # * request is generated
65
+ # * ... and yielded to the call block. (which must return the fulfilled
66
+ # scrape_request response.)
67
+ # * after_fetch method chain invoked
68
+ #
69
+ # Scraping stops when is_last?(response, page) is true
70
+ #
71
+ def each_request info=nil, &block
72
+ before_pagination()
73
+ (1..hard_request_limit).each do |page|
74
+ request = request_for_page(page, info)
75
+ response = yield request
76
+ after_fetch(response, page)
77
+ break if is_last?(response, page)
78
+ end
79
+ after_pagination()
80
+ end
81
+
82
+ # return true if the next request would be pointless (true if, perhaps, the
83
+ # response had no items, or the API page limit is reached)
84
+ def is_last? response, page
85
+ ( (page >= response.max_pages) ||
86
+ (response && response.healthy? && partial_response?(response)) )
87
+ end
88
+ def partial_response? response
89
+ (response.num_items < response.max_items)
90
+ end
91
+
92
+ # Bookkeeping/setup preceding pagination
93
+ def before_pagination
94
+ end
95
+
96
+ # Finalize bookkeeping at conclusion of scrape_job.
97
+ def after_pagination
98
+ end
99
+
100
+ # Feed back info from the fetch
101
+ def after_fetch response, page
102
+ end
103
+
104
+ # inject class variables
105
+ def self.included base
106
+ base.class_eval do
107
+ # Hard request limit: do not in any case exceed this number of requests
108
+ class_inheritable_accessor :hard_request_limit
109
+ end
110
+ end
111
+ end # Paginating
112
+
113
+ #
114
+ # Scenario: you request paginated search requests with a limit parameter (a
115
+ # max_id or min_id, for example).
116
+ #
117
+ # * request successive pages,
118
+ # * use info on the requested page to set the next limit parameter
119
+ # * stop when max_pages is reached or a successful request gives fewer than
120
+ # max_items
121
+ #
122
+ #
123
+ # The first
124
+ #
125
+ # req?min_id=1234&max_id=
126
+ # => [ [8675, ...], ..., [8012, ...] ] # 100 items
127
+ # req?min_id=1234&max_id=8011
128
+ # => [ [7581, ...], ..., [2044, ...] ] # 100 items
129
+ # req?min_id=1234&max_id=2043
130
+ # => [ [2012, ...], ..., [1234, ...] ] # 69 items
131
+ #
132
+ # * The search terminates when
133
+ # ** max_requests requests have been made, or
134
+ # ** the limit params interval is zero, or
135
+ # ** a successful response with fewer than max_items is received.
136
+ #
137
+ # * You will want to save <req?min_id=8676&max_id=""> for later scrape
138
+ #
139
+ module PaginatedWithLimit
140
+ # Set up bookkeeping for pagination tracking
141
+ def before_pagination
142
+ self.started_at = Time.now.utc
143
+ self.sess_span = UnionInterval.new
144
+ self.sess_timespan = UnionInterval.new
145
+ super
146
+ end
147
+
148
+ #
149
+ # Feed back info from the scrape
150
+ #
151
+ def after_fetch response, page
152
+ super response, page
153
+ update_spans(response) if (response && response.items)
154
+ end
155
+
156
+ # Update intervals to include new response
157
+ def update_spans response
158
+ self.sess_span << response.span
159
+ self.sess_timespan << response.timespan
160
+ end
161
+
162
+ # Return true if the next request would be pointless (true if, perhaps, the
163
+ # response had no items, or the API page limit is reached)
164
+ def is_last? response, page
165
+ sess_span.include?(prev_max) || super(response, page)
166
+ end
167
+
168
+ def after_pagination
169
+ self.prev_max = [prev_max, sess_span.max].compact.max
170
+ self.sess_span = UnionInterval.new
171
+ self.sess_timespan = UnionInterval.new
172
+ super
173
+ end
174
+
175
+ # inject class variables
176
+ def self.included base
177
+ base.class_eval do
178
+ # Span of items gathered in this scrape scrape_job.
179
+ attr_accessor :sess_span, :sess_timespan, :started_at
180
+ end
181
+ end
182
+ end # PaginatedWithLimit
183
+
184
+ module PaginatedWithRate
185
+ def before_pagination
186
+ self.sess_items ||= 0
187
+ super
188
+ end
189
+
190
+ #
191
+ # Feed back info from the scrape
192
+ #
193
+ def after_fetch response, page
194
+ super response, page
195
+ update_counts(response) if (response && response.items)
196
+ # p [response.items.map{|item| item['id']}.max, response.items.map{|item| item['id']}.min, prev_max, sess_span, response.parsed_contents.slice('max_id','next_page')]
197
+ # p response.items.map{|item| ("%6.2f" % [Time.now - Time.parse(item['created_at'])])}
198
+ end
199
+
200
+ # Count the new items from this response among the session items
201
+ def update_counts response
202
+ self.sess_items += response.num_items
203
+ end
204
+
205
+ RATE_PARAMETERS = {
206
+ :max_session_timespan => (60 * 60 * 24 * 5), # 5 days
207
+ :default_scrape_period => (60 * 60 * 2 ), # 2 hours
208
+ :max_resched_delay => (60 * 60 * 24 * 1), # 1 days
209
+ :min_resched_delay => (5), # 5 seconds
210
+ :sess_weight_slowing => 0.35, # how fast to converge when rate < average
211
+ :sess_weight_rising => 1.0, # how fast to converge when rate > average
212
+ }
213
+
214
+ #
215
+ # * session returns one result
216
+ # * session returns no result
217
+ # * session results clustered at center of nominal timespan
218
+ #
219
+ def recalculate_rate!
220
+ # If there's no good session timespan, we can fake one out
221
+ self.sess_timespan.max ||= Time.now.utc
222
+ self.sess_timespan.min ||= self.last_run
223
+ # Whatever its origin, limit the session timespan
224
+ if sess_timespan.size > RATE_PARAMETERS[:max_session_timespan]
225
+ sess_timespan.min = sess_timespan.max - RATE_PARAMETERS[:max_session_timespan]
226
+ end
227
+ # Find and limit the session items rate
228
+ if self.sess_items.to_f < 2
229
+ self.sess_items = 2
230
+ sess_items_rate = self.sess_items.to_f / RATE_PARAMETERS[:default_scrape_period]
231
+ else
232
+ # Find the items rate
233
+ sess_items_rate = self.sess_items.to_f / sess_timespan.size.to_f
234
+ end
235
+ # Find and limit the previous items rate
236
+ self.prev_items_rate = self.prev_items_rate.to_i rescue 0
237
+ if self.prev_items_rate == 0
238
+ self.prev_items_rate = target_items_per_job.to_f / RATE_PARAMETERS[:default_scrape_period]
239
+ self.delay = RATE_PARAMETERS[:default_scrape_period].to_f
240
+ end
241
+
242
+ # New items rate is a weighted average of new and old
243
+ #
244
+ # If new rate is faster than the prev_rate, we use a high weight
245
+ # (~1.0). When
246
+ sess_wt = (sess_items_rate > prev_items_rate) ? RATE_PARAMETERS[:sess_weight_rising] : RATE_PARAMETERS[:sess_weight_slowing]
247
+ new_items_rate = (prev_items_rate + (sess_items_rate * sess_wt)) / (1.0 + sess_wt)
248
+ new_total_items = prev_items.to_i + sess_items.to_i
249
+ since_start = (Time.now.utc - self.started_at).to_f
250
+ new_period = (target_items_per_job / new_items_rate)
251
+ new_delay = new_period - since_start
252
+
253
+ # puts %Q{rates %6.3f %6.3f => %6.3f delay %5.2f %5.2f => %5.2f (%5.2f) want %d sess %d items/%5.1fs -- %10d < %10d -- %s } %
254
+ # [sess_items_rate, prev_items_rate, new_items_rate,
255
+ # target_items_per_job / sess_items_rate, self.delay, new_period, new_delay,
256
+ # target_items_per_job, sess_items, sess_timespan.size.to_f,
257
+ # sess_span.max, prev_max,
258
+ # self.key]
259
+
260
+ Log.info(
261
+ %Q{resched\tit %4d\t%7.3f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%10d\t%s } %
262
+ [sess_items, sess_timespan.size.to_f, target_items_per_job / sess_items_rate, self.delay, new_period, new_delay, prev_max, self.key])
263
+
264
+ self.delay = new_delay.to_f.clamp(RATE_PARAMETERS[:min_resched_delay], RATE_PARAMETERS[:max_resched_delay])
265
+ self.prev_items_rate = new_items_rate
266
+ self.prev_items = new_total_items
267
+ end
268
+
269
+ #
270
+ # Recalculate the item rates
271
+ # using the accumulated response
272
+ #
273
+ def after_pagination
274
+ recalculate_rate!
275
+ self.sess_items = 0
276
+ super
277
+ end
278
+
279
+ # inject class variables
280
+ def self.included base
281
+ base.class_eval do
282
+ # Span of items gathered in this scrape scrape_job.
283
+ attr_accessor :sess_items
284
+ # How many items we hope to pull in for every job
285
+ cattr_accessor :target_items_per_job
286
+ end
287
+ end
288
+ end # PaginatedWithRate
289
+ end
290
+ end
@@ -0,0 +1,16 @@
1
+ require 'json'
2
+ module Monkeyshines
3
+ module RawJsonContents
4
+ def parsed_contents
5
+ return @parsed_contents if @parsed_contents
6
+ return nil unless contents
7
+ begin
8
+ @parsed_contents = JSON.load(contents.to_s)
9
+ rescue Exception => e
10
+ warn "JSON not parsing : #{e.to_s[0..2000].gsub(/[\r\n]+/,"")}" ; return nil
11
+ end
12
+ @parsed_contents
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,86 @@
1
+ module Monkeyshines
2
+ module ScrapeRequestCore
3
+ module SignedUrl
4
+
5
+ def sign_url parsed_uri, request_key
6
+ qq = parsed_uri.query_values || {}
7
+ qq.merge!(request_key)
8
+ qq.merge!(
9
+ 'api_key' => api_key,
10
+ 'nonce' => nonce,
11
+ 'format' => 'json')
12
+ p qq
13
+ qq = qq.sort.map{|k,v| k+'='+v }
14
+ str = [ parsed_uri.path, qq, api_secret].flatten.join("")
15
+ sig = Digest::MD5.hexdigest(str)
16
+ [qq, sig]
17
+ end
18
+
19
+ def authed_url(url, request_key)
20
+ parsed_uri = Addressable::URI.parse(url)
21
+ qq, sig = sign_url(parsed_uri, request_key)
22
+ [parsed_uri.scheme, '://', parsed_uri.host, parsed_uri.path, '?', qq.join("&"), "&sig=#{sig}"].join("")
23
+ end
24
+
25
+ def nonce
26
+ Time.now.utc.to_f.to_s
27
+ end
28
+
29
+ def token_request_url
30
+ "http://api.friendster.com/v1/token?api_key=#{api_key}&nonce=#{nonce}&format=json"
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ #
37
+ # class TokenRequest < Base
38
+ # def authed_url
39
+ # qq = parsed_uri.query_values.merge(
40
+ # 'api_key' => api_key,
41
+ # 'nonce' => nonce,
42
+ # # 'auth_token' => auth_token,
43
+ # 'format' => 'json').sort.map{|k,v| k+'='+v }
44
+ # p qq
45
+ # str = [
46
+ # parsed_uri.path,
47
+ # qq,
48
+ # api_secret].flatten.join("")
49
+ # p str
50
+ # sig = Digest::MD5.hexdigest(str)
51
+ # qq << "sig=#{sig}"
52
+ # au = [parsed_uri.scheme, '://', parsed_uri.host, parsed_uri.path, '?', qq.join("&")].join("")
53
+ # p au
54
+ # au
55
+ # end
56
+ # end
57
+ #
58
+ # class SessionRequest < Base
59
+ # def authed_url(auth_token)
60
+ # qq = parsed_uri.query_values.merge(
61
+ # 'api_key' => api_key,
62
+ # 'nonce' => nonce,
63
+ # 'auth_token' => auth_token,
64
+ # 'format' => 'json').sort.map{|k,v| k+'='+v }
65
+ # p qq
66
+ # str = [
67
+ # parsed_uri.path,
68
+ # qq,
69
+ # api_secret].flatten.join("")
70
+ # p str
71
+ # sig = Digest::MD5.hexdigest(str)
72
+ # qq << "sig=#{sig}"
73
+ # au = [parsed_uri.scheme, '://', parsed_uri.host, parsed_uri.path, '?', qq.join("&")].join("")
74
+ # p au
75
+ # au
76
+ # end
77
+ # def make_url()
78
+ # "http://api.friendster.com/v1/session?"
79
+ # end
80
+ # end
81
+ #
82
+ # # require 'monkeyshines' ; require 'wuclan' ; require 'wukong' ; require 'addressable/uri' ; require 'rest_client' ; scrape_config = YAML.load(File.open(ENV['HOME']+'/.monkeyshines'))
83
+ # # load(ENV['HOME']+'/ics/wuclan/lib/wuclan/friendster/scrape/base.rb') ; Wuclan::Friendster::Scrape::Base.api_key = scrape_config[:friendster_api][:api_key] ; tokreq = Wuclan::Friendster::Scrape::TokenRequest.new(scrape_config[:friendster_api][:user_id]) ; tok= RestClient.post(tokreq.authed_url, {}).gsub(/\"/,"")
84
+ # # sessreq = Wuclan::Friendster::Scrape::SessionRequest.new(scrape_config[:friendster_api][:user_id])
85
+ # # sessreq.auth_token = '' ; sessreq.make_url! ; RestClient.post(sessreq.url+'&sig='+sessreq.url_sig[1], {})
86
+ # # # => "{"session_key":"....","uid":"...","expires":"..."}"
@@ -0,0 +1,14 @@
1
+ module Monkeyshines
2
+ module Store
3
+ extend FactoryModule
4
+ autoload :Base, 'monkeyshines/store/base'
5
+ autoload :FlatFileStore, 'monkeyshines/store/flat_file_store'
6
+ autoload :ConditionalStore, 'monkeyshines/store/conditional_store'
7
+ autoload :ChunkedFlatFileStore, 'monkeyshines/store/chunked_flat_file_store'
8
+ autoload :KeyStore, 'monkeyshines/store/key_store'
9
+ autoload :TokyoTdbKeyStore, 'monkeyshines/store/tokyo_tdb_key_store'
10
+ autoload :TyrantTdbKeyStore, 'monkeyshines/store/tyrant_tdb_key_store'
11
+ autoload :TyrantRdbKeyStore, 'monkeyshines/store/tyrant_rdb_key_store'
12
+ autoload :ReadThruStore, 'monkeyshines/store/read_thru_store'
13
+ end
14
+ end
@@ -0,0 +1,29 @@
1
+ module Monkeyshines
2
+ module Store
3
+ class Base
4
+ attr_accessor :options
5
+ def initialize _options={}
6
+ self.options = _options
7
+ Log.info "Creating #{self.class}"
8
+ end
9
+
10
+ #
11
+ def each_as klass, &block
12
+ self.each do |*args|
13
+ begin
14
+ item = klass.new *args[1..-1]
15
+ rescue Exception => e
16
+ Log.info [args, e.to_s, self].join("\t")
17
+ raise e
18
+ end
19
+ yield item
20
+ end
21
+ end
22
+
23
+ def log_line
24
+ nil
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,37 @@
1
+ module Monkeyshines
2
+ module Store
3
+ class ChunkedFlatFileStore < Monkeyshines::Store::FlatFileStore
4
+ attr_accessor :filename_pattern, :chunk_monitor, :handle
5
+
6
+ DEFAULT_OPTIONS = {
7
+ :chunktime => 4*60*60, # default 4 hours
8
+ :pattern => ":rootdir/:date/:handle+:timestamp-:pid.tsv",
9
+ :rootdir => nil,
10
+ :filemode => 'w',
11
+ }
12
+
13
+ def initialize _options
14
+ self.options = DEFAULT_OPTIONS.deep_merge(_options)
15
+ raise "You don't really want a chunk time this small: #{options[:chunktime]}" unless options[:chunktime] > 600
16
+ self.chunk_monitor = Monkeyshines::Monitor::PeriodicMonitor.new( :time => options[:chunktime] )
17
+ self.handle = options[:handle] || Monkeyshines::CONFIG[:handle]
18
+ self.filename_pattern = Monkeyshines::Utils::FilenamePattern.new(options[:pattern], :handle => handle, :rootdir => options[:rootdir])
19
+ super options.merge(:filename => filename_pattern.make())
20
+ self.mkdir!
21
+ end
22
+
23
+ def save *args
24
+ result = super *args
25
+ chunk_monitor.periodically do
26
+ new_filename = filename_pattern.make()
27
+ Log.info "Rotating chunked file #{filename} into #{new_filename}"
28
+ self.close
29
+ @filename = new_filename
30
+ self.mkdir!
31
+ end
32
+ result
33
+ end
34
+
35
+ end
36
+ end
37
+ end