monkeyshines 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/.document +4 -0
  2. data/.gitignore +43 -0
  3. data/LICENSE +20 -0
  4. data/LICENSE.textile +20 -0
  5. data/README.textile +125 -0
  6. data/Rakefile +105 -0
  7. data/VERSION +1 -0
  8. data/examples/.gitignore +4 -0
  9. data/examples/bulk_urls/scrape_bulk_urls.rb +64 -0
  10. data/examples/rename_tree/rename_hdp_tree.rb +151 -0
  11. data/examples/rename_tree/rename_ripd_tree.rb +82 -0
  12. data/examples/rss_feeds/scrape_rss_feeds.rb +52 -0
  13. data/examples/shorturls/README.textile +111 -0
  14. data/examples/shorturls/bulkdump_shorturls.rb +46 -0
  15. data/examples/shorturls/bulkload_shorturls.rb +45 -0
  16. data/examples/shorturls/extract_urls.rb +12 -0
  17. data/examples/shorturls/multiplex_shorturl_cache.rb +32 -0
  18. data/examples/shorturls/old/multidump_and_fix_shorturls.rb +66 -0
  19. data/examples/shorturls/old/shorturl_stats.rb +81 -0
  20. data/examples/shorturls/scrape_shorturls.rb +112 -0
  21. data/examples/shorturls/shorturl_request.rb +29 -0
  22. data/examples/shorturls/shorturl_sequence.rb +121 -0
  23. data/examples/shorturls/shorturl_start_tyrant.sh +16 -0
  24. data/examples/shorturls/start_shorturl_cache.sh +2 -0
  25. data/lib/monkeyshines.rb +31 -0
  26. data/lib/monkeyshines/extensions.rb +16 -0
  27. data/lib/monkeyshines/fetcher.rb +10 -0
  28. data/lib/monkeyshines/fetcher/authed_http_fetcher.rb +35 -0
  29. data/lib/monkeyshines/fetcher/base.rb +44 -0
  30. data/lib/monkeyshines/fetcher/fake_fetcher.rb +19 -0
  31. data/lib/monkeyshines/fetcher/http_fetcher.rb +127 -0
  32. data/lib/monkeyshines/fetcher/http_head_fetcher.rb +23 -0
  33. data/lib/monkeyshines/monitor.rb +7 -0
  34. data/lib/monkeyshines/monitor/chunked_store.rb +23 -0
  35. data/lib/monkeyshines/monitor/periodic_logger.rb +33 -0
  36. data/lib/monkeyshines/monitor/periodic_monitor.rb +65 -0
  37. data/lib/monkeyshines/options.rb +59 -0
  38. data/lib/monkeyshines/recursive_runner.rb +26 -0
  39. data/lib/monkeyshines/repository/base.rb +57 -0
  40. data/lib/monkeyshines/repository/s3.rb +169 -0
  41. data/lib/monkeyshines/request_stream.rb +11 -0
  42. data/lib/monkeyshines/request_stream/base.rb +32 -0
  43. data/lib/monkeyshines/request_stream/edamame_queue.rb +54 -0
  44. data/lib/monkeyshines/request_stream/klass_request_stream.rb +39 -0
  45. data/lib/monkeyshines/request_stream/simple_request_stream.rb +22 -0
  46. data/lib/monkeyshines/runner.rb +161 -0
  47. data/lib/monkeyshines/runner_core/options.rb +5 -0
  48. data/lib/monkeyshines/runner_core/parsing_runner.rb +29 -0
  49. data/lib/monkeyshines/scrape_job/old_paginated.rb +343 -0
  50. data/lib/monkeyshines/scrape_job/recursive.rb +9 -0
  51. data/lib/monkeyshines/scrape_request.rb +136 -0
  52. data/lib/monkeyshines/scrape_request/paginated.rb +290 -0
  53. data/lib/monkeyshines/scrape_request/raw_json_contents.rb +16 -0
  54. data/lib/monkeyshines/scrape_request/signed_url.rb +86 -0
  55. data/lib/monkeyshines/store.rb +14 -0
  56. data/lib/monkeyshines/store/base.rb +29 -0
  57. data/lib/monkeyshines/store/chunked_flat_file_store.rb +37 -0
  58. data/lib/monkeyshines/store/conditional_store.rb +57 -0
  59. data/lib/monkeyshines/store/factory.rb +8 -0
  60. data/lib/monkeyshines/store/flat_file_store.rb +84 -0
  61. data/lib/monkeyshines/store/key_store.rb +51 -0
  62. data/lib/monkeyshines/store/null_store.rb +15 -0
  63. data/lib/monkeyshines/store/read_thru_store.rb +22 -0
  64. data/lib/monkeyshines/store/tokyo_tdb_key_store.rb +33 -0
  65. data/lib/monkeyshines/store/tyrant_rdb_key_store.rb +56 -0
  66. data/lib/monkeyshines/store/tyrant_tdb_key_store.rb +20 -0
  67. data/lib/monkeyshines/utils/factory_module.rb +106 -0
  68. data/lib/monkeyshines/utils/filename_pattern.rb +134 -0
  69. data/lib/monkeyshines/utils/logger.rb +15 -0
  70. data/lib/monkeyshines/utils/trollop-1.14/FAQ.txt +84 -0
  71. data/lib/monkeyshines/utils/trollop-1.14/History.txt +101 -0
  72. data/lib/monkeyshines/utils/trollop-1.14/Manifest.txt +7 -0
  73. data/lib/monkeyshines/utils/trollop-1.14/README.txt +40 -0
  74. data/lib/monkeyshines/utils/trollop-1.14/Rakefile +36 -0
  75. data/lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb +744 -0
  76. data/lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb +1048 -0
  77. data/lib/monkeyshines/utils/trollop.rb +744 -0
  78. data/lib/monkeyshines/utils/union_interval.rb +52 -0
  79. data/lib/monkeyshines/utils/uri.rb +70 -0
  80. data/lib/monkeyshines/utils/uuid.rb +32 -0
  81. data/monkeyshines.gemspec +147 -0
  82. data/scrape_from_file.rb +44 -0
  83. data/spec/monkeyshines_spec.rb +7 -0
  84. data/spec/spec_helper.rb +9 -0
  85. metadata +183 -0
@@ -0,0 +1,290 @@
1
+ require 'time'
2
+ require 'monkeyshines/utils/union_interval'
3
+ module Monkeyshines
4
+ module ScrapeRequestCore
5
+
6
+ #
7
+ # Paginated lets you make repeated requests to collect a timeline or
8
+ # collection of items.
9
+ #
10
+ # You will typically want to set the
11
+ #
12
+ # A Paginated-compatible ScrapeRequest should inherit from or be compatible
13
+ # with +Monkeyshines::ScrapeRequest+ and additionally define
14
+ # * [#items] list of individual items in the response; +nil+ if there was an
15
+ # error, +[]+ if the response was well-formed but returned no items.
16
+ # * [#num_items] number of items from this response
17
+ # * [#span] the range of (typically) IDs within this scrape. Used to know when
18
+ # we've reached results from previous session
19
+ #
20
+ #
21
+ module Paginated
22
+ #
23
+ # Soft limit on the number of pages to scrape.
24
+ #
25
+ # If we know the max_total_items, use it to set the number of pages;
26
+ # otherwise, let it run up to the hard limit.
27
+ #
28
+ # Typically, use this to set an upper limit that you know beforehand, and
29
+ # use #is_last? to decide based on the results
30
+ #
31
+ def max_pages
32
+ return hard_request_limit if (!max_total_items)
33
+ (max_total_items.to_f / max_items).ceil.clamp(0, hard_request_limit)
34
+ end
35
+
36
+ # Number of items returned in this request
37
+ def num_items()
38
+ items ? items.length : 0
39
+ end
40
+
41
+ # inject class variables
42
+ def self.included base
43
+ base.class_eval do
44
+ # Hard request limit: do not in any case exceed this number of requests
45
+ class_inheritable_accessor :hard_request_limit
46
+
47
+ # max items per page the API might return
48
+ class_inheritable_accessor :max_items
49
+
50
+ # Total items in all requests, if known ahead of time -- eg. a
51
+ # twitter_user's statuses_count can be used to set the max_total_items
52
+ # for TwitterUserTimelineRequests
53
+ attr_accessor :max_total_items
54
+ end
55
+ end
56
+ end # Paginated
57
+
58
+ module Paginating
59
+ #
60
+ # Generates request for each page to be scraped.
61
+ #
62
+ # The job class must define a #request_for_page(page) method.
63
+ #
64
+ # * request is generated
65
+ # * ... and yielded to the call block. (which must return the fulfilled
66
+ # scrape_request response.)
67
+ # * after_fetch method chain invoked
68
+ #
69
+ # Scraping stops when is_last?(response, page) is true
70
+ #
71
+ def each_request info=nil, &block
72
+ before_pagination()
73
+ (1..hard_request_limit).each do |page|
74
+ request = request_for_page(page, info)
75
+ response = yield request
76
+ after_fetch(response, page)
77
+ break if is_last?(response, page)
78
+ end
79
+ after_pagination()
80
+ end
81
+
82
+ # return true if the next request would be pointless (true if, perhaps, the
83
+ # response had no items, or the API page limit is reached)
84
+ def is_last? response, page
85
+ ( (page >= response.max_pages) ||
86
+ (response && response.healthy? && partial_response?(response)) )
87
+ end
88
+ def partial_response? response
89
+ (response.num_items < response.max_items)
90
+ end
91
+
92
+ # Bookkeeping/setup preceding pagination
93
+ def before_pagination
94
+ end
95
+
96
+ # Finalize bookkeeping at conclusion of scrape_job.
97
+ def after_pagination
98
+ end
99
+
100
+ # Feed back info from the fetch
101
+ def after_fetch response, page
102
+ end
103
+
104
+ # inject class variables
105
+ def self.included base
106
+ base.class_eval do
107
+ # Hard request limit: do not in any case exceed this number of requests
108
+ class_inheritable_accessor :hard_request_limit
109
+ end
110
+ end
111
+ end # Paginating
112
+
113
+ #
114
+ # Scenario: you request paginated search requests with a limit parameter (a
115
+ # max_id or min_id, for example).
116
+ #
117
+ # * request successive pages,
118
+ # * use info on the requested page to set the next limit parameter
119
+ # * stop when max_pages is reached or a successful request gives fewer than
120
+ # max_items
121
+ #
122
+ #
123
+ # The first
124
+ #
125
+ # req?min_id=1234&max_id=
126
+ # => [ [8675, ...], ..., [8012, ...] ] # 100 items
127
+ # req?min_id=1234&max_id=8011
128
+ # => [ [7581, ...], ..., [2044, ...] ] # 100 items
129
+ # req?min_id=1234&max_id=2043
130
+ # => [ [2012, ...], ..., [1234, ...] ] # 69 items
131
+ #
132
+ # * The search terminates when
133
+ # ** max_requests requests have been made, or
134
+ # ** the limit params interval is zero, or
135
+ # ** a successful response with fewer than max_items is received.
136
+ #
137
+ # * You will want to save <req?min_id=8676&max_id=""> for later scrape
138
+ #
139
+ module PaginatedWithLimit
140
+ # Set up bookkeeping for pagination tracking
141
+ def before_pagination
142
+ self.started_at = Time.now.utc
143
+ self.sess_span = UnionInterval.new
144
+ self.sess_timespan = UnionInterval.new
145
+ super
146
+ end
147
+
148
+ #
149
+ # Feed back info from the scrape
150
+ #
151
+ def after_fetch response, page
152
+ super response, page
153
+ update_spans(response) if (response && response.items)
154
+ end
155
+
156
+ # Update intervals to include new response
157
+ def update_spans response
158
+ self.sess_span << response.span
159
+ self.sess_timespan << response.timespan
160
+ end
161
+
162
+ # Return true if the next request would be pointless (true if, perhaps, the
163
+ # response had no items, or the API page limit is reached)
164
+ def is_last? response, page
165
+ sess_span.include?(prev_max) || super(response, page)
166
+ end
167
+
168
+ def after_pagination
169
+ self.prev_max = [prev_max, sess_span.max].compact.max
170
+ self.sess_span = UnionInterval.new
171
+ self.sess_timespan = UnionInterval.new
172
+ super
173
+ end
174
+
175
+ # inject class variables
176
+ def self.included base
177
+ base.class_eval do
178
+ # Span of items gathered in this scrape scrape_job.
179
+ attr_accessor :sess_span, :sess_timespan, :started_at
180
+ end
181
+ end
182
+ end # PaginatedWithLimit
183
+
184
+ module PaginatedWithRate
185
+ def before_pagination
186
+ self.sess_items ||= 0
187
+ super
188
+ end
189
+
190
+ #
191
+ # Feed back info from the scrape
192
+ #
193
+ def after_fetch response, page
194
+ super response, page
195
+ update_counts(response) if (response && response.items)
196
+ # p [response.items.map{|item| item['id']}.max, response.items.map{|item| item['id']}.min, prev_max, sess_span, response.parsed_contents.slice('max_id','next_page')]
197
+ # p response.items.map{|item| ("%6.2f" % [Time.now - Time.parse(item['created_at'])])}
198
+ end
199
+
200
+ # Count the new items from this response among the session items
201
+ def update_counts response
202
+ self.sess_items += response.num_items
203
+ end
204
+
205
+ RATE_PARAMETERS = {
206
+ :max_session_timespan => (60 * 60 * 24 * 5), # 5 days
207
+ :default_scrape_period => (60 * 60 * 2 ), # 2 hours
208
+ :max_resched_delay => (60 * 60 * 24 * 1), # 1 days
209
+ :min_resched_delay => (5), # 5 seconds
210
+ :sess_weight_slowing => 0.35, # how fast to converge when rate < average
211
+ :sess_weight_rising => 1.0, # how fast to converge when rate > average
212
+ }
213
+
214
+ #
215
+ # * session returns one result
216
+ # * session returns no result
217
+ # * session results clustered at center of nominal timespan
218
+ #
219
+ def recalculate_rate!
220
+ # If there's no good session timespan, we can fake one out
221
+ self.sess_timespan.max ||= Time.now.utc
222
+ self.sess_timespan.min ||= self.last_run
223
+ # Whatever its origin, limit the session timespan
224
+ if sess_timespan.size > RATE_PARAMETERS[:max_session_timespan]
225
+ sess_timespan.min = sess_timespan.max - RATE_PARAMETERS[:max_session_timespan]
226
+ end
227
+ # Find and limit the session items rate
228
+ if self.sess_items.to_f < 2
229
+ self.sess_items = 2
230
+ sess_items_rate = self.sess_items.to_f / RATE_PARAMETERS[:default_scrape_period]
231
+ else
232
+ # Find the items rate
233
+ sess_items_rate = self.sess_items.to_f / sess_timespan.size.to_f
234
+ end
235
+ # Find and limit the previous items rate
236
+ self.prev_items_rate = self.prev_items_rate.to_i rescue 0
237
+ if self.prev_items_rate == 0
238
+ self.prev_items_rate = target_items_per_job.to_f / RATE_PARAMETERS[:default_scrape_period]
239
+ self.delay = RATE_PARAMETERS[:default_scrape_period].to_f
240
+ end
241
+
242
+ # New items rate is a weighted average of new and old
243
+ #
244
+ # If new rate is faster than the prev_rate, we use a high weight
245
+ # (~1.0). When
246
+ sess_wt = (sess_items_rate > prev_items_rate) ? RATE_PARAMETERS[:sess_weight_rising] : RATE_PARAMETERS[:sess_weight_slowing]
247
+ new_items_rate = (prev_items_rate + (sess_items_rate * sess_wt)) / (1.0 + sess_wt)
248
+ new_total_items = prev_items.to_i + sess_items.to_i
249
+ since_start = (Time.now.utc - self.started_at).to_f
250
+ new_period = (target_items_per_job / new_items_rate)
251
+ new_delay = new_period - since_start
252
+
253
+ # puts %Q{rates %6.3f %6.3f => %6.3f delay %5.2f %5.2f => %5.2f (%5.2f) want %d sess %d items/%5.1fs -- %10d < %10d -- %s } %
254
+ # [sess_items_rate, prev_items_rate, new_items_rate,
255
+ # target_items_per_job / sess_items_rate, self.delay, new_period, new_delay,
256
+ # target_items_per_job, sess_items, sess_timespan.size.to_f,
257
+ # sess_span.max, prev_max,
258
+ # self.key]
259
+
260
+ Log.info(
261
+ %Q{resched\tit %4d\t%7.3f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%10d\t%s } %
262
+ [sess_items, sess_timespan.size.to_f, target_items_per_job / sess_items_rate, self.delay, new_period, new_delay, prev_max, self.key])
263
+
264
+ self.delay = new_delay.to_f.clamp(RATE_PARAMETERS[:min_resched_delay], RATE_PARAMETERS[:max_resched_delay])
265
+ self.prev_items_rate = new_items_rate
266
+ self.prev_items = new_total_items
267
+ end
268
+
269
+ #
270
+ # Recalculate the item rates
271
+ # using the accumulated response
272
+ #
273
+ def after_pagination
274
+ recalculate_rate!
275
+ self.sess_items = 0
276
+ super
277
+ end
278
+
279
+ # inject class variables
280
+ def self.included base
281
+ base.class_eval do
282
+ # Span of items gathered in this scrape scrape_job.
283
+ attr_accessor :sess_items
284
+ # How many items we hope to pull in for every job
285
+ cattr_accessor :target_items_per_job
286
+ end
287
+ end
288
+ end # PaginatedWithRate
289
+ end
290
+ end
@@ -0,0 +1,16 @@
1
+ require 'json'
2
+ module Monkeyshines
3
+ module RawJsonContents
4
+ def parsed_contents
5
+ return @parsed_contents if @parsed_contents
6
+ return nil unless contents
7
+ begin
8
+ @parsed_contents = JSON.load(contents.to_s)
9
+ rescue Exception => e
10
+ warn "JSON not parsing : #{e.to_s[0..2000].gsub(/[\r\n]+/,"")}" ; return nil
11
+ end
12
+ @parsed_contents
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,86 @@
1
+ module Monkeyshines
2
+ module ScrapeRequestCore
3
+ module SignedUrl
4
+
5
+ def sign_url parsed_uri, request_key
6
+ qq = parsed_uri.query_values || {}
7
+ qq.merge!(request_key)
8
+ qq.merge!(
9
+ 'api_key' => api_key,
10
+ 'nonce' => nonce,
11
+ 'format' => 'json')
12
+ p qq
13
+ qq = qq.sort.map{|k,v| k+'='+v }
14
+ str = [ parsed_uri.path, qq, api_secret].flatten.join("")
15
+ sig = Digest::MD5.hexdigest(str)
16
+ [qq, sig]
17
+ end
18
+
19
+ def authed_url(url, request_key)
20
+ parsed_uri = Addressable::URI.parse(url)
21
+ qq, sig = sign_url(parsed_uri, request_key)
22
+ [parsed_uri.scheme, '://', parsed_uri.host, parsed_uri.path, '?', qq.join("&"), "&sig=#{sig}"].join("")
23
+ end
24
+
25
+ def nonce
26
+ Time.now.utc.to_f.to_s
27
+ end
28
+
29
+ def token_request_url
30
+ "http://api.friendster.com/v1/token?api_key=#{api_key}&nonce=#{nonce}&format=json"
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ #
37
+ # class TokenRequest < Base
38
+ # def authed_url
39
+ # qq = parsed_uri.query_values.merge(
40
+ # 'api_key' => api_key,
41
+ # 'nonce' => nonce,
42
+ # # 'auth_token' => auth_token,
43
+ # 'format' => 'json').sort.map{|k,v| k+'='+v }
44
+ # p qq
45
+ # str = [
46
+ # parsed_uri.path,
47
+ # qq,
48
+ # api_secret].flatten.join("")
49
+ # p str
50
+ # sig = Digest::MD5.hexdigest(str)
51
+ # qq << "sig=#{sig}"
52
+ # au = [parsed_uri.scheme, '://', parsed_uri.host, parsed_uri.path, '?', qq.join("&")].join("")
53
+ # p au
54
+ # au
55
+ # end
56
+ # end
57
+ #
58
+ # class SessionRequest < Base
59
+ # def authed_url(auth_token)
60
+ # qq = parsed_uri.query_values.merge(
61
+ # 'api_key' => api_key,
62
+ # 'nonce' => nonce,
63
+ # 'auth_token' => auth_token,
64
+ # 'format' => 'json').sort.map{|k,v| k+'='+v }
65
+ # p qq
66
+ # str = [
67
+ # parsed_uri.path,
68
+ # qq,
69
+ # api_secret].flatten.join("")
70
+ # p str
71
+ # sig = Digest::MD5.hexdigest(str)
72
+ # qq << "sig=#{sig}"
73
+ # au = [parsed_uri.scheme, '://', parsed_uri.host, parsed_uri.path, '?', qq.join("&")].join("")
74
+ # p au
75
+ # au
76
+ # end
77
+ # def make_url()
78
+ # "http://api.friendster.com/v1/session?"
79
+ # end
80
+ # end
81
+ #
82
+ # # require 'monkeyshines' ; require 'wuclan' ; require 'wukong' ; require 'addressable/uri' ; require 'rest_client' ; scrape_config = YAML.load(File.open(ENV['HOME']+'/.monkeyshines'))
83
+ # # load(ENV['HOME']+'/ics/wuclan/lib/wuclan/friendster/scrape/base.rb') ; Wuclan::Friendster::Scrape::Base.api_key = scrape_config[:friendster_api][:api_key] ; tokreq = Wuclan::Friendster::Scrape::TokenRequest.new(scrape_config[:friendster_api][:user_id]) ; tok= RestClient.post(tokreq.authed_url, {}).gsub(/\"/,"")
84
+ # # sessreq = Wuclan::Friendster::Scrape::SessionRequest.new(scrape_config[:friendster_api][:user_id])
85
+ # # sessreq.auth_token = '' ; sessreq.make_url! ; RestClient.post(sessreq.url+'&sig='+sessreq.url_sig[1], {})
86
+ # # # => "{"session_key":"....","uid":"...","expires":"..."}"
@@ -0,0 +1,14 @@
1
+ module Monkeyshines
2
+ module Store
3
+ extend FactoryModule
4
+ autoload :Base, 'monkeyshines/store/base'
5
+ autoload :FlatFileStore, 'monkeyshines/store/flat_file_store'
6
+ autoload :ConditionalStore, 'monkeyshines/store/conditional_store'
7
+ autoload :ChunkedFlatFileStore, 'monkeyshines/store/chunked_flat_file_store'
8
+ autoload :KeyStore, 'monkeyshines/store/key_store'
9
+ autoload :TokyoTdbKeyStore, 'monkeyshines/store/tokyo_tdb_key_store'
10
+ autoload :TyrantTdbKeyStore, 'monkeyshines/store/tyrant_tdb_key_store'
11
+ autoload :TyrantRdbKeyStore, 'monkeyshines/store/tyrant_rdb_key_store'
12
+ autoload :ReadThruStore, 'monkeyshines/store/read_thru_store'
13
+ end
14
+ end
@@ -0,0 +1,29 @@
1
+ module Monkeyshines
2
+ module Store
3
+ class Base
4
+ attr_accessor :options
5
+ def initialize _options={}
6
+ self.options = _options
7
+ Log.info "Creating #{self.class}"
8
+ end
9
+
10
+ #
11
+ def each_as klass, &block
12
+ self.each do |*args|
13
+ begin
14
+ item = klass.new *args[1..-1]
15
+ rescue Exception => e
16
+ Log.info [args, e.to_s, self].join("\t")
17
+ raise e
18
+ end
19
+ yield item
20
+ end
21
+ end
22
+
23
+ def log_line
24
+ nil
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,37 @@
1
+ module Monkeyshines
2
+ module Store
3
+ class ChunkedFlatFileStore < Monkeyshines::Store::FlatFileStore
4
+ attr_accessor :filename_pattern, :chunk_monitor, :handle
5
+
6
+ DEFAULT_OPTIONS = {
7
+ :chunktime => 4*60*60, # default 4 hours
8
+ :pattern => ":rootdir/:date/:handle+:timestamp-:pid.tsv",
9
+ :rootdir => nil,
10
+ :filemode => 'w',
11
+ }
12
+
13
+ def initialize _options
14
+ self.options = DEFAULT_OPTIONS.deep_merge(_options)
15
+ raise "You don't really want a chunk time this small: #{options[:chunktime]}" unless options[:chunktime] > 600
16
+ self.chunk_monitor = Monkeyshines::Monitor::PeriodicMonitor.new( :time => options[:chunktime] )
17
+ self.handle = options[:handle] || Monkeyshines::CONFIG[:handle]
18
+ self.filename_pattern = Monkeyshines::Utils::FilenamePattern.new(options[:pattern], :handle => handle, :rootdir => options[:rootdir])
19
+ super options.merge(:filename => filename_pattern.make())
20
+ self.mkdir!
21
+ end
22
+
23
+ def save *args
24
+ result = super *args
25
+ chunk_monitor.periodically do
26
+ new_filename = filename_pattern.make()
27
+ Log.info "Rotating chunked file #{filename} into #{new_filename}"
28
+ self.close
29
+ @filename = new_filename
30
+ self.mkdir!
31
+ end
32
+ result
33
+ end
34
+
35
+ end
36
+ end
37
+ end