monkeyshines 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +4 -0
- data/.gitignore +43 -0
- data/LICENSE +20 -0
- data/LICENSE.textile +20 -0
- data/README.textile +125 -0
- data/Rakefile +105 -0
- data/VERSION +1 -0
- data/examples/.gitignore +4 -0
- data/examples/bulk_urls/scrape_bulk_urls.rb +64 -0
- data/examples/rename_tree/rename_hdp_tree.rb +151 -0
- data/examples/rename_tree/rename_ripd_tree.rb +82 -0
- data/examples/rss_feeds/scrape_rss_feeds.rb +52 -0
- data/examples/shorturls/README.textile +111 -0
- data/examples/shorturls/bulkdump_shorturls.rb +46 -0
- data/examples/shorturls/bulkload_shorturls.rb +45 -0
- data/examples/shorturls/extract_urls.rb +12 -0
- data/examples/shorturls/multiplex_shorturl_cache.rb +32 -0
- data/examples/shorturls/old/multidump_and_fix_shorturls.rb +66 -0
- data/examples/shorturls/old/shorturl_stats.rb +81 -0
- data/examples/shorturls/scrape_shorturls.rb +112 -0
- data/examples/shorturls/shorturl_request.rb +29 -0
- data/examples/shorturls/shorturl_sequence.rb +121 -0
- data/examples/shorturls/shorturl_start_tyrant.sh +16 -0
- data/examples/shorturls/start_shorturl_cache.sh +2 -0
- data/lib/monkeyshines.rb +31 -0
- data/lib/monkeyshines/extensions.rb +16 -0
- data/lib/monkeyshines/fetcher.rb +10 -0
- data/lib/monkeyshines/fetcher/authed_http_fetcher.rb +35 -0
- data/lib/monkeyshines/fetcher/base.rb +44 -0
- data/lib/monkeyshines/fetcher/fake_fetcher.rb +19 -0
- data/lib/monkeyshines/fetcher/http_fetcher.rb +127 -0
- data/lib/monkeyshines/fetcher/http_head_fetcher.rb +23 -0
- data/lib/monkeyshines/monitor.rb +7 -0
- data/lib/monkeyshines/monitor/chunked_store.rb +23 -0
- data/lib/monkeyshines/monitor/periodic_logger.rb +33 -0
- data/lib/monkeyshines/monitor/periodic_monitor.rb +65 -0
- data/lib/monkeyshines/options.rb +59 -0
- data/lib/monkeyshines/recursive_runner.rb +26 -0
- data/lib/monkeyshines/repository/base.rb +57 -0
- data/lib/monkeyshines/repository/s3.rb +169 -0
- data/lib/monkeyshines/request_stream.rb +11 -0
- data/lib/monkeyshines/request_stream/base.rb +32 -0
- data/lib/monkeyshines/request_stream/edamame_queue.rb +54 -0
- data/lib/monkeyshines/request_stream/klass_request_stream.rb +39 -0
- data/lib/monkeyshines/request_stream/simple_request_stream.rb +22 -0
- data/lib/monkeyshines/runner.rb +161 -0
- data/lib/monkeyshines/runner_core/options.rb +5 -0
- data/lib/monkeyshines/runner_core/parsing_runner.rb +29 -0
- data/lib/monkeyshines/scrape_job/old_paginated.rb +343 -0
- data/lib/monkeyshines/scrape_job/recursive.rb +9 -0
- data/lib/monkeyshines/scrape_request.rb +136 -0
- data/lib/monkeyshines/scrape_request/paginated.rb +290 -0
- data/lib/monkeyshines/scrape_request/raw_json_contents.rb +16 -0
- data/lib/monkeyshines/scrape_request/signed_url.rb +86 -0
- data/lib/monkeyshines/store.rb +14 -0
- data/lib/monkeyshines/store/base.rb +29 -0
- data/lib/monkeyshines/store/chunked_flat_file_store.rb +37 -0
- data/lib/monkeyshines/store/conditional_store.rb +57 -0
- data/lib/monkeyshines/store/factory.rb +8 -0
- data/lib/monkeyshines/store/flat_file_store.rb +84 -0
- data/lib/monkeyshines/store/key_store.rb +51 -0
- data/lib/monkeyshines/store/null_store.rb +15 -0
- data/lib/monkeyshines/store/read_thru_store.rb +22 -0
- data/lib/monkeyshines/store/tokyo_tdb_key_store.rb +33 -0
- data/lib/monkeyshines/store/tyrant_rdb_key_store.rb +56 -0
- data/lib/monkeyshines/store/tyrant_tdb_key_store.rb +20 -0
- data/lib/monkeyshines/utils/factory_module.rb +106 -0
- data/lib/monkeyshines/utils/filename_pattern.rb +134 -0
- data/lib/monkeyshines/utils/logger.rb +15 -0
- data/lib/monkeyshines/utils/trollop-1.14/FAQ.txt +84 -0
- data/lib/monkeyshines/utils/trollop-1.14/History.txt +101 -0
- data/lib/monkeyshines/utils/trollop-1.14/Manifest.txt +7 -0
- data/lib/monkeyshines/utils/trollop-1.14/README.txt +40 -0
- data/lib/monkeyshines/utils/trollop-1.14/Rakefile +36 -0
- data/lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb +744 -0
- data/lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb +1048 -0
- data/lib/monkeyshines/utils/trollop.rb +744 -0
- data/lib/monkeyshines/utils/union_interval.rb +52 -0
- data/lib/monkeyshines/utils/uri.rb +70 -0
- data/lib/monkeyshines/utils/uuid.rb +32 -0
- data/monkeyshines.gemspec +147 -0
- data/scrape_from_file.rb +44 -0
- data/spec/monkeyshines_spec.rb +7 -0
- data/spec/spec_helper.rb +9 -0
- metadata +183 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
module Monkeyshines
|
2
|
+
module Fetcher
|
3
|
+
#
|
4
|
+
# Base URL fetcher.
|
5
|
+
#
|
6
|
+
# Subclasses must provide
|
7
|
+
# get(scrape_request)
|
8
|
+
# returning the same scrape_request with its contents, scraped_at and
|
9
|
+
# response fields appropriately filled in.
|
10
|
+
#
|
11
|
+
class Base
|
12
|
+
attr_accessor :options
|
13
|
+
#
|
14
|
+
# Options hash configures any subclass behavior
|
15
|
+
#
|
16
|
+
def initialize _options={}
|
17
|
+
self.options = _options
|
18
|
+
end
|
19
|
+
|
20
|
+
# Make request, return satisfied scrape_request
|
21
|
+
def get scrape_request
|
22
|
+
end
|
23
|
+
|
24
|
+
# inscribes request with credentials
|
25
|
+
def authenticate req
|
26
|
+
end
|
27
|
+
|
28
|
+
# Based on the response code, sleep (in case servers are overheating) and
|
29
|
+
# log response.
|
30
|
+
def backoff response
|
31
|
+
sleep
|
32
|
+
end
|
33
|
+
|
34
|
+
# A compact timestamp, created each time it's called
|
35
|
+
def self.timestamp
|
36
|
+
Time.now.utc.to_flat
|
37
|
+
end
|
38
|
+
|
39
|
+
# Release any persistent connections to the remote server
|
40
|
+
def close
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Monkeyshines
|
2
|
+
module Fetcher
|
3
|
+
FakeResponse = Struct.new(:code, :message, :body)
|
4
|
+
|
5
|
+
class FakeFetcher < Base
|
6
|
+
|
7
|
+
# Fake a satisfied scrape_request
|
8
|
+
def get scrape_request
|
9
|
+
response = FakeResponse.new('200', 'OK', { :fetched => scrape_request.url }.to_json )
|
10
|
+
scrape_request.response_code = response.code
|
11
|
+
scrape_request.response_message = response.message
|
12
|
+
scrape_request.response = response
|
13
|
+
scrape_request.scraped_at = Time.now.utc.to_flat
|
14
|
+
scrape_request
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
Net::HTTP.version_1_2
|
3
|
+
module Monkeyshines
|
4
|
+
module Fetcher
|
5
|
+
#
|
6
|
+
# Opens a persistent connection and makes repeated requests.
|
7
|
+
#
|
8
|
+
# * authentication
|
9
|
+
# * backoff and logging on client or server errors
|
10
|
+
#
|
11
|
+
class HttpFetcher < Base
|
12
|
+
|
13
|
+
#
|
14
|
+
# Notes:
|
15
|
+
#
|
16
|
+
# On HTTP:
|
17
|
+
# * "RFC 2616 (HTTP/1.1)":http://tools.ietf.org/html/rfc2616
|
18
|
+
# * "Header Fields":http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html
|
19
|
+
# * "Notes on Keep-Alive":http://www.hpl.hp.com/personal/ange/archives/archives-95/http-wg-archive/1661.html
|
20
|
+
#
|
21
|
+
# * "right_http_connection is another HTTP lib":http://github.com/rightscale/right_http_connection/tree/master/lib/right_http_connection.rb
|
22
|
+
|
23
|
+
# amount to throttle non-persistent connections:
|
24
|
+
# if http is
|
25
|
+
CNXN_SLEEP_TIME = 0.5
|
26
|
+
# Default user agent presented to servers
|
27
|
+
USER_AGENT = "Net::HTTP #{RUBY_VERSION}"
|
28
|
+
attr_accessor :connection_opened_at, :username, :password, :http_req_options
|
29
|
+
def initialize _options={}
|
30
|
+
super _options
|
31
|
+
self.username = options[:username]
|
32
|
+
self.password = options[:password]
|
33
|
+
self.http_req_options = {}
|
34
|
+
self.http_req_options["User-Agent"] = options[:user_agent] || USER_AGENT
|
35
|
+
self.http_req_options["Connection"] = "keep-alive"
|
36
|
+
end
|
37
|
+
|
38
|
+
#
|
39
|
+
# Current session (starting a new one if necessary)
|
40
|
+
# If the host has changed, closes old conxn and opens new one
|
41
|
+
#
|
42
|
+
def http host, port=nil
|
43
|
+
return @http if (@http && (@http.started?) && (@host == host))
|
44
|
+
close if (@http && (@http.started?) && (@host != host))
|
45
|
+
@host = host
|
46
|
+
@connection_opened_at = Time.now
|
47
|
+
Log.info "Opening HTTP connection for #{@host} at #{@connection_opened_at}"
|
48
|
+
@http = Net::HTTP.new(@host)
|
49
|
+
@http.set_debug_output($stderr) if options[:debug_requests]
|
50
|
+
@http.start
|
51
|
+
end
|
52
|
+
|
53
|
+
# Close the current session, if any
|
54
|
+
def close
|
55
|
+
if @http && @http.started?
|
56
|
+
@http.finish
|
57
|
+
Log.info "Closing HTTP connection for #{@host} from #{@connection_opened_at}"
|
58
|
+
end
|
59
|
+
@http = nil
|
60
|
+
end
|
61
|
+
|
62
|
+
# Build and dispatch request
|
63
|
+
def perform_request url_str
|
64
|
+
url = URI.parse(url_str)
|
65
|
+
req = Net::HTTP::Get.new(url.send(:path_query), http_req_options)
|
66
|
+
authenticate req
|
67
|
+
http(url.host, url.port).request req
|
68
|
+
end
|
69
|
+
|
70
|
+
# authenticate request
|
71
|
+
def authenticate req
|
72
|
+
req.basic_auth(username, password) if username && password
|
73
|
+
end
|
74
|
+
|
75
|
+
#
|
76
|
+
# Based on the response code, sleep (in case servers are overheating) and
|
77
|
+
# log response.
|
78
|
+
#
|
79
|
+
def backoff response
|
80
|
+
# backoff when server isn't persisting connection
|
81
|
+
sleep CNXN_SLEEP_TIME if (! @http.started?)
|
82
|
+
# Response-based sleep time
|
83
|
+
sleep_time = 0
|
84
|
+
case response
|
85
|
+
when Net::HTTPSuccess then return # 2xx
|
86
|
+
when Net::HTTPRedirection then return # 3xx
|
87
|
+
when Net::HTTPBadRequest then sleep_time = 5 # 400 (rate limit, probably)
|
88
|
+
when Net::HTTPUnauthorized then sleep_time = 0 # 401 (protected user, probably)
|
89
|
+
when Net::HTTPForbidden then sleep_time = 4 # 403 update limit
|
90
|
+
when Net::HTTPNotFound then sleep_time = 0 # 404 deleted
|
91
|
+
when Net::HTTPServiceUnavailable then sleep_time = 9 # 503 Fail Whale
|
92
|
+
when Net::HTTPServerError then sleep_time = 2 # 5xx All other server errors
|
93
|
+
else sleep_time = 1
|
94
|
+
end
|
95
|
+
Log.warn "Received #{response.code}, sleeping #{sleep_time} ('#{response.message[0..200].gsub(%r{[\r\n\t]}, " ")}' from #{@host}+#{@connection_opened_at})"
|
96
|
+
sleep sleep_time
|
97
|
+
end
|
98
|
+
|
99
|
+
# Make request, return satisfied scrape_request
|
100
|
+
def get scrape_request
|
101
|
+
begin
|
102
|
+
response = perform_request(scrape_request.url)
|
103
|
+
scrape_request.response_code = response.code
|
104
|
+
scrape_request.response_message = response.message[0..200].gsub(/[\n\r\t]+/, ' ')
|
105
|
+
scrape_request.response = response
|
106
|
+
backoff response
|
107
|
+
rescue StandardError, SignalException, Timeout::Error => e
|
108
|
+
Log.warn ["Recovering from fetcher error:", e.to_s, scrape_request.inspect[0..2000].gsub(/[\n\r]+/, ' ')].join("\t")
|
109
|
+
close # restart the connection
|
110
|
+
rescue Exception => e
|
111
|
+
Log.warn e
|
112
|
+
raise e
|
113
|
+
end
|
114
|
+
scrape_request.scraped_at = self.class.timestamp
|
115
|
+
scrape_request
|
116
|
+
end
|
117
|
+
|
118
|
+
def get_and_report_timing *args
|
119
|
+
start = Time.now.to_f
|
120
|
+
response = get *args
|
121
|
+
Log.info( Time.now.to_f - start )
|
122
|
+
response
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'monkeyshines/fetcher/http_fetcher'
|
2
|
+
module Monkeyshines
|
3
|
+
module Fetcher
|
4
|
+
#
|
5
|
+
# Requests the HEAD only, for cases where you don't need to know actual page
|
6
|
+
# contents (e.g. you're looking for server info or scraping URL shorteners)
|
7
|
+
#
|
8
|
+
class HttpHeadFetcher < HttpFetcher
|
9
|
+
|
10
|
+
#
|
11
|
+
# Build and dispatch request
|
12
|
+
# We do a HEAD request only, no reason to get the body.
|
13
|
+
#
|
14
|
+
def perform_request url_str
|
15
|
+
url = URI.parse(url_str)
|
16
|
+
req = Net::HTTP::Head.new(url.send(:path_query), http_req_options)
|
17
|
+
authenticate req
|
18
|
+
http(url.host, url.port).request req
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'monkeyshines/monitor/periodic_monitor'
|
2
|
+
module Monkeyshines
|
3
|
+
module Monitor
|
4
|
+
module ChunkedStore
|
5
|
+
attr_accessor :file_pattern
|
6
|
+
def initialize file_pattern
|
7
|
+
self.file_pattern = file_pattern
|
8
|
+
super file_pattern.make
|
9
|
+
end
|
10
|
+
|
11
|
+
def close_and_reopen
|
12
|
+
close
|
13
|
+
self.filename = file_pattern.make
|
14
|
+
dump_file
|
15
|
+
end
|
16
|
+
|
17
|
+
def save *args
|
18
|
+
chunk_monitor.periodically{ close_rename_and_open }
|
19
|
+
super *args
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Monkeyshines
|
2
|
+
module Monitor
|
3
|
+
|
4
|
+
#
|
5
|
+
# Emits a log line but only every +iter_interval+ calls or +time_interval+
|
6
|
+
# lapse.
|
7
|
+
#
|
8
|
+
# Since the contents of the block aren't called until the criteria are met,
|
9
|
+
# you can put relatively expensive operations in the log without killing
|
10
|
+
# your iteration time.
|
11
|
+
#
|
12
|
+
class PeriodicLogger < PeriodicMonitor
|
13
|
+
#
|
14
|
+
# Call with a block that returns a string or array to log.
|
15
|
+
# If you return
|
16
|
+
#
|
17
|
+
# Ex: log if it has been at least 5 minutes since last announcement:
|
18
|
+
#
|
19
|
+
# periodic_logger = Monkeyshines::Monitor::PeriodicLogger.new(:time => 300)
|
20
|
+
# loop do
|
21
|
+
# # ... stuff ...
|
22
|
+
# periodic_logger.periodically{ [morbenfactor, crunkosity, exuberance] }
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
def periodically &block
|
26
|
+
super do
|
27
|
+
result = [ "%10d"%iter, "%7.1f"%since, "%7.1f"%rate, (block ? block.call : nil) ].flatten.compact
|
28
|
+
Log.info result.join("\t")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module Monkeyshines
|
2
|
+
module Monitor
|
3
|
+
#
|
4
|
+
# Accepts a lightweight call every iteration.
|
5
|
+
#
|
6
|
+
# Once either a time or an iteration criterion is met, executes the block
|
7
|
+
# and resets the timer until next execution.
|
8
|
+
#
|
9
|
+
# Note that the +time_interval+ is measured *excution to execution* and not
|
10
|
+
# in multiples of iter_interval. Say I set a time_interval of 300s, and
|
11
|
+
# happen to iterate at 297s and 310s after start. Then the monitor will
|
12
|
+
# execute at 310s, and the next execution will happen on or after 610s.
|
13
|
+
#
|
14
|
+
# Also note that when *either* criterion is met, *both* criteria are
|
15
|
+
# reset. Say I set a time interval of 300s and an +iter_interval+ of 10_000;
|
16
|
+
# and that at 250s I reach iteration 10_000. Then the monitor will execute
|
17
|
+
# on or after 20_000 iteration or 550s, whichever happens first.
|
18
|
+
#
|
19
|
+
class PeriodicMonitor
|
20
|
+
attr_accessor :time_interval, :iter_interval
|
21
|
+
attr_accessor :last_time, :iter, :started_at
|
22
|
+
|
23
|
+
def initialize options={}
|
24
|
+
self.started_at = Time.now.utc.to_f
|
25
|
+
self.last_time = started_at
|
26
|
+
self.iter = 0
|
27
|
+
self.time_interval = options[:time]
|
28
|
+
self.iter_interval = options[:iters]
|
29
|
+
end
|
30
|
+
|
31
|
+
# True if more than +iter_interval+ has elapsed since last execution.
|
32
|
+
def enough_iterations?
|
33
|
+
iter % iter_interval == 0 if iter_interval
|
34
|
+
end
|
35
|
+
|
36
|
+
# True if more than +time_interval+ has elapsed since last execution.
|
37
|
+
def enough_time? now
|
38
|
+
(now - last_time) > time_interval if time_interval
|
39
|
+
end
|
40
|
+
|
41
|
+
# Time since monitor was created
|
42
|
+
def since
|
43
|
+
Time.now.utc.to_f - started_at
|
44
|
+
end
|
45
|
+
# Iterations per second
|
46
|
+
def rate
|
47
|
+
iter.to_f / since.to_f
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# if the interval conditions are met, executes block; otherwise just does
|
52
|
+
# bookkeeping and returns.
|
53
|
+
#
|
54
|
+
def periodically &block
|
55
|
+
self.iter += 1
|
56
|
+
now = Time.now.utc.to_f
|
57
|
+
if enough_iterations? || enough_time?(now)
|
58
|
+
block.call(iter, (now-last_time))
|
59
|
+
self.last_time = now
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'monkeyshines/utils/trollop'
|
2
|
+
module Monkeyshines
|
3
|
+
|
4
|
+
CMDLINE_OPTIONS = [
|
5
|
+
[:handle, "Identifying string for scrape", { :type => String, :required => true } ],
|
6
|
+
[:source_filename, "URI for scrape store to load from", { :type => String } ],
|
7
|
+
[:dest_filename, "Filename for results", { :type => String } ],
|
8
|
+
[:log_dest, "Log file location", { :type => String } ],
|
9
|
+
]
|
10
|
+
|
11
|
+
#
|
12
|
+
# Load the YAML file ~/.monkeyshines
|
13
|
+
# and toss it into Monkeyshines::CONFIG
|
14
|
+
#
|
15
|
+
def self.load_global_options! *keys
|
16
|
+
all_defaults = YAML.load(File.open(ENV['HOME']+'/.monkeyshines'))
|
17
|
+
if keys.blank?
|
18
|
+
CONFIG.deep_merge! all_defaults
|
19
|
+
else
|
20
|
+
keys.each do |key|
|
21
|
+
CONFIG.deep_merge!( all_defaults[key] || {} )
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def self.load_cmdline_options!
|
28
|
+
Monkeyshines::CONFIG.deep_merge! options_from_cmdline
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# Takes the values set on the command line
|
33
|
+
# and merges them into the options hash:
|
34
|
+
# --source-filename
|
35
|
+
# sets the value for options[:source][:filename], etc.
|
36
|
+
#
|
37
|
+
def self.options_from_cmdline
|
38
|
+
result = {}
|
39
|
+
cmdline = self.get_cmdline_args
|
40
|
+
cmdline.each do |key, val|
|
41
|
+
next if key.to_s =~ /_given$/
|
42
|
+
args = key.to_s.split(/_/).map(&:to_sym)+[val]
|
43
|
+
result.deep_set(*args) # if val
|
44
|
+
end
|
45
|
+
result[:handle] = result[:handle].to_s.gsub(/\W/,'').to_sym
|
46
|
+
result
|
47
|
+
end
|
48
|
+
|
49
|
+
# Use the trollop options defined in Monkeyshines::CMDLINE_OPTIONS
|
50
|
+
# to extract command-line args
|
51
|
+
def self.get_cmdline_args
|
52
|
+
cmdline = Trollop::Parser.new
|
53
|
+
Monkeyshines::CMDLINE_OPTIONS.each do |args|
|
54
|
+
cmdline.opt *args
|
55
|
+
end
|
56
|
+
Trollop::do_parse_args(cmdline)
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'monkeyshines/runner'
|
2
|
+
|
3
|
+
module Monkeyshines
|
4
|
+
class RecursiveRunner < Monkeyshines::Runner
|
5
|
+
GENERATION_LIMIT = 5
|
6
|
+
|
7
|
+
#
|
8
|
+
# Generate requests that ensue from this one
|
9
|
+
#
|
10
|
+
# if GENERATION_LIMIT is 5, requests at generation 4 *do* generate recursive
|
11
|
+
# jobs, ones at generation 5 do not (so, generation 6 shouldn't exist)
|
12
|
+
#
|
13
|
+
def bookkeep result
|
14
|
+
super result
|
15
|
+
if result
|
16
|
+
result.req_generation = result.req_generation.to_i
|
17
|
+
return if (result.req_generation >= GENERATION_LIMIT)
|
18
|
+
iter = 0
|
19
|
+
result.recursive_requests do |rec_req|
|
20
|
+
source.put rec_req
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'right_aws'
|
2
|
+
module Monkeyshines
|
3
|
+
module Repository
|
4
|
+
class Base
|
5
|
+
|
6
|
+
def exists?(key)
|
7
|
+
end
|
8
|
+
alias_method :include?, :exists?
|
9
|
+
|
10
|
+
def put key, val
|
11
|
+
end
|
12
|
+
|
13
|
+
def get key
|
14
|
+
end
|
15
|
+
|
16
|
+
def open
|
17
|
+
end
|
18
|
+
|
19
|
+
def close
|
20
|
+
end
|
21
|
+
|
22
|
+
def uri key
|
23
|
+
end
|
24
|
+
|
25
|
+
def md5 key
|
26
|
+
metadata key, :md5
|
27
|
+
end
|
28
|
+
alias_method :checksum, :md5
|
29
|
+
|
30
|
+
def size key
|
31
|
+
end
|
32
|
+
|
33
|
+
def timestamp key
|
34
|
+
end
|
35
|
+
|
36
|
+
# By default,
|
37
|
+
# size+timestamp-md5
|
38
|
+
# Ex:
|
39
|
+
# 1251777182+20090222121200-577416a26499f6facf45973298be5276
|
40
|
+
def version_id
|
41
|
+
"#{size}+#{timestamp}-#{md5}"
|
42
|
+
end
|
43
|
+
|
44
|
+
CACHED_METADATA = {}
|
45
|
+
def metadata key, datum=nil
|
46
|
+
attrs = CACHED_METADATA[key] || get_metadata(key, datum)
|
47
|
+
datum ? attrs[datum] : attrs
|
48
|
+
end
|
49
|
+
|
50
|
+
# fetch
|
51
|
+
def get_metadata key, datum=nil
|
52
|
+
#
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|