monkeyshines 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/.document +4 -0
  2. data/.gitignore +43 -0
  3. data/LICENSE +20 -0
  4. data/LICENSE.textile +20 -0
  5. data/README.textile +125 -0
  6. data/Rakefile +105 -0
  7. data/VERSION +1 -0
  8. data/examples/.gitignore +4 -0
  9. data/examples/bulk_urls/scrape_bulk_urls.rb +64 -0
  10. data/examples/rename_tree/rename_hdp_tree.rb +151 -0
  11. data/examples/rename_tree/rename_ripd_tree.rb +82 -0
  12. data/examples/rss_feeds/scrape_rss_feeds.rb +52 -0
  13. data/examples/shorturls/README.textile +111 -0
  14. data/examples/shorturls/bulkdump_shorturls.rb +46 -0
  15. data/examples/shorturls/bulkload_shorturls.rb +45 -0
  16. data/examples/shorturls/extract_urls.rb +12 -0
  17. data/examples/shorturls/multiplex_shorturl_cache.rb +32 -0
  18. data/examples/shorturls/old/multidump_and_fix_shorturls.rb +66 -0
  19. data/examples/shorturls/old/shorturl_stats.rb +81 -0
  20. data/examples/shorturls/scrape_shorturls.rb +112 -0
  21. data/examples/shorturls/shorturl_request.rb +29 -0
  22. data/examples/shorturls/shorturl_sequence.rb +121 -0
  23. data/examples/shorturls/shorturl_start_tyrant.sh +16 -0
  24. data/examples/shorturls/start_shorturl_cache.sh +2 -0
  25. data/lib/monkeyshines.rb +31 -0
  26. data/lib/monkeyshines/extensions.rb +16 -0
  27. data/lib/monkeyshines/fetcher.rb +10 -0
  28. data/lib/monkeyshines/fetcher/authed_http_fetcher.rb +35 -0
  29. data/lib/monkeyshines/fetcher/base.rb +44 -0
  30. data/lib/monkeyshines/fetcher/fake_fetcher.rb +19 -0
  31. data/lib/monkeyshines/fetcher/http_fetcher.rb +127 -0
  32. data/lib/monkeyshines/fetcher/http_head_fetcher.rb +23 -0
  33. data/lib/monkeyshines/monitor.rb +7 -0
  34. data/lib/monkeyshines/monitor/chunked_store.rb +23 -0
  35. data/lib/monkeyshines/monitor/periodic_logger.rb +33 -0
  36. data/lib/monkeyshines/monitor/periodic_monitor.rb +65 -0
  37. data/lib/monkeyshines/options.rb +59 -0
  38. data/lib/monkeyshines/recursive_runner.rb +26 -0
  39. data/lib/monkeyshines/repository/base.rb +57 -0
  40. data/lib/monkeyshines/repository/s3.rb +169 -0
  41. data/lib/monkeyshines/request_stream.rb +11 -0
  42. data/lib/monkeyshines/request_stream/base.rb +32 -0
  43. data/lib/monkeyshines/request_stream/edamame_queue.rb +54 -0
  44. data/lib/monkeyshines/request_stream/klass_request_stream.rb +39 -0
  45. data/lib/monkeyshines/request_stream/simple_request_stream.rb +22 -0
  46. data/lib/monkeyshines/runner.rb +161 -0
  47. data/lib/monkeyshines/runner_core/options.rb +5 -0
  48. data/lib/monkeyshines/runner_core/parsing_runner.rb +29 -0
  49. data/lib/monkeyshines/scrape_job/old_paginated.rb +343 -0
  50. data/lib/monkeyshines/scrape_job/recursive.rb +9 -0
  51. data/lib/monkeyshines/scrape_request.rb +136 -0
  52. data/lib/monkeyshines/scrape_request/paginated.rb +290 -0
  53. data/lib/monkeyshines/scrape_request/raw_json_contents.rb +16 -0
  54. data/lib/monkeyshines/scrape_request/signed_url.rb +86 -0
  55. data/lib/monkeyshines/store.rb +14 -0
  56. data/lib/monkeyshines/store/base.rb +29 -0
  57. data/lib/monkeyshines/store/chunked_flat_file_store.rb +37 -0
  58. data/lib/monkeyshines/store/conditional_store.rb +57 -0
  59. data/lib/monkeyshines/store/factory.rb +8 -0
  60. data/lib/monkeyshines/store/flat_file_store.rb +84 -0
  61. data/lib/monkeyshines/store/key_store.rb +51 -0
  62. data/lib/monkeyshines/store/null_store.rb +15 -0
  63. data/lib/monkeyshines/store/read_thru_store.rb +22 -0
  64. data/lib/monkeyshines/store/tokyo_tdb_key_store.rb +33 -0
  65. data/lib/monkeyshines/store/tyrant_rdb_key_store.rb +56 -0
  66. data/lib/monkeyshines/store/tyrant_tdb_key_store.rb +20 -0
  67. data/lib/monkeyshines/utils/factory_module.rb +106 -0
  68. data/lib/monkeyshines/utils/filename_pattern.rb +134 -0
  69. data/lib/monkeyshines/utils/logger.rb +15 -0
  70. data/lib/monkeyshines/utils/trollop-1.14/FAQ.txt +84 -0
  71. data/lib/monkeyshines/utils/trollop-1.14/History.txt +101 -0
  72. data/lib/monkeyshines/utils/trollop-1.14/Manifest.txt +7 -0
  73. data/lib/monkeyshines/utils/trollop-1.14/README.txt +40 -0
  74. data/lib/monkeyshines/utils/trollop-1.14/Rakefile +36 -0
  75. data/lib/monkeyshines/utils/trollop-1.14/lib/trollop.rb +744 -0
  76. data/lib/monkeyshines/utils/trollop-1.14/test/test_trollop.rb +1048 -0
  77. data/lib/monkeyshines/utils/trollop.rb +744 -0
  78. data/lib/monkeyshines/utils/union_interval.rb +52 -0
  79. data/lib/monkeyshines/utils/uri.rb +70 -0
  80. data/lib/monkeyshines/utils/uuid.rb +32 -0
  81. data/monkeyshines.gemspec +147 -0
  82. data/scrape_from_file.rb +44 -0
  83. data/spec/monkeyshines_spec.rb +7 -0
  84. data/spec/spec_helper.rb +9 -0
  85. metadata +183 -0
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'; $: << File.dirname(__FILE__)
3
+ #require 'rubygems'
4
+ # require 'wukong'
5
+ require 'monkeyshines'
6
+ # require 'monkeyshines/utils/uri'
7
+ # require 'monkeyshines/utils/filename_pattern'
8
+ # require 'monkeyshines/store/conditional_store'
9
+ # require 'monkeyshines/fetcher/http_head_fetcher'
10
+ # require 'trollop' # gem install trollop
11
+ # require 'shorturl_request'
12
+ require 'shorturl_sequence'
13
+
14
+ digits = { } ; (('0'..'9').to_a+('a'..'z').to_a).each do |ch| digits[ch] = 0 end
15
+
16
+ # (1..10000).each do |idx|
17
+ # s = ShorturlSequence.encode_integer idx, 36
18
+ # digits[s[0..0]] += 1
19
+ # end
20
+ # p digits
21
+ # puts digits.sort.map{|ch,ct| "%-7s\t%10d"%[ch,ct]}
22
+
23
+ class Histo
24
+ attr_accessor :buckets
25
+ def initialize
26
+ self.buckets = { }
27
+ end
28
+ def << val
29
+ buckets[val] ||= 0
30
+ buckets[val] += 1
31
+ end
32
+ def dump
33
+ buckets.sort.each do |val, count|
34
+ puts "%10d\t%s"%[count,val]
35
+ end
36
+ end
37
+ end
38
+
39
+ len_histo = Histo.new
40
+ num_histo = Histo.new
41
+ ltr_histo = Histo.new
42
+ iter = 0
43
+
44
+ # 123456789-123456789-
45
+ # http://bit.ly/
46
+ # http://tinyurl.com/
47
+ BASE_URL = "http://is.gd/"
48
+ RADIX = 62
49
+ HANDLE = BASE_URL.gsub(%r{^http://},'').gsub(/\.com$/,'').gsub(/\W+/,'')
50
+ BASE_URL_LEN = BASE_URL.length
51
+ MAX_TAIL_LEN = BASE_URL_LEN + 2 + 6
52
+ SIX_CHARS = RADIX**6
53
+ File.open("rawd/req/shorturl_requests-20090710-#{HANDLE}.tsv"
54
+ ) do |reqfile|
55
+ reqfile.each do |url|
56
+ #decode
57
+ next unless url.length <= MAX_TAIL_LEN
58
+ tail = url.chomp.strip[BASE_URL_LEN..-1] || ''
59
+ # tail.downcase!
60
+ asnum = ShorturlSequence.decode_str tail, RADIX rescue nil # tail.to_i(36) rescue -1
61
+ next unless asnum && asnum < SIX_CHARS
62
+ size = (asnum / 1_000_000)
63
+ len = tail.length
64
+ # track stats
65
+ len_histo << len
66
+ num_histo << size
67
+ ltr_histo << "%s-%s" % [len, tail[0..0]] # + (len > 1 ? '.'* (len-1) : '')
68
+ puts iter if ((iter += 1) % 1_000_000 == 0)
69
+
70
+ end
71
+ end
72
+ puts "Integer magnitude of decoded (M):"
73
+ num_histo.dump
74
+ puts "Length of encoded:"
75
+ len_histo.dump
76
+ puts "First Letter:"
77
+ ltr_histo.dump
78
+
79
+
80
+ # puts tail.length # [tail.length, tail, tail[-1].to_i].join("\t")
81
+ # puts [asnum, tail, url].inspect
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env ruby
2
+ $: << File.dirname(__FILE__)+'/../../lib'; $: << File.dirname(__FILE__)
3
+ require 'rubygems'
4
+ require 'wukong'
5
+ require 'monkeyshines'
6
+ #
7
+ require 'shorturl_request'
8
+ require 'shorturl_sequence'
9
+ require 'monkeyshines/utils/uri'
10
+ require 'monkeyshines/utils/filename_pattern'
11
+ require 'monkeyshines/store/conditional_store'
12
+ require 'monkeyshines/fetcher/http_head_fetcher'
13
+ require 'trollop' # gem install trollop
14
+
15
+ # ===========================================================================
16
+ #
17
+ # scrape_shorturls.rb --
18
+ #
19
+ # To scrape from a list of shortened urls:
20
+ #
21
+ # ./shorturl_random_scrape.rb --from-type=FlatFileStore --from=request_urls.tsv
22
+ #
23
+ # To do a random scrape:
24
+ #
25
+ # ./shorturl_random_scrape.rb --from-type=RandomUrlStream --base-url=tinyurl.com
26
+ # --base-url="http://tinyurl.com" --min-limit= --max-limit= --encoding_radix=
27
+ #
28
+ #
29
+ opts = Trollop::options do
30
+ opt :base_url, "Host part of URL: eg tinyurl.com", :type => String, :required => true
31
+ opt :log, "Log file name; leave blank to use STDERR", :type => String
32
+ # input from file
33
+ opt :from, "Location of URLs to scrape", :type => String
34
+ opt :skip, "Initial lines to skip", :type => Integer
35
+ # OR do a random walk
36
+ opt :random, "Generate and visit random URL suffixes"
37
+ opt :min_limit, "Smallest sequential URL to randomly visit", :type => Integer # default in shorturl_sequence.rb
38
+ opt :max_limit, "Largest sequential URL to randomly visit", :type => Integer # default in shorturl_sequence.rb
39
+ opt :encoding_radix, "36 for most, 62 if URLs are case-sensitive", :type => Integer, :default => 36
40
+ # output storage
41
+ opt :cache_loc, "URI for cache server", :type => String
42
+ opt :chunk_time, "Frequency to rotate chunk files (in seconds)", :type => Integer, :default => 60*60*4
43
+ opt :dest_dir, "Filename base for output, def /data/ripd", :type => String, :default => '/data/ripd'
44
+ opt :dest_pattern, "Pattern for dump file output", :default => ":dest_dir/:handle_prefix/:handle/:date/:handle+:timestamp-:pid.tsv"
45
+ end
46
+ handle = opts[:base_url].gsub(/\.com$/,'').gsub(/\W+/,'')
47
+
48
+ # ******************** Log ********************
49
+ opts[:log] = (WORK_DIR+"/log/shorturls_#{handle}-#{Time.now.to_flat}.log") if (opts[:log]=='')
50
+ periodic_log = Monkeyshines::Monitor::PeriodicLogger.new(:iters => 10000, :time => 30)
51
+
52
+ #
53
+ # ******************** Load from store or random walk ********************
54
+ #
55
+ if opts[:from]
56
+ src_store = Monkeyshines::Store::FlatFileStore.new_from_command_line(opts, :filemode => 'r')
57
+ src_store.skip!(opts[:skip].to_i) if opts[:skip]
58
+ elsif opts[:random]
59
+ src_store = Monkeyshines::Store::RandomUrlStream.new_from_command_line(opts)
60
+ else
61
+ Trollop::die "Need to either say --random or --from=filename"
62
+ end
63
+
64
+ #
65
+ # ******************** Store output ********************
66
+ #
67
+ # Track visited URLs with key-value database
68
+ #
69
+ RDB_PORTS = { 'tinyurl' => "localhost:10042", 'bitly' => "localhost:10043", 'other' => "localhost:10044" }
70
+ cache_loc = opts[:cache_loc] || RDB_PORTS[handle] or raise "Need a handle (bitly, tinyurl or other)."
71
+ dest_cache = Monkeyshines::Store::TyrantRdbKeyStore.new(cache_loc)
72
+ # dest_cache = Monkeyshines::Store::MultiplexShorturlCache.new(RDB_PORTS)
73
+
74
+ #
75
+ # Store the data into flat files
76
+ #
77
+ dest_pattern = Monkeyshines::Utils::FilenamePattern.new(opts[:dest_pattern],
78
+ :handle => 'shorturl-'+handle, :dest_dir => opts[:dest_dir])
79
+ dest_files = Monkeyshines::Store::ChunkedFlatFileStore.new(dest_pattern,
80
+ opts[:chunk_time].to_i, opts)
81
+
82
+ #
83
+ # Conditional store uses the key-value DB to boss around the flat files --
84
+ # requests are only made (and thus data is only output) if the url is missing
85
+ # from the key-value store.
86
+ #
87
+ dest_store = Monkeyshines::Store::ConditionalStore.new(dest_cache, dest_files)
88
+
89
+ #
90
+ # ******************** Fetcher ********************
91
+ #
92
+ fetcher = Monkeyshines::Fetcher::HttpHeadFetcher.new
93
+
94
+ #
95
+ # ******************** Do this thing ********************
96
+ #
97
+ Log.info "Beginning scrape itself"
98
+ src_store.each do |bareurl, *args|
99
+ # prepare the request
100
+ next if bareurl =~ %r{\Ahttp://(poprl.com|short.to|timesurl.at|bkite.com)}
101
+ req = ShorturlRequest.new(bareurl, *args)
102
+
103
+ # conditional store only calls fetcher if url key is missing.
104
+ result = dest_store.set( req.url ) do
105
+ response = fetcher.get(req) # do the url fetch
106
+ next unless response.response_code || response.contents # don't store bad fetches
107
+ [response.scraped_at, response] # timestamp into cache, result into flat file
108
+ end
109
+ periodic_log.periodically{ ["%7d"%dest_store.misses, 'misses', dest_store.size, req.response_code, result, req.url] }
110
+ end
111
+ dest_store.close
112
+ fetcher.close
@@ -0,0 +1,29 @@
1
+ class ShorturlRequest < Struct.new(
2
+ :url,
3
+ :scraped_at,
4
+ :response_code, :response_message,
5
+ :contents
6
+ )
7
+ alias_method :short_url=, :url=
8
+ alias_method :expanded_url=, :contents=
9
+ alias_method :expanded_url, :contents
10
+ #
11
+ # All we care about is the redirect destination.
12
+ #
13
+ def response= response
14
+ self.contents = response["location"]
15
+ end
16
+
17
+ #
18
+ # The major shortening services
19
+ #
20
+ # Do any of the mainstream shorteners use in-band characters besides \w
21
+ # alphanum and - dash? (idek.net uses a ~ and pastoid.com a + but they
22
+ # are not popular enough to justify the annoyance of allowing extra
23
+ # chars).
24
+ #
25
+ SHORTURL_RE = %r{\Ahttp://(?:1link.in|4url.cc|6url.com|adjix.com|ad.vu|bellypath.com|bit.ly|bkite.com|budurl.com|canurl.com|chod.sk|cli.gs|decenturl.com|dn.vc|doiop.com|dwarfurl.com|easyuri.com|easyurl.net|ff.im|go2cut.com|gonext.org|hulu.com|hypem.com|ifood.tv|ilix.in|is.gd|ix.it|jdem.cz|jijr.com|kissa.be|kurl.us|litturl.com|lnkurl.com|memurl.com|metamark.net|miklos.dk|minilien.com|minurl.org|muhlink.com|myurl.in|myurl.us|notlong.com|ow.ly|plexp.com|poprl.com|qurlyq.com|redirx.com|s3nt.com|shorterlink.com|shortlinks.co.uk|short.to|shorturl.com|shrinklink.co.uk|shrinkurl.us|shrt.st|shurl.net|simurl.com|shorl.com|smarturl.eu|snipr.com|snipurl.com|snurl.com|sn.vc|starturl.com|surl.co.uk|tighturl.com|timesurl.at|tiny123.com|tiny.cc|tinylink.com|tinyurl.com|tobtr.com|traceurl.com|tr.im|tweetburner.com|twitpwr.com|twitthis.com|twurl.nl|u.mavrev.com|ur1.ca|url9.com|urlborg.com|urlbrief.com|urlcover.com|urlcut.com|urlhawk.com|url-press.com|urlsmash.com|urltea.com|urlvi.be|vimeo.com|wlink.us|xaddr.com|xil.in|xrl.us|x.se|xs.md|yatuc.com|yep.it|yweb.com|zi.ma|w3t.org)/.}
26
+ def self.is_shorturl? url
27
+ url.to_s =~ SHORTURL_RE
28
+ end
29
+ end
@@ -0,0 +1,121 @@
1
+ module Base62
2
+ # http://refactormycode.com/codes/125-base-62-encoding
3
+ BASE62_CHARS = ('0'..'9').to_a + ('a'..'z').to_a + ('A'..'Z').to_a
4
+ BASE62_MAP = {}
5
+ BASE62_CHARS.zip((0..61).to_a){|ch,num| BASE62_MAP[ch]=num }
6
+ def self.i_to_s i
7
+ return '0' if i == 0
8
+ s = ''
9
+ while i > 0
10
+ s << BASE62_CHARS[i.modulo(62)]
11
+ i /= 62
12
+ end
13
+ s.reverse
14
+ end
15
+
16
+ def self.s_to_i str
17
+ i_out = 0
18
+ str.reverse.chars.each_with_index do |c, i|
19
+ i_out += BASE62_MAP[c] * (62 ** i)
20
+ end
21
+ i_out
22
+ end
23
+ end
24
+
25
+ module ShorturlSequence
26
+ def self.encode_integer i, radix
27
+ case radix.to_s
28
+ when '36' then i.to_s(36)
29
+ when '62' then Base62.i_to_s(i)
30
+ else
31
+ raise "Can't encode into base #{radix}"
32
+ end
33
+ end
34
+
35
+ def self.decode_str s, radix
36
+ s = s.gsub(%r{\W+$},'')
37
+ case radix.to_s
38
+ when '36' then s.to_i(36)
39
+ when '62' then Base62.s_to_i(i)
40
+ else
41
+ raise "Can't encode into base #{radix}"
42
+ end
43
+ end
44
+ end
45
+
46
+ class Shorturl
47
+ attr_accessor :base_url
48
+ attr_accessor :token
49
+ def initialize token
50
+ self.token = token
51
+ end
52
+ end
53
+
54
+ class Shorturl62
55
+ def to_i
56
+ Base62.s_to_i token
57
+ end
58
+
59
+ def to_s
60
+ url
61
+ end
62
+
63
+ def url
64
+ "#{base_url}/#{token}"
65
+ end
66
+ end
67
+
68
+ class IsgdShorturl < Shorturl62
69
+ def base_url
70
+ 'http://is.gd'
71
+ end
72
+ end
73
+
74
+ class Monkeyshines::Store::SequentialUrlStream
75
+ DEFAULT_MAX_URLSTR = '1zzzzz'.to_i(36)
76
+ DEFAULT_RADIX = {
77
+ 'http://tinyurl.com/' => 36,
78
+ 'http://bit.ly/' => 62,
79
+ 'http://is.gd/' => 62,
80
+ }
81
+ attr_accessor :base_url, :min_limit, :span, :encoding_radix
82
+ def initialize base_url, min_limit=0, max_limit=nil, encoding_radix=nil
83
+ self.base_url = self.class.fix_url(base_url)
84
+ self.min_limit = min_limit.to_i
85
+ max_limit ||= DEFAULT_MAX_URLSTR
86
+ self.span = max_limit.to_i - self.min_limit
87
+ self.encoding_radix = (encoding_radix || DEFAULT_RADIX[self.base_url]).to_i
88
+ raise "Please specify either encoding_radix of 36 or 62" unless [36, 62].include?(self.encoding_radix)
89
+ end
90
+
91
+ def self.fix_url url
92
+ url = 'http://' + url unless (url[0..6]=='http://')
93
+ url = url + '/' unless (url[-1..-1]=='/')
94
+ url
95
+ end
96
+
97
+ # An infinite stream of urls in range
98
+ def each *args, &block
99
+ (min_limit..max_limit).each(&block)
100
+ end
101
+
102
+ def self.new_from_command_line cmdline_opts, default_opts={}
103
+ options = default_opts.merge(cmdline_opts)
104
+ Trollop::die :base_url if options[:base_url].blank?
105
+ self.new *options.values_of(:base_url, :min_limit, :max_limit, :encoding_radix)
106
+ end
107
+ end
108
+
109
+ class Monkeyshines::Store::RandomUrlStream < Monkeyshines::Store::SequentialUrlStream
110
+ # An infinite stream of urls in range
111
+ def each *args, &block
112
+ loop do
113
+ yield url_in_range
114
+ end
115
+ end
116
+
117
+ def url_in_range
118
+ idx = rand(span) + min_limit
119
+ base_url + ShorturlSequence.encode_integer(idx, encoding_radix)
120
+ end
121
+ end
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env bash
2
+
3
+ script_dir=`dirname $0`
4
+
5
+ # nohup ttserver -port 10001 "$script_dir/distdb/shorturl_scrapes-tinyurl.tct#bnum=40000000#opts=l" 2>&1 >> log/ttserver-shorturl_scrapes-tinyurl+`date "+%Y%m%d"`.log &
6
+ # nohup ttserver -port 10002 "$script_dir/distdb/shorturl_scrapes-bitly.tct#bnum=20000000#opts=l" 2>&1 >> log/ttserver-shorturl_scrapes-bitly+`date "+%Y%m%d"`.log &
7
+ # nohup ttserver -port 10003 "$script_dir/distdb/shorturl_scrapes-other.tct#bnum=20000000#opts=l" 2>&1 >> log/ttserver-shorturl_scrapes-other+`date "+%Y%m%d"`.log &
8
+
9
+ #
10
+ # Start shorturl readthru cache TokyoTyrant servers
11
+ #
12
+ nohup ttserver -port 10042 "$script_dir/distdb/shorturl_reqs-tinyurl.tch#bnum=40000000#opts=l" 2>&1 >> log/ttserver-shorturl_reqs-tinyurl+`date "+%Y%m%d"`.log &
13
+ nohup ttserver -port 10043 "$script_dir/distdb/shorturl_reqs-bitly.tch#bnum=20000000#opts=l" 2>&1 >> log/ttserver-shorturl_reqs-bitly+`date "+%Y%m%d"`.log &
14
+ nohup ttserver -port 10044 "$script_dir/distdb/shorturl_reqs-other.tch#bnum=20000000#opts=l" 2>&1 >> log/ttserver-shorturl_reqs-other+`date "+%Y%m%d"`.log &
15
+
16
+ # nohup ttserver -port 10069 "$script_dir/distdb/shorturl_reqs-foo.tch#bnum=40000000#opts=l" 2>&1 >> log/ttserver-shorturl_reqs-tinyurl+`date "+%Y%m%d"`.log &
@@ -0,0 +1,2 @@
1
+ script_dir=`dirname $0`
2
+ ttserver -port 10040 $script_dir/work/distdb/shorturl_cache.tct >> $script_dir/work/log/shorturl_cache-`datename`.log 2>&1
@@ -0,0 +1,31 @@
1
+ require 'monkeyshines/extensions'
2
+ require 'monkeyshines/utils/logger'
3
+ require 'wukong'
4
+ require 'wukong/extensions/pathname'
5
+ require 'monkeyshines/utils/factory_module'
6
+ require 'monkeyshines/utils/uri'
7
+ require 'monkeyshines/utils/filename_pattern'
8
+ require 'monkeyshines/options'
9
+ require 'monkeyshines/scrape_request'
10
+
11
+ module Monkeyshines
12
+ autoload :ScrapeRequest, 'monkeyshines/scrape_request'
13
+ autoload :ScrapeRequestCore, 'monkeyshines/scrape_request'
14
+ autoload :RequestStream, 'monkeyshines/request_stream'
15
+ autoload :Store, 'monkeyshines/store'
16
+ autoload :Fetcher, 'monkeyshines/fetcher'
17
+ autoload :Monitor, 'monkeyshines/monitor'
18
+ autoload :Runner, 'monkeyshines/runner'
19
+ autoload :RawJsonContents, 'monkeyshines/scrape_request/raw_json_contents'
20
+
21
+ # Dumping ground for configuration values
22
+ CONFIG = {} unless defined?(CONFIG)
23
+
24
+ end
25
+
26
+ #
27
+ # A convenient logger.
28
+ #
29
+ # Define NO_MONKEYSHINES_LOG (or define Log yourself) to prevent its creation
30
+ #
31
+ Log = Monkeyshines.logger unless (defined?(Log) || defined?(NO_MONKEYSHINES_LOG))
@@ -0,0 +1,16 @@
1
+ class Numeric
2
+ def clamp min, max
3
+ return min if min && (self <= min)
4
+ return max if max && (self >= max)
5
+ self
6
+ end
7
+ end
8
+
9
+
10
+ class Hash
11
+ def self.deep_sum *args
12
+ args.inject({}) do |result, options|
13
+ result.deep_merge options
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,10 @@
1
+ module Monkeyshines
2
+ module Fetcher
3
+ extend FactoryModule
4
+ autoload :Base, 'monkeyshines/fetcher/base'
5
+ autoload :FakeFetcher, 'monkeyshines/fetcher/fake_fetcher'
6
+ autoload :HttpFetcher, 'monkeyshines/fetcher/http_fetcher'
7
+ autoload :HttpHeadFetcher, 'monkeyshines/fetcher/http_head_fetcher'
8
+
9
+ end
10
+ end
@@ -0,0 +1,35 @@
1
+ require 'net/http'
2
+ Net::HTTP.version_1_2
3
+ module Monkeyshines
4
+ module Fetcher
5
+
6
+ #
7
+ class AuthedHttpFetcher
8
+ cattr_accessor :auth_params
9
+
10
+ def get_request_token
11
+ end
12
+
13
+ def authorize
14
+ end
15
+
16
+ def get_access_token
17
+ end
18
+
19
+ def api_key
20
+ end
21
+ def api_secret
22
+ end
23
+ def session_key
24
+ end
25
+
26
+ # authenticate request
27
+ def authenticate req
28
+ get_session_key unless session_key
29
+ end
30
+
31
+
32
+ end
33
+
34
+ end
35
+ end