pioneer 0.0.1.alpha

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ tmp/*
6
+ *.log
7
+ test.rb
data/.travis.yml ADDED
@@ -0,0 +1,2 @@
1
+ rvm:
2
+ - 1.9.2
data/CHANGELOG ADDED
@@ -0,0 +1,11 @@
1
+ ## v0.0.1
2
+
3
+ * Initial release
4
+
5
+ ## v0.0.2
6
+
7
+ * added some options and logging
8
+
9
+ ## v0.0.1alpha
10
+
11
+ * Heh, but it is still in dev. So let's call it alpha
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "rake"
4
+
5
+ # Specify your gem's dependencies in pioneer.gemspec
6
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1 @@
1
+ This is the Licence. Isn't it?
data/README.md ADDED
@@ -0,0 +1,83 @@
1
+ # Pioneer
2
+
3
+ Pioneer is a simple async HTTP crawler based on em-synchrony
4
+
5
+ And it is very alpha right now.
6
+
7
+ # Install
8
+
9
+ ```bash
10
+ gem install pioneer
11
+ ```
12
+
13
+ # Usage
14
+
15
+ To use `Pioneer` you should specify a class with two methods: `locations` and `processing(req)`.
16
+
17
+ First one should return enumerable object and second will accept request object.
18
+
19
+ ```ruby
20
+ class Crawler << Pioneer::Base
21
+ def locations
22
+ ["http://www.amazon.com", "http://www.apple.com"]
23
+ end
24
+
25
+ def processing(req)
26
+ File.open(req.url, "w+") do |f|
27
+ f << req.response.response
28
+ end
29
+ end
30
+ end
31
+
32
+ Crawler.new.start
33
+ ```
34
+
35
+ In this example we are saving two files with html of those two sites.
36
+
37
+ `start` method will start iterating over urls and return an Array of what `processing` method returns.
38
+
39
+ # Handling request, response errors and statuses
40
+
41
+ In case of request or response error `Pioneer` will raise an error. Or we can catch them this way:
42
+
43
+ ```ruby
44
+ class Crawler << Pioneer::Base
45
+ def locations
46
+ ["http://www.amazon.com", "http://www.apple.com"]
47
+ end
48
+
49
+ def processing(req)
50
+ File.open(req.url, "w+") do |f|
51
+ f << req.response.response
52
+ end
53
+ end
54
+
55
+ def if_request_error(req)
56
+ puts "Request error: #{req.error}"
57
+ end
58
+
59
+ def if_response_error(req)
60
+ puts "Response error: #{req.response.error}"
61
+ end
62
+
63
+ def if_status_203(req)
64
+ puts "He is trying to redirect me"
65
+ end
66
+ end
67
+ ```
68
+
69
+ also you can write `if_status_not_200` to handle all statuses not 200, or `if_status_XXX` for any status you want.
70
+
71
+ # Overriding behavior
72
+
73
+ You can override all methods on the fly:
74
+
75
+ ```ruby
76
+ crawler = Pioneer::Crawler.new # base simple crawler
77
+ crawler.locations = [url1, url2]
78
+ crawler.processing = proc{ req.response.response_header.status }
79
+ crawler.if_status_404{ |req| "Oups" }
80
+ ```
81
+
82
+
83
+ ... to be continued
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ task :default => :spec
5
+
6
+ desc 'Tests'
7
+ Rake::TestTask.new(:spec) do |t|
8
+ t.libs << 'spec'
9
+ t.pattern = 'spec/**/*_spec.rb'
10
+ t.verbose = false
11
+ end
@@ -0,0 +1,39 @@
1
+ module EventMachine
2
+ module Synchrony
3
+
4
+ class FiberPeriodicTimerIterator < EM::Synchrony::Iterator
5
+
6
+ # set timeout and start point
7
+ # each Fiber will be executed not earlier than once per timeout
8
+ def initialize(list, concurrency=1, timeout=0)
9
+ @timeout = timeout
10
+ @next_start = Time.now
11
+ super list, concurrency
12
+ end
13
+
14
+ # execute each iterator block within its own fiber at particular time offset
15
+ # and auto-advance the iterator after each call
16
+ def each(foreach=nil, after=nil, &blk)
17
+ fe = Proc.new do |obj, iter|
18
+ Fiber.new do
19
+ sleep
20
+ (foreach || blk).call(obj); iter.next
21
+ end.resume
22
+ end
23
+ super(fe, after)
24
+ end
25
+
26
+ # Sleep if the last request was recently (less then timout period)
27
+ def sleep
28
+ if @timeout > 0
29
+ now = Time.now
30
+ sleep_time = @next_start - Time.now
31
+ sleep_time = 0 if sleep_time < 0
32
+ @next_start = Time.now + sleep_time + @timeout
33
+ EM::Synchrony.sleep(sleep_time) if sleep_time > 0
34
+ end
35
+ end
36
+
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,314 @@
1
+ module EventMachine
2
+ # A simple iterator for concurrent asynchronous work.
3
+ #
4
+ # Unlike ruby's built-in iterators, the end of the current iteration cycle is signaled manually,
5
+ # instead of happening automatically after the yielded block finishes executing. For example:
6
+ #
7
+ # (0..10).each{ |num| }
8
+ #
9
+ # becomes:
10
+ #
11
+ # EM::Iterator.new(0..10).each{ |num,iter| iter.next }
12
+ #
13
+ # This is especially useful when doing asynchronous work via reactor libraries and
14
+ # functions. For example, given a sync and async http api:
15
+ #
16
+ # response = sync_http_get(url); ...
17
+ # async_http_get(url){ |response| ... }
18
+ #
19
+ # a synchronous iterator such as:
20
+ #
21
+ # responses = urls.map{ |url| sync_http_get(url) }
22
+ # ...
23
+ # puts 'all done!'
24
+ #
25
+ # could be written as:
26
+ #
27
+ # EM::Iterator.new(urls).map(proc{ |url,iter|
28
+ # async_http_get(url){ |res|
29
+ # iter.return(res)
30
+ # }
31
+ # }, proc{ |responses|
32
+ # ...
33
+ # puts 'all done!'
34
+ # })
35
+ #
36
+ # Now, you can take advantage of the asynchronous api to issue requests in parallel. For example,
37
+ # to fetch 10 urls at a time, simply pass in a concurrency of 10:
38
+ #
39
+ # EM::Iterator.new(urls, 10).each do |url,iter|
40
+ # async_http_get(url){ iter.next }
41
+ # end
42
+ #
43
+
44
+ # Support for Enumerable in Ruby 1.9+
45
+ module IteratorWithEnumerable
46
+ def setup_list(list)
47
+ raise ArgumentError, 'argument must be an Enumerable' unless list.respond_to?(:each)
48
+ list.to_enum
49
+ end
50
+
51
+ def next_item
52
+ @next_item
53
+ end
54
+
55
+ # We can't check just next_item as far as it can return nil in two cases:
56
+ # when our enumerator is stopped and when it stores nil value
57
+ def next?
58
+ begin
59
+ @next_item = @list.next
60
+ true
61
+ rescue StopIteration
62
+ false
63
+ rescue => e
64
+ raise e
65
+ end
66
+ end
67
+ end
68
+
69
+ # Ruby 1.8 uses continuations in Enumerable, so we should use Arrays
70
+ module IteratorWithArray
71
+ def setup_list(list)
72
+ raise ArgumentError, 'argument must be an array' unless list.respond_to?(:to_a)
73
+ list.dup.to_a
74
+ end
75
+
76
+ def next_item
77
+ @list.shift
78
+ end
79
+
80
+ def next?
81
+ @list.any?
82
+ end
83
+ end
84
+
85
+ class Iterator
86
+ include IteratorWithEnumerable if defined? Fiber
87
+ include IteratorWithArray unless defined? Fiber
88
+
89
+ # Create a new parallel async iterator with specified concurrency.
90
+ #
91
+ # i = EM::Iterator.new(1..100, 10)
92
+ #
93
+ # will create an iterator over the range that processes 10 items at a time. Iteration
94
+ # is started via #each, #map or #inject
95
+ #
96
+ def initialize(list, concurrency = 1)
97
+ @list = setup_list(list)
98
+ @concurrency = concurrency
99
+
100
+ @started = false
101
+ @ended = false
102
+ end
103
+
104
+ # Change the concurrency of this iterator. Workers will automatically be spawned or destroyed
105
+ # to accomodate the new concurrency level.
106
+ #
107
+ def concurrency=(val)
108
+ old = @concurrency
109
+ @concurrency = val
110
+
111
+ spawn_workers if val > old and @started and !@ended
112
+ end
113
+ attr_reader :concurrency
114
+
115
+ # Iterate over a set of items using the specified block or proc.
116
+ #
117
+ # EM::Iterator.new(1..100).each do |num, iter|
118
+ # puts num
119
+ # iter.next
120
+ # end
121
+ #
122
+ # An optional second proc is invoked after the iteration is complete.
123
+ #
124
+ # EM::Iterator.new(1..100).each(
125
+ # proc{ |num,iter| iter.next },
126
+ # proc{ puts 'all done' }
127
+ # )
128
+ #
129
+ def each(foreach=nil, after=nil, &blk)
130
+ raise ArgumentError, 'proc or block required for iteration' unless foreach ||= blk
131
+ raise RuntimeError, 'cannot iterate over an iterator more than once' if @started or @ended
132
+
133
+ @started = true
134
+ @pending = 0
135
+ @workers = 0
136
+
137
+ all_done = proc{
138
+ after.call if after and @ended and @pending == 0
139
+ }
140
+
141
+ @process_next = proc{
142
+ # p [:process_next, :pending=, @pending, :workers=, @workers, :ended=, @ended, :concurrency=, @concurrency, :list=, @list]
143
+ unless @ended or @workers > @concurrency
144
+ if next?
145
+ item = next_item
146
+ @pending += 1
147
+
148
+ is_done = false
149
+ on_done = proc{
150
+ raise RuntimeError, 'already completed this iteration' if is_done
151
+ is_done = true
152
+
153
+ @pending -= 1
154
+
155
+ if @ended
156
+ all_done.call
157
+ else
158
+ EM.next_tick(@process_next)
159
+ end
160
+ }
161
+ class << on_done
162
+ alias :next :call
163
+ end
164
+
165
+ foreach.call(item, on_done)
166
+ else
167
+ @ended = true
168
+ @workers -= 1
169
+ all_done.call
170
+ end
171
+ else
172
+ @workers -= 1
173
+ end
174
+ }
175
+
176
+ spawn_workers
177
+
178
+ self
179
+ end
180
+
181
+ # Collect the results of an asynchronous iteration into an array.
182
+ #
183
+ # EM::Iterator.new(%w[ pwd uptime uname date ], 2).map(proc{ |cmd,iter|
184
+ # EM.system(cmd){ |output,status|
185
+ # iter.return(output)
186
+ # }
187
+ # }, proc{ |results|
188
+ # p results
189
+ # })
190
+ #
191
+ def map(foreach, after)
192
+ index = 0
193
+
194
+ inject([], proc{ |results,item,iter|
195
+ i = index
196
+ index += 1
197
+
198
+ is_done = false
199
+ on_done = proc{ |res|
200
+ raise RuntimeError, 'already returned a value for this iteration' if is_done
201
+ is_done = true
202
+
203
+ results[i] = res
204
+ iter.return(results)
205
+ }
206
+ class << on_done
207
+ alias :return :call
208
+ def next
209
+ raise NoMethodError, 'must call #return on a map iterator'
210
+ end
211
+ end
212
+
213
+ foreach.call(item, on_done)
214
+ }, proc{ |results|
215
+ after.call(results)
216
+ })
217
+ end
218
+
219
+ # Inject the results of an asynchronous iteration onto a given object.
220
+ #
221
+ # EM::Iterator.new(%w[ pwd uptime uname date ], 2).inject({}, proc{ |hash,cmd,iter|
222
+ # EM.system(cmd){ |output,status|
223
+ # hash[cmd] = status.exitstatus == 0 ? output.strip : nil
224
+ # iter.return(hash)
225
+ # }
226
+ # }, proc{ |results|
227
+ # p results
228
+ # })
229
+ #
230
+ def inject(obj, foreach, after)
231
+ each(proc{ |item,iter|
232
+ is_done = false
233
+ on_done = proc{ |res|
234
+ raise RuntimeError, 'already returned a value for this iteration' if is_done
235
+ is_done = true
236
+
237
+ obj = res
238
+ iter.next
239
+ }
240
+ class << on_done
241
+ alias :return :call
242
+ def next
243
+ raise NoMethodError, 'must call #return on an inject iterator'
244
+ end
245
+ end
246
+
247
+ foreach.call(obj, item, on_done)
248
+ }, proc{
249
+ after.call(obj)
250
+ })
251
+ end
252
+
253
+ private
254
+
255
+ # Spawn workers to consume items from the iterator's enumerator based on the current concurrency level.
256
+ #
257
+ def spawn_workers
258
+ EM.next_tick(start_worker = proc{
259
+ if @workers < @concurrency and !@ended
260
+ # p [:spawning_worker, :workers=, @workers, :concurrency=, @concurrency, :ended=, @ended]
261
+ @workers += 1
262
+ @process_next.call
263
+ EM.next_tick(start_worker)
264
+ end
265
+ })
266
+ nil
267
+ end
268
+ end
269
+ end
270
+
271
+ if __FILE__ == $0
272
+ $:.unshift File.join(File.dirname(__FILE__), '..')
273
+ require 'eventmachine'
274
+
275
+ # TODO: real tests
276
+ # TODO: pass in one object instead of two? .each{ |iter| puts iter.current; iter.next }
277
+ # TODO: support iter.pause/resume/stop/break/continue?
278
+ # TODO: create some exceptions instead of using RuntimeError
279
+ # TODO: support proc instead of enumerable? EM::Iterator.new(proc{ return queue.pop })
280
+
281
+ EM.run{
282
+ EM::Iterator.new(1..50).each{ |num,iter| p num; iter.next }
283
+ EM::Iterator.new([1,2,3], 10).each{ |num,iter| p num; iter.next }
284
+
285
+ i = EM::Iterator.new(1..100, 5)
286
+ i.each(proc{|num,iter|
287
+ p num.to_s
288
+ iter.next
289
+ }, proc{
290
+ p :done
291
+ })
292
+ EM.add_timer(0.03){
293
+ i.concurrency = 1
294
+ }
295
+ EM.add_timer(0.04){
296
+ i.concurrency = 3
297
+ }
298
+
299
+ EM::Iterator.new(100..150).map(proc{ |num,iter|
300
+ EM.add_timer(0.01){ iter.return(num) }
301
+ }, proc{ |results|
302
+ p results
303
+ })
304
+
305
+ EM::Iterator.new(%w[ pwd uptime uname date ], 2).inject({}, proc{ |hash,cmd,iter|
306
+ EM.system(cmd){ |output,status|
307
+ hash[cmd] = status.exitstatus == 0 ? output.strip : nil
308
+ iter.return(hash)
309
+ }
310
+ }, proc{ |results|
311
+ p results
312
+ })
313
+ }
314
+ end
data/lib/pioneer.rb ADDED
@@ -0,0 +1,16 @@
1
+ # Eventmachine
2
+ require "em-synchrony"
3
+ require "em-synchrony/em-http"
4
+ require "em-synchrony/fiber_iterator"
5
+ # patch - to remove! maybe pull to em-synchrony?
6
+ require "patch/iterator"
7
+ require "patch/fiber_periodic_timer_iterator"
8
+ # other
9
+ require "logger"
10
+ require 'uri'
11
+ # Code
12
+ require "pioneer/version"
13
+ require "pioneer/base"
14
+ require "pioneer/request"
15
+ require "pioneer/http_header"
16
+ require "pioneer/crawler"
@@ -0,0 +1,83 @@
1
+ # encoding: utf-8
2
+ module Pioneer
3
+ class UndefinedLocations < RuntimeError; end
4
+ class LocationsNotEnumerable < RuntimeError; end
5
+ class UndefinedProcessing < RuntimeError; end
6
+ class LocationsNotEnumerator < RuntimeError; end
7
+ class HttpRequestError < RuntimeError; end
8
+ class HttpResponseError < RuntimeError; end
9
+ class Base
10
+ attr_reader :name, :concurrency, :sleep, :log_level, :redirect
11
+
12
+ def initialize(opts = {})
13
+ raise UndefinedLocations, "you should specify `locations` method in your `self.class`" unless self.methods.include? :locations
14
+ raise UndefinedProcessing, "you should specify `processing` method in your `self.class`" unless self.methods.include? :processing
15
+ raise LocationsNotEnumerator, "you should specify `locations` to return Enumerator" unless self.locations.methods.include? :each
16
+ @name = opts[:name] || "crawler"
17
+ @concurrency = opts[:concurrency] || 10
18
+ @sleep = opts[:sleep] || 0 # sleep is reversed RPS (1/RPS) - frequency of requests.
19
+ @log_enabled = opts[:log_enabled] || true # Logger is enabled by default
20
+ @log_level = opts[:log_level] || Logger::DEBUG
21
+ @random_header = opts[:random_header] || false
22
+ @header = opts[:header] || nil
23
+ @redirects = opts[:redirects] || nil
24
+ end
25
+
26
+ def start
27
+ raise LocationsNotEnumerable, "location should respond to `each`" unless locations.respond_to? :each
28
+ result = []
29
+ EM.synchrony do
30
+ # Using FiberPeriodicTimerIterator that implements RPS (request per second feature)
31
+ # In case @sleep is 0 it behaves like standart FiberIterator
32
+ EM::Synchrony::FiberPeriodicTimerIterator.new(locations, concurrency, sleep).map do |url|
33
+ result << Request.new(url, self).perform
34
+ end
35
+ EM.stop
36
+ end
37
+ result
38
+ end
39
+
40
+ def logger
41
+ @logger ||= begin
42
+ logger = @log_enabled ? Logger.new("#{name}.log") : Logger.new("/dev/null")
43
+ logger.level = log_level
44
+ logger
45
+ end
46
+ end
47
+
48
+ def http_opts
49
+ opts = {}
50
+ opts[:head] = random_header if @random_header
51
+ opts[:head] = @header if @header
52
+ opts[:redirects] = @redirects if @redirects
53
+ opts
54
+ end
55
+
56
+ def random_header
57
+ HttpHeader.random
58
+ end
59
+
60
+ # we should override only our methods: locations, processing, if_XXX
61
+ def method_missing(method_name, *args, &block)
62
+ case method_name
63
+ when /locations.*=|processing.*=|if_.+=/
64
+ method_name = method_name.to_s.gsub("=", "").to_sym
65
+ override_method(method_name, args.first)
66
+ else
67
+ super(method_name, *args, &block)
68
+ end
69
+ end
70
+
71
+ def override_method(method_name, arg)
72
+ if Proc === arg
73
+ self.define_singleton_method method_name do |req|
74
+ arg.call(req)
75
+ end
76
+ else
77
+ self.define_singleton_method method_name do
78
+ arg
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,12 @@
1
+ module Pioneer
2
+ class Crawler < Base
3
+ # basic crawler :)
4
+ def locations
5
+ []
6
+ end
7
+
8
+ def processing(req)
9
+ nil
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,273 @@
1
+ module Pioneer
2
+ module HttpHeader
3
+ extend self
4
+
5
+ def random
6
+ header = headers.sample
7
+ headers = {
8
+ 'Referer' => 'http://www.google.com/',
9
+ 'User-Agent' => header,
10
+ 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
11
+ 'Connection' => 'keep-alive'
12
+ }
13
+ end
14
+
15
+ # Get more on http://www.useragentstring.com/pages/useragentstring.php
16
+ def headers
17
+ [
18
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0a2) Gecko/20111101 Firefox/9.0a2',
19
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0) Gecko/20100101 Firefox/9.0',
20
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0a2) Gecko/20110613 Firefox/6.0a2',
21
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0a2) Gecko/20110612 Firefox/6.0a2',
22
+ 'Mozilla/5.0 (X11; Linux i686; rv:6.0) Gecko/20100101 Firefox/6.0',
23
+ 'Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0',
24
+ 'Mozilla/5.0 (Windows NT 5.1; rv:6.0) Gecko/20100101 Firefox/6.0 FirePHP/0.6',
25
+ 'Mozilla/5.0 (Windows NT 5.0; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0',
26
+ 'Mozilla/5.0 (X11; Linux i686 on x86_64; rv:5.0a2) Gecko/20110524 Firefox/5.0a2',
27
+ 'Mozilla/5.0 (Windows NT 6.1; U; ru; rv:5.0.1.6) Gecko/20110501 Firefox/5.0.1 Firefox/5.0.1',
28
+ 'mozilla/3.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/5.0.1',
29
+ 'Mozilla/5.0 (X11; U; Linux i586; de; rv:5.0) Gecko/20100101 Firefox/5.0',
30
+ 'Mozilla/5.0 (X11; U; Linux amd64; rv:5.0) Gecko/20100101 Firefox/5.0 (Debian)',
31
+ 'Mozilla/5.0 (X11; U; Linux amd64; en-US; rv:5.0) Gecko/20110619 Firefox/5.0',
32
+ 'Mozilla/5.0 (X11; Linux) Gecko Firefox/5.0',
33
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0) Gecko/20100101 Firefox/5.0 FirePHP/0.5',
34
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0) Gecko/20100101 Firefox/5.0 Firefox/5.0',
35
+ 'Mozilla/5.0 (X11; Linux x86_64) Gecko Firefox/5.0',
36
+ 'Mozilla/5.0 (X11; Linux ppc; rv:5.0) Gecko/20100101 Firefox/5.0',
37
+ 'Mozilla/5.0 (X11; Linux AMD64) Gecko Firefox/5.0',
38
+ 'Mozilla/5.0 (X11; FreeBSD amd64; rv:5.0) Gecko/20100101 Firefox/5.0',
39
+ 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0',
40
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:5.0) Gecko/20110619 Firefox/5.0',
41
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:5.0) Gecko/20100101 Firefox/5.0',
42
+ 'Mozilla/5.0 (Windows NT 6.1.1; rv:5.0) Gecko/20100101 Firefox/5.0',
43
+ 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0',
44
+ 'Mozilla/5.0 (Windows NT 5.1; U; rv:5.0) Gecko/20100101 Firefox/5.0',
45
+ 'Mozilla/5.0 (Windows NT 5.1; rv:2.0.1) Gecko/20100101 Firefox/5.0',
46
+ 'Mozilla/5.0 (Windows NT 5.0; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0',
47
+ 'Mozilla/5.0 (Windows NT 5.0; rv:5.0) Gecko/20100101 Firefox/5.0',
48
+ 'Mozilla/5.0 (U; Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0',
49
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:2.2a1pre) Gecko/20110324 Firefox/4.2a1pre',
50
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:2.2a1pre) Gecko/20100101 Firefox/4.2a1pre',
51
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.2a1pre) Gecko/20110324 Firefox/4.2a1pre',
52
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.2a1pre) Gecko/20110323 Firefox/4.2a1pre',
53
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.2a1pre) Gecko/20110208 Firefox/4.2a1pre',
54
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b9pre) Gecko/20110111 Firefox/4.0b9pre',
55
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b9pre) Gecko/20101228 Firefox/4.0b9pre',
56
+ 'Mozilla/5.0 (Windows NT 5.1; rv:2.0b9pre) Gecko/20110105 Firefox/4.0b9pre',
57
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b8pre) Gecko/20101114 Firefox/4.0b8pre',
58
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b8pre) Gecko/20101213 Firefox/4.0b8pre',
59
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b8pre) Gecko/20101128 Firefox/4.0b8pre',
60
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b8pre) Gecko/20101114 Firefox/4.0b8pre',
61
+ 'Mozilla/5.0 (Windows NT 5.1; rv:2.0b8pre) Gecko/20101127 Firefox/4.0b8pre',
62
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/18.6.872.0 Safari/535.2 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0',
63
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
64
+ 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
65
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
66
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.6 (KHTML, like Gecko) Chrome/16.0.897.0 Safari/535.6',
67
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.54 Safari/535.2',
68
+ 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.2 (KHTML, like Gecko) Ubuntu/11.10 Chromium/15.0.874.120 Chrome/15.0.874.120 Safari/535.2',
69
+ 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2',
70
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2',
71
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2',
72
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2',
73
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.860.0 Safari/535.2',
74
+ 'Chrome/15.0.860.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/15.0.860.0',
75
+ 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/10.04 Chromium/14.0.813.0 Chrome/14.0.813.0 Safari/535.1',
76
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1',
77
+ 'Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1',
78
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1',
79
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1',
80
+ 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.814.0 Chrome/14.0.814.0 Safari/535.1',
81
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.814.0 Safari/535.1',
82
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1',
83
+ 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.803.0 Chrome/14.0.803.0 Safari/535.1',
84
+ 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1',
85
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1',
86
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1',
87
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1',
88
+ 'Mozilla/5.0 Slackware/13.37 (X11; U; Linux x86_64; en-US) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41',
89
+ 'Mozilla/5.0 ArchLinux (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
90
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/13.0.782.41 Chrome/13.0.782.41 Safari/535.1',
91
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
92
+ 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
93
+ 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
94
+ 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
95
+ 'Mozilla/5.0 (Windows NT 5.2; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
96
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
97
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
98
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
99
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1',
100
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.220 Safari/535.1',
101
+ 'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.220 Safari/535.1',
102
+ 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.220 Safari/535.1',
103
+ 'Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00',
104
+ 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52',
105
+ 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52',
106
+ 'Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51',
107
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51',
108
+ 'Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50',
109
+ 'Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50',
110
+ 'Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11',
111
+ 'Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11',
112
+ 'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11',
113
+ 'Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01',
114
+ 'Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01',
115
+ 'Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01',
116
+ 'Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01',
117
+ 'Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01',
118
+ 'Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01',
119
+ 'Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01',
120
+ 'Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01',
121
+ 'Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01',
122
+ 'Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01',
123
+ 'Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01',
124
+ 'Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01',
125
+ 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101213 Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01',
126
+ 'Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01',
127
+ 'Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01',
128
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01',
129
+ 'Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10',
130
+ 'Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10',
131
+ 'Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10',
132
+ 'Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1',
133
+ 'Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00',
134
+ 'Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00',
135
+ 'Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00',
136
+ 'Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00',
137
+ 'Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00',
138
+ 'Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00',
139
+ 'Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00',
140
+ 'Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00',
141
+ 'Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00',
142
+ 'Opera/9.80 (Windows NT 5.1; U; ru) Presto/2.7.39 Version/11.00',
143
+ 'Opera/9.80 (Windows NT 5.1; U; MRA 5.5 (build 02842); ru) Presto/2.7.62 Version/11.00',
144
+ 'Opera/9.80 (Windows NT 5.1; U; it) Presto/2.7.62 Version/11.00',
145
+ 'Mozilla/5.0 (Windows NT 6.0; U; ja; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.00',
146
+ 'Mozilla/5.0 (Windows NT 5.1; U; pl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.00',
147
+ 'Mozilla/5.0 (Windows NT 5.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.00',
148
+ 'Mozilla/4.0 (compatible; MSIE 8.0; X11; Linux x86_64; pl) Opera 11.00',
149
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; fr) Opera 11.00',
150
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; ja) Opera 11.00',
151
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; en) Opera 11.00',
152
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; pl) Opera 11.00',
153
+ 'Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.6.31 Version/10.70',
154
+ 'Mozilla/5.0 (Windows NT 5.2; U; ru; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.70',
155
+ 'Mozilla/5.0 (Windows NT 5.1; U; zh-cn; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.70',
156
+ 'Opera/9.80 (X11; Linux i686; U; en-GB) Presto/2.5.24 Version/10.53',
157
+ 'Mozilla/5.0 (Windows NT 5.1; U; zh-cn; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.53',
158
+ 'Mozilla/5.0 (Windows NT 5.1; U; Firefox/5.0; en; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.53',
159
+ 'Mozilla/5.0 (Windows NT 5.1; U; Firefox/4.5; en; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.53',
160
+ 'Mozilla/5.0 (Windows NT 5.1; U; Firefox/3.5; en; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 10.53',
161
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; ko) Opera 10.53',
162
+ 'Opera/9.80 (X11; U; Linux i686; en-US; rv:1.9.2.3) Presto/2.2.15 Version/10.10',
163
+ 'Opera/9.80 (X11; Linux x86_64; U; it) Presto/2.2.15 Version/10.10',
164
+ 'Opera/9.80 (Windows NT 6.1; U; de) Presto/2.2.15 Version/10.10',
165
+ 'Opera/9.80 (Windows NT 6.0; U; Gecko/20100115; pl) Presto/2.2.15 Version/10.10',
166
+ 'Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.15 Version/10.10',
167
+ 'Opera/9.80 (Windows NT 5.1; U; de) Presto/2.2.15 Version/10.10',
168
+ 'Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.2.15 Version/10.10',
169
+ 'Mozilla/5.0 (Windows NT 6.0; U; tr; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 10.10',
170
+ 'Mozilla/4.0 (compatible; MSIE 6.0; X11; Linux i686; de) Opera 10.10',
171
+ 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 6.0; tr) Opera 10.10',
172
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)',
173
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
174
+ 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)',
175
+ 'Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)',
176
+ 'Mozilla/1.22 (compatible; MSIE 10.0; Windows 3.1)',
177
+ 'Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))',
178
+ 'Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)',
179
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 7.1; Trident/5.0)',
180
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)',
181
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7',
182
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; InfoPath.3; MS-RTC LM 8; .NET4.0C; .NET4.0E)',
183
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; chromeframe/12.0.742.112)',
184
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)',
185
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)',
186
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; Tablet PC 2.0; InfoPath.3; .NET4.0C; .NET4.0E)',
187
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0',
188
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; yie8)',
189
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET CLR 1.1.4322; .NET4.0C; Tablet PC 2.0)',
190
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; FunWebProducts)',
191
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; chromeframe/13.0.782.215)',
192
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; chromeframe/11.0.696.57)',
193
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) chromeframe/10.0.648.205',
194
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; chromeframe/11.0.696.57)',
195
+ 'Mozilla/5.0 ( ; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
196
+ 'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/4.0; FDM; MSIECrawler; Media Center PC 5.0)',
197
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)',
198
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)',
199
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 1.1.4322)',
200
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727)',
201
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
202
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; SLCC1; .NET CLR 1.1.4322)',
203
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.0; Trident/4.0; InfoPath.1; SV1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 3.0.04506.30)',
204
+ 'Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.0; Trident/4.0; FBSMTWB; .NET CLR 2.0.34861; .NET CLR 3.0.3746.3218; .NET CLR 3.5.33652; msn OptimizedIE8;ENUS)',
205
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.2; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0)',
206
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8)',
207
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8',
208
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; Media Center PC 6.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C)',
209
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; InfoPath.3; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MS-RTC LM 8)',
210
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; InfoPath.2)',
211
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 3.0)',
212
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; msn OptimizedIE8;ZHCN)',
213
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; MS-RTC LM 8; InfoPath.3; .NET4.0C; .NET4.0E) chromeframe/8.0.552.224',
214
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; MS-RTC LM 8; .NET4.0C; .NET4.0E; Zune 4.7; InfoPath.3)',
215
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; MS-RTC LM 8; .NET4.0C; .NET4.0E; Zune 4.7)',
216
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; MS-RTC LM 8)',
217
+ 'Mozilla/4.0(compatible; MSIE 7.0b; Windows NT 6.0)',
218
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0)',
219
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)',
220
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; Media Center PC 3.0; .NET CLR 1.0.3705; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1)',
221
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; FDM; .NET CLR 1.1.4322)',
222
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; .NET CLR 1.1.4322; InfoPath.1; .NET CLR 2.0.50727)',
223
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; .NET CLR 1.1.4322; InfoPath.1)',
224
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; .NET CLR 1.1.4322; Alexa Toolbar; .NET CLR 2.0.50727)',
225
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; .NET CLR 1.1.4322; Alexa Toolbar)',
226
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
227
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.40607)',
228
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; .NET CLR 1.1.4322)',
229
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; .NET CLR 1.0.3705; Media Center PC 3.1; Alexa Toolbar; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
230
+ 'Mozilla/5.0 (Windows; U; MSIE 7.0; Windows NT 6.0; en-US)',
231
+ 'Mozilla/5.0 (Windows; U; MSIE 7.0; Windows NT 6.0; el-GR)',
232
+ 'Mozilla/5.0 (Windows; U; MSIE 7.0; Windows NT 5.2)',
233
+ 'Mozilla/5.0 (MSIE 7.0; Macintosh; U; SunOS; X11; gu; SV1; InfoPath.2; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)',
234
+ 'Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0; WOW64; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; c .NET CLR 3.0.04506; .NET CLR 3.5.30707; InfoPath.1; el-GR)',
235
+ 'Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; c .NET CLR 3.0.04506; .NET CLR 3.5.30707; InfoPath.1; el-GR)',
236
+ 'Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0; fr-FR)',
237
+ 'Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0; en-US)',
238
+ 'Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.2; WOW64; .NET CLR 2.0.50727)',
239
+ 'Mozilla/5.0 (compatible; MSIE 7.0; Windows 98; SpamBlockerUtility 6.3.91; SpamBlockerUtility 6.2.91; .NET CLR 4.1.89;GB)',
240
+ 'Mozilla/4.79 [en] (compatible; MSIE 7.0; Windows NT 5.0; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 1.1.4322; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)',
241
+ 'Mozilla/4.0 (Windows; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)',
242
+ 'Mozilla/4.0 (Mozilla/4.0; MSIE 7.0; Windows NT 5.1; FDM; SV1; .NET CLR 3.0.04506.30)',
243
+ 'Mozilla/4.0 (Mozilla/4.0; MSIE 7.0; Windows NT 5.1; FDM; SV1)',
244
+ 'Mozilla/4.0 (compatible;MSIE 7.0;Windows NT 6.0)',
245
+ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; SLCC2; .NET CLR 2.0.50727; InfoPath.3; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MS-RTC LM 8)',
246
+ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; MS-RTC LM 8; .NET4.0C; .NET4.0E; InfoPath.3)',
247
+ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; chromeframe/12.0.742.100)',
248
+ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)',
249
+ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0)',
250
+ 'Mozilla/5.0 (Windows; U; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)',
251
+ 'Mozilla/5.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)',
252
+ 'Mozilla/5.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4325)',
253
+ 'Mozilla/5.0 (compatible; MSIE 6.0; Windows NT 5.1)',
254
+ 'Mozilla/45.0 (compatible; MSIE 6.0; Windows NT 5.1)',
255
+ 'Mozilla/4.08 (compatible; MSIE 6.0; Windows NT 5.1)',
256
+ 'Mozilla/4.01 (compatible; MSIE 6.0; Windows NT 5.1)',
257
+ 'Mozilla/4.0 (X11; MSIE 6.0; i686; .NET CLR 1.1.4322; .NET CLR 2.0.50727; FDM)',
258
+ 'Mozilla/4.0 (Windows; MSIE 6.0; Windows NT 6.0)',
259
+ 'Mozilla/4.0 (Windows; MSIE 6.0; Windows NT 5.2)',
260
+ 'Mozilla/4.0 (Windows; MSIE 6.0; Windows NT 5.0)',
261
+ 'Mozilla/4.0 (Windows; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)',
262
+ 'Mozilla/4.0 (MSIE 6.0; Windows NT 5.1)',
263
+ 'Mozilla/4.0 (MSIE 6.0; Windows NT 5.0)',
264
+ 'Mozilla/4.0 (compatible;MSIE 6.0;Windows 98;Q312461)',
265
+ 'Mozilla/4.0 (Compatible; Windows NT 5.1; MSIE 6.0) (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
266
+ 'Mozilla/4.0 (compatible; U; MSIE 6.0; Windows NT 5.1) (Compatible; ; ; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)',
267
+ 'Mozilla/4.0 (compatible; U; MSIE 6.0; Windows NT 5.1)',
268
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; InfoPath.3; Tablet PC 2.0)',
269
+ 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB6.5; QQDownload 534; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; SLCC2; .NET CLR 2.0.50727; Media Center PC 6.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729)'
270
+ ]
271
+ end
272
+ end
273
+ end
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+ module Pioneer
3
+ class Request
4
+ attr_reader :pioneer, :url, :result, :response, :error
5
+ def initialize(url, pioneer)
6
+ @url, @pioneer = url, pioneer
7
+ @url = begin
8
+ url = "http://" + url unless url =~ /http/
9
+ URI.escape(url)
10
+ end
11
+ end
12
+
13
+ def perform
14
+ pioneer.logger.info("going to #{url}")
15
+ @result = handle_request_error_or_return_result
16
+ end
17
+
18
+ # Handle base fatal request error
19
+ def handle_request_error_or_return_result
20
+ begin
21
+ @response = EventMachine::HttpRequest.new(url).get(pioneer.http_opts)
22
+ rescue => e
23
+ @error = "Request totaly failed. Url: #{url}, error: #{e.message}"
24
+ pioneer.logger.fatal(error)
25
+ if pioneer.respond_to? :if_request_error
26
+ return pioneer.send(:if_request_error, self)
27
+ else
28
+ raise HttpRequestError, @error
29
+ end
30
+ end
31
+ handle_response_error_or_return_result
32
+ end
33
+
34
+ # handle http error
35
+ def handle_response_error_or_return_result
36
+ if response.error
37
+ error = "Response for #{url} get an error: #{response.error}"
38
+ pioneer.logger.error(error)
39
+ if pioneer.respond_to? :if_response_error
40
+ return pioneer.send(:if_response_error, self)
41
+ else
42
+ raise HttpResponseError, error
43
+ end
44
+ end
45
+ handle_status_or_return_result
46
+ end
47
+
48
+ def handle_status_or_return_result
49
+ status = response.response_header.status
50
+ case status
51
+ when 200
52
+ pioneer.processing(self)
53
+ else
54
+ pioneer.logger.error("This #{url} returns this http status: #{status}")
55
+ if pioneer.respond_to? "if_status_#{status}".to_sym
56
+ pioneer.send("if_status_#{status}", self)
57
+ elsif pioneer.respond_to? :if_status_not_200
58
+ pioneer.send(:if_status_not_200, self)
59
+ else
60
+ nil # nothing?
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,3 @@
1
+ module Pioneer
2
+ VERSION = "0.0.1.alpha"
3
+ end
data/pioneer.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "pioneer/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "pioneer"
7
+ s.version = Pioneer::VERSION
8
+ s.authors = ["Petr"]
9
+ s.email = ["pedro.yanoviches@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = "HTTP crawler"
12
+ s.description = "Simple async HTTP crawler based on em-synchrony"
13
+
14
+ s.rubyforge_project = "pioneer"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ s.add_development_dependency "yajl-ruby"
23
+ s.add_runtime_dependency "em-synchrony"
24
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Pioneer::Base do
5
+ end
@@ -0,0 +1,66 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+ require 'yajl'
4
+ #
5
+ # TODO:
6
+ # Rewrite real live examples with StubServer
7
+ #
8
+
9
+ describe Pioneer::Request do
10
+ before do
11
+ @pioneer1 = CustomCrawler1.new(name: "Custom crawler 1")
12
+ @pioneer2 = Pioneer::Crawler.new(name: "Base crawler 2")
13
+ @pioneer3 = Pioneer::Crawler.new(name: "Base crawler 3")
14
+ end
15
+
16
+ it "should return two 200 response statuses" do
17
+ @pioneer1.start.must_equal [200, 200]
18
+ end
19
+
20
+ it "should redefine methods" do
21
+ processing = proc{ |req| req.response.response_header.status + 1 }
22
+ @pioneer2.processing = processing
23
+ @pioneer2.locations = ["www.apple.com", "www.amazon.com"]
24
+ @pioneer2.start.must_equal [201, 201]
25
+ @pioneer2.locations = ["www.ru.erro"]
26
+ if_response_error = proc{ |req| "fail" }
27
+ @pioneer2.if_response_error = if_response_error
28
+ @pioneer2.start.must_equal ["fail"]
29
+ end
30
+
31
+ it "should execute if_status_xxx" do
32
+ redirector = proc{ |req| "redirected" }
33
+ error404 = proc{ |req| "notfound" }
34
+ @pioneer2.locations = ["google.com/redirectmeplease", "http://www.amazon.com/notfoundpage"]
35
+ @pioneer2.if_status_301 = redirector
36
+ @pioneer2.if_status_404 = error404
37
+ @pioneer2.start.must_equal ["redirected", "notfound"]
38
+ end
39
+
40
+ it "should execute if_status_not_200 if another colback is not defined" do
41
+ not_200 = proc{ "something goes wrong" }
42
+ redirector = proc{ |req| "redirected" }
43
+ @pioneer3.locations = ["google.com/redirectmeplease", "http://www.amazon.com/notfoundpage"]
44
+ @pioneer3.if_status_301 = redirector
45
+ @pioneer3.if_status_not_200 = not_200
46
+ @pioneer3.start.must_equal ["redirected", "something goes wrong"]
47
+ end
48
+
49
+ # LAST FM API TEST
50
+ it "should return similar artists for a number of them" do
51
+ @lastfm_pioneer = LastfmCrawler.new(sleep: 0.25)
52
+ @lastfm_pioneer.start.sort.must_equal LastfmEnum.const_get(:ARTISTS).sort
53
+ end
54
+
55
+ it "should use headers" do
56
+ @crawler1 = KinopoiskCrawler.new(random_header: false)
57
+ @crawler2 = KinopoiskCrawler.new(random_header: false, redirects: 1)
58
+ @crawler3 = KinopoiskCrawler.new(random_header: true)
59
+ # this one will redirect
60
+ @crawler1.start.must_equal [nil]
61
+ # this one will return some restrictions (it need real headres)
62
+ (@crawler2.start.first < 10000).must_equal true
63
+ # and this one will fire up
64
+ (@crawler3.start.first > 10000).must_equal true
65
+ end
66
+ end
@@ -0,0 +1,51 @@
1
+ require 'pioneer'
2
+ require 'minitest/spec'
3
+ require 'minitest/autorun'
4
+ require 'nokogiri'
5
+
6
+ # saving two pages
7
+ class CustomCrawler1 < Pioneer::Base
8
+ def locations
9
+ ["http://www.ru", "http://www.ru"]
10
+ end
11
+
12
+ def processing(req)
13
+ req.response.response_header.status
14
+ end
15
+ end
16
+
17
+ # LastFM test
18
+ class LastfmEnum
19
+ include Enumerable
20
+
21
+ ARTISTS = ["Cher", "Madonna", "Rolling Stones", "The Beatles", "Muse"]
22
+
23
+ def each
24
+ ARTISTS.each do |artist|
25
+ url = "http://ws.audioscrobbler.com/2.0/?method=artist.getsimilar&artist=#{artist}&api_key=b25b959554ed76058ac220b7b2e0a026&format=json"
26
+ yield url
27
+ end
28
+ end
29
+ end
30
+
31
+ class LastfmCrawler < Pioneer::Base
32
+ def locations
33
+ LastfmEnum.new
34
+ end
35
+
36
+ def processing(req)
37
+ json = Yajl::Parser.parse(req.response.response)
38
+ json["similarartists"]["@attr"]["artist"]
39
+ end
40
+ end
41
+
42
+ # Kinopoisk
43
+ class KinopoiskCrawler < Pioneer::Base
44
+ def locations
45
+ ["http://www.kinopoisk.ru/level/1/film/614667/"]
46
+ end
47
+
48
+ def processing(req)
49
+ req.response.response.size
50
+ end
51
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pioneer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1.alpha
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - Petr
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-22 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: yajl-ruby
16
+ requirement: &74894120 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *74894120
25
+ - !ruby/object:Gem::Dependency
26
+ name: em-synchrony
27
+ requirement: &74893910 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *74893910
36
+ description: Simple async HTTP crawler based on em-synchrony
37
+ email:
38
+ - pedro.yanoviches@gmail.com
39
+ executables: []
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - .gitignore
44
+ - .travis.yml
45
+ - CHANGELOG
46
+ - Gemfile
47
+ - LICENSE
48
+ - README.md
49
+ - Rakefile
50
+ - lib/patch/fiber_periodic_timer_iterator.rb
51
+ - lib/patch/iterator.rb
52
+ - lib/pioneer.rb
53
+ - lib/pioneer/base.rb
54
+ - lib/pioneer/crawler.rb
55
+ - lib/pioneer/http_header.rb
56
+ - lib/pioneer/request.rb
57
+ - lib/pioneer/version.rb
58
+ - pioneer.gemspec
59
+ - spec/pioneer/base_spec.rb
60
+ - spec/pioneer/request_spec.rb
61
+ - spec/spec_helper.rb
62
+ - tmp/just_for_test/railscasts.txt
63
+ homepage: ''
64
+ licenses: []
65
+ post_install_message:
66
+ rdoc_options: []
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ! '>'
79
+ - !ruby/object:Gem::Version
80
+ version: 1.3.1
81
+ requirements: []
82
+ rubyforge_project: pioneer
83
+ rubygems_version: 1.8.15
84
+ signing_key:
85
+ specification_version: 3
86
+ summary: HTTP crawler
87
+ test_files: []
88
+ has_rdoc: