rider 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
File without changes
@@ -0,0 +1 @@
1
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'lib/rider'
4
+
5
+ queue_name = ARGV[0]
6
+ queue = Rider::Queue.new(queue_name)
7
+ puts "Crawling URLs from #{queue.filename}"
8
+
9
+ # will crawl all URLs
10
+ crawler = Rider::Crawler.new(//, queue)
11
+
12
+ crawler.each_document do |uri, metadata, contents|
13
+ puts "-"*60
14
+ puts "URL: #{uri.to_s}"
15
+ puts "Metadata: #{metadata.inspect}"
16
+ puts "Contents excerpt: #{contents[0..250]}"
17
+ puts
18
+ end
19
+
20
+ puts
21
+ puts "Crawl finished"
@@ -0,0 +1,36 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'rubygems'
4
+ require 'logger'
5
+ require 'mechanize'
6
+ require 'timeout'
7
+ require 'yaml'
8
+
9
+ require 'rider/queue'
10
+ require 'rider/part_queue'
11
+ require 'rider/crawler'
12
+
13
+ $KCODE = 'u'
14
+
15
+ module Rider
16
+ VERSION = '0.1'
17
+ LOGGER = Logger.new(STDOUT)
18
+ LOGGER.level = Logger::DEBUG
19
+
20
+
21
+ def log
22
+ LOGGER
23
+ end
24
+ module_function :log
25
+
26
+ def to_absolute(uri, link)
27
+ link = URI.encode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))
28
+ return nil if link.nil? or link.empty?
29
+
30
+ relative = URI(link)
31
+ absolute = uri.merge(relative)
32
+
33
+ absolute.path = '/' if absolute.path.nil? or absolute.path.empty?
34
+ return absolute
35
+ end
36
+ end
@@ -0,0 +1,102 @@
1
+ require 'hpricot'
2
+
3
+ module Rider
4
+ class Crawler
5
+ # Creates a new Crawler, with the specified +mask+ (a Regexp) and queue (a +Rider::Queue+ instance).
6
+ def initialize(mask, queue)
7
+ @mask = mask
8
+ @queue = queue
9
+ @seen_urls = []
10
+ @www = WWW::Mechanize.new do |a|
11
+ a.log = Logger.new("tmp/www.log")
12
+ a.pluggable_parser.default = Hpricot
13
+ end
14
+ end
15
+
16
+ # Returns true if +url+ passes the +mask+.
17
+ def match_mask?(url)
18
+ @mask.match(url) != nil
19
+ end
20
+
21
+ # Crawls documents and passes their URL, response headers, and data to the supplied block.
22
+ def each_document
23
+ while doc_data = next_document()
24
+ follow_urls = yield(doc_data) || []
25
+ add_follow_urls(follow_urls)
26
+ end
27
+ end
28
+
29
+ def add_follow_urls(urls)
30
+ urls.each { |url| @queue.push(url) if follow_url?(url) }
31
+ end
32
+
33
+ def follow_url?(url)
34
+ match_mask?(url) and !seen_url?(url)
35
+ end
36
+
37
+ SKIPPABLE_EXCEPTIONS = [Errno::ETIMEDOUT, WWW::Mechanize::ResponseCodeError, Errno::EHOSTUNREACH, SocketError,
38
+ Errno::ECONNREFUSED, Timeout::Error, Net::HTTPBadResponse, Hpricot::ParseError]
39
+ # Returns the next retrievable document from the next valid URL in the queue.
40
+ def next_document
41
+ begin
42
+ url = next_url()
43
+ return nil if url.nil?
44
+ doc_data = get(url)
45
+ saw_url(url)
46
+ return doc_data
47
+ rescue Exception=>ex
48
+ if SKIPPABLE_EXCEPTIONS.include?(ex.class)
49
+ Rider.log.debug("EXCEPTION: #{ex.inspect}, skipping...")
50
+ retry # go on to the next document
51
+ else
52
+ raise ex
53
+ end
54
+ end
55
+ end
56
+
57
+ # Gets the document at the specified +url+. Returns an Array [uri, metadata, contents]
58
+ def get(url)
59
+ uri = URI.parse(url)
60
+ Timeout::timeout(8, Timeout::Error) do
61
+ case uri.scheme
62
+ when 'http'
63
+ get_http(uri)
64
+ when 'file'
65
+ get_file(uri)
66
+ else
67
+ raise(ArgumentError, "don't know how to get #{url}")
68
+ end
69
+ end
70
+ end
71
+
72
+ def get_file(uri)
73
+ filename = uri.gsub(/^file:\/\//, '')
74
+ [uri, {}, File.read(filename)]
75
+ end
76
+
77
+ def get_http(uri)
78
+ page = @www.get(uri)
79
+ meta = page.response
80
+ [uri, meta, page]
81
+ end
82
+
83
+ # Retrieves the next URL in the queue that matches the +mask+.
84
+ def next_url
85
+ while url = @queue.shift
86
+ return url if valid_url?(url)
87
+ end
88
+ end
89
+
90
+ def valid_url?(url)
91
+ !seen_url?(url) && match_mask?(url)
92
+ end
93
+
94
+ def seen_url?(url)
95
+ @seen_urls.include?(url)
96
+ end
97
+
98
+ def saw_url(url)
99
+ @seen_urls << url
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,85 @@
1
+ module Rider
2
+ class HostPartitionedQueue
3
+ attr_reader :name
4
+
5
+ def initialize(name)
6
+ @name = name
7
+ clear
8
+ end
9
+
10
+ def push(url)
11
+ host = get_host(url)
12
+ @hosts << host unless @hosts.include?(host)
13
+ @urls_by_host[host] ||= []
14
+ @urls_by_host[host] << url
15
+ return true
16
+ end
17
+
18
+ def shift
19
+ if empty?
20
+ Rider.log.debug("Q #{name} POP nil")
21
+ return nil
22
+ end
23
+ host = @hosts[@current_host_index]
24
+ url = @urls_by_host[host].shift
25
+
26
+ if @urls_by_host[host].empty?
27
+ @hosts.delete_at(@current_host_index)
28
+ @urls_by_host.delete(host)
29
+ # no need to increment @current_host_index since we just effectively pushed every element down by one
30
+ # by deleting from @hosts, UNLESS it was the last item in the array, in which case that index doesn't
31
+ # exist anymore
32
+ increment_current_host_index if @current_host_index == @hosts.length
33
+ else
34
+ increment_current_host_index
35
+ end
36
+ return url
37
+ end
38
+
39
+ def clear
40
+ @urls_by_host = {}
41
+ @hosts = []
42
+ @current_host_index = 0
43
+ end
44
+
45
+ def empty?
46
+ @hosts.empty?
47
+ end
48
+
49
+ def ==(another_queue)
50
+ another_queue.instance_variable_get("@urls_by_host") == @urls_by_host &&
51
+ another_queue.instance_variable_get("@hosts") == @hosts &&
52
+ another_queue.instance_variable_get("@current_host_index") == @current_host_index
53
+ end
54
+
55
+ def serialize
56
+ File.open(filename, 'w') do |file|
57
+ file.write(self.to_yaml)
58
+ end
59
+ end
60
+
61
+ def self.unserialize(name)
62
+ filename = "tmp/#{name}.q"
63
+ return nil unless File.exist?(filename)
64
+ YAML.load_file("tmp/#{name}.q")
65
+ end
66
+
67
+ private
68
+ def get_host(url)
69
+ URI.parse(url).host
70
+ end
71
+
72
+ def increment_current_host_index
73
+ if @hosts.length == 0
74
+ @current_host_index = 0
75
+ else
76
+ # increment by one but go back to 0 if it exceeds the length of the array
77
+ @current_host_index = (@current_host_index + 1) % @hosts.length
78
+ end
79
+ end
80
+
81
+ def filename
82
+ "tmp/#{name}.q"
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,40 @@
1
+ module Rider
2
+ class Rider::Queue
3
+ attr_reader :filename
4
+ def initialize(filename)
5
+ raise(ArgumentError, "queues must have a filename") if !filename or filename.empty?
6
+ @filename = filename
7
+ end
8
+
9
+ def push(item)
10
+ Rider.log.debug("Q #{filename} PUSH #{item}")
11
+ File.open(filename, "a") do |file|
12
+ file.puts(item)
13
+ end
14
+ return true
15
+ end
16
+
17
+ def shift
18
+ if empty?
19
+ Rider.log.debug("Q #{filename} SHIFT nil")
20
+ return nil
21
+ end
22
+ lines = File.readlines(filename)
23
+ item = lines.shift.strip
24
+ File.open(filename, "w") do |file|
25
+ file.write(lines.join)
26
+ end
27
+ Rider.log.debug("Q #{filename} SHIFT #{item}")
28
+ return item
29
+ end
30
+
31
+ def clear
32
+ File.unlink(filename) if File.exist?(filename)
33
+ return true
34
+ end
35
+
36
+ def empty?
37
+ !File.exist?(filename) or File.open(filename).read == ""
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,94 @@
1
+ require 'spec/spec_helper'
2
+
3
+ describe Rider::Crawler do
4
+ before do
5
+ @queue = Rider::Queue.new('web')
6
+ @crawler = Rider::Crawler.new(/http:\/\/localhost/, @queue)
7
+ end
8
+
9
+ describe "when checking URLs against mask" do
10
+ it "should return true for a URL that matches the mask" do
11
+ @crawler.match_mask?("http://localhost/some/path").should == true
12
+ end
13
+
14
+ it "should return false for a URL that does not match the mask" do
15
+ @crawler.match_mask?("http://example.com/some/path").should == false
16
+ end
17
+ end
18
+
19
+ describe "when checking URL validity" do
20
+ before do
21
+ @urls = %w(http://example.com/invalid http://localhost/valid http://localhost/valid/unseen)
22
+ end
23
+
24
+ it "should return URLs matching the mask" do
25
+ @urls.select { |url| @crawler.valid_url?(url) }.should == ["http://localhost/valid", "http://localhost/valid/unseen"]
26
+ end
27
+
28
+ it "should return only unseen URLs" do
29
+ @crawler.saw_url('http://localhost/valid')
30
+ @urls.select { |url| @crawler.valid_url?(url) }.should == ['http://localhost/valid/unseen']
31
+ end
32
+ end
33
+
34
+ describe "when determining URLs to follow" do
35
+ it "should follow URLs that match the mask" do
36
+ @crawler.follow_url?('http://localhost/abc').should == true
37
+ end
38
+
39
+ it "should not follow URLs that don't match the mask" do
40
+ @crawler.follow_url?('http://invalid.com').should == false
41
+ end
42
+
43
+ it "should follow URLs that haven't been seen"
44
+ it "should not follow URLs that have been seen already"
45
+ end
46
+
47
+ describe "when getting the next document" do
48
+
49
+ end
50
+
51
+ describe "when getting documents" do
52
+ it "should raise an error for schemes other than http and file" do
53
+ lambda { @crawler.get('ftp://example.com') }.should raise_error(ArgumentError)
54
+ end
55
+
56
+ describe "when getting file:// documents" do
57
+ before do
58
+ @filename = File.expand_path(File.join(File.dirname(__FILE__), 'data', 'apples.html'))
59
+ @file_uri = 'file://' + @filename
60
+ end
61
+
62
+ it "should return an array whose first element is the uri" do
63
+ @crawler.get_file(@file_uri)[0].should == @file_uri
64
+ end
65
+
66
+ it "should return an array whose second element is blank metadata" do
67
+ @crawler.get_file(@file_uri)[1].should == {}
68
+ end
69
+
70
+ it "should return an array whose third element is the file contents" do
71
+ @crawler.get_file(@file_uri)[2].should == File.read(@filename)
72
+ end
73
+ end
74
+
75
+ describe "when getting http:// documents" do
76
+ before do
77
+ @doc_uri = 'http://localhost/simplewikipedia/articles/a/l/g/Algebra.html'
78
+ end
79
+
80
+ it "should return an array whose first element is the uri" do
81
+ @crawler.get_http(@doc_uri)[0].should == @doc_uri
82
+ end
83
+
84
+ it "should return an array whose second element is blank metadata" do
85
+ meta = @crawler.get_http(@doc_uri)[1]
86
+ meta['Content-type'].should == 'text/html'
87
+ end
88
+
89
+ it "should return an array whose third element is the file contents" do
90
+ @crawler.get_http(@doc_uri)[2].match(/Algebra is taught in school/).should_not == nil
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,23 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title>Apples</title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ <h1>Apples</h1>
14
+
15
+ <p>
16
+ Some apples are <a href="colors.html">red</a>.
17
+ Some are <a href="colors.html">green</a>.
18
+ They <a href="prices.html">do not cost much money</a>.
19
+ You can <a href="http://buyapples.com/">buy apples</a>.
20
+ </p>
21
+
22
+ </body>
23
+ </html>
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title>Colors</title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ <h1>Colors</h1>
14
+
15
+ <dl>
16
+ <dt>Red</dt>
17
+ <dd><a href="apples.html">Apples</a></dd>
18
+
19
+ <dt>Green</dt>
20
+ <dd><a href="apples.html">Apples</a> or <a href="prices.html">dollars</a>.</dd>
21
+ </dl>
22
+
23
+ </body>
24
+ </html>
@@ -0,0 +1,17 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title>Fruits</title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ <p><a href="apples.html">Apples</a> are a fruit.</a></p>
14
+
15
+
16
+ </body>
17
+ </html>
@@ -0,0 +1,14 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title></title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,34 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title>Prices</title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ <h1>Prices</h1>
14
+
15
+ <table>
16
+ <tr>
17
+ <th>Item</th>
18
+ <th>Price</th>
19
+ </tr>
20
+
21
+ <tr>
22
+ <td><a href="apples.html">Apples</a></td>
23
+ <td>$0.35</td>
24
+ </tr>
25
+
26
+ <tr>
27
+ <td><a href="colors.html">Colors</a></td>
28
+ <td>N/A</td>
29
+ </tr>
30
+ </table>
31
+
32
+
33
+ </body>
34
+ </html>
@@ -0,0 +1 @@
1
+ <html><head><title>asdf</title></head><body>asdf</body></html>
@@ -0,0 +1,40 @@
1
+ require 'spec/spec_helper'
2
+ require 'spec/queue_spec'
3
+
4
+ describe Rider::HostPartitionedQueue do
5
+ it_should_behave_like "queue"
6
+
7
+ before do
8
+ @q = Rider::HostPartitionedQueue.new('test')
9
+ end
10
+
11
+ it "should alternate among hosts when shifting" do
12
+ %w(http://example.com/path1 http://example.com/path2 http://example.net/ http://localhost/path).each { |u| @q.push(u) }
13
+ [@q.shift, @q.shift, @q.shift, @q.shift].should ==
14
+ %w(http://example.com/path1 http://example.net/ http://localhost/path http://example.com/path2)
15
+ end
16
+
17
+ it "should return the same host if only one distinct host exists" do
18
+ %w(http://example.com/path1 http://example.com/path2 http://example.com/path3).each { |u| @q.push(u) }
19
+ [@q.shift, @q.shift, @q.shift].should == %w(http://example.com/path1 http://example.com/path2 http://example.com/path3)
20
+ end
21
+
22
+ it "should be equal to another queue with the same objects and state" do
23
+ @q2 = Rider::HostPartitionedQueue.new('test2')
24
+ %w(http://example.com/path1 http://example.com/path2 http://example.net/ http://localhost/path).each { |u| @q.push(u) }
25
+ %w(http://example.com/path1 http://example.com/path2 http://example.net/ http://localhost/path).each { |u| @q2.push(u) }
26
+ @q.should == @q2
27
+ end
28
+
29
+ describe "when serializing" do
30
+ it "should write and read itself back" do
31
+ %w(http://example.com/path1 http://example.com/path2 http://example.net/ http://localhost/path).each { |u| @q.push(u) }
32
+ @q.serialize
33
+ Rider::HostPartitionedQueue.unserialize('test').should == @q
34
+ end
35
+
36
+ it "should return nil if asked to unserialize from a nonexistent file" do
37
+ Rider::HostPartitionedQueue.unserialize('nonexistent').should == nil
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,43 @@
1
+ require 'spec/spec_helper'
2
+
3
+ shared_examples_for "queue" do
4
+ it "must not have a blank or nil name" do
5
+ lambda { Rider::Queue.new(nil) }.should raise_error(ArgumentError)
6
+ lambda { Rider::Queue.new('') }.should raise_error(ArgumentError)
7
+ end
8
+
9
+ it "should be empty after clearing" do
10
+ @q.clear
11
+ @q.empty?.should == true
12
+ end
13
+
14
+ it "should push then shift one item" do
15
+ @q.push('blue')
16
+ @q.shift.should == 'blue'
17
+ end
18
+
19
+ describe "when empty" do
20
+ it "should return nil if shifted" do
21
+ @q.shift.should == nil
22
+ end
23
+ end
24
+
25
+ it "should not clobber the queue upon initialization"
26
+ end
27
+
28
+ describe Rider::Queue do
29
+ before do
30
+ @q = Rider::Queue.new('tmp/colors.q')
31
+ @q.clear
32
+ end
33
+
34
+ after do
35
+ @q.clear
36
+ end
37
+
38
+ it "should push then shift multiple items" do
39
+ %w(red green orange).each { |color| @q.push(color) }
40
+ puts "POP x 3"
41
+ [@q.shift, @q.shift, @q.shift].should == %w(red green orange)
42
+ end
43
+ end
@@ -0,0 +1 @@
1
+ require 'lib/rider'
@@ -0,0 +1,25 @@
1
+ namespace "doc" do
2
+ desc "Generate RDoc docs"
3
+ task :generate do
4
+ # Using rake/rdoctask invoked old rdoc 1.x for some reason, but this invokes rdoc 2.x
5
+ sh "rdoc --all --title 'Rider - Ruby Web crawler' --line-numbers --inline-source --force-update --all --charset utf-8 --main README README lib/"
6
+ end
7
+
8
+ desc "Upload docs to site"
9
+ task :upload do
10
+ sh "tar czfv rider-rdoc.tgz doc/"
11
+ puts
12
+ puts "Going to upload..."
13
+ puts
14
+ sh "scp rider-rdoc.tgz cardinal.stanford.edu:WWW/rider/"
15
+ sh "ssh cardinal.stanford.edu 'cd WWW/rider;tar xzfv rider-rdoc.tgz'"
16
+ sh "rm rider-rdoc.tgz"
17
+ puts
18
+ puts "Upload complete"
19
+ end
20
+
21
+ desc "Generate & upload"
22
+ task :update=>[:generate, :upload]
23
+ end
24
+
25
+
@@ -0,0 +1,7 @@
1
+ task :ruby_env do
2
+ RUBY_APP = if RUBY_PLATFORM =~ /java/
3
+ "jruby"
4
+ else
5
+ "ruby"
6
+ end unless defined? RUBY_APP
7
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+ require 'spec/rake/spectask'
3
+
4
+
5
+ desc "Run the specs under spec/"
6
+ Spec::Rake::SpecTask.new do |t|
7
+ t.spec_opts = ['--colour', '--diff']
8
+ t.spec_files = FileList['spec/**/*_spec.rb']
9
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rider
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.2"
5
+ platform: ruby
6
+ authors:
7
+ - Quinn Slack
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-07 00:00:00 -03:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.8.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: mechanize
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.3
34
+ version:
35
+ description: Ruby Web crawler
36
+ email: me@rafaelss.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files: []
42
+
43
+ files:
44
+ - bin/crawl
45
+ - lib/rider/crawler.rb
46
+ - lib/rider/part_queue.rb
47
+ - lib/rider/queue.rb
48
+ - lib/rider.rb
49
+ - Rakefile
50
+ - README
51
+ - spec/crawler_spec.rb
52
+ - spec/data/apples.html
53
+ - spec/data/colors.html
54
+ - spec/data/fruits.html
55
+ - spec/data/notitle.html
56
+ - spec/data/prices.html
57
+ - spec/data/tiny.html
58
+ - spec/part_queue_spec.rb
59
+ - spec/queue_spec.rb
60
+ - spec/spec_helper.rb
61
+ - tasks/deployment.rake
62
+ - tasks/environment.rake
63
+ - tasks/rspec.rake
64
+ has_rdoc: true
65
+ homepage: http://qslack.com/
66
+ licenses: []
67
+
68
+ post_install_message:
69
+ rdoc_options: []
70
+
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: "0"
78
+ version:
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: "0"
84
+ version:
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.3.5
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Ruby Web crawler
92
+ test_files:
93
+ - spec/crawler_spec.rb
94
+ - spec/part_queue_spec.rb
95
+ - spec/queue_spec.rb