rider 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
File without changes
@@ -0,0 +1 @@
1
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'lib/rider'
4
+
5
+ queue_name = ARGV[0]
6
+ queue = Rider::Queue.new(queue_name)
7
+ puts "Crawling URLs from #{queue.filename}"
8
+
9
+ # will crawl all URLs
10
+ crawler = Rider::Crawler.new(//, queue)
11
+
12
+ crawler.each_document do |uri, metadata, contents|
13
+ puts "-"*60
14
+ puts "URL: #{uri.to_s}"
15
+ puts "Metadata: #{metadata.inspect}"
16
+ puts "Contents excerpt: #{contents[0..250]}"
17
+ puts
18
+ end
19
+
20
+ puts
21
+ puts "Crawl finished"
@@ -0,0 +1,36 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'rubygems'
4
+ require 'logger'
5
+ require 'mechanize'
6
+ require 'timeout'
7
+ require 'yaml'
8
+
9
+ require 'rider/queue'
10
+ require 'rider/part_queue'
11
+ require 'rider/crawler'
12
+
13
+ $KCODE = 'u'
14
+
15
+ module Rider
16
+ VERSION = '0.1'
17
+ LOGGER = Logger.new(STDOUT)
18
+ LOGGER.level = Logger::DEBUG
19
+
20
+
21
+ def log
22
+ LOGGER
23
+ end
24
+ module_function :log
25
+
26
+ def to_absolute(uri, link)
27
+ link = URI.encode(link.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))
28
+ return nil if link.nil? or link.empty?
29
+
30
+ relative = URI(link)
31
+ absolute = uri.merge(relative)
32
+
33
+ absolute.path = '/' if absolute.path.nil? or absolute.path.empty?
34
+ return absolute
35
+ end
36
+ end
@@ -0,0 +1,102 @@
1
+ require 'hpricot'
2
+
3
+ module Rider
4
+ class Crawler
5
+ # Creates a new Crawler, with the specified +mask+ (a Regexp) and queue (a +Rider::Queue+ instance).
6
+ def initialize(mask, queue)
7
+ @mask = mask
8
+ @queue = queue
9
+ @seen_urls = []
10
+ @www = WWW::Mechanize.new do |a|
11
+ a.log = Logger.new("tmp/www.log")
12
+ a.pluggable_parser.default = Hpricot
13
+ end
14
+ end
15
+
16
+ # Returns true if +url+ passes the +mask+.
17
+ def match_mask?(url)
18
+ @mask.match(url) != nil
19
+ end
20
+
21
+ # Crawls documents and passes their URL, response headers, and data to the supplied block.
22
+ def each_document
23
+ while doc_data = next_document()
24
+ follow_urls = yield(doc_data) || []
25
+ add_follow_urls(follow_urls)
26
+ end
27
+ end
28
+
29
+ def add_follow_urls(urls)
30
+ urls.each { |url| @queue.push(url) if follow_url?(url) }
31
+ end
32
+
33
+ def follow_url?(url)
34
+ match_mask?(url) and !seen_url?(url)
35
+ end
36
+
37
+ SKIPPABLE_EXCEPTIONS = [Errno::ETIMEDOUT, WWW::Mechanize::ResponseCodeError, Errno::EHOSTUNREACH, SocketError,
38
+ Errno::ECONNREFUSED, Timeout::Error, Net::HTTPBadResponse, Hpricot::ParseError]
39
+ # Returns the next retrievable document from the next valid URL in the queue.
40
+ def next_document
41
+ begin
42
+ url = next_url()
43
+ return nil if url.nil?
44
+ doc_data = get(url)
45
+ saw_url(url)
46
+ return doc_data
47
+ rescue Exception=>ex
48
+ if SKIPPABLE_EXCEPTIONS.include?(ex.class)
49
+ Rider.log.debug("EXCEPTION: #{ex.inspect}, skipping...")
50
+ retry # go on to the next document
51
+ else
52
+ raise ex
53
+ end
54
+ end
55
+ end
56
+
57
+ # Gets the document at the specified +url+. Returns an Array [uri, metadata, contents]
58
+ def get(url)
59
+ uri = URI.parse(url)
60
+ Timeout::timeout(8, Timeout::Error) do
61
+ case uri.scheme
62
+ when 'http'
63
+ get_http(uri)
64
+ when 'file'
65
+ get_file(uri)
66
+ else
67
+ raise(ArgumentError, "don't know how to get #{url}")
68
+ end
69
+ end
70
+ end
71
+
72
+ def get_file(uri)
73
+ filename = uri.gsub(/^file:\/\//, '')
74
+ [uri, {}, File.read(filename)]
75
+ end
76
+
77
+ def get_http(uri)
78
+ page = @www.get(uri)
79
+ meta = page.response
80
+ [uri, meta, page]
81
+ end
82
+
83
+ # Retrieves the next URL in the queue that matches the +mask+.
84
+ def next_url
85
+ while url = @queue.shift
86
+ return url if valid_url?(url)
87
+ end
88
+ end
89
+
90
+ def valid_url?(url)
91
+ !seen_url?(url) && match_mask?(url)
92
+ end
93
+
94
+ def seen_url?(url)
95
+ @seen_urls.include?(url)
96
+ end
97
+
98
+ def saw_url(url)
99
+ @seen_urls << url
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,85 @@
1
+ module Rider
2
+ class HostPartitionedQueue
3
+ attr_reader :name
4
+
5
+ def initialize(name)
6
+ @name = name
7
+ clear
8
+ end
9
+
10
+ def push(url)
11
+ host = get_host(url)
12
+ @hosts << host unless @hosts.include?(host)
13
+ @urls_by_host[host] ||= []
14
+ @urls_by_host[host] << url
15
+ return true
16
+ end
17
+
18
+ def shift
19
+ if empty?
20
+ Rider.log.debug("Q #{name} POP nil")
21
+ return nil
22
+ end
23
+ host = @hosts[@current_host_index]
24
+ url = @urls_by_host[host].shift
25
+
26
+ if @urls_by_host[host].empty?
27
+ @hosts.delete_at(@current_host_index)
28
+ @urls_by_host.delete(host)
29
+ # no need to increment @current_host_index since we just effectively pushed every element down by one
30
+ # by deleting from @hosts, UNLESS it was the last item in the array, in which case that index doesn't
31
+ # exist anymore
32
+ increment_current_host_index if @current_host_index == @hosts.length
33
+ else
34
+ increment_current_host_index
35
+ end
36
+ return url
37
+ end
38
+
39
+ def clear
40
+ @urls_by_host = {}
41
+ @hosts = []
42
+ @current_host_index = 0
43
+ end
44
+
45
+ def empty?
46
+ @hosts.empty?
47
+ end
48
+
49
+ def ==(another_queue)
50
+ another_queue.instance_variable_get("@urls_by_host") == @urls_by_host &&
51
+ another_queue.instance_variable_get("@hosts") == @hosts &&
52
+ another_queue.instance_variable_get("@current_host_index") == @current_host_index
53
+ end
54
+
55
+ def serialize
56
+ File.open(filename, 'w') do |file|
57
+ file.write(self.to_yaml)
58
+ end
59
+ end
60
+
61
+ def self.unserialize(name)
62
+ filename = "tmp/#{name}.q"
63
+ return nil unless File.exist?(filename)
64
+ YAML.load_file("tmp/#{name}.q")
65
+ end
66
+
67
+ private
68
+ def get_host(url)
69
+ URI.parse(url).host
70
+ end
71
+
72
+ def increment_current_host_index
73
+ if @hosts.length == 0
74
+ @current_host_index = 0
75
+ else
76
+ # increment by one but go back to 0 if it exceeds the length of the array
77
+ @current_host_index = (@current_host_index + 1) % @hosts.length
78
+ end
79
+ end
80
+
81
+ def filename
82
+ "tmp/#{name}.q"
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,40 @@
1
+ module Rider
2
+ class Rider::Queue
3
+ attr_reader :filename
4
+ def initialize(filename)
5
+ raise(ArgumentError, "queues must have a filename") if !filename or filename.empty?
6
+ @filename = filename
7
+ end
8
+
9
+ def push(item)
10
+ Rider.log.debug("Q #{filename} PUSH #{item}")
11
+ File.open(filename, "a") do |file|
12
+ file.puts(item)
13
+ end
14
+ return true
15
+ end
16
+
17
+ def shift
18
+ if empty?
19
+ Rider.log.debug("Q #{filename} SHIFT nil")
20
+ return nil
21
+ end
22
+ lines = File.readlines(filename)
23
+ item = lines.shift.strip
24
+ File.open(filename, "w") do |file|
25
+ file.write(lines.join)
26
+ end
27
+ Rider.log.debug("Q #{filename} SHIFT #{item}")
28
+ return item
29
+ end
30
+
31
+ def clear
32
+ File.unlink(filename) if File.exist?(filename)
33
+ return true
34
+ end
35
+
36
+ def empty?
37
+ !File.exist?(filename) or File.open(filename).read == ""
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,94 @@
1
+ require 'spec/spec_helper'
2
+
3
+ describe Rider::Crawler do
4
+ before do
5
+ @queue = Rider::Queue.new('web')
6
+ @crawler = Rider::Crawler.new(/http:\/\/localhost/, @queue)
7
+ end
8
+
9
+ describe "when checking URLs against mask" do
10
+ it "should return true for a URL that matches the mask" do
11
+ @crawler.match_mask?("http://localhost/some/path").should == true
12
+ end
13
+
14
+ it "should return false for a URL that does not match the mask" do
15
+ @crawler.match_mask?("http://example.com/some/path").should == false
16
+ end
17
+ end
18
+
19
+ describe "when checking URL validity" do
20
+ before do
21
+ @urls = %w(http://example.com/invalid http://localhost/valid http://localhost/valid/unseen)
22
+ end
23
+
24
+ it "should return URLs matching the mask" do
25
+ @urls.select { |url| @crawler.valid_url?(url) }.should == ["http://localhost/valid", "http://localhost/valid/unseen"]
26
+ end
27
+
28
+ it "should return only unseen URLs" do
29
+ @crawler.saw_url('http://localhost/valid')
30
+ @urls.select { |url| @crawler.valid_url?(url) }.should == ['http://localhost/valid/unseen']
31
+ end
32
+ end
33
+
34
+ describe "when determining URLs to follow" do
35
+ it "should follow URLs that match the mask" do
36
+ @crawler.follow_url?('http://localhost/abc').should == true
37
+ end
38
+
39
+ it "should not follow URLs that don't match the mask" do
40
+ @crawler.follow_url?('http://invalid.com').should == false
41
+ end
42
+
43
+ it "should follow URLs that haven't been seen"
44
+ it "should not follow URLs that have been seen already"
45
+ end
46
+
47
+ describe "when getting the next document" do
48
+
49
+ end
50
+
51
+ describe "when getting documents" do
52
+ it "should raise an error for schemes other than http and file" do
53
+ lambda { @crawler.get('ftp://example.com') }.should raise_error(ArgumentError)
54
+ end
55
+
56
+ describe "when getting file:// documents" do
57
+ before do
58
+ @filename = File.expand_path(File.join(File.dirname(__FILE__), 'data', 'apples.html'))
59
+ @file_uri = 'file://' + @filename
60
+ end
61
+
62
+ it "should return an array whose first element is the uri" do
63
+ @crawler.get_file(@file_uri)[0].should == @file_uri
64
+ end
65
+
66
+ it "should return an array whose second element is blank metadata" do
67
+ @crawler.get_file(@file_uri)[1].should == {}
68
+ end
69
+
70
+ it "should return an array whose third element is the file contents" do
71
+ @crawler.get_file(@file_uri)[2].should == File.read(@filename)
72
+ end
73
+ end
74
+
75
+ describe "when getting http:// documents" do
76
+ before do
77
+ @doc_uri = 'http://localhost/simplewikipedia/articles/a/l/g/Algebra.html'
78
+ end
79
+
80
+ it "should return an array whose first element is the uri" do
81
+ @crawler.get_http(@doc_uri)[0].should == @doc_uri
82
+ end
83
+
84
+ it "should return an array whose second element is blank metadata" do
85
+ meta = @crawler.get_http(@doc_uri)[1]
86
+ meta['Content-type'].should == 'text/html'
87
+ end
88
+
89
+ it "should return an array whose third element is the file contents" do
90
+ @crawler.get_http(@doc_uri)[2].match(/Algebra is taught in school/).should_not == nil
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,23 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title>Apples</title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ <h1>Apples</h1>
14
+
15
+ <p>
16
+ Some apples are <a href="colors.html">red</a>.
17
+ Some are <a href="colors.html">green</a>.
18
+ They <a href="prices.html">do not cost much money</a>.
19
+ You can <a href="http://buyapples.com/">buy apples</a>.
20
+ </p>
21
+
22
+ </body>
23
+ </html>
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title>Colors</title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ <h1>Colors</h1>
14
+
15
+ <dl>
16
+ <dt>Red</dt>
17
+ <dd><a href="apples.html">Apples</a></dd>
18
+
19
+ <dt>Green</dt>
20
+ <dd><a href="apples.html">Apples</a> or <a href="prices.html">dollars</a>.</dd>
21
+ </dl>
22
+
23
+ </body>
24
+ </html>
@@ -0,0 +1,17 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title>Fruits</title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ <p><a href="apples.html">Apples</a> are a fruit.</a></p>
14
+
15
+
16
+ </body>
17
+ </html>
@@ -0,0 +1,14 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title></title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,34 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
6
+ <head>
7
+ <title>Prices</title>
8
+
9
+ </head>
10
+
11
+ <body>
12
+
13
+ <h1>Prices</h1>
14
+
15
+ <table>
16
+ <tr>
17
+ <th>Item</th>
18
+ <th>Price</th>
19
+ </tr>
20
+
21
+ <tr>
22
+ <td><a href="apples.html">Apples</a></td>
23
+ <td>$0.35</td>
24
+ </tr>
25
+
26
+ <tr>
27
+ <td><a href="colors.html">Colors</a></td>
28
+ <td>N/A</td>
29
+ </tr>
30
+ </table>
31
+
32
+
33
+ </body>
34
+ </html>
@@ -0,0 +1 @@
1
+ <html><head><title>asdf</title></head><body>asdf</body></html>
@@ -0,0 +1,40 @@
1
+ require 'spec/spec_helper'
2
+ require 'spec/queue_spec'
3
+
4
+ describe Rider::HostPartitionedQueue do
5
+ it_should_behave_like "queue"
6
+
7
+ before do
8
+ @q = Rider::HostPartitionedQueue.new('test')
9
+ end
10
+
11
+ it "should alternate among hosts when shifting" do
12
+ %w(http://example.com/path1 http://example.com/path2 http://example.net/ http://localhost/path).each { |u| @q.push(u) }
13
+ [@q.shift, @q.shift, @q.shift, @q.shift].should ==
14
+ %w(http://example.com/path1 http://example.net/ http://localhost/path http://example.com/path2)
15
+ end
16
+
17
+ it "should return the same host if only one distinct host exists" do
18
+ %w(http://example.com/path1 http://example.com/path2 http://example.com/path3).each { |u| @q.push(u) }
19
+ [@q.shift, @q.shift, @q.shift].should == %w(http://example.com/path1 http://example.com/path2 http://example.com/path3)
20
+ end
21
+
22
+ it "should be equal to another queue with the same objects and state" do
23
+ @q2 = Rider::HostPartitionedQueue.new('test2')
24
+ %w(http://example.com/path1 http://example.com/path2 http://example.net/ http://localhost/path).each { |u| @q.push(u) }
25
+ %w(http://example.com/path1 http://example.com/path2 http://example.net/ http://localhost/path).each { |u| @q2.push(u) }
26
+ @q.should == @q2
27
+ end
28
+
29
+ describe "when serializing" do
30
+ it "should write and read itself back" do
31
+ %w(http://example.com/path1 http://example.com/path2 http://example.net/ http://localhost/path).each { |u| @q.push(u) }
32
+ @q.serialize
33
+ Rider::HostPartitionedQueue.unserialize('test').should == @q
34
+ end
35
+
36
+ it "should return nil if asked to unserialize from a nonexistent file" do
37
+ Rider::HostPartitionedQueue.unserialize('nonexistent').should == nil
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,43 @@
1
+ require 'spec/spec_helper'
2
+
3
+ shared_examples_for "queue" do
4
+ it "must not have a blank or nil name" do
5
+ lambda { Rider::Queue.new(nil) }.should raise_error(ArgumentError)
6
+ lambda { Rider::Queue.new('') }.should raise_error(ArgumentError)
7
+ end
8
+
9
+ it "should be empty after clearing" do
10
+ @q.clear
11
+ @q.empty?.should == true
12
+ end
13
+
14
+ it "should push then shift one item" do
15
+ @q.push('blue')
16
+ @q.shift.should == 'blue'
17
+ end
18
+
19
+ describe "when empty" do
20
+ it "should return nil if shifted" do
21
+ @q.shift.should == nil
22
+ end
23
+ end
24
+
25
+ it "should not clobber the queue upon initialization"
26
+ end
27
+
28
+ describe Rider::Queue do
29
+ before do
30
+ @q = Rider::Queue.new('tmp/colors.q')
31
+ @q.clear
32
+ end
33
+
34
+ after do
35
+ @q.clear
36
+ end
37
+
38
+ it "should push then shift multiple items" do
39
+ %w(red green orange).each { |color| @q.push(color) }
40
+ puts "POP x 3"
41
+ [@q.shift, @q.shift, @q.shift].should == %w(red green orange)
42
+ end
43
+ end
@@ -0,0 +1 @@
1
+ require 'lib/rider'
@@ -0,0 +1,25 @@
1
+ namespace "doc" do
2
+ desc "Generate RDoc docs"
3
+ task :generate do
4
+ # Using rake/rdoctask invoked old rdoc 1.x for some reason, but this invokes rdoc 2.x
5
+ sh "rdoc --all --title 'Rider - Ruby Web crawler' --line-numbers --inline-source --force-update --all --charset utf-8 --main README README lib/"
6
+ end
7
+
8
+ desc "Upload docs to site"
9
+ task :upload do
10
+ sh "tar czfv rider-rdoc.tgz doc/"
11
+ puts
12
+ puts "Going to upload..."
13
+ puts
14
+ sh "scp rider-rdoc.tgz cardinal.stanford.edu:WWW/rider/"
15
+ sh "ssh cardinal.stanford.edu 'cd WWW/rider;tar xzfv rider-rdoc.tgz'"
16
+ sh "rm rider-rdoc.tgz"
17
+ puts
18
+ puts "Upload complete"
19
+ end
20
+
21
+ desc "Generate & upload"
22
+ task :update=>[:generate, :upload]
23
+ end
24
+
25
+
@@ -0,0 +1,7 @@
1
+ task :ruby_env do
2
+ RUBY_APP = if RUBY_PLATFORM =~ /java/
3
+ "jruby"
4
+ else
5
+ "ruby"
6
+ end unless defined? RUBY_APP
7
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+ require 'spec/rake/spectask'
3
+
4
+
5
+ desc "Run the specs under spec/"
6
+ Spec::Rake::SpecTask.new do |t|
7
+ t.spec_opts = ['--colour', '--diff']
8
+ t.spec_files = FileList['spec/**/*_spec.rb']
9
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rider
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.2"
5
+ platform: ruby
6
+ authors:
7
+ - Quinn Slack
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-07 00:00:00 -03:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.8.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: mechanize
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.3
34
+ version:
35
+ description: Ruby Web crawler
36
+ email: me@rafaelss.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files: []
42
+
43
+ files:
44
+ - bin/crawl
45
+ - lib/rider/crawler.rb
46
+ - lib/rider/part_queue.rb
47
+ - lib/rider/queue.rb
48
+ - lib/rider.rb
49
+ - Rakefile
50
+ - README
51
+ - spec/crawler_spec.rb
52
+ - spec/data/apples.html
53
+ - spec/data/colors.html
54
+ - spec/data/fruits.html
55
+ - spec/data/notitle.html
56
+ - spec/data/prices.html
57
+ - spec/data/tiny.html
58
+ - spec/part_queue_spec.rb
59
+ - spec/queue_spec.rb
60
+ - spec/spec_helper.rb
61
+ - tasks/deployment.rake
62
+ - tasks/environment.rake
63
+ - tasks/rspec.rake
64
+ has_rdoc: true
65
+ homepage: http://qslack.com/
66
+ licenses: []
67
+
68
+ post_install_message:
69
+ rdoc_options: []
70
+
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: "0"
78
+ version:
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: "0"
84
+ version:
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.3.5
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Ruby Web crawler
92
+ test_files:
93
+ - spec/crawler_spec.rb
94
+ - spec/part_queue_spec.rb
95
+ - spec/queue_spec.rb