cobweb 0.0.32 → 0.0.33
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +3 -7
- data/lib/cobweb.rb +17 -9
- data/lib/content_link_parser.rb +11 -1
- data/lib/crawl_job.rb +15 -14
- data/spec/cobweb/cobweb_crawler_spec.rb +29 -30
- data/spec/cobweb/cobweb_spec.rb +130 -198
- data/spec/cobweb/content_link_parser_spec.rb +118 -81
- data/spec/spec_helper.rb +61 -0
- metadata +20 -20
data/README.textile
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
|
2
|
-
h1. Cobweb v0.0.
|
2
|
+
h1. Cobweb v0.0.33
|
3
3
|
|
4
4
|
h2. Intro
|
5
5
|
|
@@ -95,13 +95,9 @@ Simple get that obey's the options supplied in new.
|
|
95
95
|
bq. crawler.head("http://www.google.com/")
|
96
96
|
|
97
97
|
|
98
|
-
|
98
|
+
h3. Contributing/Testing
|
99
99
|
|
100
|
-
|
101
|
-
|
102
|
-
* remove resque requirement - I'd like the crawler to work independant of resque as a normal standalone crawler
|
103
|
-
* fix crawl finished notification
|
104
|
-
* improve internal link control
|
100
|
+
Feel free to contribute small or large bits of code, just please make sure that there are rspec test for the features your submitting. We also test on travis at http://travis-ci.org/#!/stewartmckee/cobweb if you want to see the state of the project.
|
105
101
|
|
106
102
|
h2. License
|
107
103
|
|
data/lib/cobweb.rb
CHANGED
@@ -19,7 +19,7 @@ class Cobweb
|
|
19
19
|
# investigate using event machine for single threaded crawling
|
20
20
|
|
21
21
|
def self.version
|
22
|
-
"0.0.
|
22
|
+
"0.0.33"
|
23
23
|
end
|
24
24
|
|
25
25
|
def method_missing(method_sym, *arguments, &block)
|
@@ -97,7 +97,7 @@ class Cobweb
|
|
97
97
|
# check if it has already been cached
|
98
98
|
if redis.get(unique_id) and @options[:cache]
|
99
99
|
puts "Cache hit for #{url}" unless @options[:quiet]
|
100
|
-
content = Marshal.load(redis.get(unique_id))
|
100
|
+
content = deep_symbolize_keys(Marshal.load(redis.get(unique_id)))
|
101
101
|
else
|
102
102
|
# this url is valid for processing so lets get on with it
|
103
103
|
uri = Addressable::URI.parse(url.strip)
|
@@ -169,7 +169,7 @@ class Cobweb
|
|
169
169
|
content[:body] = Base64.encode64(response.body)
|
170
170
|
end
|
171
171
|
content[:location] = response["location"]
|
172
|
-
content[:headers] = response.to_hash
|
172
|
+
content[:headers] = deep_symbolize_keys(response.to_hash)
|
173
173
|
# parse data for links
|
174
174
|
link_parser = ContentLinkParser.new(content[:url], content[:body])
|
175
175
|
content[:links] = link_parser.link_data
|
@@ -252,7 +252,7 @@ class Cobweb
|
|
252
252
|
# check if it has already been cached
|
253
253
|
if redis.get("head-#{unique_id}") and @options[:cache]
|
254
254
|
puts "Cache hit for #{url}" unless @options[:quiet]
|
255
|
-
content = Marshal.load(redis.get("head-#{unique_id}"))
|
255
|
+
content = deep_symbolize_keys(Marshal.load(redis.get("head-#{unique_id}")))
|
256
256
|
else
|
257
257
|
print "Retrieving #{url }... " unless @options[:quiet]
|
258
258
|
uri = Addressable::URI.parse(url.strip)
|
@@ -336,9 +336,17 @@ class Cobweb
|
|
336
336
|
content
|
337
337
|
end
|
338
338
|
end
|
339
|
+
|
340
|
+
def deep_symbolize_keys(hash)
|
341
|
+
hash.keys.each do |key|
|
342
|
+
value = hash[key]
|
343
|
+
hash.delete(key)
|
344
|
+
hash[key.to_sym] = value
|
345
|
+
if hash[key.to_sym].instance_of? Hash
|
346
|
+
hash[key.to_sym] = deep_symbolize_keys(hash[key.to_sym])
|
347
|
+
end
|
348
|
+
end
|
349
|
+
hash
|
350
|
+
end
|
351
|
+
|
339
352
|
end
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
data/lib/content_link_parser.rb
CHANGED
@@ -18,7 +18,10 @@ class ContentLinkParser
|
|
18
18
|
@options[:tags][:images] = [["img[src]", "src"]]
|
19
19
|
@options[:tags][:related] = [["link[rel]", "href"]]
|
20
20
|
@options[:tags][:scripts] = [["script[src]", "src"]]
|
21
|
-
@options[:tags][:styles] = [["link[rel='stylesheet'][href]", "href"], ["style[@type^='text/css']",
|
21
|
+
@options[:tags][:styles] = [["link[rel='stylesheet'][href]", "href"], ["style[@type^='text/css']", lambda{|array,tag|
|
22
|
+
first_regex =/url\((['"]?)(.*?)\1\)/
|
23
|
+
tag.content.scan(first_regex) {|match| array << Addressable::URI.parse(match[1]).to_s}
|
24
|
+
}]]
|
22
25
|
|
23
26
|
#clear the default tags if required
|
24
27
|
@options[:tags] = {} if @options[:ignore_default_tags]
|
@@ -67,6 +70,13 @@ class ContentLinkParser
|
|
67
70
|
rescue
|
68
71
|
end
|
69
72
|
end
|
73
|
+
elsif attribute.instance_of? Proc
|
74
|
+
@doc.css(selector).each do |tag|
|
75
|
+
begin
|
76
|
+
attribute.call(array, tag)
|
77
|
+
rescue
|
78
|
+
end
|
79
|
+
end
|
70
80
|
end
|
71
81
|
end
|
72
82
|
|
data/lib/crawl_job.rb
CHANGED
@@ -35,19 +35,21 @@ class CrawlJob
|
|
35
35
|
# set the base url if this is the first page
|
36
36
|
set_base_url @redis, content, content_request
|
37
37
|
|
38
|
-
|
38
|
+
if within_queue_limits?(content_request[:crawl_limit])
|
39
|
+
internal_links = all_links_from_content(content).map{|link| link.to_s}
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
|
41
|
+
# reject the link if we've crawled it or queued it
|
42
|
+
internal_links.reject!{|link| @redis.sismember("crawled", link)}
|
43
|
+
internal_links.reject!{|link| @redis.sismember("queued", link)}
|
43
44
|
|
44
|
-
|
45
|
-
|
45
|
+
# select the link if its internal
|
46
|
+
internal_links.select!{|link| internal_link?(link)}
|
46
47
|
|
47
|
-
|
48
|
-
|
48
|
+
internal_links.each do |link|
|
49
|
+
enqueue_content(content_request, link) if within_queue_limits?(content_request[:crawl_limit])
|
50
|
+
end
|
49
51
|
end
|
50
|
-
|
52
|
+
|
51
53
|
# enqueue to processing queue
|
52
54
|
Resque.enqueue(const_get(content_request[:processing_queue]), content.merge({:internal_urls => internal_patterns, :redis_options => content_request[:redis_options], :source_id => content_request[:source_id], :crawl_id => content_request[:crawl_id]}))
|
53
55
|
puts "#{content_request[:url]} has been sent for processing." if content_request[:debug]
|
@@ -74,11 +76,10 @@ class CrawlJob
|
|
74
76
|
Resque.enqueue(const_get(content_request[:crawl_finished_queue]), stats.merge({:redis_options => content_request[:redis_options], :crawl_id => content_request[:crawl_id], :source_id => content_request[:source_id]}))
|
75
77
|
|
76
78
|
end
|
77
|
-
|
78
|
-
else
|
79
|
-
puts "Crawl Limit Exceeded by #{@crawl_counter - content_request[:crawl_limit].to_i} objects" if content_request[:debug]
|
80
79
|
end
|
81
80
|
else
|
81
|
+
@redis.srem "queued", content_request[:url]
|
82
|
+
decrement_queue_counter
|
82
83
|
puts "Already crawled #{content_request[:url]}" if content_request[:debug]
|
83
84
|
end
|
84
85
|
|
@@ -87,11 +88,11 @@ class CrawlJob
|
|
87
88
|
private
|
88
89
|
|
89
90
|
def self.within_crawl_limits?(crawl_limit)
|
90
|
-
crawl_limit.nil? or @crawl_counter
|
91
|
+
crawl_limit.nil? or @crawl_counter < crawl_limit.to_i
|
91
92
|
end
|
92
93
|
|
93
94
|
def self.within_queue_limits?(crawl_limit)
|
94
|
-
within_crawl_limits?(crawl_limit) and (crawl_limit.nil? or @queue_counter
|
95
|
+
within_crawl_limits?(crawl_limit) and (crawl_limit.nil? or (@queue_counter + @crawl_counter) < crawl_limit.to_i)
|
95
96
|
end
|
96
97
|
|
97
98
|
def self.set_base_url(redis, content, content_request)
|
@@ -17,41 +17,40 @@ describe CobwebCrawler do
|
|
17
17
|
|
18
18
|
end
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
20
|
+
|
21
|
+
it "should generate a cobweb_crawler object" do
|
22
|
+
CobwebCrawler.new.should be_an_instance_of CobwebCrawler
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "crawl" do
|
26
|
+
it "should crawl a site" do
|
27
|
+
|
28
|
+
# temporary tests to run crawler - proper specs to follow.. honest
|
29
|
+
|
30
|
+
crawler = CobwebCrawler.new({:cache => false, :quiet => false, :debug => false})
|
31
|
+
|
32
|
+
statistics = crawler.crawl("http://rockwellcottage.heroku.com/")
|
33
|
+
|
34
|
+
statistics.should_not be_nil
|
35
|
+
statistics.should be_an_instance_of Hash
|
36
|
+
|
25
37
|
end
|
26
38
|
|
27
|
-
|
28
|
-
it "should crawl a site" do
|
29
|
-
|
30
|
-
# temporary tests to run crawler - proper specs to follow.. honest
|
31
|
-
|
32
|
-
crawler = CobwebCrawler.new({:cache => false, :quiet => false, :debug => false})
|
33
|
-
|
34
|
-
statistics = crawler.crawl("http://rockwellcottage.heroku.com/")
|
35
|
-
|
36
|
-
ap statistics
|
37
|
-
|
38
|
-
end
|
39
|
-
|
40
|
-
it "should take a block" do
|
39
|
+
it "should take a block" do
|
41
40
|
|
42
|
-
|
41
|
+
# temporary tests to run crawler - proper specs to follow.. honest
|
43
42
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
ap statistics
|
52
|
-
|
43
|
+
crawler = CobwebCrawler.new({:cache => false, :quiet => false, :debug => false})
|
44
|
+
|
45
|
+
statistics = crawler.crawl("http://www.rockwellcottage.com/") do |content, statistics|
|
46
|
+
content[:url].should_not be_nil
|
47
|
+
statistics[:average_length].should_not be_nil
|
53
48
|
end
|
54
|
-
|
49
|
+
|
50
|
+
statistics.should_not be_nil
|
51
|
+
statistics.should be_an_instance_of Hash
|
52
|
+
|
53
|
+
end
|
55
54
|
end
|
56
55
|
|
57
56
|
end
|
data/spec/cobweb/cobweb_spec.rb
CHANGED
@@ -3,253 +3,185 @@ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
|
3
3
|
describe Cobweb do
|
4
4
|
|
5
5
|
before(:each) do
|
6
|
-
|
7
6
|
@base_url = "http://www.baseurl.com/"
|
8
|
-
|
9
|
-
@default_headers = {"Cache-Control" => "private, max-age=0",
|
10
|
-
"Date" => "Wed, 10 Nov 2010 09:06:17 GMT",
|
11
|
-
"Expires" => "-1",
|
12
|
-
"Content-Type" => "text/html; charset=UTF-8",
|
13
|
-
"Content-Encoding" => "gzip",
|
14
|
-
"Transfer-Encoding" => "chunked",
|
15
|
-
"Server" => "gws",
|
16
|
-
"X-XSS-Protection" => "1; mode=block"}
|
17
|
-
|
18
7
|
@cobweb = Cobweb.new :quiet => true, :cache => nil
|
19
|
-
end
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
@
|
46
|
-
|
47
|
-
@mock_http_response.stub!(:code).and_return(200)
|
48
|
-
@mock_http_response.stub!(:content_type).and_return("text/html")
|
49
|
-
@mock_http_response.stub!(:[]).with("Content-Type").and_return(@default_headers["Content-Type"])
|
50
|
-
@mock_http_response.stub!(:[]).with("location").and_return(@default_headers["location"])
|
51
|
-
@mock_http_response.stub!(:content_length).and_return(1024)
|
52
|
-
@mock_http_response.stub!(:body).and_return("asdf")
|
53
|
-
@mock_http_response.stub!(:to_hash).and_return(@default_headers)
|
54
|
-
|
55
|
-
@mock_http_redirect_response.stub!(:code).and_return(301)
|
56
|
-
@mock_http_redirect_response.stub!(:content_type).and_return("text/html")
|
57
|
-
@mock_http_redirect_response.stub!(:[]).with("Content-Type").and_return(@default_headers["Content-Type"])
|
58
|
-
@mock_http_redirect_response.stub!(:[]).with("location").and_return("http://redirected-to.com/redirect2.html")
|
59
|
-
@mock_http_redirect_response.stub!(:content_length).and_return(2048)
|
60
|
-
@mock_http_redirect_response.stub!(:body).and_return("redirected body")
|
61
|
-
@mock_http_redirect_response.stub!(:to_hash).and_return(@default_headers)
|
62
|
-
|
63
|
-
@mock_http_redirect_response2.stub!(:code).and_return(301)
|
64
|
-
@mock_http_redirect_response2.stub!(:content_type).and_return("text/html")
|
65
|
-
@mock_http_redirect_response2.stub!(:[]).with("Content-Type").and_return(@default_headers["Content-Type"])
|
66
|
-
@mock_http_redirect_response2.stub!(:[]).with("location").and_return("http://redirected-to.com/redirected.html")
|
67
|
-
@mock_http_redirect_response2.stub!(:content_length).and_return(2048)
|
68
|
-
@mock_http_redirect_response2.stub!(:body).and_return("redirected body")
|
69
|
-
@mock_http_redirect_response2.stub!(:to_hash).and_return(@default_headers)
|
70
|
-
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should generate a cobweb object" do
|
11
|
+
Cobweb.new.should be_an_instance_of Cobweb
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should setup with defaults" do
|
15
|
+
cobweb = Cobweb.new
|
16
|
+
|
17
|
+
options = cobweb.instance_eval("@options")
|
18
|
+
|
19
|
+
options[:follow_redirects].should == true
|
20
|
+
options[:redirect_limit].should == 10
|
21
|
+
options[:processing_queue].should == CobwebProcessJob
|
22
|
+
options[:crawl_finished_queue].should == CobwebFinishedJob
|
23
|
+
options[:quiet].should == true
|
24
|
+
options[:debug].should == false
|
25
|
+
options[:cache].should == 300
|
26
|
+
options[:timeout].should == 10
|
27
|
+
options[:redis_options].should == {}
|
28
|
+
options[:internal_urls].should == []
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "get" do
|
33
|
+
it "should return a hash with default values" do
|
34
|
+
@cobweb.get(@base_url).should be_an_instance_of Hash
|
71
35
|
end
|
72
36
|
|
73
|
-
it "should
|
74
|
-
|
37
|
+
it "should return a hash with default values without quiet option" do
|
38
|
+
@cobweb.get(@base_url).should be_an_instance_of Hash
|
75
39
|
end
|
76
40
|
|
77
|
-
it "should
|
78
|
-
cobweb
|
79
|
-
|
80
|
-
options = cobweb.instance_eval("@options")
|
81
|
-
ap options
|
82
|
-
|
83
|
-
options[:follow_redirects].should == true
|
84
|
-
options[:redirect_limit].should == 10
|
85
|
-
options[:processing_queue].should == CobwebProcessJob
|
86
|
-
options[:crawl_finished_queue].should == CobwebFinishedJob
|
87
|
-
options[:quiet].should == true
|
88
|
-
options[:debug].should == false
|
89
|
-
options[:cache].should == 300
|
90
|
-
options[:timeout].should == 10
|
91
|
-
options[:redis_options].should == {}
|
92
|
-
options[:internal_urls].should == []
|
93
|
-
|
41
|
+
it "should raise exception if there is no url" do
|
42
|
+
lambda {@cobweb.get(nil)}.should raise_error("url cannot be nil")
|
94
43
|
end
|
95
44
|
|
96
|
-
describe "
|
97
|
-
it "should return
|
98
|
-
@cobweb.get(@base_url).should
|
45
|
+
describe "content object" do
|
46
|
+
it "should return the url" do
|
47
|
+
@cobweb.get(@base_url)[:url].should == @base_url
|
48
|
+
end
|
49
|
+
it "should return correct content-type" do
|
50
|
+
@mock_http_response.stub!(:content_type).and_return("image/jpeg")
|
51
|
+
@cobweb.get(@base_url)[:mime_type].should == "image/jpeg"
|
52
|
+
end
|
53
|
+
it "should return correct status-code" do
|
54
|
+
@mock_http_response.stub!(:code).and_return(404)
|
55
|
+
@cobweb.get(@base_url)[:status_code].should == 404
|
56
|
+
end
|
57
|
+
it "should return correct status-code" do
|
58
|
+
@mock_http_response.stub!(:code).and_return(404)
|
59
|
+
@cobweb.get(@base_url)[:status_code].should == 404
|
60
|
+
end
|
61
|
+
it "should return correct character_set" do
|
62
|
+
@cobweb.get(@base_url)[:character_set].should == "UTF-8"
|
63
|
+
end
|
64
|
+
it "should return correct content_length" do
|
65
|
+
@cobweb.get(@base_url)[:length].should == 1024
|
66
|
+
end
|
67
|
+
it "should return correct content_body" do
|
68
|
+
@cobweb.get(@base_url)[:body].should == "asdf"
|
69
|
+
end
|
70
|
+
it "should return correct location" do
|
71
|
+
@cobweb.get(@base_url)[:location].should == nil
|
72
|
+
|
73
|
+
@mock_http_response.stub!(:[]).with("location").and_return("http://google.com/")
|
74
|
+
@cobweb.get(@base_url)[:location].should == "http://google.com/"
|
75
|
+
end
|
76
|
+
it "should return correct headers" do
|
77
|
+
@cobweb.get(@base_url)[:headers].should == @default_headers
|
78
|
+
end
|
79
|
+
it "should return correct a hash of links" do
|
80
|
+
@cobweb.get(@base_url)[:links].should be_an_instance_of Hash
|
81
|
+
end
|
82
|
+
it "should return the response time for the url" do
|
83
|
+
@cobweb.get(@base_url)[:response_time].should be_an_instance_of Float
|
84
|
+
end
|
85
|
+
end
|
86
|
+
describe "with redirect" do
|
87
|
+
|
88
|
+
before(:each) do
|
89
|
+
@base_url = "http://redirect-me.com/redirect.html"
|
90
|
+
@cobweb = Cobweb.new(:follow_redirects => true, :quiet => true, :cache => nil)
|
91
|
+
|
92
|
+
@mock_http_response.stub!(:[]).with("location").and_return("http://google.com/")
|
93
|
+
@mock_http_redirect_response.stub!(:[]).with("location").and_return("http://redirected-to.com/redirect2.html")
|
94
|
+
@mock_http_redirect_response2.stub!(:[]).with("location").and_return("http://redirected-to.com/redirected.html")
|
95
|
+
|
99
96
|
end
|
100
97
|
|
101
|
-
it "should
|
102
|
-
|
98
|
+
it "should flow through redirect" #do
|
99
|
+
|
100
|
+
#@mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
|
101
|
+
#@mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
|
102
|
+
#
|
103
|
+
#content = @cobweb.get(@base_url)
|
104
|
+
#content.should be_an_instance_of HashHelper
|
105
|
+
#ap content
|
106
|
+
#content[:url].should == "http://redirect-me.com/redirect.html"
|
107
|
+
#content[:redirect_through].length.should == 2
|
108
|
+
#content[:mime_type].should == "text/html"
|
109
|
+
#content[:body].should == "asdf"
|
110
|
+
|
111
|
+
#end
|
112
|
+
it "should return the path followed" #do
|
113
|
+
#@mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
|
114
|
+
#
|
115
|
+
#content = @cobweb.get(@base_url)
|
116
|
+
#content[:redirect_through].should == ["http://redirected-to.com/redirect2.html", "http://redirected-to.com/redirected.html"]
|
117
|
+
|
118
|
+
#end
|
119
|
+
it "should not follow with redirect disabled" do
|
120
|
+
@cobweb = Cobweb.new(:follow_redirects => false, :cache => nil)
|
121
|
+
@mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
|
122
|
+
|
123
|
+
content = @cobweb.get(@base_url)
|
124
|
+
content[:url].should == "http://redirect-me.com/redirect.html"
|
125
|
+
content[:redirect_through].should be_nil
|
126
|
+
content[:status_code].should == 301
|
127
|
+
content[:mime_type].should == "text/html"
|
128
|
+
content[:body].should == "redirected body"
|
129
|
+
|
103
130
|
end
|
131
|
+
end
|
132
|
+
|
133
|
+
describe "with cache" do
|
104
134
|
|
105
|
-
|
106
|
-
|
135
|
+
before(:each) do
|
136
|
+
@cobweb = Cobweb.new :quiet => true, :cache => 200
|
107
137
|
end
|
108
138
|
|
109
139
|
describe "content object" do
|
110
140
|
it "should return the url" do
|
111
141
|
@cobweb.get(@base_url)[:url].should == @base_url
|
142
|
+
@cobweb.get(@base_url)[:url].should == @base_url
|
112
143
|
end
|
113
144
|
it "should return correct content-type" do
|
114
145
|
@mock_http_response.stub!(:content_type).and_return("image/jpeg")
|
115
146
|
@cobweb.get(@base_url)[:mime_type].should == "image/jpeg"
|
147
|
+
@cobweb.get(@base_url)[:mime_type].should == "image/jpeg"
|
116
148
|
end
|
117
149
|
it "should return correct status-code" do
|
118
150
|
@mock_http_response.stub!(:code).and_return(404)
|
119
151
|
@cobweb.get(@base_url)[:status_code].should == 404
|
152
|
+
@cobweb.get(@base_url)[:status_code].should == 404
|
120
153
|
end
|
121
154
|
it "should return correct status-code" do
|
122
155
|
@mock_http_response.stub!(:code).and_return(404)
|
123
156
|
@cobweb.get(@base_url)[:status_code].should == 404
|
157
|
+
@cobweb.get(@base_url)[:status_code].should == 404
|
124
158
|
end
|
125
159
|
it "should return correct character_set" do
|
126
160
|
@cobweb.get(@base_url)[:character_set].should == "UTF-8"
|
161
|
+
@cobweb.get(@base_url)[:character_set].should == "UTF-8"
|
127
162
|
end
|
128
163
|
it "should return correct content_length" do
|
129
164
|
@cobweb.get(@base_url)[:length].should == 1024
|
165
|
+
@cobweb.get(@base_url)[:length].should == 1024
|
130
166
|
end
|
131
167
|
it "should return correct content_body" do
|
132
168
|
@cobweb.get(@base_url)[:body].should == "asdf"
|
133
|
-
|
134
|
-
it "should return correct location" do
|
135
|
-
@cobweb.get(@base_url)[:location].should == nil
|
136
|
-
|
137
|
-
@mock_http_response.stub!(:[]).with("location").and_return("http://google.com/")
|
138
|
-
@cobweb.get(@base_url)[:location].should == "http://google.com/"
|
169
|
+
@cobweb.get(@base_url)[:body].should == "asdf"
|
139
170
|
end
|
140
171
|
it "should return correct headers" do
|
141
172
|
@cobweb.get(@base_url)[:headers].should == @default_headers
|
173
|
+
@cobweb.get(@base_url)[:headers].should == @default_headers
|
142
174
|
end
|
143
175
|
it "should return correct a hash of links" do
|
144
176
|
@cobweb.get(@base_url)[:links].should be_an_instance_of Hash
|
177
|
+
@cobweb.get(@base_url)[:links].should be_an_instance_of Hash
|
145
178
|
end
|
146
179
|
it "should return the response time for the url" do
|
147
180
|
@cobweb.get(@base_url)[:response_time].should be_an_instance_of Float
|
148
|
-
|
149
|
-
end
|
150
|
-
describe "with redirect" do
|
151
|
-
|
152
|
-
before(:each) do
|
153
|
-
@base_url = "http://redirect-me.com/redirect.html"
|
154
|
-
@cobweb = Cobweb.new(:follow_redirects => true, :quiet => true, :cache => nil)
|
155
|
-
end
|
156
|
-
|
157
|
-
it "should flow through redirect" #do
|
158
|
-
|
159
|
-
#@mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
|
160
|
-
#@mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
|
161
|
-
#
|
162
|
-
#content = @cobweb.get(@base_url)
|
163
|
-
#content.should be_an_instance_of HashHelper
|
164
|
-
#ap content
|
165
|
-
#content[:url].should == "http://redirect-me.com/redirect.html"
|
166
|
-
#content[:redirect_through].length.should == 2
|
167
|
-
#content[:mime_type].should == "text/html"
|
168
|
-
#content[:body].should == "asdf"
|
169
|
-
|
170
|
-
#end
|
171
|
-
it "should return the path followed" #do
|
172
|
-
#@mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
|
173
|
-
#
|
174
|
-
#content = @cobweb.get(@base_url)
|
175
|
-
#content[:redirect_through].should == ["http://redirected-to.com/redirect2.html", "http://redirected-to.com/redirected.html"]
|
176
|
-
|
177
|
-
#end
|
178
|
-
it "should not follow with redirect disabled" do
|
179
|
-
@cobweb = Cobweb.new(:follow_redirects => false, :cache => nil)
|
180
|
-
@mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
|
181
|
-
|
182
|
-
content = @cobweb.get(@base_url)
|
183
|
-
content[:url].should == "http://redirect-me.com/redirect.html"
|
184
|
-
content[:redirect_through].should be_nil
|
185
|
-
content[:status_code].should == 301
|
186
|
-
content[:mime_type].should == "text/html"
|
187
|
-
content[:body].should == "redirected body"
|
188
|
-
|
181
|
+
@cobweb.get(@base_url)[:response_time].should be_an_instance_of Float
|
189
182
|
end
|
190
183
|
end
|
191
184
|
|
192
|
-
|
193
|
-
|
194
|
-
before(:each) do
|
195
|
-
@cobweb = Cobweb.new :quiet => true, :cache => 200
|
196
|
-
end
|
197
|
-
|
198
|
-
describe "content object" do
|
199
|
-
it "should return the url" do
|
200
|
-
@cobweb.get(@base_url)[:url].should == @base_url
|
201
|
-
@cobweb.get(@base_url)[:url].should == @base_url
|
202
|
-
end
|
203
|
-
it "should return correct content-type" do
|
204
|
-
@mock_http_response.stub!(:content_type).and_return("image/jpeg")
|
205
|
-
@cobweb.get(@base_url)[:mime_type].should == "image/jpeg"
|
206
|
-
@cobweb.get(@base_url)[:mime_type].should == "image/jpeg"
|
207
|
-
end
|
208
|
-
it "should return correct status-code" do
|
209
|
-
@mock_http_response.stub!(:code).and_return(404)
|
210
|
-
@cobweb.get(@base_url)[:status_code].should == 404
|
211
|
-
@cobweb.get(@base_url)[:status_code].should == 404
|
212
|
-
end
|
213
|
-
it "should return correct status-code" do
|
214
|
-
@mock_http_response.stub!(:code).and_return(404)
|
215
|
-
@cobweb.get(@base_url)[:status_code].should == 404
|
216
|
-
@cobweb.get(@base_url)[:status_code].should == 404
|
217
|
-
end
|
218
|
-
it "should return correct character_set" do
|
219
|
-
@cobweb.get(@base_url)[:character_set].should == "UTF-8"
|
220
|
-
@cobweb.get(@base_url)[:character_set].should == "UTF-8"
|
221
|
-
end
|
222
|
-
it "should return correct content_length" do
|
223
|
-
@cobweb.get(@base_url)[:length].should == 1024
|
224
|
-
@cobweb.get(@base_url)[:length].should == 1024
|
225
|
-
end
|
226
|
-
it "should return correct content_body" do
|
227
|
-
@cobweb.get(@base_url)[:body].should == "asdf"
|
228
|
-
@cobweb.get(@base_url)[:body].should == "asdf"
|
229
|
-
end
|
230
|
-
it "should return correct location" do
|
231
|
-
@cobweb.get(@base_url)[:location].should == nil
|
232
|
-
@cobweb.get(@base_url)[:location].should == nil
|
233
|
-
|
234
|
-
@mock_http_response.stub!(:[]).with("location").and_return("http://google.com/")
|
235
|
-
@cobweb.get(@base_url)[:location].should == "http://google.com/"
|
236
|
-
@cobweb.get(@base_url)[:location].should == "http://google.com/"
|
237
|
-
end
|
238
|
-
it "should return correct headers" do
|
239
|
-
@cobweb.get(@base_url)[:headers].should == @default_headers
|
240
|
-
@cobweb.get(@base_url)[:headers].should == @default_headers
|
241
|
-
end
|
242
|
-
it "should return correct a hash of links" do
|
243
|
-
@cobweb.get(@base_url)[:links].should be_an_instance_of Hash
|
244
|
-
@cobweb.get(@base_url)[:links].should be_an_instance_of Hash
|
245
|
-
end
|
246
|
-
it "should return the response time for the url" do
|
247
|
-
@cobweb.get(@base_url)[:response_time].should be_an_instance_of Float
|
248
|
-
@cobweb.get(@base_url)[:response_time].should be_an_instance_of Float
|
249
|
-
end
|
250
|
-
end
|
251
|
-
|
252
|
-
end
|
253
|
-
end
|
185
|
+
end
|
254
186
|
end
|
255
187
|
end
|
@@ -3,102 +3,139 @@ require File.expand_path(File.dirname(__FILE__) + '/../../lib/content_link_parse
|
|
3
3
|
|
4
4
|
describe ContentLinkParser do
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
it "should load the sample document" do
|
13
|
-
@content.should_not be_nil
|
14
|
-
@content.should_not be_empty
|
15
|
-
end
|
16
|
-
|
17
|
-
it "should create a content link parser" do
|
18
|
-
@content_parser.should_not be_nil
|
19
|
-
@content_parser.should be_an_instance_of ContentLinkParser
|
20
|
-
end
|
21
|
-
|
22
|
-
describe "using default tags" do
|
23
|
-
describe "returning general links" do
|
24
|
-
it "should return some links from the sample data" do
|
25
|
-
links = @content_parser.links
|
26
|
-
links.should_not be_nil
|
27
|
-
links.should_not be_empty
|
28
|
-
end
|
29
|
-
it "should return the correct links" do
|
30
|
-
links = @content_parser.links
|
31
|
-
links.length.should == 4
|
32
|
-
end
|
6
|
+
describe "Sample Links Document" do
|
7
|
+
before(:each) do
|
8
|
+
@base_url = "http://www.baseurl.com/"
|
9
|
+
@content = File.read(File.dirname(__FILE__) + "/../samples/sample_html_links.html")
|
10
|
+
@content_parser = ContentLinkParser.new("http://sample-links.com/", @content)
|
33
11
|
end
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
12
|
+
|
13
|
+
it "should load the sample document" do
|
14
|
+
@content.should_not be_nil
|
15
|
+
@content.should_not be_empty
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should create a content link parser" do
|
19
|
+
@content_parser.should_not be_nil
|
20
|
+
@content_parser.should be_an_instance_of ContentLinkParser
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "using default tags" do
|
24
|
+
describe "returning general links" do
|
25
|
+
it "should return some links from the sample data" do
|
26
|
+
links = @content_parser.links
|
27
|
+
links.should_not be_nil
|
28
|
+
links.should_not be_empty
|
29
|
+
end
|
30
|
+
it "should return the correct links" do
|
31
|
+
links = @content_parser.links
|
32
|
+
links.length.should == 4
|
33
|
+
end
|
39
34
|
end
|
40
|
-
|
41
|
-
links
|
42
|
-
|
35
|
+
describe "returning image links" do
|
36
|
+
it "should return some image links from the sample data" do
|
37
|
+
links = @content_parser.images
|
38
|
+
links.should_not be_nil
|
39
|
+
links.should_not be_empty
|
40
|
+
end
|
41
|
+
it "should return the correct links" do
|
42
|
+
links = @content_parser.images
|
43
|
+
links.length.should == 1
|
44
|
+
end
|
43
45
|
end
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
46
|
+
describe "returning related links" do
|
47
|
+
it "should return some related links from the sample data" do
|
48
|
+
links = @content_parser.related
|
49
|
+
links.should_not be_nil
|
50
|
+
links.should_not be_empty
|
51
|
+
end
|
52
|
+
it "should return the correct links" do
|
53
|
+
links = @content_parser.related
|
54
|
+
links.length.should == 2
|
55
|
+
end
|
50
56
|
end
|
51
|
-
|
52
|
-
links
|
53
|
-
|
57
|
+
describe "returning script links" do
|
58
|
+
it "should return some script links from the sample data" do
|
59
|
+
links = @content_parser.scripts
|
60
|
+
links.should_not be_nil
|
61
|
+
links.should_not be_empty
|
62
|
+
end
|
63
|
+
it "should return the correct links" do
|
64
|
+
links = @content_parser.scripts
|
65
|
+
links.length.should == 1
|
66
|
+
end
|
54
67
|
end
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
68
|
+
describe "returning style links" do
|
69
|
+
it "should return some style links from the sample data" do
|
70
|
+
links = @content_parser.styles
|
71
|
+
links.should_not be_nil
|
72
|
+
links.should_not be_empty
|
73
|
+
end
|
74
|
+
it "should return the correct links" do
|
75
|
+
links = @content_parser.styles
|
76
|
+
links.length.should == 3
|
77
|
+
end
|
61
78
|
end
|
62
|
-
|
63
|
-
|
64
|
-
|
79
|
+
describe "returning unknown link type" do
|
80
|
+
it "should return an empty array" do
|
81
|
+
links = @content_parser.asdfasdfsadf
|
82
|
+
links.should_not be_nil
|
83
|
+
links.should be_an_instance_of Array
|
84
|
+
end
|
65
85
|
end
|
66
86
|
end
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
links.length.should ==
|
87
|
+
|
88
|
+
describe "returning all link data" do
|
89
|
+
it "should return a hash with all link data" do
|
90
|
+
link_data = @content_parser.link_data
|
91
|
+
link_data.should_not be_nil
|
92
|
+
link_data.should be_an_instance_of Hash
|
93
|
+
|
94
|
+
link_data.keys.length.should == 5
|
95
|
+
link_data[:links].length.should == 4
|
76
96
|
end
|
77
97
|
end
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
links.
|
82
|
-
links.should
|
98
|
+
|
99
|
+
describe "ignoring default tags" do
|
100
|
+
it "should not return any links" do
|
101
|
+
parser = ContentLinkParser.new("http://sample-links.com", @content, :ignore_default_tags => true)
|
102
|
+
parser.links.should be_empty
|
83
103
|
end
|
84
104
|
end
|
85
105
|
end
|
86
106
|
|
87
|
-
describe "
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
107
|
+
describe "simple style based documents" do
|
108
|
+
def create_content(url)
|
109
|
+
<<-eos
|
110
|
+
<html>
|
111
|
+
<head>
|
112
|
+
<STYLE TYPE="text/css" MEDIA="screen, projection">
|
113
|
+
@import url(#{url});
|
114
|
+
</STYLE>
|
115
|
+
</head>
|
116
|
+
</html>
|
117
|
+
eos
|
95
118
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
it "should
|
100
|
-
|
101
|
-
|
119
|
+
before :each do
|
120
|
+
@base_url = "http://www.baseurl.com/path"
|
121
|
+
end
|
122
|
+
it "should have the right link for a single quoted style" do
|
123
|
+
@content_parser = ContentLinkParser.new("http://sample-links.com/", create_content("'/new'"))
|
124
|
+
styles = @content_parser.styles
|
125
|
+
styles.length.should==1
|
126
|
+
styles[0].should=="/new"
|
127
|
+
end
|
128
|
+
it "should have the right link for a double quoted style" do
|
129
|
+
@content_parser = ContentLinkParser.new("http://sample-links.com/", create_content('"/new"'))
|
130
|
+
styles = @content_parser.styles
|
131
|
+
styles.length.should==1
|
132
|
+
styles[0].should=="/new"
|
133
|
+
end
|
134
|
+
it "should just leave links with differing quotes alone" do
|
135
|
+
@content_parser = ContentLinkParser.new("http://sample-links.com/", create_content('"new\''))
|
136
|
+
styles = @content_parser.styles
|
137
|
+
styles.length.should==1
|
138
|
+
styles[0].should=="\"new'"
|
102
139
|
end
|
103
140
|
end
|
104
141
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -8,6 +8,67 @@ RSpec.configure do |config|
|
|
8
8
|
#redis_mock.stub(:new).and_return(MockRedis.new)
|
9
9
|
|
10
10
|
Redis.new.flushdb
|
11
|
+
|
12
|
+
|
13
|
+
@default_headers = {"Cache-Control" => "private, max-age=0",
|
14
|
+
"Date" => "Wed, 10 Nov 2010 09:06:17 GMT",
|
15
|
+
"Expires" => "-1",
|
16
|
+
"Content-Type" => "text/html; charset=UTF-8",
|
17
|
+
"Content-Encoding" => "",
|
18
|
+
"Transfer-Encoding" => "chunked",
|
19
|
+
"Server" => "gws",
|
20
|
+
"X-XSS-Protection" => "1; mode=block"}
|
21
|
+
|
22
|
+
@mock_http_client = mock(Net::HTTP)
|
23
|
+
@mock_http_request = mock(Net::HTTPRequest)
|
24
|
+
@mock_http_redirect_request = mock(Net::HTTPRequest)
|
25
|
+
@mock_http_redirect_request2 = mock(Net::HTTPRequest)
|
26
|
+
|
27
|
+
@mock_http_response = mock(Net::HTTPResponse)
|
28
|
+
@mock_http_redirect_response = mock(Net::HTTPRedirection)
|
29
|
+
@mock_http_redirect_response2 = mock(Net::HTTPRedirection)
|
30
|
+
@mock_http_get = mock(Net::HTTP::Get)
|
31
|
+
|
32
|
+
Net::HTTP.stub!(:new).and_return(@mock_http_client)
|
33
|
+
Net::HTTP::Get.stub!(:new).and_return(@mock_http_request)
|
34
|
+
Net::HTTP::Get.stub!(:new).with("/redirect.html").and_return(@mock_http_redirect_request)
|
35
|
+
Net::HTTP::Get.stub!(:new).with("/redirect2.html").and_return(@mock_http_redirect_request2)
|
36
|
+
|
37
|
+
@mock_http_client.stub!(:request).with(@mock_http_request).and_return(@mock_http_response)
|
38
|
+
@mock_http_client.stub!(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
|
39
|
+
@mock_http_client.stub!(:request).with(@mock_http_redirect_request2).and_return(@mock_http_redirect_response2)
|
40
|
+
@mock_http_client.stub!(:read_timeout=).and_return(nil)
|
41
|
+
@mock_http_client.stub!(:open_timeout=).and_return(nil)
|
42
|
+
@mock_http_client.stub!(:start).and_return(@mock_http_response)
|
43
|
+
@mock_http_client.stub!(:address).and_return("www.baseurl.com")
|
44
|
+
@mock_http_client.stub!(:port).and_return("80 ")
|
45
|
+
|
46
|
+
@mock_http_response.stub!(:code).and_return(200)
|
47
|
+
@mock_http_response.stub!(:content_type).and_return("text/html")
|
48
|
+
@mock_http_response.stub!(:[]).with("Content-Type").and_return(@default_headers["Content-Type"])
|
49
|
+
@mock_http_response.stub!(:[]).with("location").and_return(@default_headers["location"])
|
50
|
+
@mock_http_response.stub!(:[]).with("Content-Encoding").and_return(@default_headers["Content-Encoding"])
|
51
|
+
@mock_http_response.stub!(:content_length).and_return(1024)
|
52
|
+
@mock_http_response.stub!(:body).and_return("asdf")
|
53
|
+
@mock_http_response.stub!(:to_hash).and_return(@default_headers)
|
54
|
+
|
55
|
+
@mock_http_redirect_response.stub!(:code).and_return(301)
|
56
|
+
@mock_http_redirect_response.stub!(:content_type).and_return("text/html")
|
57
|
+
@mock_http_redirect_response.stub!(:[]).with("Content-Type").and_return(@default_headers["Content-Type"])
|
58
|
+
@mock_http_redirect_response.stub!(:[]).with("location").and_return("http://redirected-to.com/redirect2.html")
|
59
|
+
@mock_http_redirect_response.stub!(:[]).with("Content-Encoding").and_return(@default_headers["Content-Encoding"])
|
60
|
+
@mock_http_redirect_response.stub!(:content_length).and_return(2048)
|
61
|
+
@mock_http_redirect_response.stub!(:body).and_return("redirected body")
|
62
|
+
@mock_http_redirect_response.stub!(:to_hash).and_return(@default_headers)
|
63
|
+
|
64
|
+
@mock_http_redirect_response2.stub!(:code).and_return(301)
|
65
|
+
@mock_http_redirect_response2.stub!(:content_type).and_return("text/html")
|
66
|
+
@mock_http_redirect_response2.stub!(:[]).with("Content-Type").and_return(@default_headers["Content-Type"])
|
67
|
+
@mock_http_redirect_response2.stub!(:[]).with("location").and_return("http://redirected-to.com/redirected.html")
|
68
|
+
@mock_http_redirect_response2.stub!(:[]).with("Content-Encoding").and_return(@default_headers["Content-Encoding"])
|
69
|
+
@mock_http_redirect_response2.stub!(:content_length).and_return(2048)
|
70
|
+
@mock_http_redirect_response2.stub!(:body).and_return("redirected body")
|
71
|
+
@mock_http_redirect_response2.stub!(:to_hash).and_return(@default_headers)
|
11
72
|
}
|
12
73
|
|
13
74
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cobweb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.33
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-04-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: resque
|
16
|
-
requirement: &
|
16
|
+
requirement: &70153227362400 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70153227362400
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: redis
|
27
|
-
requirement: &
|
27
|
+
requirement: &70153227361980 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70153227361980
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: nokogiri
|
38
|
-
requirement: &
|
38
|
+
requirement: &70153227361560 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70153227361560
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: addressable
|
49
|
-
requirement: &
|
49
|
+
requirement: &70153227361140 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70153227361140
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rspec
|
60
|
-
requirement: &
|
60
|
+
requirement: &70153227360720 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70153227360720
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: awesome_print
|
71
|
-
requirement: &
|
71
|
+
requirement: &70153227360300 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70153227360300
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: sinatra
|
82
|
-
requirement: &
|
82
|
+
requirement: &70153227359880 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *70153227359880
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: thin
|
93
|
-
requirement: &
|
93
|
+
requirement: &70153227359460 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ! '>='
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: '0'
|
99
99
|
type: :runtime
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *70153227359460
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: haml
|
104
|
-
requirement: &
|
104
|
+
requirement: &70153227359040 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ! '>='
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
version: '0'
|
110
110
|
type: :runtime
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *70153227359040
|
113
113
|
description: Web Crawler that uses resque background job engine to allow you to cluster
|
114
114
|
your crawl.
|
115
115
|
email: stewart@rockwellcottage.com
|