cobweb 1.0.26 → 1.0.27
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.textile +1 -1
- data/lib/cobweb.rb +7 -4
- data/lib/cobweb_crawl_helper.rb +40 -18
- data/lib/cobweb_crawler.rb +5 -1
- data/lib/cobweb_finished_job.rb +2 -2
- data/lib/cobweb_links.rb +3 -3
- data/lib/cobweb_process_job.rb +1 -1
- data/lib/cobweb_version.rb +1 -1
- data/lib/crawl_finished_worker.rb +1 -1
- data/lib/crawl_helper.rb +1 -1
- data/lib/sidekiq/cobweb_helper.rb +2 -2
- data/spec/cobweb/cobweb_crawl_helper_spec.rb +53 -18
- data/spec/cobweb/cobweb_crawler_spec.rb +33 -0
- data/spec/cobweb/cobweb_spec.rb +4 -4
- data/spec/samples/sample_site/index.html +2 -0
- data/spec/spec_helper.rb +8 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5cee480edd847679803dcc5e7b47f03523bd5775
|
4
|
+
data.tar.gz: 37ebf10e098d2b46274a80a969962eadea9bd307
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea3ae531762c16268aac13babd9dbdb482bed8930819de4cdb013f268378eb729483512f181f85ad421a40b19ae2086011d46e6f459286cb314889450be7329c
|
7
|
+
data.tar.gz: ca1bf5d58f6a242af1030fd2d5f83c28444f8eae47e12cb1c6938032687559824924ef313b52e276186c2c480137035258db5154fcf0205445a7b4c811644a14
|
data/README.textile
CHANGED
data/lib/cobweb.rb
CHANGED
@@ -250,7 +250,10 @@ class Cobweb
|
|
250
250
|
end
|
251
251
|
end
|
252
252
|
rescue RedirectError => e
|
253
|
-
|
253
|
+
if @options[:raise_exceptions]
|
254
|
+
puts "Re-Raising error #{e.message} on #{uri.to_s}"
|
255
|
+
raise e
|
256
|
+
end
|
254
257
|
puts "ERROR RedirectError: #{e.message}"
|
255
258
|
|
256
259
|
## generate a blank content
|
@@ -453,9 +456,9 @@ class Cobweb
|
|
453
456
|
pattern = pattern.gsub(".", "\\.")
|
454
457
|
pattern = pattern.gsub("?", "\\?")
|
455
458
|
pattern = pattern.gsub("+", "\\\\+")
|
456
|
-
pattern = pattern.gsub("*", ".*?")
|
457
|
-
if !options.has_key?(:treat_https_as_http)
|
458
|
-
pattern = pattern.gsub("
|
459
|
+
pattern = pattern.gsub("*", ".*?")
|
460
|
+
if options[:treat_https_as_http] || !options.has_key?(:treat_https_as_http)
|
461
|
+
pattern = pattern.gsub("http:", "https?:")
|
459
462
|
end
|
460
463
|
pattern
|
461
464
|
end
|
data/lib/cobweb_crawl_helper.rb
CHANGED
@@ -1,40 +1,54 @@
|
|
1
1
|
# The crawl class gives easy access to information about the crawl, and gives the ability to stop a crawl
|
2
2
|
class CobwebCrawlHelper
|
3
|
-
|
3
|
+
|
4
4
|
attr_accessor :id
|
5
|
-
|
5
|
+
|
6
6
|
BATCH_SIZE = 200
|
7
7
|
FINISHED = "Finished"
|
8
8
|
STARTING = "Starting"
|
9
9
|
CANCELLED = "Cancelled"
|
10
|
-
|
10
|
+
|
11
11
|
def initialize(data)
|
12
12
|
@data = data
|
13
|
-
|
13
|
+
|
14
14
|
# TAKING A LONG TIME TO RUN ON PRODUCTION BOX
|
15
15
|
@stats = Stats.new(data)
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
def destroy
|
19
19
|
options = @data
|
20
20
|
options[:queue_name] = "cobweb_crawl_job" unless options.has_key?(:queue_name)
|
21
21
|
if RESQUE_INSTALLED
|
22
|
-
options[:
|
22
|
+
options[:processing_queue] = "CobwebJob" unless options.has_key?(:processing_queue)
|
23
|
+
options[:crawl_finished_queue] = "CobwebFinishedJob" unless options.has_key?(:crawl_finished_queue)
|
24
|
+
end
|
25
|
+
if SIDEKIQ_INSTALLED
|
26
|
+
options[:processing_queue] = "CrawlWorker" unless options.has_key?(:processing_queue)
|
27
|
+
options[:crawl_finished_queue] = "CrawlFinishedWorker" unless options.has_key?(:crawl_finished_queue)
|
23
28
|
end
|
24
|
-
|
29
|
+
|
25
30
|
# set status as cancelled now so that we don't enqueue any further pages
|
26
31
|
self.statistics.end_crawl(@data, true)
|
27
|
-
|
28
32
|
|
29
|
-
|
30
|
-
|
33
|
+
|
34
|
+
if options[:crawl_finished_queue] && options[:queue_system] == :resque && RESQUE_INSTALLED
|
35
|
+
|
31
36
|
additional_stats = {:crawl_id => id, :crawled_base_url => @stats.redis.get("crawled_base_url")}
|
32
37
|
additional_stats[:redis_options] = @data[:redis_options] unless @data[:redis_options] == {}
|
33
38
|
additional_stats[:source_id] = options[:source_id] unless options[:source_id].nil?
|
34
|
-
|
35
|
-
Resque.enqueue(options[:
|
39
|
+
|
40
|
+
Resque.enqueue(options[:crawl_finished_queue], @stats.get_statistics.merge(additional_stats))
|
36
41
|
end
|
37
|
-
|
42
|
+
|
43
|
+
if options[:crawl_finished_queue] && options[:queue_system] == :sidekiq && SIDEKIQ_INSTALLED
|
44
|
+
|
45
|
+
additional_stats = {:crawl_id => id, :crawled_base_url => @stats.redis.get("crawled_base_url")}
|
46
|
+
additional_stats[:redis_options] = @data[:redis_options] unless @data[:redis_options] == {}
|
47
|
+
additional_stats[:source_id] = options[:source_id] unless options[:source_id].nil?
|
48
|
+
|
49
|
+
Kernel.const_get(options[:crawl_finished_queue]).perform_async(@stats.get_statistics.merge(additional_stats))
|
50
|
+
end
|
51
|
+
|
38
52
|
counter = 0
|
39
53
|
while(counter < 200) do
|
40
54
|
break if self.statistics.get_status == CANCELLED
|
@@ -55,19 +69,27 @@ class CobwebCrawlHelper
|
|
55
69
|
end
|
56
70
|
end
|
57
71
|
end
|
58
|
-
|
72
|
+
if options[:queue_system] == :sidekiq && SIDEKIQ_INSTALLED
|
73
|
+
queue_name = Kernel.const_get(options[:processing_queue]).sidekiq_options_hash["queue"]
|
74
|
+
queue = Sidekiq::Queue.new(queue_name)
|
75
|
+
queue.each do |job|
|
76
|
+
job.args # => [1, 2, 3]
|
77
|
+
job.delete if job.args[0]["crawl_id"] == id
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
59
81
|
end
|
60
|
-
|
82
|
+
|
61
83
|
def statistics
|
62
84
|
@stats
|
63
85
|
end
|
64
|
-
|
86
|
+
|
65
87
|
def status
|
66
88
|
statistics.get_status
|
67
89
|
end
|
68
|
-
|
90
|
+
|
69
91
|
def id
|
70
92
|
@data[:crawl_id]
|
71
93
|
end
|
72
|
-
|
94
|
+
|
73
95
|
end
|
data/lib/cobweb_crawler.rb
CHANGED
@@ -27,7 +27,8 @@ class CobwebCrawler
|
|
27
27
|
@options[:seed_urls].map{|link| @redis.sadd "queued", link }
|
28
28
|
|
29
29
|
@options[:crawl_linked_external] = false unless @options.has_key? :crawl_linked_external
|
30
|
-
|
30
|
+
|
31
|
+
@options[:treat_https_as_http] = true unless @options.has_key? :treat_https_as_http
|
31
32
|
@debug = @options[:debug]
|
32
33
|
|
33
34
|
@stats = Stats.new(@options.merge(:crawl_id => @crawl_id))
|
@@ -100,16 +101,19 @@ class CobwebCrawler
|
|
100
101
|
|
101
102
|
document_links = ContentLinkParser.new(url, content[:body]).all_links(:valid_schemes => [:http, :https]).uniq
|
102
103
|
|
104
|
+
|
103
105
|
# select the link if its internal (eliminate external before expensive lookups in queued and crawled)
|
104
106
|
cobweb_links = CobwebLinks.new(@options)
|
105
107
|
|
106
108
|
internal_links = document_links.select{|link| cobweb_links.internal?(link) || (@options[:crawl_linked_external] && cobweb_links.internal?(url.to_s) && !cobweb_links.matches_external?(link))}
|
107
109
|
|
108
110
|
# if the site has the same content for http and https then normalize to http
|
111
|
+
|
109
112
|
if @options[:treat_https_as_http]
|
110
113
|
internal_links.map!{|link| link.gsub(/^https/, "http")}
|
111
114
|
end
|
112
115
|
|
116
|
+
|
113
117
|
# reject the link if we've crawled it or queued it
|
114
118
|
internal_links.reject!{|link| @redis.sismember("crawled", link)}
|
115
119
|
internal_links.reject!{|link| @redis.sismember("queued", link)}
|
data/lib/cobweb_finished_job.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Dummy resque job that executes at the end of the crawl if none are specified
|
2
2
|
class CobwebFinishedJob
|
3
|
-
require "ap"
|
3
|
+
#require "ap"
|
4
4
|
|
5
5
|
@queue = :cobweb_finished_job
|
6
6
|
|
@@ -8,7 +8,7 @@ class CobwebFinishedJob
|
|
8
8
|
def self.perform(statistics)
|
9
9
|
puts "Dummy Finished Job"
|
10
10
|
|
11
|
-
ap statistics
|
11
|
+
#ap statistics
|
12
12
|
|
13
13
|
end
|
14
14
|
end
|
data/lib/cobweb_links.rb
CHANGED
@@ -12,9 +12,9 @@ class CobwebLinks
|
|
12
12
|
@options[:external_urls] = [] unless @options.has_key? :external_urls
|
13
13
|
@options[:debug] = false unless @options.has_key? :debug
|
14
14
|
|
15
|
-
@internal_patterns = @options[:internal_urls].map{|pattern| Regexp.new("^#{Cobweb.escape_pattern_for_regex(pattern, options)}")}
|
16
|
-
@external_patterns = @options[:external_urls].map{|pattern| Regexp.new("^#{Cobweb.escape_pattern_for_regex(pattern, options)}")}
|
17
|
-
|
15
|
+
@internal_patterns = @options[:internal_urls].map{|pattern| Regexp.new("^#{Cobweb.escape_pattern_for_regex(pattern, @options)}")}
|
16
|
+
@external_patterns = @options[:external_urls].map{|pattern| Regexp.new("^#{Cobweb.escape_pattern_for_regex(pattern, @options)}")}
|
17
|
+
|
18
18
|
end
|
19
19
|
|
20
20
|
def allowed?(link)
|
data/lib/cobweb_process_job.rb
CHANGED
data/lib/cobweb_version.rb
CHANGED
data/lib/crawl_helper.rb
CHANGED
@@ -126,7 +126,7 @@ class CrawlHelper
|
|
126
126
|
def self.finished(content_request)
|
127
127
|
# finished
|
128
128
|
if @redis.hget("statistics", "current_status")!= "Crawl Finished"
|
129
|
-
ap "CRAWL FINISHED #{content_request[:url]}, #{counters}, #{@redis.get("original_base_url")}, #{@redis.get("crawled_base_url")}" if content_request[:debug]
|
129
|
+
#ap "CRAWL FINISHED #{content_request[:url]}, #{counters}, #{@redis.get("original_base_url")}, #{@redis.get("crawled_base_url")}" if content_request[:debug]
|
130
130
|
@stats.end_crawl(content_request)
|
131
131
|
|
132
132
|
additional_stats = {:crawl_id => content_request[:crawl_id], :crawled_base_url => @redis.get("crawled_base_url")}
|
@@ -4,14 +4,14 @@ if Gem::Specification.find_all_by_name("sidekiq", ">=1.0.0").count >= 1
|
|
4
4
|
require 'sidekiq'
|
5
5
|
else
|
6
6
|
SIDEKIQ_INSTALLED = false
|
7
|
-
puts "sidekiq gem not installed, skipping crawl_worker specs"
|
7
|
+
puts "sidekiq gem not installed, skipping crawl_worker specs" if defined?(ENVIRONMENT) && ENVIRONMENT=="test"
|
8
8
|
end
|
9
9
|
if Gem::Specification.find_all_by_name("resque", ">=1.0.0").count >= 1
|
10
10
|
RESQUE_INSTALLED = true
|
11
11
|
require 'resque'
|
12
12
|
else
|
13
13
|
RESQUE_INSTALLED = false
|
14
|
-
puts "resque gem not installed, skipping crawl_job specs"
|
14
|
+
puts "resque gem not installed, skipping crawl_job specs" if defined?(ENVIRONMENT) && ENVIRONMENT=="test"
|
15
15
|
end
|
16
16
|
|
17
17
|
module Sidekiq
|
@@ -3,19 +3,19 @@ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
|
3
3
|
describe CobwebCrawlHelper do
|
4
4
|
include HttpStubs
|
5
5
|
before(:each) do
|
6
|
-
pending("
|
6
|
+
pending("requires resque or sidekiq") unless RESQUE_INSTALLED || SIDEKIQ_INSTALLED
|
7
7
|
|
8
8
|
setup_stubs
|
9
9
|
end
|
10
10
|
# this spec tests the crawl object
|
11
|
-
|
11
|
+
|
12
12
|
describe "initialize" do
|
13
13
|
describe "without data" do
|
14
14
|
it "should raise an exception" do
|
15
15
|
lambda {CobwebCrawlHelper.new}.should raise_exception
|
16
16
|
end
|
17
17
|
end
|
18
|
-
|
18
|
+
|
19
19
|
describe "with data" do
|
20
20
|
before(:each) do
|
21
21
|
data = {:crawl_id => "asdf"}
|
@@ -30,41 +30,76 @@ describe CobwebCrawlHelper do
|
|
30
30
|
it "should return a status" do
|
31
31
|
@crawl.should respond_to "status"
|
32
32
|
end
|
33
|
-
|
34
|
-
describe "the destroy method" do
|
33
|
+
|
34
|
+
describe "the destroy method " do
|
35
35
|
before(:each) do
|
36
|
-
if
|
37
|
-
|
36
|
+
if SIDEKIQ_INSTALLED
|
37
|
+
if Sidekiq::Queue.new("crawl_worker").size > 0
|
38
|
+
raise "cobweb_crawl_job is not empty, do not run specs until it is!"
|
39
|
+
end
|
40
|
+
elsif RESQUE_INSTALLED
|
41
|
+
if Resque.size("cobweb_crawl_job") > 0
|
42
|
+
raise "cobweb_crawl_job is not empty, do not run specs until it is!"
|
43
|
+
end
|
38
44
|
end
|
45
|
+
|
39
46
|
105.times do |item_count|
|
40
47
|
2.times do |crawl_count|
|
41
|
-
|
42
|
-
|
48
|
+
if SIDEKIQ_INSTALLED
|
49
|
+
item_data = {:crawl_id => "crawl_#{crawl_count}_id", :url => "http://crawl#{crawl_count}.com/page#{item_count}.html"}
|
50
|
+
CrawlWorker.perform_async(item_data)
|
51
|
+
elsif RESQUE_INSTALLED
|
52
|
+
item_data = {:crawl_id => "crawl_#{crawl_count}_id", :url => "http://crawl#{crawl_count}.com/page#{item_count}.html"}
|
53
|
+
Resque.enqueue(CrawlJob, item_data)
|
54
|
+
end
|
43
55
|
end
|
44
56
|
end
|
45
57
|
end
|
46
58
|
after(:each) do
|
59
|
+
Sidekiq::Queue.new("crawl_worker").clear if SIDEKIQ_INSTALLED
|
47
60
|
Resque.remove_queue("cobweb_crawl_job") if RESQUE_INSTALLED
|
48
61
|
end
|
49
62
|
it "should have a queue length of 210" do
|
50
|
-
|
63
|
+
Sidekiq::Queue.new("crawl_worker").size.should == 210 if SIDEKIQ_INSTALLED
|
64
|
+
Resque.size("cobweb_crawl_job").should == 210 if RESQUE_INSTALLED
|
51
65
|
end
|
52
66
|
describe "after called" do
|
53
67
|
before(:each) do
|
54
|
-
|
68
|
+
if SIDEKIQ_INSTALLED
|
69
|
+
@crawl = CobwebCrawlHelper.new({:crawl_id => "crawl_0_id", :queue_system => :sidekiq}) if SIDEKIQ_INSTALLED
|
70
|
+
elsif RESQUE_INSTALLED
|
71
|
+
@crawl = CobwebCrawlHelper.new({:crawl_id => "crawl_0_id", :queue_system => :resque}) if RESQUE_INSTALLED
|
72
|
+
end
|
55
73
|
@crawl.destroy
|
56
74
|
end
|
57
75
|
it "should delete only the crawl specified" do
|
58
|
-
|
76
|
+
if SIDEKIQ_INSTALLED
|
77
|
+
Sidekiq::Queue.new("crawl_worker").size.should == 105
|
78
|
+
elsif RESQUE_INSTALLED
|
79
|
+
Resque.size("cobweb_crawl_job").should == 105
|
80
|
+
end
|
81
|
+
|
59
82
|
end
|
60
83
|
it "should not contain any crawl_0_id" do
|
61
|
-
|
62
|
-
|
84
|
+
if SIDEKIQ_INSTALLED
|
85
|
+
Sidekiq::Queue.new("crawl_job").each do |item|
|
86
|
+
item.args[0]["crawl_id"].should_not == "crawl_0_id"
|
87
|
+
end
|
88
|
+
elsif RESQUE_INSTALLED
|
89
|
+
Resque.peek("cobweb_crawl_job", 0, 200).map{|i| i["args"][0]}.each do |item|
|
90
|
+
item["crawl_id"].should_not == "crawl_0_id"
|
91
|
+
end
|
63
92
|
end
|
64
93
|
end
|
65
94
|
it "should only contain crawl_1_id" do
|
66
|
-
|
67
|
-
|
95
|
+
if SIDEKIQ_INSTALLED
|
96
|
+
Sidekiq::Queue.new("crawl_job").each do |item|
|
97
|
+
item.args[0]["crawl_id"].should == "crawl_1_id"
|
98
|
+
end
|
99
|
+
elsif RESQUE_INSTALLED
|
100
|
+
Resque.peek("cobweb_crawl_job", 0, 200).map{|i| i["args"][0]}.each do |item|
|
101
|
+
item["crawl_id"].should == "crawl_1_id"
|
102
|
+
end
|
68
103
|
end
|
69
104
|
end
|
70
105
|
it "should set status to 'Cancelled'" do
|
@@ -74,6 +109,6 @@ describe CobwebCrawlHelper do
|
|
74
109
|
end
|
75
110
|
end
|
76
111
|
end
|
77
|
-
|
78
|
-
|
112
|
+
|
113
|
+
|
79
114
|
end
|
@@ -53,6 +53,39 @@ describe CobwebCrawler do
|
|
53
53
|
|
54
54
|
end
|
55
55
|
|
56
|
+
context "internal_links" do
|
57
|
+
it "should match internal links without being explicitly set" do
|
58
|
+
crawler = CobwebCrawler.new({:cache => false, :crawl_limit => 1})
|
59
|
+
crawler.crawl(@base_url)
|
60
|
+
queued_links = @redis_mock_object.smembers("queued")
|
61
|
+
queued_links.should_not include("http://themeforest.net/item/cleandream/490140")
|
62
|
+
queued_links.should include("http://localhost:3532/secure")
|
63
|
+
end
|
64
|
+
context "with https" do
|
65
|
+
it "should match https by default" do
|
66
|
+
crawler = CobwebCrawler.new({:cache => false, :crawl_limit => 1})
|
67
|
+
crawler.crawl(@base_url)
|
68
|
+
queued_links = @redis_mock_object.smembers("queued")
|
69
|
+
queued_links.should_not include("https://localhost:3532/secure")
|
70
|
+
queued_links.should include("http://localhost:3532/secure")
|
71
|
+
end
|
72
|
+
it "should not define https as different if treat_https_as_http is true" do
|
73
|
+
crawler = CobwebCrawler.new({:cache => false, :crawl_limit => 1, :treat_https_as_http => true})
|
74
|
+
crawler.crawl(@base_url)
|
75
|
+
queued_links = @redis_mock_object.smembers("queued")
|
76
|
+
queued_links.should_not include("https://localhost:3532/secure")
|
77
|
+
queued_links.should include("http://localhost:3532/secure")
|
78
|
+
end
|
79
|
+
it "should define https as different if treat_https_as_http is false" do
|
80
|
+
crawler = CobwebCrawler.new({:cache => false, :crawl_limit => 1, :treat_https_as_http => false})
|
81
|
+
crawler.crawl(@base_url)
|
82
|
+
queued_links = @redis_mock_object.smembers("queued")
|
83
|
+
queued_links.should_not include("https://localhost:3532/secure")
|
84
|
+
queued_links.should_not include("http://localhost:3532/secure")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
56
89
|
context "storing inbound links" do
|
57
90
|
|
58
91
|
before(:each) do
|
data/spec/cobweb/cobweb_spec.rb
CHANGED
@@ -61,19 +61,19 @@ describe Cobweb do
|
|
61
61
|
|
62
62
|
context "with https ignored" do
|
63
63
|
it "should ignore https" do
|
64
|
-
result = Cobweb.escape_pattern_for_regex("
|
64
|
+
result = Cobweb.escape_pattern_for_regex("http://asdf.com")
|
65
65
|
result.should eql "https?://asdf\\.com"
|
66
66
|
end
|
67
67
|
it "should ignore https" do
|
68
|
-
result = Cobweb.escape_pattern_for_regex("
|
68
|
+
result = Cobweb.escape_pattern_for_regex("http://asdf.com", :treat_https_as_http => true)
|
69
69
|
result.should eql "https?://asdf\\.com"
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
73
73
|
context "without https ignored" do
|
74
74
|
it "should ignore https" do
|
75
|
-
result = Cobweb.escape_pattern_for_regex("
|
76
|
-
result.should eql "
|
75
|
+
result = Cobweb.escape_pattern_for_regex("http://asdf.com", :treat_https_as_http => false)
|
76
|
+
result.should eql "http://asdf\\.com"
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
@@ -711,6 +711,8 @@
|
|
711
711
|
<a href="gfx/photos/07xl.jpg" class="zoom"><img src="gfx/photos/07.jpg" class="shadow" alt="Photo" /></a>
|
712
712
|
<a href="gfx/photos/08xl.jpg" class="zoom"><img src="gfx/photos/08.jpg" class="shadow" alt="Photo" /></a>
|
713
713
|
<a href="gfx/photos/09xl.jpg" class="zoom"><img src="gfx/photos/09.jpg" class="shadow" alt="Photo" /></a>
|
714
|
+
|
715
|
+
<a href="https://localhost:3532/secure">HTTPS Link</a>
|
714
716
|
|
715
717
|
<a href="#"><img src="gfx/photos/11.jpg" class="shadow" alt="Photo" /></a>
|
716
718
|
<a href="#"><img src="gfx/photos/12.jpg" class="shadow" alt="Photo" /></a>
|
data/spec/spec_helper.rb
CHANGED
@@ -4,7 +4,6 @@ require File.expand_path(File.dirname(__FILE__) + '/../spec/samples/sample_serve
|
|
4
4
|
require File.expand_path(File.dirname(__FILE__) + '/../spec/http_stubs')
|
5
5
|
require 'mock_redis'
|
6
6
|
|
7
|
-
|
8
7
|
require 'coveralls'
|
9
8
|
Coveralls.wear!
|
10
9
|
|
@@ -13,11 +12,11 @@ ENVIRONMENT = "test"
|
|
13
12
|
APP_ROOT = File.expand_path(File.dirname(__FILE__) + '/../')
|
14
13
|
|
15
14
|
RSpec.configure do |config|
|
16
|
-
|
15
|
+
|
17
16
|
if ENV["TRAVIS_RUBY_VERSION"] || ENV['CI']
|
18
17
|
config.filter_run_excluding :local_only => true
|
19
18
|
end
|
20
|
-
|
19
|
+
|
21
20
|
THIN_INSTALLED = false
|
22
21
|
if Gem::Specification.find_all_by_name("thin", ">=1.0.0").count >= 1
|
23
22
|
require 'thin'
|
@@ -30,17 +29,18 @@ RSpec.configure do |config|
|
|
30
29
|
# WAIT FOR START TO COMPLETE
|
31
30
|
sleep 1
|
32
31
|
|
33
|
-
|
32
|
+
|
34
33
|
config.before(:all) {
|
35
34
|
# START THIN SERVER TO HOST THE SAMPLE SITE FOR CRAWLING
|
36
35
|
}
|
37
|
-
|
36
|
+
|
38
37
|
config.before(:each) {
|
39
38
|
|
40
|
-
|
41
|
-
|
39
|
+
@redis_mock_object = MockRedis.new
|
40
|
+
Redis.stub(:new).and_return(@redis_mock_object)
|
41
|
+
Redis::Namespace.stub(:new).and_return(@redis_mock_object)
|
42
42
|
|
43
|
-
|
43
|
+
@redis_mock_object.flushdb
|
44
44
|
|
45
45
|
}
|
46
46
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cobweb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stewart McKee
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|