cobweb 0.0.60 → 0.0.61

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
 
2
- h1. Cobweb v0.0.60
2
+ h1. Cobweb v0.0.61
3
3
  !https://secure.travis-ci.org/stewartmckee/cobweb.png?branch=master!
4
4
 
5
5
  h2. Intro
@@ -3,7 +3,7 @@ class CobwebVersion
3
3
 
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "0.0.60"
6
+ "0.0.61"
7
7
  end
8
8
 
9
9
  end
@@ -35,8 +35,11 @@ class CrawlJob
35
35
  # if there is no limit or we're still under it lets get the url
36
36
  if within_crawl_limits?(content_request[:crawl_limit])
37
37
  begin
38
+ # move the url from the queued list to the crawled list - for both the original url, and the content url (to handle redirects)
38
39
  @redis.srem "queued", content_request[:url]
39
40
  @redis.sadd "crawled", content_request[:url]
41
+ @redis.srem "queued", content[:url]
42
+ @redis.sadd "crawled", content[:url]
40
43
  # increment the counter if we are not limiting by page only || we are limiting count by page and it is a page
41
44
  if content_request[:crawl_limit_by_page]
42
45
  if content[:mime_type].match("text/html")
@@ -207,7 +207,6 @@ def clear_queues
207
207
  Resque.queues.each do |queue|
208
208
  Resque.remove_queue(queue)
209
209
  end
210
- puts "Cleared"
211
210
 
212
211
  Resque.size("cobweb_process_job").should == 0
213
212
  Resque.size("cobweb_finished_job").should == 0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.60
4
+ version: 0.0.61
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-07-12 00:00:00.000000000 Z
12
+ date: 2012-07-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: resque
16
- requirement: &70111532832640 !ruby/object:Gem::Requirement
16
+ requirement: &70301996170680 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70111532832640
24
+ version_requirements: *70301996170680
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &70111532832160 !ruby/object:Gem::Requirement
27
+ requirement: &70301996168960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70111532832160
35
+ version_requirements: *70301996168960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70111532831300 !ruby/object:Gem::Requirement
38
+ requirement: &70301996168220 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70111532831300
46
+ version_requirements: *70301996168220
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: addressable
49
- requirement: &70111532830260 !ruby/object:Gem::Requirement
49
+ requirement: &70301996167040 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70111532830260
57
+ version_requirements: *70301996167040
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &70111532829180 !ruby/object:Gem::Requirement
60
+ requirement: &70301996165600 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70111532829180
68
+ version_requirements: *70301996165600
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: awesome_print
71
- requirement: &70111532844220 !ruby/object:Gem::Requirement
71
+ requirement: &70301996180600 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70111532844220
79
+ version_requirements: *70301996180600
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: sinatra
82
- requirement: &70111532843480 !ruby/object:Gem::Requirement
82
+ requirement: &70301996179980 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *70111532843480
90
+ version_requirements: *70301996179980
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: thin
93
- requirement: &70111532842880 !ruby/object:Gem::Requirement
93
+ requirement: &70301996179420 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '0'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *70111532842880
101
+ version_requirements: *70301996179420
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: haml
104
- requirement: &70111532842240 !ruby/object:Gem::Requirement
104
+ requirement: &70301996178900 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '0'
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *70111532842240
112
+ version_requirements: *70301996178900
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: namespaced_redis
115
- requirement: &70111532841500 !ruby/object:Gem::Requirement
115
+ requirement: &70301996178200 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ! '>='
@@ -120,7 +120,7 @@ dependencies:
120
120
  version: 1.0.2
121
121
  type: :runtime
122
122
  prerelease: false
123
- version_requirements: *70111532841500
123
+ version_requirements: *70301996178200
124
124
  description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
125
125
  crawl extremely large sites which is much more perofmant than multi-threaded crawlers. It
126
126
  is also a standalone crawler that has a sophisticated statistics monitoring interface