cobweb 1.0.29 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ba10134e0b3da7418f0a1a5772ca196cf5525066
4
- data.tar.gz: ae2f27f0036172b001968e277913d0038549220b
3
+ metadata.gz: 854165929dc7a5e3e16d138515723cfd2d3f95b5
4
+ data.tar.gz: 8444c545e80547e41dcaeae817a81183ed4a8d54
5
5
  SHA512:
6
- metadata.gz: 175e8dedf0592c1cc8e9abb50cc0efa835ffb8e1d3c0df3ecb3c3a9900e24a1a2aebc479c5188ea44d57de210b559e0637efb8beac4f90c49f3f6e54bc0492d7
7
- data.tar.gz: d4fc557a20e3b4d54daaecaa151b5365e4a483067b6e2cc4429f846e47066d1326a868383f2d1a2031159dc21cfbd053854e2c12ea57cd1eeb2de999f8aec2ab
6
+ metadata.gz: 181edda4c8fc822f52729a645046c6e3a8d11ad0ff3d5540cfcc055528563605c82eec5a5615074fe86f97c8655694dc75c29d61b8e324fed02291372e5f107a
7
+ data.tar.gz: 830a4aceeb58d6d5b43d284622a9054aea66a5a46aadabec319bf9eebeb3e20444e476a3a0db42afcc2344a6a5f7dae210aaed024055d6d7d25042f3cbd38f79
@@ -1,7 +1,8 @@
1
- h1. Cobweb v1.0.29
1
+ h1. Cobweb v1.1.0
2
2
 
3
3
  "@cobweb_gem":https://twitter.com/cobweb_gem
4
4
  !https://badge.fury.io/rb/cobweb.png!:http://badge.fury.io/rb/cobweb
5
+ !https://circleci.com/gh/stewartmckee/cobweb.svg?style=shield&circle-token=07357f0bd17ac67e21ea161fb9abdb35ecac4c2e!
5
6
  !https://gemnasium.com/stewartmckee/cobweb.png!
6
7
  !https://coveralls.io/repos/stewartmckee/cobweb/badge.png?branch=master(Coverage Status)!:https://coveralls.io/r/stewartmckee/cobweb
7
8
 
@@ -3,7 +3,7 @@ class CobwebVersion
3
3
 
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "1.0.29"
6
+ "1.1.0"
7
7
  end
8
8
 
9
9
  end
@@ -8,12 +8,11 @@ class ContentLinkParser
8
8
  def initialize(url, content, options = {})
9
9
  @options = {}.merge(options)
10
10
  @url = url
11
+ @base_url = ''
11
12
  @doc = Nokogiri::HTML(content)
12
13
 
13
- base_url = @url.to_s
14
14
  if @doc.at("base[href]")
15
- base_url = @doc.at("base[href]").attr("href").to_s
16
- @url = base_url if base_url
15
+ @base_url = @doc.at("base[href]").attr("href").to_s if @doc.at("base[href]").attr("href").to_s.present?
17
16
  end
18
17
 
19
18
  @options[:tags] = {}
@@ -46,7 +45,9 @@ class ContentLinkParser
46
45
  options[:valid_schemes] = [:http, :https] unless options.has_key? :valid_schemes
47
46
  data = link_data
48
47
  links = data.keys.map{|key| data[key]}.flatten.uniq
49
- links = links.map{|link| UriHelper.join_no_fragment(@url, link).to_s }
48
+ links = links.map{|link| UriHelper.join_no_fragment(@url, UriHelper.join_no_fragment(@base_url, link))}
49
+ .reject(&:nil?)
50
+ .map(&:to_s)
50
51
  links = links.reject{|link| link =~ /\/([^\/]+?)\/\1\// }
51
52
  links = links.reject{|link| link =~ /([^\/]+?)\/([^\/]+?)\/.*?\1\/\2/ }
52
53
  links = links.select{|link| options[:valid_schemes].include? link.split(':')[0].to_sym}
@@ -2,6 +2,7 @@
2
2
  if Gem::Specification.find_all_by_name("sidekiq", ">=1.0.0").count >= 1
3
3
  SIDEKIQ_INSTALLED = true
4
4
  require 'sidekiq'
5
+ require 'sidekiq/api'
5
6
  else
6
7
  SIDEKIQ_INSTALLED = false
7
8
  puts "sidekiq gem not installed, skipping crawl_worker specs" if defined?(ENVIRONMENT) && ENVIRONMENT=="test"
@@ -29,4 +30,4 @@ module Sidekiq
29
30
  end
30
31
  end
31
32
  end
32
- end
33
+ end
@@ -111,4 +111,4 @@ describe CobwebCrawlHelper do
111
111
  end
112
112
 
113
113
 
114
- end
114
+ end
@@ -212,7 +212,7 @@ def wait_for_crawl_finished(crawl_id, timeout=20)
212
212
  @counter = 0
213
213
  start_time = Time.now
214
214
  while(running?(crawl_id) && Time.now < start_time + timeout) do
215
- puts Sidekiq::Stats.new.queues
215
+ # puts Sidekiq::Stats.new.queues
216
216
  sleep 1
217
217
  end
218
218
  if Time.now > start_time + timeout
@@ -35,13 +35,13 @@ RSpec.configure do |config|
35
35
  }
36
36
 
37
37
  config.before(:each) {
38
-
38
+
39
39
  @redis_mock_object = MockRedis.new
40
40
  Redis.stub(:new).and_return(@redis_mock_object)
41
41
  Redis::Namespace.stub(:new).and_return(@redis_mock_object)
42
-
42
+
43
43
  @redis_mock_object.flushdb
44
-
44
+
45
45
  }
46
46
 
47
47
  end
metadata CHANGED
@@ -1,127 +1,225 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.29
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stewart McKee
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-05 00:00:00.000000000 Z
11
+ date: 2015-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: redis
15
29
  requirement: !ruby/object:Gem::Requirement
16
30
  requirements:
17
- - - "~>"
31
+ - - ">="
18
32
  - !ruby/object:Gem::Version
19
- version: '3.0'
33
+ version: 3.2.1
20
34
  type: :runtime
21
35
  prerelease: false
22
36
  version_requirements: !ruby/object:Gem::Requirement
23
37
  requirements:
24
- - - "~>"
38
+ - - ">="
25
39
  - !ruby/object:Gem::Version
26
- version: '3.0'
40
+ version: 3.2.1
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: nokogiri
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
- - - "~>"
45
+ - - ">="
32
46
  - !ruby/object:Gem::Version
33
- version: '1.6'
47
+ version: 1.6.6.2
34
48
  type: :runtime
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
- - - "~>"
52
+ - - ">="
39
53
  - !ruby/object:Gem::Version
40
- version: '1.6'
54
+ version: 1.6.6.2
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: addressable
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
- - - "~>"
59
+ - - ">="
46
60
  - !ruby/object:Gem::Version
47
- version: '2.3'
61
+ version: 2.3.8
48
62
  type: :runtime
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
- - - "~>"
66
+ - - ">="
53
67
  - !ruby/object:Gem::Version
54
- version: '2.3'
68
+ version: 2.3.8
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: sinatra
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
- - - "~>"
73
+ - - ">="
60
74
  - !ruby/object:Gem::Version
61
- version: '1.4'
75
+ version: 1.4.6
62
76
  type: :runtime
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
- - - "~>"
80
+ - - ">="
67
81
  - !ruby/object:Gem::Version
68
- version: '1.4'
82
+ version: 1.4.6
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: haml
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
- - - "~>"
87
+ - - ">="
74
88
  - !ruby/object:Gem::Version
75
- version: '4.0'
89
+ version: 4.0.7
76
90
  type: :runtime
77
91
  prerelease: false
78
92
  version_requirements: !ruby/object:Gem::Requirement
79
93
  requirements:
80
- - - "~>"
94
+ - - ">="
81
95
  - !ruby/object:Gem::Version
82
- version: '4.0'
96
+ version: 4.0.7
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: redis-namespace
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
- - - "~>"
101
+ - - ">="
88
102
  - !ruby/object:Gem::Version
89
- version: '1.3'
103
+ version: 1.5.2
90
104
  type: :runtime
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
- - - "~>"
108
+ - - ">="
95
109
  - !ruby/object:Gem::Version
96
- version: '1.3'
110
+ version: 1.5.2
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: json
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
- - - "~>"
115
+ - - ">="
102
116
  - !ruby/object:Gem::Version
103
- version: '1.8'
117
+ version: 1.8.3
104
118
  type: :runtime
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
- - - "~>"
122
+ - - ">="
109
123
  - !ruby/object:Gem::Version
110
- version: '1.8'
124
+ version: 1.8.3
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: slop
113
127
  requirement: !ruby/object:Gem::Requirement
114
128
  requirements:
115
- - - "~>"
129
+ - - ">="
116
130
  - !ruby/object:Gem::Version
117
- version: '3.4'
131
+ version: 4.2.0
118
132
  type: :runtime
119
133
  prerelease: false
120
134
  version_requirements: !ruby/object:Gem::Requirement
121
135
  requirements:
122
- - - "~>"
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: 4.2.0
139
+ - !ruby/object:Gem::Dependency
140
+ name: rspec
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rspec-core
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: mock_redis
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: thin
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: coveralls
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: sidekiq
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - ">="
123
221
  - !ruby/object:Gem::Version
124
- version: '3.4'
222
+ version: '0'
125
223
  description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
126
224
  crawl extremely large sites which is much more performant than multi-threaded crawlers. It
127
225
  is also a standalone crawler that has a sophisticated statistics monitoring interface
@@ -518,7 +616,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
518
616
  version: '0'
519
617
  requirements: []
520
618
  rubyforge_project:
521
- rubygems_version: 2.4.8
619
+ rubygems_version: 2.4.5.1
522
620
  signing_key:
523
621
  specification_version: 4
524
622
  summary: Cobweb is a web crawler that can use resque to cluster crawls to quickly