krawler 1.0.13 → 1.0.14
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/krawler.rb +8 -2
- data/lib/krawler/version.rb +1 -1
- metadata +5 -10
data/lib/krawler.rb
CHANGED
@@ -112,7 +112,7 @@ module Krawler
|
|
112
112
|
@mutex.synchronize do
|
113
113
|
return if !page.respond_to?(:links)
|
114
114
|
|
115
|
-
recache_invalid_results(page)
|
115
|
+
#recache_invalid_results(page)
|
116
116
|
|
117
117
|
page.links.each do |new_link|
|
118
118
|
next if new_link.href.nil?
|
@@ -168,7 +168,13 @@ module Krawler
|
|
168
168
|
query = params_to_hash(uri.query || '')
|
169
169
|
query['cache'] = 'false'
|
170
170
|
uri.query = hash_to_params(query)
|
171
|
-
@
|
171
|
+
if @restrict # don't crawl outside of our restricted base path
|
172
|
+
if @include && uri.path =~ /#{@include}/ # unless we match our inclusion
|
173
|
+
if !@crawled_links.include?(uri.path) && !@links_to_crawl.include?(uri.path) # don't crawl what we've alread crawled
|
174
|
+
@links_to_crawl << uri.to_s
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
172
178
|
end
|
173
179
|
end
|
174
180
|
end
|
data/lib/krawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: krawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.14
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-03-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirement: &70187441725840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,12 +21,7 @@ dependencies:
|
|
21
21
|
version: 2.5.1
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ~>
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: 2.5.1
|
24
|
+
version_requirements: *70187441725840
|
30
25
|
description: Simple little website crawler.
|
31
26
|
email:
|
32
27
|
- mike@urlgonomics.com
|
@@ -66,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
61
|
version: '0'
|
67
62
|
requirements: []
|
68
63
|
rubyforge_project: krawler
|
69
|
-
rubygems_version: 1.8.
|
64
|
+
rubygems_version: 1.8.17
|
70
65
|
signing_key:
|
71
66
|
specification_version: 3
|
72
67
|
summary: ''
|