krawler 1.0.13 → 1.0.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/krawler.rb +8 -2
  2. data/lib/krawler/version.rb +1 -1
  3. metadata +5 -10
data/lib/krawler.rb CHANGED
@@ -112,7 +112,7 @@ module Krawler
112
112
  @mutex.synchronize do
113
113
  return if !page.respond_to?(:links)
114
114
 
115
- recache_invalid_results(page)
115
+ #recache_invalid_results(page)
116
116
 
117
117
  page.links.each do |new_link|
118
118
  next if new_link.href.nil?
@@ -168,7 +168,13 @@ module Krawler
168
168
  query = params_to_hash(uri.query || '')
169
169
  query['cache'] = 'false'
170
170
  uri.query = hash_to_params(query)
171
- @links_to_crawl << uri.to_s
171
+ if @restrict # don't crawl outside of our restricted base path
172
+ if @include && uri.path =~ /#{@include}/ # unless we match our inclusion
173
+ if !@crawled_links.include?(uri.path) && !@links_to_crawl.include?(uri.path) # don't crawl what we've alread crawled
174
+ @links_to_crawl << uri.to_s
175
+ end
176
+ end
177
+ end
172
178
  end
173
179
  end
174
180
  end
@@ -1,3 +1,3 @@
1
1
  module Krawler
2
- VERSION = '1.0.13'
2
+ VERSION = '1.0.14'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: krawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.13
4
+ version: 1.0.14
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-10 00:00:00.000000000 Z
12
+ date: 2013-03-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
16
- requirement: !ruby/object:Gem::Requirement
16
+ requirement: &70187441725840 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,12 +21,7 @@ dependencies:
21
21
  version: 2.5.1
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ~>
28
- - !ruby/object:Gem::Version
29
- version: 2.5.1
24
+ version_requirements: *70187441725840
30
25
  description: Simple little website crawler.
31
26
  email:
32
27
  - mike@urlgonomics.com
@@ -66,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
61
  version: '0'
67
62
  requirements: []
68
63
  rubyforge_project: krawler
69
- rubygems_version: 1.8.24
64
+ rubygems_version: 1.8.17
70
65
  signing_key:
71
66
  specification_version: 3
72
67
  summary: ''