krawler 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/krawler.rb +8 -2
  2. data/lib/krawler/version.rb +1 -1
  3. metadata +5 -10
data/lib/krawler.rb CHANGED
@@ -112,7 +112,7 @@ module Krawler
112
112
  @mutex.synchronize do
113
113
  return if !page.respond_to?(:links)
114
114
 
115
- recache_invalid_results(page)
115
+ #recache_invalid_results(page)
116
116
 
117
117
  page.links.each do |new_link|
118
118
  next if new_link.href.nil?
@@ -168,7 +168,13 @@ module Krawler
168
168
  query = params_to_hash(uri.query || '')
169
169
  query['cache'] = 'false'
170
170
  uri.query = hash_to_params(query)
171
- @links_to_crawl << uri.to_s
171
+ if @restrict # don't crawl outside of our restricted base path
172
+ if @include && uri.path =~ /#{@include}/ # unless we match our inclusion
173
+ if !@crawled_links.include?(uri.path) && !@links_to_crawl.include?(uri.path) # don't crawl what we've alread crawled
174
+ @links_to_crawl << uri.to_s
175
+ end
176
+ end
177
+ end
172
178
  end
173
179
  end
174
180
  end
@@ -1,3 +1,3 @@
1
1
  module Krawler
2
- VERSION = '1.0.13'
2
+ VERSION = '1.0.14'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: krawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.13
4
+ version: 1.0.14
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-10 00:00:00.000000000 Z
12
+ date: 2013-03-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
16
- requirement: !ruby/object:Gem::Requirement
16
+ requirement: &70187441725840 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,12 +21,7 @@ dependencies:
21
21
  version: 2.5.1
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ~>
28
- - !ruby/object:Gem::Version
29
- version: 2.5.1
24
+ version_requirements: *70187441725840
30
25
  description: Simple little website crawler.
31
26
  email:
32
27
  - mike@urlgonomics.com
@@ -66,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
61
  version: '0'
67
62
  requirements: []
68
63
  rubyforge_project: krawler
69
- rubygems_version: 1.8.24
64
+ rubygems_version: 1.8.17
70
65
  signing_key:
71
66
  specification_version: 3
72
67
  summary: ''