krawler 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/krawler/version.rb +1 -1
  2. data/lib/krawler.rb +4 -3
  3. metadata +4 -4
@@ -1,3 +1,3 @@
1
1
  module Krawler
2
- VERSION = "1.0.0"
2
+ VERSION = '1.0.1'
3
3
  end
data/lib/krawler.rb CHANGED
@@ -77,17 +77,18 @@ module Krawler
77
77
  @suspect_links << link
78
78
  return
79
79
  ensure
80
- @mutex.synchronize {
80
+ @mutex.synchronize do
81
81
  puts link
82
82
  puts " [#{Time.now - start}s] #{@links_to_crawl.size} links..."
83
- }
83
+ end
84
84
  end
85
85
 
86
86
  @mutex.synchronize do
87
87
  return if !page.respond_to?(:links)
88
88
  page.links.each do |new_link|
89
89
  next if new_link.href.nil?
90
-
90
+ next if new_link.rel.include? 'nofollow'
91
+
91
92
  # quick scrub known issues
92
93
  new_link = new_link.href.gsub(/ /, '%20')
93
94
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: krawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-20 00:00:00.000000000 Z
12
+ date: 2012-06-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
16
- requirement: &70309315236200 !ruby/object:Gem::Requirement
16
+ requirement: &70152297970740 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: 2.5.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70309315236200
24
+ version_requirements: *70152297970740
25
25
  description: Simple little website crawler.
26
26
  email:
27
27
  - mike@urlgonomics.com