iron-crawler 1.1.2 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/iron-crawler.gemspec +2 -2
- data/lib/iron-crawler/crawler.rb +2 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b85415ba9b59160fdbfe5ca87c0f14eca5b1bcdc
|
4
|
+
data.tar.gz: 712549785cf1adc3f0dbe205d69f2abe60905968
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a548d701fe83340f17bba0c406efd26fc76d401293f3a7ac36dbd80a822bae55afd2e9f73251250ab17560b5ccafbcfb44ae9818b6bf67876a1d7f88d39a0fda
|
7
|
+
data.tar.gz: f9a1bf749346692fc5e0df6ce859dc9c7136a06525dd20bbcb165dc074cc00212c7e05b28aab6fd545fc31e7a2cebe43ec7890f04386c685ececda5c0bd5b1cf
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.1.
|
1
|
+
1.1.3
|
data/iron-crawler.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: iron-crawler 1.1.
|
5
|
+
# stub: iron-crawler 1.1.3 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "iron-crawler"
|
9
|
-
s.version = "1.1.
|
9
|
+
s.version = "1.1.3"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
data/lib/iron-crawler/crawler.rb
CHANGED
@@ -15,7 +15,6 @@ class Crawler < Mechanize
|
|
15
15
|
#
|
16
16
|
def spiderize(url)
|
17
17
|
page = @mech.get(url)
|
18
|
-
|
19
18
|
stack = page.links
|
20
19
|
stack.push(*src_links(page))
|
21
20
|
|
@@ -24,7 +23,7 @@ class Crawler < Mechanize
|
|
24
23
|
puts "crawling #{link.uri}"
|
25
24
|
begin
|
26
25
|
page = link.click
|
27
|
-
next unless Mechanize::Page
|
26
|
+
next unless Mechanize::Page === page
|
28
27
|
stack.push(*src_links(page))
|
29
28
|
stack.push(*page.links)
|
30
29
|
rescue Mechanize::ResponseCodeError
|
@@ -88,8 +87,7 @@ class Crawler < Mechanize
|
|
88
87
|
# @return [Booolean] true when valid URL.
|
89
88
|
#
|
90
89
|
def not_valid_uri?(link)
|
91
|
-
|
92
|
-
return true unless link.uri && valid_uri_regex
|
90
|
+
return true unless link.uri && (/^http.+/ =~ link.uri.to_s || /\/.+/ =~ link.uri.to_s)
|
93
91
|
end
|
94
92
|
|
95
93
|
|