crawl 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/crawl.gemspec +2 -2
- data/lib/crawl/engine.rb +1 -1
- data/lib/crawl/version.rb +1 -1
- metadata +19 -29
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 4825e68f15b25c5f26947ea149b6c835ced7bdf8
|
|
4
|
+
data.tar.gz: fdebd71a13a40f51cb20a635c5e02ea6dd9a4f6f
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 25a485be0c41e3f23b60f5974806757c6a6ab6888bbadbf0df317b3c7bce1825616026a5d1cd0c2feaf223a7af844b3630de0d6e547108d87d77c00b89dd27c0
|
|
7
|
+
data.tar.gz: abe1b5261102f0cd498d75caedef412e4bee3a7995401fe3c88ef0619b01c4d18a46904db4d5b778916fbf76f1ae7faf0935758463ab45e389ef98915ff633e7
|
data/crawl.gemspec
CHANGED
|
@@ -16,6 +16,6 @@ Gem::Specification.new do |gem|
|
|
|
16
16
|
gem.version = Crawl::VERSION
|
|
17
17
|
gem.add_dependency('nokogiri')
|
|
18
18
|
gem.add_dependency('rest-client')
|
|
19
|
-
gem.add_dependency('eventmachine', '
|
|
20
|
-
gem.add_dependency('em-http-request')
|
|
19
|
+
gem.add_dependency('eventmachine', '1.0.1')
|
|
20
|
+
gem.add_dependency('em-http-request', '1.0.3')
|
|
21
21
|
end
|
data/lib/crawl/engine.rb
CHANGED
|
@@ -105,7 +105,7 @@ private
|
|
|
105
105
|
raw_links = anchors.map{|anchor| anchor['href']}
|
|
106
106
|
raw_links.compact!
|
|
107
107
|
raw_links.map!{|link| link.sub(options[:domain], '')}
|
|
108
|
-
raw_links.delete_if{|link| link =~ %r{^http(s)?://}}
|
|
108
|
+
raw_links.delete_if{|link| link =~ %r{^http(s)?://} && !link.include?(options[:domain])}
|
|
109
109
|
raw_links.delete_if{|link| IGNORE.any?{|pattern| link =~ pattern}}
|
|
110
110
|
raw_links.map{ |url| Page.new(@register, url, page.url) }
|
|
111
111
|
end
|
data/lib/crawl/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,80 +1,71 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: crawl
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
5
|
-
prerelease:
|
|
4
|
+
version: 1.0.3
|
|
6
5
|
platform: ruby
|
|
7
6
|
authors:
|
|
8
7
|
- Tor Erik Linnerud
|
|
9
8
|
autorequire:
|
|
10
9
|
bindir: bin
|
|
11
10
|
cert_chain: []
|
|
12
|
-
date: 2013-
|
|
11
|
+
date: 2013-03-15 00:00:00.000000000 Z
|
|
13
12
|
dependencies:
|
|
14
13
|
- !ruby/object:Gem::Dependency
|
|
15
14
|
name: nokogiri
|
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
|
17
|
-
none: false
|
|
18
16
|
requirements:
|
|
19
|
-
- -
|
|
17
|
+
- - '>='
|
|
20
18
|
- !ruby/object:Gem::Version
|
|
21
19
|
version: '0'
|
|
22
20
|
type: :runtime
|
|
23
21
|
prerelease: false
|
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
-
none: false
|
|
26
23
|
requirements:
|
|
27
|
-
- -
|
|
24
|
+
- - '>='
|
|
28
25
|
- !ruby/object:Gem::Version
|
|
29
26
|
version: '0'
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
|
31
28
|
name: rest-client
|
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
|
33
|
-
none: false
|
|
34
30
|
requirements:
|
|
35
|
-
- -
|
|
31
|
+
- - '>='
|
|
36
32
|
- !ruby/object:Gem::Version
|
|
37
33
|
version: '0'
|
|
38
34
|
type: :runtime
|
|
39
35
|
prerelease: false
|
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
41
|
-
none: false
|
|
42
37
|
requirements:
|
|
43
|
-
- -
|
|
38
|
+
- - '>='
|
|
44
39
|
- !ruby/object:Gem::Version
|
|
45
40
|
version: '0'
|
|
46
41
|
- !ruby/object:Gem::Dependency
|
|
47
42
|
name: eventmachine
|
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
|
49
|
-
none: false
|
|
50
44
|
requirements:
|
|
51
|
-
- -
|
|
45
|
+
- - '='
|
|
52
46
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: 1.0.
|
|
47
|
+
version: 1.0.1
|
|
54
48
|
type: :runtime
|
|
55
49
|
prerelease: false
|
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
57
|
-
none: false
|
|
58
51
|
requirements:
|
|
59
|
-
- -
|
|
52
|
+
- - '='
|
|
60
53
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: 1.0.
|
|
54
|
+
version: 1.0.1
|
|
62
55
|
- !ruby/object:Gem::Dependency
|
|
63
56
|
name: em-http-request
|
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
|
65
|
-
none: false
|
|
66
58
|
requirements:
|
|
67
|
-
- -
|
|
59
|
+
- - '='
|
|
68
60
|
- !ruby/object:Gem::Version
|
|
69
|
-
version:
|
|
61
|
+
version: 1.0.3
|
|
70
62
|
type: :runtime
|
|
71
63
|
prerelease: false
|
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
73
|
-
none: false
|
|
74
65
|
requirements:
|
|
75
|
-
- -
|
|
66
|
+
- - '='
|
|
76
67
|
- !ruby/object:Gem::Version
|
|
77
|
-
version:
|
|
68
|
+
version: 1.0.3
|
|
78
69
|
description: Crawl all pages on a domain, checking for errors
|
|
79
70
|
email:
|
|
80
71
|
- tor@alphasights.com
|
|
@@ -98,27 +89,26 @@ files:
|
|
|
98
89
|
- lib/crawl/version.rb
|
|
99
90
|
homepage: http://github.com/alphasights/crawl
|
|
100
91
|
licenses: []
|
|
92
|
+
metadata: {}
|
|
101
93
|
post_install_message:
|
|
102
94
|
rdoc_options: []
|
|
103
95
|
require_paths:
|
|
104
96
|
- lib
|
|
105
97
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
106
|
-
none: false
|
|
107
98
|
requirements:
|
|
108
|
-
- -
|
|
99
|
+
- - '>='
|
|
109
100
|
- !ruby/object:Gem::Version
|
|
110
101
|
version: '0'
|
|
111
102
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
|
-
none: false
|
|
113
103
|
requirements:
|
|
114
|
-
- -
|
|
104
|
+
- - '>='
|
|
115
105
|
- !ruby/object:Gem::Version
|
|
116
106
|
version: '0'
|
|
117
107
|
requirements: []
|
|
118
108
|
rubyforge_project:
|
|
119
|
-
rubygems_version:
|
|
109
|
+
rubygems_version: 2.0.0
|
|
120
110
|
signing_key:
|
|
121
|
-
specification_version:
|
|
111
|
+
specification_version: 4
|
|
122
112
|
summary: Crawl pages witin a domain, reporting any page that returns a bad response
|
|
123
113
|
code
|
|
124
114
|
test_files: []
|