iron-crawler 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8a2dc8e70c7883e03bbc90ec200e0f80ef59d059
4
+ data.tar.gz: a348eb768509de52f3406d1fa23d52ddea636c62
5
+ SHA512:
6
+ metadata.gz: b5f54a41ccbdadccecf7d522dcd81c0a13e424e3f3064f9aa9e45775e1f597b0b073d0ae6212d7cc163db3db4bd238031f13e4a86801d35eb84c6cc113f8440c
7
+ data.tar.gz: 522d5666aa95be4477ffb6d4922606fe6c02fe4f5afec4adacd5338ae36ea641f5bd852c68fe04188a6e590537c10e2b9b16a1ac3ee05390b24a1fde1a1516f3
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gem 'mechanize'
4
+
5
+ group :development do
6
+ gem 'shoulda', '>= 0'
7
+ gem 'rdoc', '~> 3.12'
8
+ gem 'bundler', '~> 1.0'
9
+ gem 'jeweler', '~> 2.0.1'
10
+ gem 'simplecov', '>= 0'
11
+ gem 'rspec', '~> 3.4.0'
12
+ gem "reek", "~> 3.10.0"
13
+ gem "roodi", "~> 5.0.0"
14
+ gem "yard", "~> 0.8.0"
15
+ gem "churn", "~> 1.0.2"
16
+ gem "flay", "~> 2.7.0"
17
+ gem "flog", "~> 4.3.2"
18
+ gem "inch", "~> 0.7.0"
19
+ gem "rubocop", "~> 0.37.0"
20
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,215 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ activesupport (4.2.5.1)
5
+ i18n (~> 0.7)
6
+ json (~> 1.7, >= 1.7.7)
7
+ minitest (~> 5.1)
8
+ thread_safe (~> 0.3, >= 0.3.4)
9
+ tzinfo (~> 1.1)
10
+ addressable (2.4.0)
11
+ arrayfields (4.9.2)
12
+ ast (2.2.0)
13
+ axiom-types (0.1.1)
14
+ descendants_tracker (~> 0.0.4)
15
+ ice_nine (~> 0.11.0)
16
+ thread_safe (~> 0.3, >= 0.3.1)
17
+ builder (3.2.2)
18
+ chronic (0.10.2)
19
+ churn (1.0.2)
20
+ chronic (>= 0.2.3)
21
+ hirb
22
+ json_pure
23
+ main
24
+ rest-client (>= 1.6.0)
25
+ ruby_parser (~> 3.0)
26
+ sexp_processor (~> 4.1)
27
+ codeclimate-engine-rb (0.3.1)
28
+ virtus (~> 1.0)
29
+ coderay (1.1.0)
30
+ coercible (1.0.0)
31
+ descendants_tracker (~> 0.0.1)
32
+ descendants_tracker (0.0.4)
33
+ thread_safe (~> 0.3, >= 0.3.1)
34
+ diff-lcs (1.2.5)
35
+ docile (1.1.5)
36
+ domain_name (0.5.20160128)
37
+ unf (>= 0.0.5, < 1.0.0)
38
+ equalizer (0.0.11)
39
+ erubis (2.7.0)
40
+ faraday (0.9.2)
41
+ multipart-post (>= 1.2, < 3)
42
+ fattr (2.2.2)
43
+ flay (2.7.0)
44
+ erubis (~> 2.7.0)
45
+ ruby_parser (~> 3.0)
46
+ sexp_processor (~> 4.0)
47
+ flog (4.3.2)
48
+ ruby_parser (~> 3.1, > 3.1.0)
49
+ sexp_processor (~> 4.4)
50
+ git (1.2.9.1)
51
+ github_api (0.13.1)
52
+ addressable (~> 2.4.0)
53
+ descendants_tracker (~> 0.0.4)
54
+ faraday (~> 0.8, < 0.10)
55
+ hashie (>= 3.4)
56
+ multi_json (>= 1.7.5, < 2.0)
57
+ oauth2
58
+ hashie (3.4.3)
59
+ highline (1.7.8)
60
+ hirb (0.7.3)
61
+ http-cookie (1.0.2)
62
+ domain_name (~> 0.5)
63
+ i18n (0.7.0)
64
+ ice_nine (0.11.2)
65
+ inch (0.7.0)
66
+ pry
67
+ sparkr (>= 0.2.0)
68
+ term-ansicolor
69
+ yard (~> 0.8.7.5)
70
+ jeweler (2.0.1)
71
+ builder
72
+ bundler (>= 1.0)
73
+ git (>= 1.2.5)
74
+ github_api
75
+ highline (>= 1.6.15)
76
+ nokogiri (>= 1.5.10)
77
+ rake
78
+ rdoc
79
+ json (1.8.3)
80
+ json_pure (1.8.3)
81
+ jwt (1.5.1)
82
+ main (6.2.0)
83
+ arrayfields (>= 4.7.4)
84
+ chronic (>= 0.6.2)
85
+ fattr (>= 2.2.0)
86
+ map (>= 6.1.0)
87
+ map (6.6.0)
88
+ mechanize (2.7.4)
89
+ domain_name (~> 0.5, >= 0.5.1)
90
+ http-cookie (~> 1.0)
91
+ mime-types (>= 1.17.2, < 3)
92
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
93
+ net-http-persistent (~> 2.5, >= 2.5.2)
94
+ nokogiri (~> 1.6)
95
+ ntlm-http (~> 0.1, >= 0.1.1)
96
+ webrobots (>= 0.0.9, < 0.2)
97
+ method_source (0.8.2)
98
+ mime-types (2.99)
99
+ mini_portile2 (2.0.0)
100
+ minitest (5.8.4)
101
+ multi_json (1.11.2)
102
+ multi_xml (0.5.5)
103
+ multipart-post (2.0.0)
104
+ net-http-digest_auth (1.4)
105
+ net-http-persistent (2.9.4)
106
+ netrc (0.11.0)
107
+ nokogiri (1.6.7.2)
108
+ mini_portile2 (~> 2.0.0.rc2)
109
+ ntlm-http (0.1.1)
110
+ oauth2 (1.1.0)
111
+ faraday (>= 0.8, < 0.10)
112
+ jwt (~> 1.0, < 1.5.2)
113
+ multi_json (~> 1.3)
114
+ multi_xml (~> 0.5)
115
+ rack (>= 1.2, < 3)
116
+ parser (2.3.0.3)
117
+ ast (~> 2.2)
118
+ powerpack (0.1.1)
119
+ private_attr (1.1.0)
120
+ pry (0.10.3)
121
+ coderay (~> 1.1.0)
122
+ method_source (~> 0.8.1)
123
+ slop (~> 3.4)
124
+ rack (1.6.4)
125
+ rainbow (2.1.0)
126
+ rake (10.5.0)
127
+ rdoc (3.12.2)
128
+ json (~> 1.4)
129
+ reek (3.10.0)
130
+ codeclimate-engine-rb (~> 0.3.1)
131
+ parser (~> 2.3)
132
+ private_attr (~> 1.1)
133
+ rainbow (~> 2.0)
134
+ rest-client (1.8.0)
135
+ http-cookie (>= 1.0.2, < 2.0)
136
+ mime-types (>= 1.16, < 3.0)
137
+ netrc (~> 0.7)
138
+ roodi (5.0.0)
139
+ ruby_parser (~> 3.2, >= 3.2.2)
140
+ rspec (3.4.0)
141
+ rspec-core (~> 3.4.0)
142
+ rspec-expectations (~> 3.4.0)
143
+ rspec-mocks (~> 3.4.0)
144
+ rspec-core (3.4.2)
145
+ rspec-support (~> 3.4.0)
146
+ rspec-expectations (3.4.0)
147
+ diff-lcs (>= 1.2.0, < 2.0)
148
+ rspec-support (~> 3.4.0)
149
+ rspec-mocks (3.4.1)
150
+ diff-lcs (>= 1.2.0, < 2.0)
151
+ rspec-support (~> 3.4.0)
152
+ rspec-support (3.4.1)
153
+ rubocop (0.37.0)
154
+ parser (>= 2.3.0.2, < 3.0)
155
+ powerpack (~> 0.1)
156
+ rainbow (>= 1.99.1, < 3.0)
157
+ ruby-progressbar (~> 1.7)
158
+ unicode-display_width (~> 0.3)
159
+ ruby-progressbar (1.7.5)
160
+ ruby_parser (3.7.3)
161
+ sexp_processor (~> 4.1)
162
+ sexp_processor (4.6.1)
163
+ shoulda (3.5.0)
164
+ shoulda-context (~> 1.0, >= 1.0.1)
165
+ shoulda-matchers (>= 1.4.1, < 3.0)
166
+ shoulda-context (1.2.1)
167
+ shoulda-matchers (2.8.0)
168
+ activesupport (>= 3.0.0)
169
+ simplecov (0.11.2)
170
+ docile (~> 1.1.0)
171
+ json (~> 1.8)
172
+ simplecov-html (~> 0.10.0)
173
+ simplecov-html (0.10.0)
174
+ slop (3.6.0)
175
+ sparkr (0.4.1)
176
+ term-ansicolor (1.3.2)
177
+ tins (~> 1.0)
178
+ thread_safe (0.3.5)
179
+ tins (1.8.2)
180
+ tzinfo (1.2.2)
181
+ thread_safe (~> 0.1)
182
+ unf (0.1.4)
183
+ unf_ext
184
+ unf_ext (0.0.7.2)
185
+ unicode-display_width (0.3.1)
186
+ virtus (1.0.5)
187
+ axiom-types (~> 0.1)
188
+ coercible (~> 1.0)
189
+ descendants_tracker (~> 0.0, >= 0.0.3)
190
+ equalizer (~> 0.0, >= 0.0.9)
191
+ webrobots (0.1.2)
192
+ yard (0.8.7.6)
193
+
194
+ PLATFORMS
195
+ ruby
196
+
197
+ DEPENDENCIES
198
+ bundler (~> 1.0)
199
+ churn (~> 1.0.2)
200
+ flay (~> 2.7.0)
201
+ flog (~> 4.3.2)
202
+ inch (~> 0.7.0)
203
+ jeweler (~> 2.0.1)
204
+ mechanize
205
+ rdoc (~> 3.12)
206
+ reek (~> 3.10.0)
207
+ roodi (~> 5.0.0)
208
+ rspec (~> 3.4.0)
209
+ rubocop (~> 0.37.0)
210
+ shoulda
211
+ simplecov
212
+ yard (~> 0.8.0)
213
+
214
+ BUNDLED WITH
215
+ 1.10.6
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2016 Ben Visser
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,23 @@
1
+ # Iron Crawler
2
+
3
+ A generic web crawler.
4
+
5
+ ## Requirements
6
+
7
+ From a starting URL, crawl all links on that URL and print a list of URLs visited.
8
+
9
+ - Follow href attributes contained in tags from the same domain
10
+ - Ignores href attributes contained in tags from other domains (even subdomains)
11
+ - Captures script src and link href tags for script and link tags respectively
12
+
13
+ # Getting Started
14
+
15
+ It's easy to get started!
16
+
17
+ ```
18
+ iron-crawler <url>
19
+ ```
20
+
21
+ will crawl any site for you.
22
+
23
+
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+
6
+ begin
7
+ Bundler.setup(:default, :development)
8
+ rescue Bundler::BundlerError => e
9
+ $stderr.puts e.message
10
+ $stderr.puts "Run `bundle install` to install missing gems"
11
+ exit e.status_code
12
+ end
13
+
14
+ # import rake tasks
15
+ Dir.glob('tasks/**/*.rake').each(&method(:import))
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
data/bin/iron-crawler ADDED
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'iron-crawler'
4
+ require 'terminal-announce'
5
+
6
+ if ARGV.empty? || %w(-h --help help).include?(ARGV.first)
7
+ Announce.info 'Syntax: iron-crawler <url>'
8
+ abort
9
+ end
10
+
11
+ url = ARGV.first
12
+
13
+ Announce.info "Crawling #{url}..."
14
+ agent = Crawler.new
15
+ agent.spiderize(url)
16
+
17
+ puts agent.history
@@ -0,0 +1 @@
1
+ require_relative 'iron-crawler/crawler'
@@ -0,0 +1,80 @@
1
+ require 'mechanize'
2
+
3
+ # Enables the spidering of websites by utilizing Mechanize
4
+ class Crawler < Mechanize
5
+
6
+ def initialize
7
+ @mech = Mechanize.new
8
+ end
9
+
10
+
11
+ # Kicks off the spidering of a site.
12
+ # @param [String] A simple URL string to crawl.
13
+ # @return [Hash] A hash of URls crawled.
14
+ #
15
+ def spiderize(url)
16
+ @mech.max_history = nil
17
+ stack = @mech.get(url).links
18
+ while link = stack.pop
19
+ next if reject(link)
20
+ puts "crawling #{link.uri}"
21
+ begin
22
+ page = link.click
23
+ next unless Mechanize::Page === page
24
+ stack.push(*page.links)
25
+ rescue Mechanize::ResponseCodeError
26
+ end
27
+ end
28
+ end
29
+
30
+
31
+ # Whether we should reject to spider a URL.
32
+ # @param [Mechanize::Page::Link] A mechanize page link.
33
+ # @return [Boolean] true if we should reject URL.
34
+ #
35
+ def reject(link)
36
+ # TODO: are we accounting for subdomains?
37
+ if not_valid_uri?(link) || not_same_domain?(link) || already_spidered?(link)
38
+ return true
39
+ else
40
+ return false
41
+ end
42
+ end
43
+
44
+
45
+ # Checks whether a link has already been crawled.
46
+ # @param [Mechanize::Page::Link] A mechanize page link.
47
+ # @return [Booolean] true when already spidered.
48
+ #
49
+ def already_spidered?(link)
50
+ begin
51
+ abs_url = @mech.history.first.uri.to_s.chomp('/') + link.href + '/'
52
+ return true if @mech.visited? link.href
53
+ return true if @mech.visited? abs_url
54
+ rescue Mechanize::UnsupportedSchemeError
55
+ puts "skipping #{link.uri}"
56
+ return true
57
+ end
58
+ end
59
+
60
+
61
+ # Checks whether a URL is able to be crawled.
62
+ # @param [Mechanize::Page::Link] A mechanize page link.
63
+ # @return [Booolean] true when valid URL.
64
+ #
65
+ def not_valid_uri?(link)
66
+ return true unless link.uri && (/^http.+/ =~ link.uri.to_s || /\/.+/ =~ link.uri.to_s)
67
+ end
68
+
69
+
70
+ # Checks whether a URL is from the same domain.
71
+ # @param [Mechanize::Page::Link] A mechanize page link.
72
+ # @return [Booolean] true when not the same domain as original URL.
73
+ #
74
+ def not_same_domain?(link)
75
+ host = link.uri.host
76
+ return true unless host.nil? || host == @mech.history.first.uri.host
77
+ end
78
+
79
+ private :not_valid_uri?, :not_same_domain?, :already_spidered?
80
+ end
@@ -0,0 +1,14 @@
1
+ require 'jeweler'
2
+
3
+ Jeweler::Tasks.new do |gem|
4
+ gem.name = "iron-crawler"
5
+ gem.homepage = "http://github.com/noqcks/iron-crawler"
6
+ gem.license = "MIT"
7
+ gem.summary = %Q{A generic web crawler.}
8
+ gem.description = %Q{A generic web crawler that doesn't crawl outside URLs.}
9
+ gem.email = "theodore.r.visser@gmail.com"
10
+ gem.authors = ["Ben Visser"]
11
+ # dependencies defined in Gemfile
12
+ end
13
+
14
+ Jeweler::RubygemsDotOrgTasks.new
data/tasks/qa.rake ADDED
@@ -0,0 +1,47 @@
1
+ require 'bundler/setup'
2
+ require 'rubocop/rake_task'
3
+ require 'reek/rake/task'
4
+ require 'roodi_task'
5
+
6
+ task default: [:audit]
7
+
8
+ task audit: [:style, :complexity, :duplication, :design, :documentation]
9
+
10
+ task style: [:rubocop]
11
+
12
+ desc 'Enforce Style Conformance with RuboCop'
13
+ RuboCop::RakeTask.new(:rubocop) do |task|
14
+ task.patterns = ['lib/**/*.rb']
15
+ task.fail_on_error = false
16
+ end
17
+
18
+ task complexity: [:flog]
19
+
20
+ desc 'Assess Complexity with flog'
21
+ task :flog do
22
+ sh 'flog lib/**/*.rb'
23
+ end
24
+
25
+ task duplication: [:flay]
26
+
27
+ desc 'Detect duplication with flay'
28
+ task :flay do
29
+ sh 'flay lib/**/*.rb'
30
+ end
31
+
32
+ task design: [:reek]
33
+
34
+ desc 'Sniff out code smells with reek'
35
+ Reek::Rake::Task.new(:reek) do |task|
36
+ task.fail_on_error = false
37
+ end
38
+
39
+ desc 'Identity volatile areas with churn'
40
+ task :rework do
41
+ sh 'churn'
42
+ end
43
+
44
+ desc 'Critique documentation with inch'
45
+ task :documentation do
46
+ sh 'inch'
47
+ end
metadata ADDED
@@ -0,0 +1,268 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iron-crawler
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Ben Visser
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-02-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: shoulda
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rdoc
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.12'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.12'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: jeweler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 2.0.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 2.0.1
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 3.4.0
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 3.4.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: reek
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 3.10.0
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 3.10.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: roodi
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 5.0.0
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 5.0.0
139
+ - !ruby/object:Gem::Dependency
140
+ name: yard
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 0.8.0
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 0.8.0
153
+ - !ruby/object:Gem::Dependency
154
+ name: churn
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 1.0.2
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 1.0.2
167
+ - !ruby/object:Gem::Dependency
168
+ name: flay
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: 2.7.0
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: 2.7.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: flog
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 4.3.2
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 4.3.2
195
+ - !ruby/object:Gem::Dependency
196
+ name: inch
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: 0.7.0
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: 0.7.0
209
+ - !ruby/object:Gem::Dependency
210
+ name: rubocop
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - "~>"
214
+ - !ruby/object:Gem::Version
215
+ version: 0.37.0
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - "~>"
221
+ - !ruby/object:Gem::Version
222
+ version: 0.37.0
223
+ description: A generic web crawler that doesn't crawl outside URLs.
224
+ email: theodore.r.visser@gmail.com
225
+ executables:
226
+ - iron-crawler
227
+ extensions: []
228
+ extra_rdoc_files:
229
+ - LICENSE.txt
230
+ - README.md
231
+ files:
232
+ - ".document"
233
+ - Gemfile
234
+ - Gemfile.lock
235
+ - LICENSE.txt
236
+ - README.md
237
+ - Rakefile
238
+ - VERSION
239
+ - bin/iron-crawler
240
+ - lib/iron-crawler.rb
241
+ - lib/iron-crawler/crawler.rb
242
+ - tasks/jeweler.rake
243
+ - tasks/qa.rake
244
+ homepage: http://github.com/noqcks/iron-crawler
245
+ licenses:
246
+ - MIT
247
+ metadata: {}
248
+ post_install_message:
249
+ rdoc_options: []
250
+ require_paths:
251
+ - lib
252
+ required_ruby_version: !ruby/object:Gem::Requirement
253
+ requirements:
254
+ - - ">="
255
+ - !ruby/object:Gem::Version
256
+ version: '0'
257
+ required_rubygems_version: !ruby/object:Gem::Requirement
258
+ requirements:
259
+ - - ">="
260
+ - !ruby/object:Gem::Version
261
+ version: '0'
262
+ requirements: []
263
+ rubyforge_project:
264
+ rubygems_version: 2.4.6
265
+ signing_key:
266
+ specification_version: 4
267
+ summary: A generic web crawler.
268
+ test_files: []