mech_warrior 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 63e3f44169fe19d92e5c237ce58460e35522bc1d
4
+ data.tar.gz: 95bc44122c6b15c317c512866a1f1c00a769260f
5
+ SHA512:
6
+ metadata.gz: eb826d2289cbed61494ef15285ee2ae0e2fad84cc40a6434895f0eedd3430706fe6539e912af94faf98a49307b288146c19be23b8febc7dce15d91d5bba7e473
7
+ data.tar.gz: 849c53b351db05f04375bf73e10e6c76f284b62d0664cab8697926792377f1468119c5bd0fb7afbbed14c930b4464109c1342758a8341f4586bb4e0f59a3c40f
@@ -0,0 +1 @@
1
+ 2.1.1
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
@@ -0,0 +1,61 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ mech_warrior (0.0.1)
5
+ celluloid (~> 0)
6
+ mechanize (~> 2.7)
7
+ xml-sitemap (~> 1.3)
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ builder (3.2.2)
13
+ celluloid (0.15.2)
14
+ timers (~> 1.1.0)
15
+ diff-lcs (1.2.5)
16
+ domain_name (0.5.18)
17
+ unf (>= 0.0.5, < 1.0.0)
18
+ fakeweb (1.3.0)
19
+ http-cookie (1.0.2)
20
+ domain_name (~> 0.5)
21
+ mechanize (2.7.3)
22
+ domain_name (~> 0.5, >= 0.5.1)
23
+ http-cookie (~> 1.0)
24
+ mime-types (~> 2.0)
25
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
26
+ net-http-persistent (~> 2.5, >= 2.5.2)
27
+ nokogiri (~> 1.4)
28
+ ntlm-http (~> 0.1, >= 0.1.1)
29
+ webrobots (>= 0.0.9, < 0.2)
30
+ mime-types (2.2)
31
+ mini_portile (0.5.3)
32
+ net-http-digest_auth (1.4)
33
+ net-http-persistent (2.9.4)
34
+ nokogiri (1.6.1)
35
+ mini_portile (~> 0.5.0)
36
+ ntlm-http (0.1.1)
37
+ rake (0.9.6)
38
+ rspec (2.14.1)
39
+ rspec-core (~> 2.14.0)
40
+ rspec-expectations (~> 2.14.0)
41
+ rspec-mocks (~> 2.14.0)
42
+ rspec-core (2.14.8)
43
+ rspec-expectations (2.14.5)
44
+ diff-lcs (>= 1.1.3, < 2.0)
45
+ rspec-mocks (2.14.6)
46
+ timers (1.1.0)
47
+ unf (0.1.4)
48
+ unf_ext
49
+ unf_ext (0.0.6)
50
+ webrobots (0.1.1)
51
+ xml-sitemap (1.3.3)
52
+ builder (>= 2.0)
53
+
54
+ PLATFORMS
55
+ ruby
56
+
57
+ DEPENDENCIES
58
+ fakeweb (~> 1.3)
59
+ mech_warrior!
60
+ rake (~> 0)
61
+ rspec (~> 2.14)
@@ -0,0 +1,58 @@
1
+ MechWarrior
2
+ =========
3
+
4
+ MechWarrior is a Mechanize and Celluloid powered site crawler that generates a
5
+ JSON file of all pages, links on pages, and assets those pages rely upon
6
+ as well as optionally generating an XML sitemap compliant with sitemaps 0.9
7
+ protocol.
8
+
9
+
10
+ Version
11
+ ----
12
+
13
+ 0.0.1
14
+
15
+ Tech
16
+ -----------
17
+
18
+ MechWarrior relies on several excellent RubyGems
19
+
20
+ * [Mechanize] - a ruby library that makes automated web interaction easy.
21
+ * [Celluloid] - an Actor-based concurrent object framework for Ruby
22
+ * [XML-Sitemap] - provides easy XML sitemap generation for Ruby/Rails/Merb/Sinatra applications
23
+
24
+
25
+ Installation
26
+ --------------
27
+
28
+ ```sh
29
+ gem install mech_warrior-0.0.1.gem
30
+ ```
31
+
32
+ Crawling a site
33
+ ---------------
34
+
35
+ ```sh
36
+ bin/spider
37
+ ```
38
+ and enter a host name, followed by any additional options you wish to pass in
39
+ to override default options in `lib/mech_warrior.rb`
40
+
41
+
42
+ Todo
43
+ ----
44
+ Some of the functionality, including XML Sitemaps, is untested.
45
+ Support for multiple hosts in a single spider is currently incomplete,
46
+ despite the 'allowed_hosts' array, unless all but default host have
47
+ only absolute links to follow.
48
+
49
+ License
50
+ ----
51
+
52
+ MIT
53
+
54
+ [mechanize]:https://github.com/sparklemotion/mechanize
55
+ [celluloid]:http://celluloid.io/
56
+ [xml-sitemap]:https://github.com/sosedoff/xml-sitemap
57
+
58
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env rake
2
+
3
+ task :default => 'spec'
4
+ task :spec do
5
+ sh "rspec spec/mech_warrior_spec.rb"
6
+ end
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/mech_warrior"
4
+
5
+ puts "Host:"
6
+ host = gets.chomp
7
+ puts
8
+ puts "Other options:"
9
+ opts = gets.chomp
10
+ opts_hash = eval("{" + opts + "}")
11
+ puts opts_hash.to_json
12
+ MechWarrior.crawl(opts_hash.merge(default_host: host))
@@ -0,0 +1,60 @@
1
+ require 'mechanize'
2
+ require 'xml-sitemap'
3
+ require 'logger'
4
+ require 'celluloid/autostart'
5
+ require_relative 'mech_warrior/mech_cell'
6
+ require_relative 'mech_warrior/crawler'
7
+
8
+ module MechWarrior
9
+ SITEMAP_MAX_LINKS = 50000
10
+ DEFAULTS = {
11
+ allowed_domains: [],
12
+ default_protocol: 'http://',
13
+ default_host: 'www.example.com',
14
+ # this is less 'default_host' at the moment than 'only', though links to other domains will work as long
15
+ # as all links on other domains' pages are absolute. To support multiple domains while supporting
16
+ # relative links, some new state would have to be introduced to track 'current_host'
17
+ max_depth_divisor: 256, # this results in max depth of 4096 on my machine, seems deep enough
18
+ pool_size: 20,
19
+ logger_class: Logger,
20
+ log_file_name: "mech_warrior_errors.txt"
21
+ }
22
+
23
+ def self.crawl(opts={})
24
+ crawl_results = Crawler.new(opts)
25
+ crawl_results.agent_pool.future.terminate
26
+ unless opts[:skip_asset_json]
27
+ File.open("#{crawl_results.default_host}_crawl_#{Time.now.gmtime}", 'w') do |file|
28
+ file.write(JSON.pretty_generate(crawl_results.pages))
29
+ end
30
+ end
31
+
32
+ if sitemap_opts = opts[:generate_sitemap]
33
+ generate_sitemap(crawl_results.default_host,
34
+ crawl_results.pages,
35
+ sitemap_opts.respond_to?(:keys) ? sitemap_opts : {}
36
+ )
37
+ end
38
+
39
+ crawl_results
40
+ end
41
+
42
+
43
+ #generate_sitemap is untested and NOT production ready, but is functional
44
+ #and probably a better output format if asset/link data is not needed
45
+ def self.generate_sitemap(default_host, pages, opts, sitemap_file_num=1)
46
+ page_keys = pages.keys
47
+ current_page_keys = page_keys.slice(0...SITEMAP_MAX_LINKS)
48
+
49
+ site_map = XmlSitemap::Map.new(default_host) do |map|
50
+ current_page_keys.each do |page|
51
+ map.add URI(page).path, opts if URI(page).path.length > 0
52
+ end
53
+ end
54
+ site_map.render_to("./site_map_#{default_host}_#{sitemap_file_num}")
55
+
56
+ if page_keys.count > SITEMAP_MAX_LINKS
57
+ generate_sitemap(default_host, page_keys.slice(SITEMAP_MAX_LINKS..-1), opts, sitemap_file_num + 1)
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,75 @@
1
+ module MechWarrior
2
+ class Crawler
3
+ attr_reader :agent_pool, :pages, :opts, :default_host, :default_protocol, :logger, :output_file
4
+
5
+ def initialize(override_opts={})
6
+ @opts = DEFAULTS.merge(override_opts)
7
+ @default_host = opts[:default_host]
8
+ @default_protocol = opts[:default_protocol]
9
+ opts[:allowed_domains] << default_host
10
+ @output_file = opts[:output_file] || File.open(opts[:log_file_name], 'a')
11
+ @logger = opts[:logger_class].new(output_file)
12
+ @agent_pool = MechCell.pool(size: opts[:pool_size], args: [logger])
13
+ @pages = {}
14
+ start_url = opts[:start_url] || "#{default_protocol}#{default_host}/"
15
+ pages[normalize_url(start_url)] = {}
16
+ index_url(start_url) unless opts[:no_index]
17
+ self
18
+ ensure
19
+ output_file.close if output_file.respond_to?(:close)
20
+ end
21
+
22
+ def index_url(href)
23
+ schemed_url = normalize_url(href)
24
+ future = page_future(schemed_url)
25
+ process_page(future, schemed_url)
26
+ end
27
+
28
+ private
29
+
30
+ def process_page(page_future, url, depth=0)
31
+ return if depth > RubyVM::DEFAULT_PARAMS[:thread_vm_stack_size]/opts[:max_depth_divisor]
32
+ page = page_future.value
33
+ if page && page.respond_to?(:links)
34
+ pages[url] = {}
35
+ pages[url][:links] = page.respond_to?(:links) ? page.links.map(&:href) : []
36
+ pages[url][:assets] = {
37
+ images: page.image_urls,
38
+ scripts: page.search('script'),
39
+ asset_links: page.search('link'), #css, icons
40
+ iframes: page.iframes
41
+ }
42
+ urls = links_to_follow(page).map {|link| normalize_url(link.href)}
43
+ futures = urls.map {|url| page_future(url)}
44
+ pairs = futures.zip(urls)
45
+ pairs.each {|future, url| process_page(future, url, depth +1)}
46
+ end
47
+ rescue URI::InvalidURIError => e
48
+ logger << "InvalidURIError processing links on page at URL: #{url} -- #{e}\n"
49
+ end
50
+
51
+ def page_future(url)
52
+ agent_pool.future.get(url)
53
+ end
54
+
55
+ def get_page(url)
56
+ agent_pool.get(url)
57
+ end
58
+
59
+ def normalize_url(href)
60
+ URI(href).scheme ? href : "#{default_protocol}#{default_host}#{href}"
61
+ end
62
+
63
+ def follow_link?(link) #follow only pages not indexed and relative links or whitelisted link hosts
64
+ if link.href && URI(link.href)
65
+ pages[normalize_url(link.href)].nil? && (link.uri.host.nil? || opts[:allowed_domains].include?(link.uri.host))
66
+ end
67
+ rescue URI::InvalidURIError => e
68
+ logger << "InvalidURIError on link with href: #{link.href} -- #{e}\n"
69
+ end
70
+
71
+ def links_to_follow(page)
72
+ page.links.select { |link| follow_link?(link) }
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,22 @@
1
+ module MechWarrior
2
+ class MechCell
3
+ include Celluloid
4
+ attr_reader :agent, :logger
5
+ MECH_ERRORS = [
6
+ SocketError,
7
+ Mechanize::ResponseCodeError,
8
+ Mechanize::ResponseReadError,
9
+ Mechanize::UnsupportedSchemeError
10
+ ]
11
+ def initialize(logger)
12
+ @agent = Mechanize.new
13
+ @logger = logger
14
+ end
15
+
16
+ def get(url)
17
+ agent.get(url)
18
+ rescue *MECH_ERRORS => e
19
+ logger << "Caught Exception getting URL: #{url} -- #{e}\n"
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,3 @@
1
+ module MechWarrior
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,32 @@
1
+ lib = File.expand_path('../lib/', __FILE__)
2
+ $:.unshift lib unless $:.include?(lib)
3
+ require "mech_warrior/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "mech_warrior"
7
+ s.version = MechWarrior::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Brian Glusman"]
10
+ s.email = ["brian@glusman.me"]
11
+ s.summary = "Crawler and asset list/sitemap generator"
12
+ s.licenses = ["MIT", "BSD"]
13
+ s.extensions = ["Rakefile"]
14
+
15
+ s.description = <<-DESC
16
+ Spider a web host with many mechanize agents concurrently, and generate an asset JSON
17
+ and/or an XML sitemap of the result
18
+ DESC
19
+
20
+
21
+ s.files = `git ls-files`.split("\n")
22
+ s.test_files = `git ls-files -- {spec}/*`.split("\n")
23
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
24
+ s.require_paths = ["lib"]
25
+
26
+ s.add_runtime_dependency "mechanize", '~> 2.7'
27
+ s.add_runtime_dependency "xml-sitemap", '~> 1.3'
28
+ s.add_runtime_dependency "celluloid", '~> 0'
29
+ s.add_development_dependency "rake", '~> 0'
30
+ s.add_development_dependency "rspec", '~> 2.14'
31
+ s.add_development_dependency "fakeweb", '~> 1.3'
32
+ end
@@ -0,0 +1,65 @@
1
+ FakeWeb.allow_net_connect = false
2
+ module MechWarrior
3
+ require_relative '../lib/mech_warrior'
4
+ DEFAULT_HOST = DEFAULTS[:default_host]
5
+ SPEC_DOMAIN = "http://#{DEFAULT_HOST}/"
6
+
7
+ class FakePage
8
+ attr_accessor :links
9
+ attr_accessor :hrefs
10
+ attr_accessor :body
11
+
12
+ def initialize(name = '', options = {})
13
+ @name = name
14
+ @links = [options[:links]].flatten if options.has_key?(:links)
15
+ @hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs)
16
+ @redirect = options[:redirect] if options.has_key?(:redirect)
17
+ @base = options[:base] if options.has_key?(:base)
18
+ @content_type = options[:content_type] || "text/html"
19
+ @body = options[:body]
20
+
21
+ create_body unless @body
22
+ add_to_fakeweb
23
+ end
24
+
25
+ def url
26
+ SPEC_DOMAIN + @name
27
+ end
28
+
29
+ private
30
+
31
+ def create_body
32
+ if @base
33
+ @body = "<html><head><base href=\"#{@base}\"></head><body>"
34
+ else
35
+ @body = "<html><body>"
36
+ end
37
+ @links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
38
+ @hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
39
+ @body += "</body></html>"
40
+ end
41
+
42
+ def add_to_fakeweb
43
+ options = {:body => @body, :content_type => @content_type, :status => [200, "OK"]}
44
+
45
+ if @redirect
46
+ options[:status] = [301, "Permanently Moved"]
47
+
48
+ # only prepend SPEC_DOMAIN if a relative url (without an http scheme) was specified
49
+ redirect_url = (@redirect =~ /http/) ? @redirect : SPEC_DOMAIN + @redirect
50
+ options[:location] = redirect_url
51
+
52
+ # register the page this one redirects to
53
+ FakeWeb.register_uri(:get, redirect_url, {:body => '',
54
+ :content_type => @content_type,
55
+ :status => [200, "OK"]})
56
+ end
57
+
58
+
59
+ FakeWeb.register_uri(:get, SPEC_DOMAIN + @name, options)
60
+ end
61
+ end
62
+ end
63
+
64
+ #default root
65
+ MechWarrior::FakePage.new
@@ -0,0 +1,89 @@
1
+ require 'spec_helper'
2
+
3
+ module MechWarrior
4
+ describe Crawler do
5
+
6
+ before(:each) do
7
+ FakeWeb.clean_registry
8
+ end
9
+
10
+ describe "crawl" do
11
+
12
+ context "crawl all the html pages in a domain by following <a> href's" do
13
+ let(:pages) do
14
+ pages = []
15
+ pages << FakePage.new('0', links: ['1', '2'])
16
+ pages << FakePage.new('1', links: ['3'])
17
+ pages << FakePage.new('2')
18
+ pages << FakePage.new('3')
19
+ pages
20
+ end
21
+
22
+ subject { Crawler.new(default_host: MechWarrior::DEFAULTS[:default_host],
23
+ start_url: pages[0].url,
24
+ logger_class: String,
25
+ output_file: "")
26
+ }
27
+
28
+ it {should have(4).pages }
29
+ its(:logger) {should be_empty }
30
+ end
31
+
32
+ context "should not follow links that leave the original domain" do
33
+ let(:pages) do
34
+ pages = []
35
+ pages << FakePage.new('0', links: ['1'], :hrefs => 'http://www.other.com/')
36
+ pages << FakePage.new('1')
37
+ pages
38
+ end
39
+
40
+ subject { Crawler.new(default_host: MechWarrior::DEFAULTS[:default_host],
41
+ start_url: pages[0].url,
42
+ logger_class: String,
43
+ output_file: "")
44
+ }
45
+ it { should have(2).pages }
46
+ its("pages.keys") { should_not include('http://www.other.com/') }
47
+ its(:logger) {should be_empty }
48
+ end
49
+
50
+ context "should not index non-html links" do
51
+ let(:pages) do
52
+ pages = []
53
+ pages << FakePage.new('0', links: ['1', '2'])
54
+ pages << FakePage.new('1', content_type: 'application/pdf')
55
+ pages << FakePage.new('2', content_type: 'text/csv')
56
+ pages
57
+ end
58
+
59
+ subject { Crawler.new(default_host: MechWarrior::DEFAULTS[:default_host],
60
+ start_url: pages[0].url,
61
+ logger_class: String,
62
+ output_file: "")
63
+ }
64
+ it { should have(1).pages }
65
+ its(:logger) {should be_empty }
66
+ end
67
+
68
+ context "should ignore invalid URLs" do
69
+ let(:pages) do
70
+ pages = []
71
+ pages << FakePage.new('0', links: ['1', '2'])
72
+ pages << FakePage.new('1', links: ['not a valid url'])
73
+ pages << FakePage.new('2')
74
+ pages << FakePage.new('not_a_valid_url')
75
+ pages
76
+ end
77
+
78
+ subject { Crawler.new(default_host: MechWarrior::DEFAULTS[:default_host],
79
+ start_url: pages[0].url,
80
+ logger_class: String,
81
+ output_file: "")
82
+ }
83
+ it { should have(3).pages }
84
+ its(:logger) {should_not be_empty }
85
+ end
86
+
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,6 @@
1
+ require_relative '../lib/mech_warrior'
2
+
3
+ require 'fakeweb'
4
+ require File.dirname(__FILE__) + '/fakeweb_helper'
5
+
6
+ SPEC_DOMAIN = 'http://www.example.com/'
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mech_warrior
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brian Glusman
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: xml-sitemap
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: celluloid
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.14'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.14'
83
+ - !ruby/object:Gem::Dependency
84
+ name: fakeweb
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.3'
97
+ description: |2
98
+ Spider a web host with many mechanize agents concurrently, and generate an asset JSON
99
+ and/or an XML sitemap of the result
100
+ email:
101
+ - brian@glusman.me
102
+ executables:
103
+ - spider
104
+ extensions:
105
+ - Rakefile
106
+ extra_rdoc_files: []
107
+ files:
108
+ - ".ruby-version"
109
+ - Gemfile
110
+ - Gemfile.lock
111
+ - README.md
112
+ - Rakefile
113
+ - bin/spider
114
+ - lib/mech_warrior.rb
115
+ - lib/mech_warrior/crawler.rb
116
+ - lib/mech_warrior/mech_cell.rb
117
+ - lib/mech_warrior/version.rb
118
+ - mech_warrior.gemspec
119
+ - spec/fakeweb_helper.rb
120
+ - spec/mech_warrior_spec.rb
121
+ - spec/spec_helper.rb
122
+ homepage:
123
+ licenses:
124
+ - MIT
125
+ - BSD
126
+ metadata: {}
127
+ post_install_message:
128
+ rdoc_options: []
129
+ require_paths:
130
+ - lib
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubyforge_project:
143
+ rubygems_version: 2.2.2
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: Crawler and asset list/sitemap generator
147
+ test_files: []