mech_warrior 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 63e3f44169fe19d92e5c237ce58460e35522bc1d
4
+ data.tar.gz: 95bc44122c6b15c317c512866a1f1c00a769260f
5
+ SHA512:
6
+ metadata.gz: eb826d2289cbed61494ef15285ee2ae0e2fad84cc40a6434895f0eedd3430706fe6539e912af94faf98a49307b288146c19be23b8febc7dce15d91d5bba7e473
7
+ data.tar.gz: 849c53b351db05f04375bf73e10e6c76f284b62d0664cab8697926792377f1468119c5bd0fb7afbbed14c930b4464109c1342758a8341f4586bb4e0f59a3c40f
@@ -0,0 +1 @@
1
+ 2.1.1
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
@@ -0,0 +1,61 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ mech_warrior (0.0.1)
5
+ celluloid (~> 0)
6
+ mechanize (~> 2.7)
7
+ xml-sitemap (~> 1.3)
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ builder (3.2.2)
13
+ celluloid (0.15.2)
14
+ timers (~> 1.1.0)
15
+ diff-lcs (1.2.5)
16
+ domain_name (0.5.18)
17
+ unf (>= 0.0.5, < 1.0.0)
18
+ fakeweb (1.3.0)
19
+ http-cookie (1.0.2)
20
+ domain_name (~> 0.5)
21
+ mechanize (2.7.3)
22
+ domain_name (~> 0.5, >= 0.5.1)
23
+ http-cookie (~> 1.0)
24
+ mime-types (~> 2.0)
25
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
26
+ net-http-persistent (~> 2.5, >= 2.5.2)
27
+ nokogiri (~> 1.4)
28
+ ntlm-http (~> 0.1, >= 0.1.1)
29
+ webrobots (>= 0.0.9, < 0.2)
30
+ mime-types (2.2)
31
+ mini_portile (0.5.3)
32
+ net-http-digest_auth (1.4)
33
+ net-http-persistent (2.9.4)
34
+ nokogiri (1.6.1)
35
+ mini_portile (~> 0.5.0)
36
+ ntlm-http (0.1.1)
37
+ rake (0.9.6)
38
+ rspec (2.14.1)
39
+ rspec-core (~> 2.14.0)
40
+ rspec-expectations (~> 2.14.0)
41
+ rspec-mocks (~> 2.14.0)
42
+ rspec-core (2.14.8)
43
+ rspec-expectations (2.14.5)
44
+ diff-lcs (>= 1.1.3, < 2.0)
45
+ rspec-mocks (2.14.6)
46
+ timers (1.1.0)
47
+ unf (0.1.4)
48
+ unf_ext
49
+ unf_ext (0.0.6)
50
+ webrobots (0.1.1)
51
+ xml-sitemap (1.3.3)
52
+ builder (>= 2.0)
53
+
54
+ PLATFORMS
55
+ ruby
56
+
57
+ DEPENDENCIES
58
+ fakeweb (~> 1.3)
59
+ mech_warrior!
60
+ rake (~> 0)
61
+ rspec (~> 2.14)
@@ -0,0 +1,58 @@
1
+ MechWarrior
2
+ =========
3
+
4
+ MechWarrior is a Mechanize and Celluloid powered site crawler that generates a
5
+ JSON file of all pages, links on pages, and assets those pages rely upon
6
+ as well as optionally generating an XML sitemap compliant with sitemaps 0.9
7
+ protocol.
8
+
9
+
10
+ Version
11
+ ----
12
+
13
+ 0.0.1
14
+
15
+ Tech
16
+ -----------
17
+
18
+ MechWarrior relies on several excellent RubyGems
19
+
20
+ * [Mechanize] - a ruby library that makes automated web interaction easy.
21
+ * [Celluloid] - an Actor-based concurrent object framework for Ruby
22
+ * [XML-Sitemap] - provides easy XML sitemap generation for Ruby/Rails/Merb/Sinatra applications
23
+
24
+
25
+ Installation
26
+ --------------
27
+
28
+ ```sh
29
+ gem install mech_warrior-0.0.1.gem
30
+ ```
31
+
32
+ Crawling a site
33
+ ---------------
34
+
35
+ ```sh
36
+ bin/spider
37
+ ```
38
+ and enter a host name, followed by any additional options you wish to pass in
39
+ to override default options in `lib/mech_warrior.rb`
40
+
41
+
42
+ Todo
43
+ ----
44
+ Some of the functionality, including XML Sitemaps, is untested.
45
+ Support for multiple hosts in a single spider is currently incomplete,
46
+ despite the 'allowed_hosts' array, unless all but default host have
47
+ only absolute links to follow.
48
+
49
+ License
50
+ ----
51
+
52
+ MIT
53
+
54
+ [mechanize]:https://github.com/sparklemotion/mechanize
55
+ [celluloid]:http://celluloid.io/
56
+ [xml-sitemap]:https://github.com/sosedoff/xml-sitemap
57
+
58
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env rake
2
+
3
+ task :default => 'spec'
4
+ task :spec do
5
+ sh "rspec spec/mech_warrior_spec.rb"
6
+ end
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative "../lib/mech_warrior"
4
+
5
+ puts "Host:"
6
+ host = gets.chomp
7
+ puts
8
+ puts "Other options:"
9
+ opts = gets.chomp
10
+ opts_hash = eval("{" + opts + "}")
11
+ puts opts_hash.to_json
12
+ MechWarrior.crawl(opts_hash.merge(default_host: host))
@@ -0,0 +1,60 @@
1
+ require 'mechanize'
2
+ require 'xml-sitemap'
3
+ require 'logger'
4
+ require 'celluloid/autostart'
5
+ require_relative 'mech_warrior/mech_cell'
6
+ require_relative 'mech_warrior/crawler'
7
+
8
+ module MechWarrior
9
+ SITEMAP_MAX_LINKS = 50000
10
+ DEFAULTS = {
11
+ allowed_domains: [],
12
+ default_protocol: 'http://',
13
+ default_host: 'www.example.com',
14
+ # this is less 'default_host' at the moment than 'only', though links to other domains will work as long
15
+ # as all links on other domains' pages are absolute. To support multiple domains while supporting
16
+ # relative links, some new state would have to be introduced to track 'current_host'
17
+ max_depth_divisor: 256, # this results in max depth of 4096 on my machine, seems deep enough
18
+ pool_size: 20,
19
+ logger_class: Logger,
20
+ log_file_name: "mech_warrior_errors.txt"
21
+ }
22
+
23
+ def self.crawl(opts={})
24
+ crawl_results = Crawler.new(opts)
25
+ crawl_results.agent_pool.future.terminate
26
+ unless opts[:skip_asset_json]
27
+ File.open("#{crawl_results.default_host}_crawl_#{Time.now.gmtime}", 'w') do |file|
28
+ file.write(JSON.pretty_generate(crawl_results.pages))
29
+ end
30
+ end
31
+
32
+ if sitemap_opts = opts[:generate_sitemap]
33
+ generate_sitemap(crawl_results.default_host,
34
+ crawl_results.pages,
35
+ sitemap_opts.respond_to?(:keys) ? sitemap_opts : {}
36
+ )
37
+ end
38
+
39
+ crawl_results
40
+ end
41
+
42
+
43
+ #generate_sitemap is untested and NOT production ready, but is functional
44
+ #and probably a better output format if asset/link data is not needed
45
+ def self.generate_sitemap(default_host, pages, opts, sitemap_file_num=1)
46
+ page_keys = pages.keys
47
+ current_page_keys = page_keys.slice(0...SITEMAP_MAX_LINKS)
48
+
49
+ site_map = XmlSitemap::Map.new(default_host) do |map|
50
+ current_page_keys.each do |page|
51
+ map.add URI(page).path, opts if URI(page).path.length > 0
52
+ end
53
+ end
54
+ site_map.render_to("./site_map_#{default_host}_#{sitemap_file_num}")
55
+
56
+ if page_keys.count > SITEMAP_MAX_LINKS
57
+ generate_sitemap(default_host, page_keys.slice(SITEMAP_MAX_LINKS..-1), opts, sitemap_file_num + 1)
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,75 @@
1
+ module MechWarrior
2
+ class Crawler
3
+ attr_reader :agent_pool, :pages, :opts, :default_host, :default_protocol, :logger, :output_file
4
+
5
+ def initialize(override_opts={})
6
+ @opts = DEFAULTS.merge(override_opts)
7
+ @default_host = opts[:default_host]
8
+ @default_protocol = opts[:default_protocol]
9
+ opts[:allowed_domains] << default_host
10
+ @output_file = opts[:output_file] || File.open(opts[:log_file_name], 'a')
11
+ @logger = opts[:logger_class].new(output_file)
12
+ @agent_pool = MechCell.pool(size: opts[:pool_size], args: [logger])
13
+ @pages = {}
14
+ start_url = opts[:start_url] || "#{default_protocol}#{default_host}/"
15
+ pages[normalize_url(start_url)] = {}
16
+ index_url(start_url) unless opts[:no_index]
17
+ self
18
+ ensure
19
+ output_file.close if output_file.respond_to?(:close)
20
+ end
21
+
22
+ def index_url(href)
23
+ schemed_url = normalize_url(href)
24
+ future = page_future(schemed_url)
25
+ process_page(future, schemed_url)
26
+ end
27
+
28
+ private
29
+
30
+ def process_page(page_future, url, depth=0)
31
+ return if depth > RubyVM::DEFAULT_PARAMS[:thread_vm_stack_size]/opts[:max_depth_divisor]
32
+ page = page_future.value
33
+ if page && page.respond_to?(:links)
34
+ pages[url] = {}
35
+ pages[url][:links] = page.respond_to?(:links) ? page.links.map(&:href) : []
36
+ pages[url][:assets] = {
37
+ images: page.image_urls,
38
+ scripts: page.search('script'),
39
+ asset_links: page.search('link'), #css, icons
40
+ iframes: page.iframes
41
+ }
42
+ urls = links_to_follow(page).map {|link| normalize_url(link.href)}
43
+ futures = urls.map {|url| page_future(url)}
44
+ pairs = futures.zip(urls)
45
+ pairs.each {|future, url| process_page(future, url, depth +1)}
46
+ end
47
+ rescue URI::InvalidURIError => e
48
+ logger << "InvalidURIError processing links on page at URL: #{url} -- #{e}\n"
49
+ end
50
+
51
+ def page_future(url)
52
+ agent_pool.future.get(url)
53
+ end
54
+
55
+ def get_page(url)
56
+ agent_pool.get(url)
57
+ end
58
+
59
+ def normalize_url(href)
60
+ URI(href).scheme ? href : "#{default_protocol}#{default_host}#{href}"
61
+ end
62
+
63
+ def follow_link?(link) #follow only pages not indexed and relative links or whitelisted link hosts
64
+ if link.href && URI(link.href)
65
+ pages[normalize_url(link.href)].nil? && (link.uri.host.nil? || opts[:allowed_domains].include?(link.uri.host))
66
+ end
67
+ rescue URI::InvalidURIError => e
68
+ logger << "InvalidURIError on link with href: #{link.href} -- #{e}\n"
69
+ end
70
+
71
+ def links_to_follow(page)
72
+ page.links.select { |link| follow_link?(link) }
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,22 @@
1
+ module MechWarrior
2
+ class MechCell
3
+ include Celluloid
4
+ attr_reader :agent, :logger
5
+ MECH_ERRORS = [
6
+ SocketError,
7
+ Mechanize::ResponseCodeError,
8
+ Mechanize::ResponseReadError,
9
+ Mechanize::UnsupportedSchemeError
10
+ ]
11
+ def initialize(logger)
12
+ @agent = Mechanize.new
13
+ @logger = logger
14
+ end
15
+
16
+ def get(url)
17
+ agent.get(url)
18
+ rescue *MECH_ERRORS => e
19
+ logger << "Caught Exception getting URL: #{url} -- #{e}\n"
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,3 @@
1
+ module MechWarrior
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,32 @@
1
+ lib = File.expand_path('../lib/', __FILE__)
2
+ $:.unshift lib unless $:.include?(lib)
3
+ require "mech_warrior/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "mech_warrior"
7
+ s.version = MechWarrior::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Brian Glusman"]
10
+ s.email = ["brian@glusman.me"]
11
+ s.summary = "Crawler and asset list/sitemap generator"
12
+ s.licenses = ["MIT", "BSD"]
13
+ s.extensions = ["Rakefile"]
14
+
15
+ s.description = <<-DESC
16
+ Spider a web host with many mechanize agents concurrently, and generate an asset JSON
17
+ and/or an XML sitemap of the result
18
+ DESC
19
+
20
+
21
+ s.files = `git ls-files`.split("\n")
22
+ s.test_files = `git ls-files -- {spec}/*`.split("\n")
23
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
24
+ s.require_paths = ["lib"]
25
+
26
+ s.add_runtime_dependency "mechanize", '~> 2.7'
27
+ s.add_runtime_dependency "xml-sitemap", '~> 1.3'
28
+ s.add_runtime_dependency "celluloid", '~> 0'
29
+ s.add_development_dependency "rake", '~> 0'
30
+ s.add_development_dependency "rspec", '~> 2.14'
31
+ s.add_development_dependency "fakeweb", '~> 1.3'
32
+ end
@@ -0,0 +1,65 @@
1
+ FakeWeb.allow_net_connect = false
2
+ module MechWarrior
3
+ require_relative '../lib/mech_warrior'
4
+ DEFAULT_HOST = DEFAULTS[:default_host]
5
+ SPEC_DOMAIN = "http://#{DEFAULT_HOST}/"
6
+
7
+ class FakePage
8
+ attr_accessor :links
9
+ attr_accessor :hrefs
10
+ attr_accessor :body
11
+
12
+ def initialize(name = '', options = {})
13
+ @name = name
14
+ @links = [options[:links]].flatten if options.has_key?(:links)
15
+ @hrefs = [options[:hrefs]].flatten if options.has_key?(:hrefs)
16
+ @redirect = options[:redirect] if options.has_key?(:redirect)
17
+ @base = options[:base] if options.has_key?(:base)
18
+ @content_type = options[:content_type] || "text/html"
19
+ @body = options[:body]
20
+
21
+ create_body unless @body
22
+ add_to_fakeweb
23
+ end
24
+
25
+ def url
26
+ SPEC_DOMAIN + @name
27
+ end
28
+
29
+ private
30
+
31
+ def create_body
32
+ if @base
33
+ @body = "<html><head><base href=\"#{@base}\"></head><body>"
34
+ else
35
+ @body = "<html><body>"
36
+ end
37
+ @links.each{|l| @body += "<a href=\"#{SPEC_DOMAIN}#{l}\"></a>"} if @links
38
+ @hrefs.each{|h| @body += "<a href=\"#{h}\"></a>"} if @hrefs
39
+ @body += "</body></html>"
40
+ end
41
+
42
+ def add_to_fakeweb
43
+ options = {:body => @body, :content_type => @content_type, :status => [200, "OK"]}
44
+
45
+ if @redirect
46
+ options[:status] = [301, "Permanently Moved"]
47
+
48
+ # only prepend SPEC_DOMAIN if a relative url (without an http scheme) was specified
49
+ redirect_url = (@redirect =~ /http/) ? @redirect : SPEC_DOMAIN + @redirect
50
+ options[:location] = redirect_url
51
+
52
+ # register the page this one redirects to
53
+ FakeWeb.register_uri(:get, redirect_url, {:body => '',
54
+ :content_type => @content_type,
55
+ :status => [200, "OK"]})
56
+ end
57
+
58
+
59
+ FakeWeb.register_uri(:get, SPEC_DOMAIN + @name, options)
60
+ end
61
+ end
62
+ end
63
+
64
+ #default root
65
+ MechWarrior::FakePage.new
@@ -0,0 +1,89 @@
1
+ require 'spec_helper'
2
+
3
+ module MechWarrior
4
+ describe Crawler do
5
+
6
+ before(:each) do
7
+ FakeWeb.clean_registry
8
+ end
9
+
10
+ describe "crawl" do
11
+
12
+ context "crawl all the html pages in a domain by following <a> href's" do
13
+ let(:pages) do
14
+ pages = []
15
+ pages << FakePage.new('0', links: ['1', '2'])
16
+ pages << FakePage.new('1', links: ['3'])
17
+ pages << FakePage.new('2')
18
+ pages << FakePage.new('3')
19
+ pages
20
+ end
21
+
22
+ subject { Crawler.new(default_host: MechWarrior::DEFAULTS[:default_host],
23
+ start_url: pages[0].url,
24
+ logger_class: String,
25
+ output_file: "")
26
+ }
27
+
28
+ it {should have(4).pages }
29
+ its(:logger) {should be_empty }
30
+ end
31
+
32
+ context "should not follow links that leave the original domain" do
33
+ let(:pages) do
34
+ pages = []
35
+ pages << FakePage.new('0', links: ['1'], :hrefs => 'http://www.other.com/')
36
+ pages << FakePage.new('1')
37
+ pages
38
+ end
39
+
40
+ subject { Crawler.new(default_host: MechWarrior::DEFAULTS[:default_host],
41
+ start_url: pages[0].url,
42
+ logger_class: String,
43
+ output_file: "")
44
+ }
45
+ it { should have(2).pages }
46
+ its("pages.keys") { should_not include('http://www.other.com/') }
47
+ its(:logger) {should be_empty }
48
+ end
49
+
50
+ context "should not index non-html links" do
51
+ let(:pages) do
52
+ pages = []
53
+ pages << FakePage.new('0', links: ['1', '2'])
54
+ pages << FakePage.new('1', content_type: 'application/pdf')
55
+ pages << FakePage.new('2', content_type: 'text/csv')
56
+ pages
57
+ end
58
+
59
+ subject { Crawler.new(default_host: MechWarrior::DEFAULTS[:default_host],
60
+ start_url: pages[0].url,
61
+ logger_class: String,
62
+ output_file: "")
63
+ }
64
+ it { should have(1).pages }
65
+ its(:logger) {should be_empty }
66
+ end
67
+
68
+ context "should ignore invalid URLs" do
69
+ let(:pages) do
70
+ pages = []
71
+ pages << FakePage.new('0', links: ['1', '2'])
72
+ pages << FakePage.new('1', links: ['not a valid url'])
73
+ pages << FakePage.new('2')
74
+ pages << FakePage.new('not_a_valid_url')
75
+ pages
76
+ end
77
+
78
+ subject { Crawler.new(default_host: MechWarrior::DEFAULTS[:default_host],
79
+ start_url: pages[0].url,
80
+ logger_class: String,
81
+ output_file: "")
82
+ }
83
+ it { should have(3).pages }
84
+ its(:logger) {should_not be_empty }
85
+ end
86
+
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,6 @@
1
+ require_relative '../lib/mech_warrior'
2
+
3
+ require 'fakeweb'
4
+ require File.dirname(__FILE__) + '/fakeweb_helper'
5
+
6
+ SPEC_DOMAIN = 'http://www.example.com/'
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mech_warrior
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brian Glusman
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: xml-sitemap
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: celluloid
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.14'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.14'
83
+ - !ruby/object:Gem::Dependency
84
+ name: fakeweb
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.3'
97
+ description: |2
98
+ Spider a web host with many mechanize agents concurrently, and generate an asset JSON
99
+ and/or an XML sitemap of the result
100
+ email:
101
+ - brian@glusman.me
102
+ executables:
103
+ - spider
104
+ extensions:
105
+ - Rakefile
106
+ extra_rdoc_files: []
107
+ files:
108
+ - ".ruby-version"
109
+ - Gemfile
110
+ - Gemfile.lock
111
+ - README.md
112
+ - Rakefile
113
+ - bin/spider
114
+ - lib/mech_warrior.rb
115
+ - lib/mech_warrior/crawler.rb
116
+ - lib/mech_warrior/mech_cell.rb
117
+ - lib/mech_warrior/version.rb
118
+ - mech_warrior.gemspec
119
+ - spec/fakeweb_helper.rb
120
+ - spec/mech_warrior_spec.rb
121
+ - spec/spec_helper.rb
122
+ homepage:
123
+ licenses:
124
+ - MIT
125
+ - BSD
126
+ metadata: {}
127
+ post_install_message:
128
+ rdoc_options: []
129
+ require_paths:
130
+ - lib
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubyforge_project:
143
+ rubygems_version: 2.2.2
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: Crawler and asset list/sitemap generator
147
+ test_files: []