rubyretriever 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 05f8e6c0169af87c8284c8b6e98d5f25488b0980
4
- data.tar.gz: a45a361b215b5ae7832e762b08bbdb989d0847a1
3
+ metadata.gz: 9a282bd399fdff64f26493e2b76a44df2ece00e4
4
+ data.tar.gz: 57b52b0b8b56116ae924fe7a03c912526c3342b2
5
5
  SHA512:
6
- metadata.gz: 8cee32f96e0ea0fe003a109016c6b17f9ddde9a73d72dbfd0a95c63e413b87b41c2ccf5bbc86b9886f78f59f053fbcd27aad0cbbbededbe8b002f0f7d986c528
7
- data.tar.gz: fd0762069a69f7383a59b4058b46bde46793defec437eb1525e69f98f60e4429c1dc676d2d8ae3fa828c8d82d2018a9e9685bb15537424cb3e15fe0d5c472ade
6
+ metadata.gz: 92b91cc247b847a845b48d33e7fdbbc828e2e3372663d06abcdbd0dc5a524357319dfddbc28f879419670aea0f0a5ab88934ae515b4d31087fe67d534bf95e26
7
+ data.tar.gz: 3bdb6d28f487209b36426e6b00a7c929d049e539d3cda583cbd13e73902d3230f0a3a2d081af4ab5939110b1e2685b4202e67a1b11bb256d4c924e46cdeefb57
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- 2014 (c) Joseph Michael Norton - 'Joe Norton' - SoftwareByJoe.com
1
+ 2016 (c) Joseph Michael Norton - @JoeNorton - http://Norton.io
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -84,7 +84,7 @@ module Retriever
84
84
  puts
85
85
  end
86
86
 
87
- # returns true is resp is ok to continue
87
+ # returns true is resp is ok to continue
88
88
  def good_response?(resp, url)
89
89
  return false unless resp
90
90
  hdr = resp.response_header
@@ -128,7 +128,7 @@ module Retriever
128
128
  @sitemap = options['sitemap']
129
129
  @seo = options['seo']
130
130
  @autodown = options['autodown']
131
- @file_re = Regexp.new(/.#{@fileharvest}\z/).freeze if @fileharvest
131
+ @file_re = Regexp.new(/.#{@fileharvest}/).freeze if @fileharvest
132
132
  end
133
133
 
134
134
  def setup_bloom_filter
@@ -1,5 +1,5 @@
1
1
  module Retriever
2
- # recieves target url and RR options
2
+ # receives target url and RR options
3
3
  # returns an array of all unique files (based on given filetype)
4
4
  # found on the target site
5
5
  class FetchFiles < Fetch
@@ -1,7 +1,7 @@
1
1
  module Retriever
2
2
  #
3
3
  class FetchSEO < Fetch
4
- # recieves target url and RR options
4
+ # receives target url and RR options
5
5
  # returns an array of onpage SEO related fields
6
6
  # on all unique pages found on the site
7
7
  def initialize(url, options)
@@ -1,7 +1,7 @@
1
1
  module Retriever
2
2
  #
3
3
  class FetchSitemap < Fetch
4
- # recieves target URL and RR options
4
+ # receives target URL and RR options
5
5
  # returns an array of all unique pages found on the site
6
6
  def initialize(url, options)
7
7
  super
@@ -7,16 +7,18 @@ module Retriever
7
7
  DOUBLE_SLASH_RE = Regexp.new(%r(^/{2}[^/])).freeze
8
8
  WWW_DOT_RE = Regexp.new(/^www\./i).freeze
9
9
 
10
- def initialize(target_scheme, target_host, this_link)
10
+ def initialize(target_scheme, target_host, this_link, current_url)
11
11
  begin
12
- @link_uri = Addressable::URI.parse(Addressable::URI.encode(this_link)).normalize
13
- rescue Addressable::URI::InvalidURIError => e
14
- dummy_link = Retriever::Link.new(target_scheme, target_host, target_host)
15
- @link_uri = Addressable::URI.parse(dummy_link.path)
12
+ #this_link = Addressable::URI.encode(this_link) //not necessary; and breaking links
13
+ @link_uri = Addressable::URI.parse(this_link)
14
+ rescue Addressable::URI::InvalidURIError
15
+ dummy = Retriever::Link.new(target_scheme, target_host, target_host, target_host)
16
+ @link_uri = Addressable::URI.parse(dummy.path)
16
17
  end
17
18
  @scheme = target_scheme
18
19
  @host = target_host
19
20
  @this_link = @link_uri.to_s
21
+ @current_page_url = current_url
20
22
  end
21
23
 
22
24
  def path
@@ -30,11 +32,16 @@ module Retriever
30
32
  return "#{@scheme}:#{this_link}" if DOUBLE_SLASH_RE =~ this_link
31
33
 
32
34
  # link uses relative path with no slashes at all
33
- return "#{@scheme}://#{host}/#{this_link}" if link_uri.relative?
35
+ if link_uri.relative?
36
+ if @current_page_url[-1, 1] == "/"
37
+ return "#{@current_page_url}#{this_link}"
38
+ end
39
+ return "#{@current_page_url}/#{this_link}"
40
+ end
34
41
  end
35
42
 
36
43
  private
37
44
 
38
- attr_reader :this_link, :host, :link_uri
45
+ attr_reader :this_link, :host, :link_uri, :current_page_url
39
46
  end
40
47
  end
@@ -1,3 +1,4 @@
1
+ require 'nokogiri'
1
2
  require 'addressable/uri'
2
3
  #
3
4
  using SourceString
@@ -40,7 +41,7 @@ module Retriever
40
41
  @links = nil
41
42
  end
42
43
 
43
- # recieves page source as string
44
+ # receives page source as string
44
45
  # returns array of unique href links
45
46
  def links
46
47
  return @links if @links
@@ -49,12 +50,14 @@ module Retriever
49
50
  # filter some malformed URLS that come in
50
51
  # meant to be a loose filter to catch all reasonable HREF attributes.
51
52
  link = match[0]
52
- Link.new(@t.scheme, @t.host, link).path
53
+ Link.new(@t.scheme, @t.host, link, @url).path
53
54
  end.compact.uniq
54
55
  end
55
56
 
56
57
  def parse_internal
57
- links.select { |x| @t.host == Addressable::URI.parse(Addressable::URI.encode(x)).host }
58
+ links.select do |x|
59
+ @t.host == Addressable::URI.parse(Addressable::URI.encode(x)).host
60
+ end
58
61
  end
59
62
 
60
63
  def parse_internal_visitable
@@ -65,6 +68,11 @@ module Retriever
65
68
  arr.select { |x| @t.file_re =~ x }
66
69
  end
67
70
 
71
+ def parse_by_css(selector)
72
+ nokogiri_doc = Nokogiri::HTML(@source)
73
+ nokogiri_doc.css(selector).text
74
+ end
75
+
68
76
  def title
69
77
  TITLE_RE =~ @source ? @source.match(TITLE_RE)[1].decode_html : ''
70
78
  end
@@ -1,7 +1,7 @@
1
1
  module Retriever
2
2
  #
3
3
  class PageIterator < Fetch
4
- # recieves target url and RR options, and a block
4
+ # receives target url and RR options, and a block
5
5
  # runs the block on all pages during crawl, pushing
6
6
  # the returned value of the block onto a result stack
7
7
  # the complete data returned from the crawl is accessible thru self.result
@@ -1,4 +1,4 @@
1
1
  #
2
2
  module Retriever
3
- VERSION = '1.3.0'
3
+ VERSION = '1.4.0'
4
4
  end
data/readme.md CHANGED
@@ -4,33 +4,53 @@
4
4
 
5
5
  By Joe Norton
6
6
 
7
- RubyRetriever is a Web Crawler, Site Mapper, File Harvester & Autodownloader.
7
+ RubyRetriever is a Web Crawler, Scraper & File Harvester. Available as a command-line executable and as a crawling framework.
8
8
 
9
- RubyRetriever (RR) uses asynchronous HTTP requests via [Eventmachine](https://github.com/eventmachine/eventmachine) & [Synchrony](https://github.com/igrigorik/em-synchrony) to crawl webpages *very quickly*. RR also uses a Ruby implementation of the [bloomfilter](https://github.com/igrigorik/bloomfilter-rb) in order to keep track of pages it has already crawled.
9
+ RubyRetriever (RR) uses asynchronous HTTP requests via [Eventmachine](https://github.com/eventmachine/eventmachine) & [Synchrony](https://github.com/igrigorik/em-synchrony) to crawl webpages *very quickly*. RR also uses a Ruby implementation of the [bloomfilter](https://github.com/igrigorik/bloomfilter-rb) in order to keep track of pages it has already crawled in a memory efficient manner.
10
+
11
+ **v1.3.1 Update (3/24/2016)** - Several bug fixes.
12
+
13
+ **v1.3.0 Update (6/22/2014)** - The major change in this update is the new PageIterator class which adds functionality for library/script usage. Now you can run custom blocks against each page during the crawl. This update also includes more tests, and other code improvements to improve modularity and testability.
10
14
 
11
15
  **v1.0 Update (6/07/2014)** - Includes major code changes and a lot of bug fixes. It's now much better in dealing with redirects, issues with the host changing, etc. Also added the SEO mode, which grabs a number of key SEO components from every page on a site. Lastly, this update was so extensive that I could not ensure backward compatibility; thus, this was update 1.0!
12
16
 
13
17
  Mission
14
18
  -------
15
- RubyRetriever aims to be the best command-line crawling and scraping package written in Ruby.
19
+ RubyRetriever aims to be the best command-line crawling and scraping package written in Ruby and a replacement for paid software such as Screaming Frog SEO Spider.
20
+
21
+
22
+ Roadmap?
23
+ Not sure. Feel free to offer your thoughts.
24
+
25
+ Some Potential Ideas:
26
+ * 'freeroam mode' - to go on cruising the net endlessly in fileharvest mode
27
+ * 'dead-link finder' mode - collects links returning 404, or other error msgs
28
+ * 'validate robots.txt' mode - outputs the bot-exposed sitemap of your site
29
+ * more sophisticated SEO analysis? replace screaming frog? this would include checks for canonical URL, maybe some keyword density checks, content length checks, etc.
16
30
 
17
31
  Features
18
32
  --------
19
33
  * Asynchronous HTTP Requests thru EM & Synchrony
20
- * Bloom filter for tracking visited pages
21
- * 3 CLI modes
22
- * Sitemap
23
- * File Harvest
24
- * SEO
34
+ * Bloom filter for tracking visited pages
35
+ * Supports HTTPS
36
+ * Follows 301 redirects (if to same host)
37
+ * 3 CLI modes
38
+ * Sitemap - Find all links on a website, output a valid XML sitemap, or just a CSV
39
+ * File Harvest - find all files linked to on a website, option to autodownload
40
+ * SEO - collect important SEO info from every page, output to a CSV (or STDOUT)
41
+ * Run a Custom Block on a Per-Page basis (PageIterator)
25
42
 
26
43
  Use cases
27
44
  ---------
28
- RubyRetriever can do multiple things for you. As an Executable
45
+ **As an Executable**
29
46
  With a single command at the terminal, RR can:
30
47
  1. Crawl your website and output a *valid XML sitemap* based on what it found.
31
48
  2. Crawl a target website and *download all files of a given filetype*.
32
49
  3. Crawl a target website, *collect important SEO information* such as page titles, meta descriptions and h1 tags, and write it to CSV.
33
50
 
51
+ **Used in Custom scripts**
52
+ As of version 1.3.0, with the PageIterator class you can pass a custom block that will get run against each page during a crawl, and collect the results in an array. This means you can define for yourself whatever it is you want to collect from each page during the crawl.
53
+
34
54
  Help & Forks Welcome!
35
55
 
36
56
  Getting started
@@ -94,7 +114,7 @@ and OPTIONS is the applicable:
94
114
  -h, --help *Display this screen*
95
115
 
96
116
 
97
- Using as a Library (starting as of version 1.3.0 -- yet to be released)
117
+ Using as a Library (starting as of version 1.3.0)
98
118
  ------------------
99
119
 
100
120
  If you want to collect something, other than that which the executable allows, on a 'per page' basis then you want to use the PageIterator class. Then you can run whatever block you want against each individual page's source code located during the crawl.
data/spec/link_spec.rb CHANGED
@@ -3,7 +3,9 @@ require 'retriever'
3
3
  describe 'Link' do
4
4
 
5
5
  t = Retriever::Target.new('http://www.cnet.com/reviews/')
6
- let(:links) { Retriever::Page.new('http://www.cnet.com/reviews/', @source, t).links }
6
+ let(:links) do
7
+ Retriever::Page.new('http://www.cnet.com/reviews/', @source, t).links
8
+ end
7
9
 
8
10
  it 'collects links in anchor tags' do
9
11
  @source = (<<SOURCE).strip
@@ -46,7 +48,7 @@ SOURCE
46
48
  expect(links).to include('http://www.cnet.com/download.exe')
47
49
  end
48
50
 
49
- it "doesn't care about any extra attributes on the anchor tag" do
51
+ it "doesn\'t care about any extra attributes on the anchor tag" do
50
52
  @source = (<<SOURCE).strip
51
53
  <a href='http://www.cnet.com/products/gadgets/'>gadgets </a>
52
54
  <a href='http://www.cnet.com/products/gadgets/' data-vanity-rewritten='true'>
@@ -63,6 +65,13 @@ SOURCE
63
65
  SOURCE
64
66
 
65
67
  expect(links).to include('http://www.cnet.com/test.html',
66
- 'http://www.cnet.com/cpage_18')
68
+ 'http://www.cnet.com/reviews/cpage_18')
69
+ end
70
+ it 'collects files even when query strings exist' do
71
+ @source = (<<SOURCE).strip
72
+ <a href='http://mises.org/system/tdf/Robert%20Nozick%20and%20Murray%20Rothbard%20David%20Gordon.mp3?file=1&amp;type=audio' type='audio/mpeg; length=22217599' title='Robert Nozick and Murray Rothbard David Gordon.mp3'>Download audio file</a></span></div>
73
+ SOURCE
74
+
75
+ expect(links).to include('http://mises.org/system/tdf/Robert%20Nozick%20and%20Murray%20Rothbard%20David%20Gordon.mp3?file=1&amp;type=audio')
67
76
  end
68
77
  end
data/spec/page_spec.rb CHANGED
@@ -4,102 +4,89 @@ require 'retriever/fetch'
4
4
  t = Retriever::Target.new('http://www.cnet.com/reviews/', /\.exe\z/)
5
5
 
6
6
  describe 'Page' do
7
+ let(:common_source) do
8
+ <<-SOURCE
9
+ <title>test</title>
10
+ <a href='www.cnet.com/download.exe'>download</a>
11
+ <a href='/test.html'>test</a>
12
+ <a href='http://www.cnet.com/products/gadgets/' data-vanity-rewritten='true'>
13
+ </a>
14
+ <a href='http://www.cnet.com/products/gadgets/' id='gadgets-link'>gadgets </a>
15
+ <a href='http://www.yahoo.com/test/'>yahoo</a>"
16
+ <meta name='description' content="test2 ">
17
+ <h1>test 3</h1>
18
+ <h2> test 4 </h2>
19
+ SOURCE
20
+ end
21
+
7
22
  describe '#url' do
8
- let(:page) { Retriever::Page.new('http://www.cnet.com/', @source, t) }
23
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
9
24
  it 'returns current page URL' do
10
- @source = (<<SOURCE).strip
11
- <a href='http://www.cnet.com/'>download</a>
12
- SOURCE
13
25
  expect(page.url).to eq('http://www.cnet.com/')
14
26
  end
15
27
  end
16
28
 
17
29
  describe '#links' do
18
- let(:page) { Retriever::Page.new('http://www.cnet.com/', @source, t) }
30
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
19
31
  it 'collects all unique href links on the page' do
20
- @source = (<<SOURCE).strip
21
- <a href='www.cnet.com/download.exe'>download</a>
22
- <a href='/test.html'>test</a>
23
- <a href='http://www.cnet.com/products/gadgets/' data-vanity-rewritten='true'>
24
- </a>
25
- <a href='http://www.cnet.com/products/gadgets/'>gadgets </a>
26
- <a href='http://www.yahoo.com/test/'>yahoo</a>
27
- SOURCE
28
-
29
32
  expect(page.links.size).to eq(4)
30
33
  end
31
34
  end
32
35
 
33
36
  describe '#parse_internal' do
34
- let(:page) { Retriever::Page.new('http://www.cnet.com/', @source, t) }
37
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
35
38
  let(:links) { page.parse_internal }
36
39
  it 'filters links by host' do
37
- @source = (<<SOURCE).strip
38
- <a href='http://www.cnet.com/'>download</a>
39
- <a href='http://www.yahoo.com/test/'>yahoo</a>
40
- SOURCE
41
-
42
- expect(links.size).to eq(1)
40
+ expect(links.size).to eq(3)
43
41
  end
44
42
  end
45
43
 
46
44
  describe '#parse_internal_visitable' do
47
- let(:page) { Retriever::Page.new('http://www.cnet.com/', @source, t) }
45
+ let(:source) { "<link rel='stylesheet' id='gforms_reset_css-css' href='http://www.cnet.com/wp-content/plugins/gravityforms/css/formreset.css?ver=1.7.12' type='text/css' media='all' />" }
46
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', source, t) }
48
47
  let(:links) { page.parse_internal_visitable }
49
48
  it "filters out 'unvisitable' URLS like JS, Stylesheets, Images" do
50
- @source = (<<SOURCE).strip
51
- <link rel='stylesheet' id='gforms_reset_css-css' href='http://www.cnet.com/wp-content/plugins/gravityforms/css/formreset.css?ver=1.7.12' type='text/css' media='all' />
52
- SOURCE
53
49
  expect(links.size).to eq(0)
54
50
  end
55
51
  end
56
52
 
57
53
  describe '#parse_files' do
58
- let(:page) { Retriever::Page.new('http://www.cnet.com/', @source, t) }
54
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
59
55
  let(:files) { page.parse_files(page.parse_internal) }
60
56
  it 'filters links by filetype' do
61
- @source = (<<SOURCE).strip
62
- <a href='www.cnet.com/download.exe'>download</a>
63
- http://www.google.com
64
- <a href='/test.html'>test</a>
65
- SOURCE
66
57
  expect(files.size).to eq(1)
67
58
  end
68
59
  end
69
60
 
61
+ describe '#parse_by_css' do
62
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
63
+
64
+ it 'returns the text from the received css selector' do
65
+ expect(page.parse_by_css('#gadgets-link')).to eq('gadgets ')
66
+ end
67
+ end
68
+
70
69
  describe '#title' do
71
- let(:page) { Retriever::Page.new('http://www.cnet.com/', @source, t) }
70
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
72
71
  it 'returns page title' do
73
- @source = (<<SOURCE).strip
74
- <title>test</title>
75
- SOURCE
76
72
  expect(page.title).to eq('test')
77
73
  end
78
74
  end
79
75
  describe '#desc' do
80
- let(:page) { Retriever::Page.new('http://www.cnet.com/', @source, t) }
76
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
81
77
  it 'returns meta description' do
82
- @source = (<<SOURCE).strip
83
- <meta name='description' content="test2 ">
84
- SOURCE
85
78
  expect(page.desc).to eq('test2 ')
86
79
  end
87
80
  end
88
81
  describe '#h1' do
89
- let(:page) { Retriever::Page.new('http://www.cnet.com/', @source, t) }
82
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
90
83
  it 'returns h1 text' do
91
- @source = (<<SOURCE).strip
92
- <h1>test 3</h1>
93
- SOURCE
94
84
  expect(page.h1).to eq('test 3')
95
85
  end
96
86
  end
97
87
  describe '#h2' do
98
- let(:page) { Retriever::Page.new('http://www.cnet.com/', @source, t) }
88
+ let(:page) { Retriever::Page.new('http://www.cnet.com/', common_source, t) }
99
89
  it 'returns h2 text' do
100
- @source = (<<SOURCE).strip
101
- <h2> test 4 </h2>
102
- SOURCE
103
90
  expect(page.h2).to eq(' test 4 ')
104
91
  end
105
92
  end
@@ -11,7 +11,7 @@ describe 'Fetch' do
11
11
  end
12
12
 
13
13
  let(:nil_response) do
14
- r.good_response?(nil,'http://www.yahoo.com')
14
+ r.good_response?(nil, 'http://www.yahoo.com')
15
15
  end
16
16
 
17
17
  let(:unsuccessful_resp) do
@@ -20,14 +20,14 @@ describe 'Fetch' do
20
20
  resp.stub(:successful?).and_return(false)
21
21
  resp.stub(:server_error?).and_return(false)
22
22
  resp.stub(:client_error?).and_return(false)
23
- r.good_response?(resp,'http://www.yahoo.com')
23
+ r.good_response?(resp, 'http://www.yahoo.com')
24
24
  end
25
25
 
26
26
  let(:redir_resp) do
27
27
  resp.stub(:response_header).and_return(resp)
28
28
  resp.stub(:redirection?).and_return(true)
29
29
  resp.stub(:location).and_return('http://www.google.com')
30
- r.good_response?(resp,'http://www.yahoo.com')
30
+ r.good_response?(resp, 'http://www.yahoo.com')
31
31
  end
32
32
 
33
33
  let(:bad_content_type_resp) do
@@ -35,7 +35,7 @@ describe 'Fetch' do
35
35
  resp.stub(:redirection?).and_return(false)
36
36
  resp.stub(:successful?).and_return(true)
37
37
  resp['CONTENT_TYPE'] = 'image/jpeg'
38
- r.good_response?(resp,'http://www.yahoo.com')
38
+ r.good_response?(resp, 'http://www.yahoo.com')
39
39
  end
40
40
 
41
41
  let(:success_resp) do
@@ -43,7 +43,7 @@ describe 'Fetch' do
43
43
  resp.stub(:redirection?).and_return(false)
44
44
  resp.stub(:successful?).and_return(true)
45
45
  resp['CONTENT_TYPE'] = 'text/html'
46
- r.good_response?(resp,'http://www.yahoo.com')
46
+ r.good_response?(resp, 'http://www.yahoo.com')
47
47
  end
48
48
 
49
49
  it 'returns false if the response is empty' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyretriever
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joe Norton
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: nokogiri
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: bundler
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +150,20 @@ dependencies:
136
150
  - - "~>"
137
151
  - !ruby/object:Gem::Version
138
152
  version: '2.14'
153
+ - !ruby/object:Gem::Dependency
154
+ name: pry
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
139
167
  description: Asynchronous web crawler, scraper and file harvester
140
168
  email:
141
169
  - joe@softwarebyjoe.com