rubyretriever 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e4e4773d62ec74a55bb3d9afc78622229e7db559
4
- data.tar.gz: a2186b11bb3eabdec8c312e0b3365961c97805de
3
+ metadata.gz: 849720cdbcbddf95e458dcf8a9928ad0430e1c6d
4
+ data.tar.gz: 2bd39dfcbd58a0b1b2fa66496be9a77c97f72623
5
5
  SHA512:
6
- metadata.gz: ae392a910a3c7a6f2b3f9097d82978ba35ba413da46f4e5ce30b9e728bac2fb40f61c3a110ea59b9f59d75d79b0fcf85a8a783597aeaccd31c833b2c3753bd4d
7
- data.tar.gz: 72fe87613059ccae6022c65ad5db70445cc4470028d081e918ffb9dfc122adb5dd8c9967abe2622a8daaeea59d878340183597a73768f32dd42679b7b159d64d
6
+ metadata.gz: a76d5ea2c7087a2c63f84f2d31cde832bcf2d2f72b0e827f8cae581380f1fd3b3ad7e40306753a63ad06ebd5e1154eb2153e9a853ad39aca6ce800428733f4c8
7
+ data.tar.gz: aba0032ed5d5cb9701e7103f0928a0009443d2323df28f683990b7a75da962f2b0c617a7667fff4a19e9a28983297b60113799a49977bf49998fba5b9af4f98b
@@ -0,0 +1,25 @@
1
+ module Retriever
2
+ class FetchSitemap < Fetch
3
+ attr_reader :sitemap
4
+ def initialize(url,options)
5
+ super
6
+ @sitemap = [@target]
7
+ @linkStack = self.parseInternalLinks(self.fetchLinks(fetchPage(@target)))
8
+ self.lg("#{@linkStack.size-1} new links found")
9
+ errlog("Bad URL -- #{@target}") if !@linkStack
10
+
11
+ @linkStack.delete(@target) if @linkStack.include?(@target)
12
+ @linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages)
13
+ @sitemap.concat(@linkStack)
14
+
15
+ self.async_crawl_and_collect()
16
+
17
+ @sitemap.sort_by! {|x| x.length} if @sitemap.size>1
18
+ @sitemap.uniq!
19
+ @sitemap = @sitemap.take(@maxPages) if (@sitemap.size+1 > @maxPages)
20
+
21
+ self.dump(self.sitemap)
22
+ self.write(self.sitemap) if @output
23
+ end
24
+ end
25
+ end
@@ -1,3 +1,3 @@
1
1
  module Retriever
2
- VERSION = '0.0.8'
2
+ VERSION = '0.0.9'
3
3
  end
data/readme.md CHANGED
@@ -1,9 +1,9 @@
1
- RubyRetriever [![Gem Version](https://badge.fury.io/rb/RubyRetriever.svg)](http://badge.fury.io/rb/RubyRetriever)
1
+ RubyRetriever [![Gem Version](https://badge.fury.io/rb/rubyretriever.svg)](http://badge.fury.io/rb/rubyretriever)
2
2
  ==============
3
3
 
4
- Now an official RubyGem! --make sure to use camel-casing--
4
+ Now an official RubyGem!
5
5
  ```sh
6
- gem install RubyRetriever
6
+ gem install rubyretriever
7
7
  ```
8
8
 
9
9
  Update (5/25):
@@ -25,7 +25,7 @@ RubyRetriever does NOT respect robots.txt, and RubyRetriever currently - by defa
25
25
  HOW IT WORKS
26
26
  -----------
27
27
  ```sh
28
- gem install RubyRetriever
28
+ gem install rubyretriever
29
29
  rr [MODE] [OPTIONS] Target_URL
30
30
  ```
31
31
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyretriever
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joe Norton
@@ -118,6 +118,7 @@ extra_rdoc_files: []
118
118
  files:
119
119
  - LICENSE
120
120
  - bin/rr
121
+ - lib/fetchsitemap.rb
121
122
  - lib/retriever.rb
122
123
  - lib/retriever/fetch.rb
123
124
  - lib/retriever/fetchfiles.rb