rubyretriever 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e4e4773d62ec74a55bb3d9afc78622229e7db559
4
- data.tar.gz: a2186b11bb3eabdec8c312e0b3365961c97805de
3
+ metadata.gz: 849720cdbcbddf95e458dcf8a9928ad0430e1c6d
4
+ data.tar.gz: 2bd39dfcbd58a0b1b2fa66496be9a77c97f72623
5
5
  SHA512:
6
- metadata.gz: ae392a910a3c7a6f2b3f9097d82978ba35ba413da46f4e5ce30b9e728bac2fb40f61c3a110ea59b9f59d75d79b0fcf85a8a783597aeaccd31c833b2c3753bd4d
7
- data.tar.gz: 72fe87613059ccae6022c65ad5db70445cc4470028d081e918ffb9dfc122adb5dd8c9967abe2622a8daaeea59d878340183597a73768f32dd42679b7b159d64d
6
+ metadata.gz: a76d5ea2c7087a2c63f84f2d31cde832bcf2d2f72b0e827f8cae581380f1fd3b3ad7e40306753a63ad06ebd5e1154eb2153e9a853ad39aca6ce800428733f4c8
7
+ data.tar.gz: aba0032ed5d5cb9701e7103f0928a0009443d2323df28f683990b7a75da962f2b0c617a7667fff4a19e9a28983297b60113799a49977bf49998fba5b9af4f98b
@@ -0,0 +1,25 @@
1
+ module Retriever
2
+ class FetchSitemap < Fetch
3
+ attr_reader :sitemap
4
+ def initialize(url,options)
5
+ super
6
+ @sitemap = [@target]
7
+ @linkStack = self.parseInternalLinks(self.fetchLinks(fetchPage(@target)))
8
+ self.lg("#{@linkStack.size-1} new links found")
9
+ errlog("Bad URL -- #{@target}") if !@linkStack
10
+
11
+ @linkStack.delete(@target) if @linkStack.include?(@target)
12
+ @linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages)
13
+ @sitemap.concat(@linkStack)
14
+
15
+ self.async_crawl_and_collect()
16
+
17
+ @sitemap.sort_by! {|x| x.length} if @sitemap.size>1
18
+ @sitemap.uniq!
19
+ @sitemap = @sitemap.take(@maxPages) if (@sitemap.size+1 > @maxPages)
20
+
21
+ self.dump(self.sitemap)
22
+ self.write(self.sitemap) if @output
23
+ end
24
+ end
25
+ end
@@ -1,3 +1,3 @@
1
1
  module Retriever
2
- VERSION = '0.0.8'
2
+ VERSION = '0.0.9'
3
3
  end
data/readme.md CHANGED
@@ -1,9 +1,9 @@
1
- RubyRetriever [![Gem Version](https://badge.fury.io/rb/RubyRetriever.svg)](http://badge.fury.io/rb/RubyRetriever)
1
+ RubyRetriever [![Gem Version](https://badge.fury.io/rb/rubyretriever.svg)](http://badge.fury.io/rb/rubyretriever)
2
2
  ==============
3
3
 
4
- Now an official RubyGem! --make sure to use camel-casing--
4
+ Now an official RubyGem!
5
5
  ```sh
6
- gem install RubyRetriever
6
+ gem install rubyretriever
7
7
  ```
8
8
 
9
9
  Update (5/25):
@@ -25,7 +25,7 @@ RubyRetriever does NOT respect robots.txt, and RubyRetriever currently - by defa
25
25
  HOW IT WORKS
26
26
  -----------
27
27
  ```sh
28
- gem install RubyRetriever
28
+ gem install rubyretriever
29
29
  rr [MODE] [OPTIONS] Target_URL
30
30
  ```
31
31
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyretriever
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joe Norton
@@ -118,6 +118,7 @@ extra_rdoc_files: []
118
118
  files:
119
119
  - LICENSE
120
120
  - bin/rr
121
+ - lib/fetchsitemap.rb
121
122
  - lib/retriever.rb
122
123
  - lib/retriever/fetch.rb
123
124
  - lib/retriever/fetchfiles.rb