rubyretriever 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 849720cdbcbddf95e458dcf8a9928ad0430e1c6d
4
- data.tar.gz: 2bd39dfcbd58a0b1b2fa66496be9a77c97f72623
3
+ metadata.gz: 4b46761b89c8f66fb12417b8b08b9b99834eaa6a
4
+ data.tar.gz: 04588d0306cf3c80577e287c371ac799fcdb69b1
5
5
  SHA512:
6
- metadata.gz: a76d5ea2c7087a2c63f84f2d31cde832bcf2d2f72b0e827f8cae581380f1fd3b3ad7e40306753a63ad06ebd5e1154eb2153e9a853ad39aca6ce800428733f4c8
7
- data.tar.gz: aba0032ed5d5cb9701e7103f0928a0009443d2323df28f683990b7a75da962f2b0c617a7667fff4a19e9a28983297b60113799a49977bf49998fba5b9af4f98b
6
+ metadata.gz: 0fe73886f396c9282fc831f1ed2a2bebb97f17decf79b06fa58b1105d48e11dabefb43ee021a27c772bd6c70303cb64d0100cb57644d451a9242991642489665
7
+ data.tar.gz: c63e83ba6b79d3874943a15b17324ef4dd5fa85c9502fe5d24f26f98fab265f928105808994b980b1ce7dd94e8439481004644cfbcbfadf7e3b01facb483a1cf
@@ -19,7 +19,7 @@ module Retriever
19
19
  @sitemap = @sitemap.take(@maxPages) if (@sitemap.size+1 > @maxPages)
20
20
 
21
21
  self.dump(self.sitemap)
22
- self.write(@output,self.sitemap) if @output
22
+ self.write(self.sitemap) if @output
23
23
  end
24
24
  end
25
25
  end
@@ -1,3 +1,3 @@
1
1
  module Retriever
2
- VERSION = '0.0.9'
2
+ VERSION = '0.0.10'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyretriever
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joe Norton
@@ -118,7 +118,6 @@ extra_rdoc_files: []
118
118
  files:
119
119
  - LICENSE
120
120
  - bin/rr
121
- - lib/fetchsitemap.rb
122
121
  - lib/retriever.rb
123
122
  - lib/retriever/fetch.rb
124
123
  - lib/retriever/fetchfiles.rb
data/lib/fetchsitemap.rb DELETED
@@ -1,25 +0,0 @@
1
- module Retriever
2
- class FetchSitemap < Fetch
3
- attr_reader :sitemap
4
- def initialize(url,options)
5
- super
6
- @sitemap = [@target]
7
- @linkStack = self.parseInternalLinks(self.fetchLinks(fetchPage(@target)))
8
- self.lg("#{@linkStack.size-1} new links found")
9
- errlog("Bad URL -- #{@target}") if !@linkStack
10
-
11
- @linkStack.delete(@target) if @linkStack.include?(@target)
12
- @linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages)
13
- @sitemap.concat(@linkStack)
14
-
15
- self.async_crawl_and_collect()
16
-
17
- @sitemap.sort_by! {|x| x.length} if @sitemap.size>1
18
- @sitemap.uniq!
19
- @sitemap = @sitemap.take(@maxPages) if (@sitemap.size+1 > @maxPages)
20
-
21
- self.dump(self.sitemap)
22
- self.write(self.sitemap) if @output
23
- end
24
- end
25
- end