rubyretriever 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/retriever/fetchsitemap.rb +1 -1
- data/lib/retriever/version.rb +1 -1
- metadata +1 -2
- data/lib/fetchsitemap.rb +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b46761b89c8f66fb12417b8b08b9b99834eaa6a
|
4
|
+
data.tar.gz: 04588d0306cf3c80577e287c371ac799fcdb69b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fe73886f396c9282fc831f1ed2a2bebb97f17decf79b06fa58b1105d48e11dabefb43ee021a27c772bd6c70303cb64d0100cb57644d451a9242991642489665
|
7
|
+
data.tar.gz: c63e83ba6b79d3874943a15b17324ef4dd5fa85c9502fe5d24f26f98fab265f928105808994b980b1ce7dd94e8439481004644cfbcbfadf7e3b01facb483a1cf
|
data/lib/retriever/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubyretriever
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joe Norton
|
@@ -118,7 +118,6 @@ extra_rdoc_files: []
|
|
118
118
|
files:
|
119
119
|
- LICENSE
|
120
120
|
- bin/rr
|
121
|
-
- lib/fetchsitemap.rb
|
122
121
|
- lib/retriever.rb
|
123
122
|
- lib/retriever/fetch.rb
|
124
123
|
- lib/retriever/fetchfiles.rb
|
data/lib/fetchsitemap.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
module Retriever
|
2
|
-
class FetchSitemap < Fetch
|
3
|
-
attr_reader :sitemap
|
4
|
-
def initialize(url,options)
|
5
|
-
super
|
6
|
-
@sitemap = [@target]
|
7
|
-
@linkStack = self.parseInternalLinks(self.fetchLinks(fetchPage(@target)))
|
8
|
-
self.lg("#{@linkStack.size-1} new links found")
|
9
|
-
errlog("Bad URL -- #{@target}") if !@linkStack
|
10
|
-
|
11
|
-
@linkStack.delete(@target) if @linkStack.include?(@target)
|
12
|
-
@linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages)
|
13
|
-
@sitemap.concat(@linkStack)
|
14
|
-
|
15
|
-
self.async_crawl_and_collect()
|
16
|
-
|
17
|
-
@sitemap.sort_by! {|x| x.length} if @sitemap.size>1
|
18
|
-
@sitemap.uniq!
|
19
|
-
@sitemap = @sitemap.take(@maxPages) if (@sitemap.size+1 > @maxPages)
|
20
|
-
|
21
|
-
self.dump(self.sitemap)
|
22
|
-
self.write(self.sitemap) if @output
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|