organic-sitemap 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/organic-sitemap/crawler_manager.rb +15 -2
- data/lib/organic-sitemap/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa747ab73beae12ff458805daddb62de82a43440
|
4
|
+
data.tar.gz: 12a0e654afe1610fc3bf9ee976ae4a157db36874
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de620e662c93b3f13901fe443cb2dd619381c951146f679449e0bc2d6484de7d17c5cfb59877b6a63463dfed3aa1ff7377ee68957a63f93748ffea537a588819
|
7
|
+
data.tar.gz: 513bd94dfdf1f313145fe33f7d3ab588a381c9e11402100c4a7ded6fcff2866ea83fd98cb966e427e1bd59a05397b84c5304c1153b7a2eb79bf6b37e4b5bb7e4
|
data/CHANGELOG.md
CHANGED
@@ -1,18 +1,30 @@
|
|
1
1
|
module OrganicSitemap
|
2
2
|
class CrawlerManager
|
3
3
|
def self.warmup(urls, opts = {})
|
4
|
-
[*urls]
|
4
|
+
data = {urls: [*urls],
|
5
|
+
valids: 0,
|
6
|
+
invalids: {}}
|
7
|
+
|
8
|
+
data[:urls].each do |uri|
|
5
9
|
p "OrganicSitemap::CrawlerManager GET: #{uri}"
|
6
10
|
response = HTTParty.get(url_for(uri), opts)
|
7
11
|
|
8
12
|
url_processed = OrganicSitemap::UrlProcessor.new(response.code, response.headers, response.request)
|
9
13
|
|
10
14
|
unless url_processed.url_from_cache_valid?
|
11
|
-
p "OrganicSitemap::CrawlerManager REMOVING URL: #{uri}
|
15
|
+
p "OrganicSitemap::CrawlerManager REMOVING URL: #{uri} response code: #{response.code}"
|
12
16
|
OrganicSitemap::RedisManager.remove_key(key: uri)
|
17
|
+
if data[:invalids][response.code]
|
18
|
+
data[:invalids][response.code] << uri
|
19
|
+
else
|
20
|
+
data[:invalids][response.code] = [uri]
|
21
|
+
end
|
22
|
+
else
|
23
|
+
data[:valids] += 1
|
13
24
|
end
|
14
25
|
sleep crawler_delay if crawler_delay
|
15
26
|
end
|
27
|
+
data
|
16
28
|
end
|
17
29
|
|
18
30
|
private
|
@@ -20,6 +32,7 @@ module OrganicSitemap
|
|
20
32
|
def self.url_for(uri)
|
21
33
|
"#{OrganicSitemap.configuration.crawler_domain}#{uri}"
|
22
34
|
end
|
35
|
+
|
23
36
|
def self.crawler_delay
|
24
37
|
OrganicSitemap.configuration.crawler_delay
|
25
38
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: organic-sitemap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kaskito
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|