organic-sitemap 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/README.md +5 -2
- data/lib/organic-sitemap/configuration.rb +1 -1
- data/lib/organic-sitemap/crawler_manager.rb +5 -3
- data/lib/organic-sitemap/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 758be2109c62c6cb65424a33b2cccfb42c0500b1
|
4
|
+
data.tar.gz: fb77334c80b26ce31a745e698dcc8752947314dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b0f70bd7651db8adf134049a88b49f9ae20035b4bd55c44417a558cba13a5584b7c05cac91230eb0f28062f470ed7c71561fd4b67ecd8054555cac940081b93
|
7
|
+
data.tar.gz: 18b8bdad32a4b68c34c0344c143b32531ee677c9380c69670e4bf7c54e4c4f7913440b077059c824623b846c6daf1431a6387e6e4b1b118954e701fb464850c9
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,16 @@
|
|
1
|
+
## 0.2.0
|
2
|
+
|
3
|
+
### Changed
|
4
|
+
|
5
|
+
- Let crawler_manager accept Httparty options
|
6
|
+
|
1
7
|
## 0.1.3
|
2
8
|
|
3
9
|
### Changed
|
4
10
|
|
5
|
-
- Add
|
11
|
+
- Add cache_manager for get expiry cache url
|
6
12
|
- Fix error with rspec gem versions
|
7
|
-
- Add
|
13
|
+
- Add crawler_manager to warmup urls
|
8
14
|
|
9
15
|
## 0.1.2
|
10
16
|
|
data/README.md
CHANGED
@@ -90,12 +90,15 @@ OrganicSitemap::CacheManager.uncached_urls(expiration_time: 3.hours, url_pattern
|
|
90
90
|
|
91
91
|
|
92
92
|
```
|
93
|
-
The with **CrawlerManager.warmup(urls)** we visit all this urls
|
93
|
+
The with **CrawlerManager.warmup(urls, opts={})** we visit all this urls. We can set a delay between each page load setting a delay on configuration file. When we visit a url, *RedisManager* update score for this url and will be no more visited until not expire cache time
|
94
94
|
|
95
95
|
Example:
|
96
96
|
```
|
97
|
-
# For a
|
97
|
+
# For a 3.hours page cache, get page with user-agent='Ruby'
|
98
98
|
CrawlerManager.warmup(CacheManager.uncached_urls(expiration_time: 3.hours))
|
99
|
+
|
100
|
+
# Get '/test' with user-agent='Crawler-bot'
|
101
|
+
CrawlerManager.warmup('/test', {headers: {"User-Agent" => 'Crawler-bot'}})
|
99
102
|
```
|
100
103
|
|
101
104
|
## Rails config generator
|
@@ -1,15 +1,17 @@
|
|
1
1
|
module OrganicSitemap
|
2
2
|
class CrawlerManager
|
3
|
-
def self.warmup(urls)
|
3
|
+
def self.warmup(urls, opts = {})
|
4
4
|
[*urls].each do |uri|
|
5
|
-
|
5
|
+
p "OrganicSitemap::CrawlerManager GET: #{uri}"
|
6
|
+
response = HTTParty.get(url_for(uri), opts)
|
6
7
|
|
7
8
|
url_processed = OrganicSitemap::UrlProcessor.new(response.code, response.headers, response.request)
|
8
9
|
|
9
10
|
unless url_processed.url_from_cache_valid?
|
11
|
+
p "OrganicSitemap::CrawlerManager REMOVING URL: #{uri} invalid url"
|
10
12
|
OrganicSitemap::RedisManager.remove_key(key: uri)
|
11
13
|
end
|
12
|
-
sleep crawler_delay
|
14
|
+
sleep crawler_delay if crawler_delay
|
13
15
|
end
|
14
16
|
end
|
15
17
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: organic-sitemap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kaskito
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|