organic-sitemap 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/README.md +5 -2
- data/lib/organic-sitemap/configuration.rb +1 -1
- data/lib/organic-sitemap/crawler_manager.rb +5 -3
- data/lib/organic-sitemap/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 758be2109c62c6cb65424a33b2cccfb42c0500b1
|
4
|
+
data.tar.gz: fb77334c80b26ce31a745e698dcc8752947314dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b0f70bd7651db8adf134049a88b49f9ae20035b4bd55c44417a558cba13a5584b7c05cac91230eb0f28062f470ed7c71561fd4b67ecd8054555cac940081b93
|
7
|
+
data.tar.gz: 18b8bdad32a4b68c34c0344c143b32531ee677c9380c69670e4bf7c54e4c4f7913440b077059c824623b846c6daf1431a6387e6e4b1b118954e701fb464850c9
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,16 @@
|
|
1
|
+
## 0.2.0
|
2
|
+
|
3
|
+
### Changed
|
4
|
+
|
5
|
+
- Let crawler_manager accept Httparty options
|
6
|
+
|
1
7
|
## 0.1.3
|
2
8
|
|
3
9
|
### Changed
|
4
10
|
|
5
|
-
- Add
|
11
|
+
- Add cache_manager for get expiry cache url
|
6
12
|
- Fix error with rspec gem versions
|
7
|
-
- Add
|
13
|
+
- Add crawler_manager to warmup urls
|
8
14
|
|
9
15
|
## 0.1.2
|
10
16
|
|
data/README.md
CHANGED
@@ -90,12 +90,15 @@ OrganicSitemap::CacheManager.uncached_urls(expiration_time: 3.hours, url_pattern
|
|
90
90
|
|
91
91
|
|
92
92
|
```
|
93
|
-
The with **CrawlerManager.warmup(urls)** we visit all this urls
|
93
|
+
The with **CrawlerManager.warmup(urls, opts={})** we visit all this urls. We can set a delay between each page load setting a delay on configuration file. When we visit a url, *RedisManager* update score for this url and will be no more visited until not expire cache time
|
94
94
|
|
95
95
|
Example:
|
96
96
|
```
|
97
|
-
# For a
|
97
|
+
# For a 3.hours page cache, get page with user-agent='Ruby'
|
98
98
|
CrawlerManager.warmup(CacheManager.uncached_urls(expiration_time: 3.hours))
|
99
|
+
|
100
|
+
# Get '/test' with user-agent='Crawler-bot'
|
101
|
+
CrawlerManager.warmup('/test', {headers: {"User-Agent" => 'Crawler-bot'}})
|
99
102
|
```
|
100
103
|
|
101
104
|
## Rails config generator
|
@@ -1,15 +1,17 @@
|
|
1
1
|
module OrganicSitemap
|
2
2
|
class CrawlerManager
|
3
|
-
def self.warmup(urls)
|
3
|
+
def self.warmup(urls, opts = {})
|
4
4
|
[*urls].each do |uri|
|
5
|
-
|
5
|
+
p "OrganicSitemap::CrawlerManager GET: #{uri}"
|
6
|
+
response = HTTParty.get(url_for(uri), opts)
|
6
7
|
|
7
8
|
url_processed = OrganicSitemap::UrlProcessor.new(response.code, response.headers, response.request)
|
8
9
|
|
9
10
|
unless url_processed.url_from_cache_valid?
|
11
|
+
p "OrganicSitemap::CrawlerManager REMOVING URL: #{uri} invalid url"
|
10
12
|
OrganicSitemap::RedisManager.remove_key(key: uri)
|
11
13
|
end
|
12
|
-
sleep crawler_delay
|
14
|
+
sleep crawler_delay if crawler_delay
|
13
15
|
end
|
14
16
|
end
|
15
17
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: organic-sitemap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kaskito
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|