organic-sitemap 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +12 -8
- data/lib/generators/organic_sitemap/templates/organic_sitemap.rb +7 -1
- data/lib/organic-sitemap/configuration.rb +4 -1
- data/lib/organic-sitemap/middleware/url_capture.rb +2 -0
- data/lib/organic-sitemap/url_processor.rb +12 -0
- data/lib/organic-sitemap/version.rb +1 -1
- data/organic_sitemap.gemspec +2 -2
- metadata +4 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d3e7fcd97003744b1142e0ffcb04cc1512f15128
|
|
4
|
+
data.tar.gz: e3c6405460286fd4e43d4f16132b938f195a3eb5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bbd93f06a83762feb6e1289866c608afa43daf0658b538238c22bab555f1b020292fb1b1319a90c4e2df2b69998900164167e17519a00518fcaa68ccacc51a2f
|
|
7
|
+
data.tar.gz: 1225d1f5cdf9b3d1b246f18dd7a8704c1b1f7ed1becfd56ea2932d7e2072b4cd411ed3aa7f50797ec83e79f75297f0de94337385ef68c384ec97ad9520a7d5b5
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -13,13 +13,11 @@ OrganicSitemap is a gem that gives you a structure to manage your sitemap with h
|
|
|
13
13
|
Uses a **Redis** connection to save sitemaps urls
|
|
14
14
|
|
|
15
15
|
## Installation
|
|
16
|
-
|
|
16
|
+
Put this line in your Gemfile:
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
```gem 'organic-sitemap'```
|
|
19
19
|
|
|
20
|
-
```
|
|
21
|
-
|
|
22
|
-
Run ```bundle install.```
|
|
20
|
+
Run ```bundle install```
|
|
23
21
|
|
|
24
22
|
## Configuration
|
|
25
23
|
|
|
@@ -54,6 +52,12 @@ OrganicSitemap.configure do |config|
|
|
|
54
52
|
# By default crawler_delay is 5sec. This is the time between get each url
|
|
55
53
|
# To change it (seconds of delay):
|
|
56
54
|
# config.crawler_delay = x
|
|
55
|
+
|
|
56
|
+
# By default, do nothing with urls that not return 200.
|
|
57
|
+
# If you want remove automatically 301 urls from Redis
|
|
58
|
+
# config.clean_redirects = true
|
|
59
|
+
# If you want remove automatically 404 urls from Redis
|
|
60
|
+
# config.clean_not_found = true
|
|
57
61
|
end
|
|
58
62
|
```
|
|
59
63
|
|
|
@@ -77,7 +81,7 @@ To configure it:
|
|
|
77
81
|
|
|
78
82
|
With **CacheManager.uncached_urls(expiration_time: CacheExpirationTime, url_pattern: PATTERN)** we get all url not hitted on this time (all expired urls)
|
|
79
83
|
|
|
80
|
-
|
|
84
|
+
*** Examples
|
|
81
85
|
```
|
|
82
86
|
# Return urls not visited between 1.week.ago(setted on config.expiry_time) and 3.hours.ago
|
|
83
87
|
OrganicSitemap::CacheManager.uncached_urls(expiration_time: 3.hours)
|
|
@@ -88,11 +92,11 @@ OrganicSitemap::CacheManager.uncached_urls(expiration_time: 3.hours, url_pattern
|
|
|
88
92
|
# Return urls not visited between 1.week.ago(setted on config.expiry_time) and 3.hours.ago and match ^\/test\/ regexp
|
|
89
93
|
OrganicSitemap::CacheManager.uncached_urls(expiration_time: 3.hours, url_pattern: /^\/test\//)
|
|
90
94
|
|
|
91
|
-
|
|
92
95
|
```
|
|
96
|
+
|
|
93
97
|
The with **CrawlerManager.warmup(urls, opts={})** we visit all this urls. We can set a delay between each page load setting a delay on configuration file. When we visit a url, *RedisManager* update score for this url and will be no more visited until not expire cache time
|
|
94
98
|
|
|
95
|
-
|
|
99
|
+
*** Examples
|
|
96
100
|
```
|
|
97
101
|
# For a 3.hours page cache, get page with user-agent='Ruby'
|
|
98
102
|
CrawlerManager.warmup(CacheManager.uncached_urls(expiration_time: 3.hours))
|
|
@@ -25,5 +25,11 @@ OrganicSitemap.configure do |config|
|
|
|
25
25
|
|
|
26
26
|
# By default crawler_delay is 5sec. This is the time between get each url
|
|
27
27
|
# To change it (seconds of delay):
|
|
28
|
-
# config.crawler_delay = x
|
|
28
|
+
# config.crawler_delay = x
|
|
29
|
+
|
|
30
|
+
# By default, do nothing with urls that not return 200.
|
|
31
|
+
# If you want remove automatically 301 urls from Redis
|
|
32
|
+
# config.clean_redirects = true
|
|
33
|
+
# If you want remove automatically 404 urls from Redis
|
|
34
|
+
# config.clean_not_found = true
|
|
29
35
|
end
|
|
@@ -10,7 +10,8 @@ module OrganicSitemap
|
|
|
10
10
|
|
|
11
11
|
class Configuration
|
|
12
12
|
attr_accessor :storage, :storage_key, :domains, :allowed_params,
|
|
13
|
-
:skipped_urls, :redis_connection, :expiry_time
|
|
13
|
+
:skipped_urls, :redis_connection, :expiry_time,
|
|
14
|
+
:clean_redirects, :clean_not_found
|
|
14
15
|
attr_accessor :crawler_domain, :crawler_delay
|
|
15
16
|
|
|
16
17
|
def initialize
|
|
@@ -19,6 +20,8 @@ module OrganicSitemap
|
|
|
19
20
|
@allowed_params = []
|
|
20
21
|
@skipped_urls = []
|
|
21
22
|
@redis_connection = Redis.new(url: 'redis://127.0.0.1:6379')
|
|
23
|
+
@clean_redirects = false
|
|
24
|
+
@clean_not_found = false
|
|
22
25
|
@expiry_time = 7
|
|
23
26
|
@crawler_delay = nil
|
|
24
27
|
end
|
|
@@ -10,6 +10,8 @@ module OrganicSitemap
|
|
|
10
10
|
processor = OrganicSitemap::UrlProcessor.new(status, headers, Rack::Request.new(env))
|
|
11
11
|
if processor.sitemap_url?
|
|
12
12
|
OrganicSitemap::RedisManager.add(processor.sanitize_path_info)
|
|
13
|
+
elsif processor.cleanable_url?
|
|
14
|
+
OrganicSitemap::RedisManager.remove_key(key: processor.sanitize_path_info)
|
|
13
15
|
end
|
|
14
16
|
[status, headers, response]
|
|
15
17
|
end
|
|
@@ -28,6 +28,18 @@ module OrganicSitemap
|
|
|
28
28
|
success_response? && html_page? && is_expected_domain?
|
|
29
29
|
end
|
|
30
30
|
|
|
31
|
+
def cleanable_url?
|
|
32
|
+
redirect_response? || not_found_response?
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def redirect_response?
|
|
36
|
+
OrganicSitemap.configuration.clean_redirects && status == 301
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def not_found_response?
|
|
40
|
+
OrganicSitemap.configuration.clean_not_found && status == 404
|
|
41
|
+
end
|
|
42
|
+
|
|
31
43
|
private
|
|
32
44
|
|
|
33
45
|
def success_response?
|
data/organic_sitemap.gemspec
CHANGED
|
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
|
|
|
9
9
|
spec.authors = ["Kaskito"]
|
|
10
10
|
spec.email = ["abelardogilm@gmail.com"]
|
|
11
11
|
|
|
12
|
-
spec.summary = %q{
|
|
13
|
-
spec.description = %q{
|
|
12
|
+
spec.summary = %q{Lets Users and Bots to create our Sitemap in a organic way.}
|
|
13
|
+
spec.description = %q{Lets Users and Bots to create our Sitemap in a organic way.}
|
|
14
14
|
spec.homepage = "https://github.com/abelardogilm/organic-sitemap/"
|
|
15
15
|
spec.license = "MIT"
|
|
16
16
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: organic-sitemap
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kaskito
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2016-05-17 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rack
|
|
@@ -156,7 +156,7 @@ dependencies:
|
|
|
156
156
|
- - ">="
|
|
157
157
|
- !ruby/object:Gem::Version
|
|
158
158
|
version: '0'
|
|
159
|
-
description:
|
|
159
|
+
description: Lets Users and Bots to create our Sitemap in a organic way.
|
|
160
160
|
email:
|
|
161
161
|
- abelardogilm@gmail.com
|
|
162
162
|
executables: []
|
|
@@ -210,5 +210,5 @@ rubyforge_project:
|
|
|
210
210
|
rubygems_version: 2.4.3
|
|
211
211
|
signing_key:
|
|
212
212
|
specification_version: 4
|
|
213
|
-
summary:
|
|
213
|
+
summary: Lets Users and Bots to create our Sitemap in a organic way.
|
|
214
214
|
test_files: []
|