organic-sitemap 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aa747ab73beae12ff458805daddb62de82a43440
4
- data.tar.gz: 12a0e654afe1610fc3bf9ee976ae4a157db36874
3
+ metadata.gz: d3e7fcd97003744b1142e0ffcb04cc1512f15128
4
+ data.tar.gz: e3c6405460286fd4e43d4f16132b938f195a3eb5
5
5
  SHA512:
6
- metadata.gz: de620e662c93b3f13901fe443cb2dd619381c951146f679449e0bc2d6484de7d17c5cfb59877b6a63463dfed3aa1ff7377ee68957a63f93748ffea537a588819
7
- data.tar.gz: 513bd94dfdf1f313145fe33f7d3ab588a381c9e11402100c4a7ded6fcff2866ea83fd98cb966e427e1bd59a05397b84c5304c1153b7a2eb79bf6b37e4b5bb7e4
6
+ metadata.gz: bbd93f06a83762feb6e1289866c608afa43daf0658b538238c22bab555f1b020292fb1b1319a90c4e2df2b69998900164167e17519a00518fcaa68ccacc51a2f
7
+ data.tar.gz: 1225d1f5cdf9b3d1b246f18dd7a8704c1b1f7ed1becfd56ea2932d7e2072b4cd411ed3aa7f50797ec83e79f75297f0de94337385ef68c384ec97ad9520a7d5b5
@@ -1,3 +1,9 @@
1
+ ## 0.3.0
2
+
3
+ ### Changed
4
+
5
+ - Add posibility to remove urls from collection if 404 or 301 response code
6
+
1
7
  ## 0.2.1
2
8
 
3
9
  ### Changed
data/README.md CHANGED
@@ -13,13 +13,11 @@ OrganicSitemap is a gem that gives you a structure to manage your sitemap with h
13
13
  Uses a **Redis** connection to save sitemaps urls
14
14
 
15
15
  ## Installation
16
- OrganicSearch is not available as a gem yet, you can load it using my github account.
16
+ Put this line in your Gemfile:
17
17
 
18
- Add in your Gemfile:
18
+ ```gem 'organic-sitemap'```
19
19
 
20
- ```gem 'organic-sitemap', :git => 'git://github.com/abelardogilm/organic-sitemap.git'```
21
-
22
- Run ```bundle install.```
20
+ Run ```bundle install```
23
21
 
24
22
  ## Configuration
25
23
 
@@ -54,6 +52,12 @@ OrganicSitemap.configure do |config|
54
52
  # By default crawler_delay is 5sec. This is the time between get each url
55
53
  # To change it (seconds of delay):
56
54
  # config.crawler_delay = x
55
+
56
+ # By default, do nothing with urls that not return 200.
57
+ # If you want remove automatically 301 urls from Redis
58
+ # config.clean_redirects = true
59
+ # If you want remove automatically 404 urls from Redis
60
+ # config.clean_not_found = true
57
61
  end
58
62
  ```
59
63
 
@@ -77,7 +81,7 @@ To configure it:
77
81
 
78
82
  With **CacheManager.uncached_urls(expiration_time: CacheExpirationTime, url_pattern: PATTERN)** we get all url not hitted on this time (all expired urls)
79
83
 
80
- Example:
84
+ *** Examples
81
85
  ```
82
86
  # Return urls not visited between 1.week.ago(setted on config.expiry_time) and 3.hours.ago
83
87
  OrganicSitemap::CacheManager.uncached_urls(expiration_time: 3.hours)
@@ -88,11 +92,11 @@ OrganicSitemap::CacheManager.uncached_urls(expiration_time: 3.hours, url_pattern
88
92
  # Return urls not visited between 1.week.ago(setted on config.expiry_time) and 3.hours.ago and match ^\/test\/ regexp
89
93
  OrganicSitemap::CacheManager.uncached_urls(expiration_time: 3.hours, url_pattern: /^\/test\//)
90
94
 
91
-
92
95
  ```
96
+
93
97
  The with **CrawlerManager.warmup(urls, opts={})** we visit all this urls. We can set a delay between each page load setting a delay on configuration file. When we visit a url, *RedisManager* update score for this url and will be no more visited until not expire cache time
94
98
 
95
- Example:
99
+ *** Examples
96
100
  ```
97
101
  # For a 3.hours page cache, get page with user-agent='Ruby'
98
102
  CrawlerManager.warmup(CacheManager.uncached_urls(expiration_time: 3.hours))
@@ -25,5 +25,11 @@ OrganicSitemap.configure do |config|
25
25
 
26
26
  # By default crawler_delay is 5sec. This is the time between get each url
27
27
  # To change it (seconds of delay):
28
- # config.crawler_delay = x
28
+ # config.crawler_delay = x
29
+
30
+ # By default, do nothing with urls that not return 200.
31
+ # If you want remove automatically 301 urls from Redis
32
+ # config.clean_redirects = true
33
+ # If you want remove automatically 404 urls from Redis
34
+ # config.clean_not_found = true
29
35
  end
@@ -10,7 +10,8 @@ module OrganicSitemap
10
10
 
11
11
  class Configuration
12
12
  attr_accessor :storage, :storage_key, :domains, :allowed_params,
13
- :skipped_urls, :redis_connection, :expiry_time
13
+ :skipped_urls, :redis_connection, :expiry_time,
14
+ :clean_redirects, :clean_not_found
14
15
  attr_accessor :crawler_domain, :crawler_delay
15
16
 
16
17
  def initialize
@@ -19,6 +20,8 @@ module OrganicSitemap
19
20
  @allowed_params = []
20
21
  @skipped_urls = []
21
22
  @redis_connection = Redis.new(url: 'redis://127.0.0.1:6379')
23
+ @clean_redirects = false
24
+ @clean_not_found = false
22
25
  @expiry_time = 7
23
26
  @crawler_delay = nil
24
27
  end
@@ -10,6 +10,8 @@ module OrganicSitemap
10
10
  processor = OrganicSitemap::UrlProcessor.new(status, headers, Rack::Request.new(env))
11
11
  if processor.sitemap_url?
12
12
  OrganicSitemap::RedisManager.add(processor.sanitize_path_info)
13
+ elsif processor.cleanable_url?
14
+ OrganicSitemap::RedisManager.remove_key(key: processor.sanitize_path_info)
13
15
  end
14
16
  [status, headers, response]
15
17
  end
@@ -28,6 +28,18 @@ module OrganicSitemap
28
28
  success_response? && html_page? && is_expected_domain?
29
29
  end
30
30
 
31
+ def cleanable_url?
32
+ redirect_response? || not_found_response?
33
+ end
34
+
35
+ def redirect_response?
36
+ OrganicSitemap.configuration.clean_redirects && status == 301
37
+ end
38
+
39
+ def not_found_response?
40
+ OrganicSitemap.configuration.clean_not_found && status == 404
41
+ end
42
+
31
43
  private
32
44
 
33
45
  def success_response?
@@ -1,3 +1,3 @@
1
1
  module OrganicSitemap
2
- VERSION = "0.2.1" # Improve crawler_manager warmup response
2
+ VERSION = "0.3.0" # Add remove 301 and 404 functionality
3
3
  end
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Kaskito"]
10
10
  spec.email = ["abelardogilm@gmail.com"]
11
11
 
12
- spec.summary = %q{Create your Sitemap in a organic way.}
13
- spec.description = %q{Create your Sitemap in a organic way.}
12
+ spec.summary = %q{Lets Users and Bots to create our Sitemap in a organic way.}
13
+ spec.description = %q{Lets Users and Bots to create our Sitemap in a organic way.}
14
14
  spec.homepage = "https://github.com/abelardogilm/organic-sitemap/"
15
15
  spec.license = "MIT"
16
16
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: organic-sitemap
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kaskito
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-11-27 00:00:00.000000000 Z
11
+ date: 2016-05-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rack
@@ -156,7 +156,7 @@ dependencies:
156
156
  - - ">="
157
157
  - !ruby/object:Gem::Version
158
158
  version: '0'
159
- description: Create your Sitemap in a organic way.
159
+ description: Lets Users and Bots to create our Sitemap in a organic way.
160
160
  email:
161
161
  - abelardogilm@gmail.com
162
162
  executables: []
@@ -210,5 +210,5 @@ rubyforge_project:
210
210
  rubygems_version: 2.4.3
211
211
  signing_key:
212
212
  specification_version: 4
213
- summary: Create your Sitemap in a organic way.
213
+ summary: Lets Users and Bots to create our Sitemap in a organic way.
214
214
  test_files: []