seo_cache 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30638911700f93015a1ebe83773cc0e88c5cbf8682a3951481f9bc5fa733e00d
4
- data.tar.gz: c2d4cf35ef1389992fb92465b31ccb83b174bad535438e839dabaa81dbcc3ca1
3
+ metadata.gz: e101c3d2b9260ac681e1d6ffc399c88e98c5f1edc69f3148790e6b63abda2ad0
4
+ data.tar.gz: de86f2455bb55ecbbf70365c068f6ba3b4f865cb59dfb352975a6f14d4599681
5
5
  SHA512:
6
- metadata.gz: 919ca5401c7f11763324cc851867944e920569c41533092a0767841bcf94a189539772373be819d4cf14400634a30c05f0fa8edfd26ab63365ecd9da610446bf
7
- data.tar.gz: 5209689caeeb22a54c3ff4a3dfe5d8eae843f8d02d5322e5e6e94400978cf564a910d7d7cfdc7dfe4f7b712824e5d4ea925bd3ac0b781bbdc32c0f95769c3e74
6
+ metadata.gz: 2539f235f5e806422cbd969723a3a83cb671b270869ec02f73efffa2d15f96bf401155bcc48ea462ad1ee7fac2b994692b94103decd33ec61f5ee88672c5ad16
7
+ data.tar.gz: d871e975c11792c10bde694427e184e19d1b7effa0ff5eb90bd88daa05ed1b42a66deccbe111e2435fb0972df24556a88c462956eb47f59d4c1875fe94e66be2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.9.0
2
+
3
+ - Update gems
4
+ - Don't cache response if status is not 200
5
+ - Add 'seo_mode' variable to env to detect si seo mode is currently active
6
+ - Add status to source code to cache only pages with 200 HTTP status code
7
+ - Check existence of destination directory before creating it
8
+ - Remove persistent connection (already built-in in Selenium)
9
+
1
10
  ## 0.8.0
2
11
 
3
12
  - Change bundler version
data/README.md CHANGED
@@ -112,6 +112,12 @@ Parameter to add manually to the URl to force page caching, if you want to cache
112
112
  SeoCache.force_cache_url_param = '_seo_cache_'
113
113
  ```
114
114
 
115
+ Cache only the pages with these HTTP status code (don't cache by default not found or error pages):
116
+
117
+ ```ruby
118
+ SeoCache.cache_only_status = [<your_list>]
119
+ ```
120
+
115
121
  URL extension to ignore when caching (already defined):
116
122
 
117
123
  ```ruby
@@ -134,6 +140,15 @@ Be aware, JS will be render twice: once by server rendering and once by client.
134
140
 
135
141
  Disk cache is recommended by default. Nginx will directly fetch file on disk. The TTFB (time to first byte) will be under 200ms :). You can use memory cache if you have lot of RAM.
136
142
 
143
+ ## Controllers
144
+
145
+ You can check if seo mode is active in your controllers, with the following variable:
146
+
147
+ ```ruby
148
+ request.env['seo_mode']
149
+ ```
150
+
151
+
137
152
  ## Consult cache pages
138
153
 
139
154
  Too see in browser the cache page, open a browser and set the user agent to:
@@ -6,9 +6,9 @@ require 'seo_cache/page_render'
6
6
  module SeoCache
7
7
  class Middleware
8
8
  def initialize(app, options = {})
9
- @options = options
10
- @extensions_to_ignore = SeoCache.extensions_to_ignore
11
- @crawler_user_agents = SeoCache.crawler_user_agents
9
+ @options = options
10
+ @extensions_to_ignore = SeoCache.extensions_to_ignore
11
+ @crawler_user_agents = SeoCache.crawler_user_agents
12
12
 
13
13
  @app = app
14
14
 
@@ -16,7 +16,7 @@ module SeoCache
16
16
  end
17
17
 
18
18
  def call(env)
19
- if should_show_prerendered_page(env)
19
+ if prerender_page?(env)
20
20
  cached_response = before_render(env)
21
21
 
22
22
  return cached_response.finish if cached_response.present?
@@ -24,40 +24,61 @@ module SeoCache
24
24
  SeoCache.log('missed cache : ' + Rack::Request.new(env).path) if SeoCache.log_missed_cache
25
25
 
26
26
  if SeoCache.prerender_service_url.present?
27
- prerendered_response = get_prerendered_page_response(env)
28
- if prerendered_response
29
- response = build_rack_response_from_prerender(prerendered_response.body)
30
- after_render(env, prerendered_response)
27
+ prerender_response = prerender_service(env)
28
+ if prerender_response
29
+ response = build_response_from_prerender(prerender_response.body)
30
+ after_render(env, prerender_response)
31
31
  return response.finish
32
32
  end
33
33
  else
34
34
  Thread.new do
35
- prerendered_data = page_render(env)
36
- after_render(env, prerendered_data) if prerendered_data
35
+ prerender_data = page_render(env)
36
+ # Extract status from render page
37
+ status = prerender_data.scan(/<!--status:(\d+)-->/).last&.first
38
+ after_render(env, prerender_data, status || 200)
37
39
  end
38
40
  end
41
+ elsif prerender_params?(env)
42
+ env['seo_mode'] = true
43
+ # Add status to render page because Selenium doesn't return http headers or status...
44
+ status, headers, response = @app.call(env)
45
+ status_code = "<!--status:#{status}-->"
46
+ # Cannot add at the top of file, Chrome removes leading comments...
47
+ body_code = response.body.sub('<head>', "<head>#{status_code}")
48
+ return [status, headers, [body_code]]
39
49
  end
40
50
 
41
- @app.call(env)
51
+ return @app.call(env)
42
52
  end
43
53
 
44
- def should_show_prerendered_page(env)
45
- user_agent = env['HTTP_USER_AGENT']
46
- buffer_agent = env['HTTP_X_BUFFERBOT']
47
- is_requesting_prerendered_page = false
54
+ def prerender_params?(env)
55
+ return false if env['REQUEST_METHOD'] != 'GET'
56
+
57
+ request = Rack::Request.new(env)
58
+ query_params = Rack::Utils.parse_query(request.query_string)
59
+
60
+ return false if @extensions_to_ignore.any? { |extension| request.fullpath.include? extension }
61
+
62
+ return true if query_params.has_key?(SeoCache.prerender_url_param) || query_params.has_key?(SeoCache.force_cache_url_param)
63
+ end
64
+
65
+ def prerender_page?(env)
66
+ user_agent = env['HTTP_USER_AGENT']
67
+ buffer_agent = env['HTTP_X_BUFFERBOT']
68
+ is_requesting_prerender_page = false
48
69
 
49
70
  return false unless user_agent
50
71
 
51
72
  return false if env['REQUEST_METHOD'] != 'GET'
52
73
 
53
- request = Rack::Request.new(env)
74
+ request = Rack::Request.new(env)
54
75
  query_params = Rack::Utils.parse_query(request.query_string)
55
76
 
56
77
  # If it is the generated page...don't prerender
57
78
  return false if query_params.has_key?(SeoCache.prerender_url_param)
58
79
 
59
80
  # if it is a bot and host doesn't contain these domains...don't prerender
60
- return false if SeoCache.whitelist_hosts.present? && SeoCache.whitelist_hosts.any? { |host| !request.host.include?(host) }
81
+ return false if SeoCache.whitelist_hosts.present? && SeoCache.whitelist_hosts.none? { |host| request.host.include?(host) }
61
82
 
62
83
  # if it is a bot and urls contain these params...don't prerender
63
84
  return false if SeoCache.blacklist_params.present? && SeoCache.blacklist_params.any? { |param| query_params.has_key?(param) }
@@ -79,20 +100,20 @@ module SeoCache
79
100
  end
80
101
  return false if blacklisted_url
81
102
 
82
- is_requesting_prerendered_page = true if Rack::Utils.parse_query(request.query_string).has_key?('_escaped_fragment_') || Rack::Utils.parse_query(request.query_string).has_key?(SeoCache.force_cache_url_param)
103
+ is_requesting_prerender_page = true if Rack::Utils.parse_query(request.query_string).has_key?('_escaped_fragment_') || Rack::Utils.parse_query(request.query_string).has_key?(SeoCache.force_cache_url_param)
83
104
 
84
105
  # if it is a bot...show prerendered page
85
- is_requesting_prerendered_page = true if @crawler_user_agents.any? { |crawler_user_agent| user_agent.downcase.include?(crawler_user_agent.downcase) }
106
+ is_requesting_prerender_page = true if @crawler_user_agents.any? { |crawler_user_agent| user_agent.downcase.include?(crawler_user_agent.downcase) }
86
107
 
87
108
  # if it is BufferBot...show prerendered page
88
- is_requesting_prerendered_page = true if buffer_agent
109
+ is_requesting_prerender_page = true if buffer_agent
89
110
 
90
111
  SeoCache.log('force cache : ' + request.path) if Rack::Utils.parse_query(request.query_string).has_key?(SeoCache.force_cache_url_param) && SeoCache.log_missed_cache
91
112
 
92
- return is_requesting_prerendered_page
113
+ return is_requesting_prerender_page
93
114
  end
94
115
 
95
- def get_prerendered_page_response(env)
116
+ def prerender_service(env)
96
117
  url = URI.parse(build_api_url(env))
97
118
  headers = {
98
119
  'User-Agent' => env['HTTP_USER_AGENT'],
@@ -107,7 +128,8 @@ module SeoCache
107
128
  response['Content-Length'] = response.body.length
108
129
  response.delete('Content-Encoding')
109
130
  end
110
- response
131
+
132
+ return response
111
133
  rescue StandardError => error
112
134
  SeoCache.log_error(error.message)
113
135
  end
@@ -136,8 +158,8 @@ module SeoCache
136
158
  "#{prerender_url}#{forward_slash}#{url}"
137
159
  end
138
160
 
139
- def build_rack_response_from_prerender(prerendered_response)
140
- response = Rack::Response.new(prerendered_response.body, prerendered_response.code, prerendered_response.header)
161
+ def build_response_from_prerender(prerender_response)
162
+ response = Rack::Response.new(prerender_response.body, prerender_response.code, prerender_response.header)
141
163
 
142
164
  # @options[:build_rack_response_from_prerender]&.call(response, prerendered_response)
143
165
 
@@ -152,17 +174,14 @@ module SeoCache
152
174
 
153
175
  return nil unless cached_render
154
176
 
155
- if cached_render&.is_a?(String)
177
+ if cached_render.is_a?(String)
156
178
  Rack::Response.new(cached_render, 200, 'Content-Type' => 'text/html; charset=utf-8')
157
- elsif cached_render&.is_a?(Rack::Response)
179
+ elsif cached_render.is_a?(Rack::Response)
158
180
  cached_render
159
181
  end
160
182
  end
161
183
 
162
184
  def page_render(env)
163
- # return nil unless @options[:page_render]
164
- # @options[:page_render].call(url)
165
-
166
185
  # Add key parameter to url
167
186
  request = Rack::Request.new(env)
168
187
  url = if request.query_string.present? || request.url.end_with?('?')
@@ -172,12 +191,11 @@ module SeoCache
172
191
  end
173
192
  url += "#{SeoCache.prerender_url_param}=true"
174
193
 
175
- PageRender.new.get(url)
194
+ return PageRender.new.get(url)
176
195
  end
177
196
 
178
- def after_render(env, response)
179
- # return true unless @options[:after_render]
180
- # @options[:after_render].call(env, response)
197
+ def after_render(env, response, status = 200)
198
+ return unless response && SeoCache.cache_only_status.include?(status.to_i)
181
199
 
182
200
  @page_caching.cache(response, Rack::Request.new(env).path)
183
201
  end
@@ -74,7 +74,7 @@ module SeoCache
74
74
  end
75
75
 
76
76
  def write_to_disk(content, path, gzip)
77
- FileUtils.makedirs(File.dirname(path))
77
+ FileUtils.makedirs(File.dirname(path)) unless File.directory?(File.dirname(path))
78
78
  File.open(path, 'wb+') { |f| f.write(content) }
79
79
 
80
80
  Zlib::GzipWriter.open(path + '.gz', gzip) { |f| f.write(content) } if gzip
@@ -12,16 +12,8 @@ module SeoCache
12
12
  return @driver.page_source
13
13
  rescue StandardError => error
14
14
  SeoCache.log_error(error.message)
15
- ensure
16
- @driver&.quit
17
- end
18
-
19
- def persistent_get(url)
20
- @driver.get(url)
21
-
22
- return @driver.page_source
23
- rescue StandardError => error
24
- SeoCache.log_error(error.message)
15
+ # ensure
16
+ # @driver&.quit
25
17
  end
26
18
 
27
19
  def close_connection
@@ -37,7 +29,6 @@ module SeoCache
37
29
 
38
30
  Selenium::WebDriver::Chrome.path = SeoCache.chrome_path if SeoCache.chrome_path
39
31
 
40
- client = ::Selenium::WebDriver::Remote::Http::Persistent.new
41
32
  browser_options = ::Selenium::WebDriver::Chrome::Options.new
42
33
  browser_options.args << '--headless'
43
34
  browser_options.args << '--disable-gpu'
@@ -45,7 +36,7 @@ module SeoCache
45
36
  browser_options.args << '--disable-web-security'
46
37
  browser_options.args << '--window-size=1920x1080'
47
38
  # browser_options.args << '--remote-debugging-port=3020'
48
- @driver = ::Selenium::WebDriver.for(:chrome, options: browser_options, http_client: client)
39
+ @driver = ::Selenium::WebDriver.for(:chrome, options: browser_options)
49
40
  end
50
41
  end
51
42
  end
@@ -18,7 +18,7 @@ module SeoCache
18
18
  @paths.each do |path|
19
19
  next if @page_caching.cache_exists?(path) && !@force_cache
20
20
 
21
- page_source = @page_render.persistent_get(@host + path)
21
+ page_source = @page_render.get(@host + path)
22
22
  @page_caching.cache(page_source, path)
23
23
  end
24
24
 
@@ -1,3 +1,3 @@
1
1
  module SeoCache
2
- VERSION = '0.8.0'.freeze
2
+ VERSION = '0.9.0'.freeze
3
3
  end
data/lib/seo_cache.rb CHANGED
@@ -5,7 +5,6 @@ require 'net/http'
5
5
  require 'redis'
6
6
  require 'redis-namespace'
7
7
  require 'selenium/webdriver'
8
- require 'selenium/webdriver/remote/http/persistent'
9
8
  require 'webdrivers'
10
9
 
11
10
  require 'seo_cache/logger'
@@ -53,14 +52,18 @@ module SeoCache
53
52
  mattr_accessor :prerender_service_url
54
53
  self.prerender_service_url = nil
55
54
 
55
+ # Selenium doesn't support HTTP header, so URL parameter is mandatory
56
56
  mattr_accessor :prerender_url_param
57
57
  self.prerender_url_param = '_prerender_'
58
58
 
59
59
  mattr_accessor :force_cache_url_param
60
60
  self.force_cache_url_param = '_seo_cache_'
61
61
 
62
+ mattr_accessor :cache_only_status
63
+ self.cache_only_status = [200]
64
+
62
65
  mattr_accessor :extensions_to_ignore
63
- self.extensions_to_ignore = %w[.js .css .xml .less .png .jpg .jpeg .gif .pdf .doc .txt .ico .rss .zip .mp3 .rar .exe .wmv .doc .avi .ppt .mpg .mpeg .tif .wav .mov .psd .ai .xls .mp4 .m4a .swf .dat .dmg .iso .flv .m4v .torrent]
66
+ self.extensions_to_ignore = %w[.js .css .xml .less .png .jpg .jpeg .gif .pdf .doc .txt .ico .rss .zip .mp3 .rar .exe .wmv .doc .avi .ppt .mpg .mpeg .tif .wav .mov .psd .ai .xls .mp4 .m4a .swf .dat .dmg .iso .flv .m4v .torrent .woff2 .woff .gz .ttf .svg]
64
67
 
65
68
  mattr_accessor :crawler_user_agents
66
69
  self.crawler_user_agents = [
data/seo_cache.gemspec CHANGED
@@ -21,12 +21,11 @@ Gem::Specification.new do |spec|
21
21
  spec.require_paths = ['lib']
22
22
 
23
23
  spec.add_dependency 'activesupport', '~> 5'
24
- spec.add_dependency 'net-http-persistent', '~> 3'
25
24
  spec.add_dependency 'rack', '~> 2'
26
25
  spec.add_dependency 'railties', '~> 5'
27
26
  spec.add_dependency 'redis', '~> 4'
28
27
  spec.add_dependency 'redis-namespace', '~> 1'
29
- spec.add_dependency 'selenium-webdriver', '3.141.0' # 3.142 is bugged
28
+ spec.add_dependency 'selenium-webdriver', '~> 3'
30
29
  spec.add_dependency 'webdrivers', '~> 4'
31
30
 
32
31
  spec.add_development_dependency 'bundler', '~> 1'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: seo_cache
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - FloXcoder
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-10 00:00:00.000000000 Z
11
+ date: 2019-09-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '5'
27
- - !ruby/object:Gem::Dependency
28
- name: net-http-persistent
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '3'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '3'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: rack
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -98,16 +84,16 @@ dependencies:
98
84
  name: selenium-webdriver
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
- - - '='
87
+ - - "~>"
102
88
  - !ruby/object:Gem::Version
103
- version: 3.141.0
89
+ version: '3'
104
90
  type: :runtime
105
91
  prerelease: false
106
92
  version_requirements: !ruby/object:Gem::Requirement
107
93
  requirements:
108
- - - '='
94
+ - - "~>"
109
95
  - !ruby/object:Gem::Version
110
- version: 3.141.0
96
+ version: '3'
111
97
  - !ruby/object:Gem::Dependency
112
98
  name: webdrivers
113
99
  requirement: !ruby/object:Gem::Requirement
@@ -240,8 +226,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
240
226
  - !ruby/object:Gem::Version
241
227
  version: '0'
242
228
  requirements: []
243
- rubyforge_project:
244
- rubygems_version: 2.7.9
229
+ rubygems_version: 3.0.4
245
230
  signing_key:
246
231
  specification_version: 4
247
232
  summary: Cache dedicated for SEO with Javascript rendering