seo_cache 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +15 -0
- data/lib/seo_cache/middleware.rb +52 -34
- data/lib/seo_cache/page_caching.rb +1 -1
- data/lib/seo_cache/page_render.rb +3 -12
- data/lib/seo_cache/populate_cache.rb +1 -1
- data/lib/seo_cache/version.rb +1 -1
- data/lib/seo_cache.rb +5 -2
- data/seo_cache.gemspec +1 -2
- metadata +7 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e101c3d2b9260ac681e1d6ffc399c88e98c5f1edc69f3148790e6b63abda2ad0
|
4
|
+
data.tar.gz: de86f2455bb55ecbbf70365c068f6ba3b4f865cb59dfb352975a6f14d4599681
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2539f235f5e806422cbd969723a3a83cb671b270869ec02f73efffa2d15f96bf401155bcc48ea462ad1ee7fac2b994692b94103decd33ec61f5ee88672c5ad16
|
7
|
+
data.tar.gz: d871e975c11792c10bde694427e184e19d1b7effa0ff5eb90bd88daa05ed1b42a66deccbe111e2435fb0972df24556a88c462956eb47f59d4c1875fe94e66be2
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.9.0
|
2
|
+
|
3
|
+
- Update gems
|
4
|
+
- Don't cache response if status is not 200
|
5
|
+
- Add 'seo_mode' variable to env to detect si seo mode is currently active
|
6
|
+
- Add status to source code to cache only pages with 200 HTTP status code
|
7
|
+
- Check existence of destination directory before creating it
|
8
|
+
- Remove persistent connection (already built-in in Selenium)
|
9
|
+
|
1
10
|
## 0.8.0
|
2
11
|
|
3
12
|
- Change bundler version
|
data/README.md
CHANGED
@@ -112,6 +112,12 @@ Parameter to add manually to the URl to force page caching, if you want to cache
|
|
112
112
|
SeoCache.force_cache_url_param = '_seo_cache_'
|
113
113
|
```
|
114
114
|
|
115
|
+
Cache only the pages with these HTTP status code (don't cache by default not found or error pages):
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
SeoCache.cache_only_status = [<your_list>]
|
119
|
+
```
|
120
|
+
|
115
121
|
URL extension to ignore when caching (already defined):
|
116
122
|
|
117
123
|
```ruby
|
@@ -134,6 +140,15 @@ Be aware, JS will be render twice: once by server rendering and once by client.
|
|
134
140
|
|
135
141
|
Disk cache is recommended by default. Nginx will directly fetch file on disk. The TTFB (time to first byte) will be under 200ms :). You can use memory cache if you have lot of RAM.
|
136
142
|
|
143
|
+
## Controllers
|
144
|
+
|
145
|
+
You can check if seo mode is active in your controllers, with the following variable:
|
146
|
+
|
147
|
+
```ruby
|
148
|
+
request.env['seo_mode']
|
149
|
+
```
|
150
|
+
|
151
|
+
|
137
152
|
## Consult cache pages
|
138
153
|
|
139
154
|
Too see in browser the cache page, open a browser and set the user agent to:
|
data/lib/seo_cache/middleware.rb
CHANGED
@@ -6,9 +6,9 @@ require 'seo_cache/page_render'
|
|
6
6
|
module SeoCache
|
7
7
|
class Middleware
|
8
8
|
def initialize(app, options = {})
|
9
|
-
@options
|
10
|
-
@extensions_to_ignore
|
11
|
-
@crawler_user_agents
|
9
|
+
@options = options
|
10
|
+
@extensions_to_ignore = SeoCache.extensions_to_ignore
|
11
|
+
@crawler_user_agents = SeoCache.crawler_user_agents
|
12
12
|
|
13
13
|
@app = app
|
14
14
|
|
@@ -16,7 +16,7 @@ module SeoCache
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def call(env)
|
19
|
-
if
|
19
|
+
if prerender_page?(env)
|
20
20
|
cached_response = before_render(env)
|
21
21
|
|
22
22
|
return cached_response.finish if cached_response.present?
|
@@ -24,40 +24,61 @@ module SeoCache
|
|
24
24
|
SeoCache.log('missed cache : ' + Rack::Request.new(env).path) if SeoCache.log_missed_cache
|
25
25
|
|
26
26
|
if SeoCache.prerender_service_url.present?
|
27
|
-
|
28
|
-
if
|
29
|
-
response =
|
30
|
-
after_render(env,
|
27
|
+
prerender_response = prerender_service(env)
|
28
|
+
if prerender_response
|
29
|
+
response = build_response_from_prerender(prerender_response.body)
|
30
|
+
after_render(env, prerender_response)
|
31
31
|
return response.finish
|
32
32
|
end
|
33
33
|
else
|
34
34
|
Thread.new do
|
35
|
-
|
36
|
-
|
35
|
+
prerender_data = page_render(env)
|
36
|
+
# Extract status from render page
|
37
|
+
status = prerender_data.scan(/<!--status:(\d+)-->/).last&.first
|
38
|
+
after_render(env, prerender_data, status || 200)
|
37
39
|
end
|
38
40
|
end
|
41
|
+
elsif prerender_params?(env)
|
42
|
+
env['seo_mode'] = true
|
43
|
+
# Add status to render page because Selenium doesn't return http headers or status...
|
44
|
+
status, headers, response = @app.call(env)
|
45
|
+
status_code = "<!--status:#{status}-->"
|
46
|
+
# Cannot add at the top of file, Chrome removes leading comments...
|
47
|
+
body_code = response.body.sub('<head>', "<head>#{status_code}")
|
48
|
+
return [status, headers, [body_code]]
|
39
49
|
end
|
40
50
|
|
41
|
-
@app.call(env)
|
51
|
+
return @app.call(env)
|
42
52
|
end
|
43
53
|
|
44
|
-
def
|
45
|
-
|
46
|
-
|
47
|
-
|
54
|
+
def prerender_params?(env)
|
55
|
+
return false if env['REQUEST_METHOD'] != 'GET'
|
56
|
+
|
57
|
+
request = Rack::Request.new(env)
|
58
|
+
query_params = Rack::Utils.parse_query(request.query_string)
|
59
|
+
|
60
|
+
return false if @extensions_to_ignore.any? { |extension| request.fullpath.include? extension }
|
61
|
+
|
62
|
+
return true if query_params.has_key?(SeoCache.prerender_url_param) || query_params.has_key?(SeoCache.force_cache_url_param)
|
63
|
+
end
|
64
|
+
|
65
|
+
def prerender_page?(env)
|
66
|
+
user_agent = env['HTTP_USER_AGENT']
|
67
|
+
buffer_agent = env['HTTP_X_BUFFERBOT']
|
68
|
+
is_requesting_prerender_page = false
|
48
69
|
|
49
70
|
return false unless user_agent
|
50
71
|
|
51
72
|
return false if env['REQUEST_METHOD'] != 'GET'
|
52
73
|
|
53
|
-
request
|
74
|
+
request = Rack::Request.new(env)
|
54
75
|
query_params = Rack::Utils.parse_query(request.query_string)
|
55
76
|
|
56
77
|
# If it is the generated page...don't prerender
|
57
78
|
return false if query_params.has_key?(SeoCache.prerender_url_param)
|
58
79
|
|
59
80
|
# if it is a bot and host doesn't contain these domains...don't prerender
|
60
|
-
return false if SeoCache.whitelist_hosts.present? && SeoCache.whitelist_hosts.
|
81
|
+
return false if SeoCache.whitelist_hosts.present? && SeoCache.whitelist_hosts.none? { |host| request.host.include?(host) }
|
61
82
|
|
62
83
|
# if it is a bot and urls contain these params...don't prerender
|
63
84
|
return false if SeoCache.blacklist_params.present? && SeoCache.blacklist_params.any? { |param| query_params.has_key?(param) }
|
@@ -79,20 +100,20 @@ module SeoCache
|
|
79
100
|
end
|
80
101
|
return false if blacklisted_url
|
81
102
|
|
82
|
-
|
103
|
+
is_requesting_prerender_page = true if Rack::Utils.parse_query(request.query_string).has_key?('_escaped_fragment_') || Rack::Utils.parse_query(request.query_string).has_key?(SeoCache.force_cache_url_param)
|
83
104
|
|
84
105
|
# if it is a bot...show prerendered page
|
85
|
-
|
106
|
+
is_requesting_prerender_page = true if @crawler_user_agents.any? { |crawler_user_agent| user_agent.downcase.include?(crawler_user_agent.downcase) }
|
86
107
|
|
87
108
|
# if it is BufferBot...show prerendered page
|
88
|
-
|
109
|
+
is_requesting_prerender_page = true if buffer_agent
|
89
110
|
|
90
111
|
SeoCache.log('force cache : ' + request.path) if Rack::Utils.parse_query(request.query_string).has_key?(SeoCache.force_cache_url_param) && SeoCache.log_missed_cache
|
91
112
|
|
92
|
-
return
|
113
|
+
return is_requesting_prerender_page
|
93
114
|
end
|
94
115
|
|
95
|
-
def
|
116
|
+
def prerender_service(env)
|
96
117
|
url = URI.parse(build_api_url(env))
|
97
118
|
headers = {
|
98
119
|
'User-Agent' => env['HTTP_USER_AGENT'],
|
@@ -107,7 +128,8 @@ module SeoCache
|
|
107
128
|
response['Content-Length'] = response.body.length
|
108
129
|
response.delete('Content-Encoding')
|
109
130
|
end
|
110
|
-
|
131
|
+
|
132
|
+
return response
|
111
133
|
rescue StandardError => error
|
112
134
|
SeoCache.log_error(error.message)
|
113
135
|
end
|
@@ -136,8 +158,8 @@ module SeoCache
|
|
136
158
|
"#{prerender_url}#{forward_slash}#{url}"
|
137
159
|
end
|
138
160
|
|
139
|
-
def
|
140
|
-
response = Rack::Response.new(
|
161
|
+
def build_response_from_prerender(prerender_response)
|
162
|
+
response = Rack::Response.new(prerender_response.body, prerender_response.code, prerender_response.header)
|
141
163
|
|
142
164
|
# @options[:build_rack_response_from_prerender]&.call(response, prerendered_response)
|
143
165
|
|
@@ -152,17 +174,14 @@ module SeoCache
|
|
152
174
|
|
153
175
|
return nil unless cached_render
|
154
176
|
|
155
|
-
if cached_render
|
177
|
+
if cached_render.is_a?(String)
|
156
178
|
Rack::Response.new(cached_render, 200, 'Content-Type' => 'text/html; charset=utf-8')
|
157
|
-
elsif cached_render
|
179
|
+
elsif cached_render.is_a?(Rack::Response)
|
158
180
|
cached_render
|
159
181
|
end
|
160
182
|
end
|
161
183
|
|
162
184
|
def page_render(env)
|
163
|
-
# return nil unless @options[:page_render]
|
164
|
-
# @options[:page_render].call(url)
|
165
|
-
|
166
185
|
# Add key parameter to url
|
167
186
|
request = Rack::Request.new(env)
|
168
187
|
url = if request.query_string.present? || request.url.end_with?('?')
|
@@ -172,12 +191,11 @@ module SeoCache
|
|
172
191
|
end
|
173
192
|
url += "#{SeoCache.prerender_url_param}=true"
|
174
193
|
|
175
|
-
PageRender.new.get(url)
|
194
|
+
return PageRender.new.get(url)
|
176
195
|
end
|
177
196
|
|
178
|
-
def after_render(env, response)
|
179
|
-
|
180
|
-
# @options[:after_render].call(env, response)
|
197
|
+
def after_render(env, response, status = 200)
|
198
|
+
return unless response && SeoCache.cache_only_status.include?(status.to_i)
|
181
199
|
|
182
200
|
@page_caching.cache(response, Rack::Request.new(env).path)
|
183
201
|
end
|
@@ -74,7 +74,7 @@ module SeoCache
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def write_to_disk(content, path, gzip)
|
77
|
-
FileUtils.makedirs(File.dirname(path))
|
77
|
+
FileUtils.makedirs(File.dirname(path)) unless File.directory?(File.dirname(path))
|
78
78
|
File.open(path, 'wb+') { |f| f.write(content) }
|
79
79
|
|
80
80
|
Zlib::GzipWriter.open(path + '.gz', gzip) { |f| f.write(content) } if gzip
|
@@ -12,16 +12,8 @@ module SeoCache
|
|
12
12
|
return @driver.page_source
|
13
13
|
rescue StandardError => error
|
14
14
|
SeoCache.log_error(error.message)
|
15
|
-
ensure
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
def persistent_get(url)
|
20
|
-
@driver.get(url)
|
21
|
-
|
22
|
-
return @driver.page_source
|
23
|
-
rescue StandardError => error
|
24
|
-
SeoCache.log_error(error.message)
|
15
|
+
# ensure
|
16
|
+
# @driver&.quit
|
25
17
|
end
|
26
18
|
|
27
19
|
def close_connection
|
@@ -37,7 +29,6 @@ module SeoCache
|
|
37
29
|
|
38
30
|
Selenium::WebDriver::Chrome.path = SeoCache.chrome_path if SeoCache.chrome_path
|
39
31
|
|
40
|
-
client = ::Selenium::WebDriver::Remote::Http::Persistent.new
|
41
32
|
browser_options = ::Selenium::WebDriver::Chrome::Options.new
|
42
33
|
browser_options.args << '--headless'
|
43
34
|
browser_options.args << '--disable-gpu'
|
@@ -45,7 +36,7 @@ module SeoCache
|
|
45
36
|
browser_options.args << '--disable-web-security'
|
46
37
|
browser_options.args << '--window-size=1920x1080'
|
47
38
|
# browser_options.args << '--remote-debugging-port=3020'
|
48
|
-
@driver = ::Selenium::WebDriver.for(:chrome, options: browser_options
|
39
|
+
@driver = ::Selenium::WebDriver.for(:chrome, options: browser_options)
|
49
40
|
end
|
50
41
|
end
|
51
42
|
end
|
@@ -18,7 +18,7 @@ module SeoCache
|
|
18
18
|
@paths.each do |path|
|
19
19
|
next if @page_caching.cache_exists?(path) && !@force_cache
|
20
20
|
|
21
|
-
page_source = @page_render.
|
21
|
+
page_source = @page_render.get(@host + path)
|
22
22
|
@page_caching.cache(page_source, path)
|
23
23
|
end
|
24
24
|
|
data/lib/seo_cache/version.rb
CHANGED
data/lib/seo_cache.rb
CHANGED
@@ -5,7 +5,6 @@ require 'net/http'
|
|
5
5
|
require 'redis'
|
6
6
|
require 'redis-namespace'
|
7
7
|
require 'selenium/webdriver'
|
8
|
-
require 'selenium/webdriver/remote/http/persistent'
|
9
8
|
require 'webdrivers'
|
10
9
|
|
11
10
|
require 'seo_cache/logger'
|
@@ -53,14 +52,18 @@ module SeoCache
|
|
53
52
|
mattr_accessor :prerender_service_url
|
54
53
|
self.prerender_service_url = nil
|
55
54
|
|
55
|
+
# Selenium doesn't support HTTP header, so URL parameter is mandatory
|
56
56
|
mattr_accessor :prerender_url_param
|
57
57
|
self.prerender_url_param = '_prerender_'
|
58
58
|
|
59
59
|
mattr_accessor :force_cache_url_param
|
60
60
|
self.force_cache_url_param = '_seo_cache_'
|
61
61
|
|
62
|
+
mattr_accessor :cache_only_status
|
63
|
+
self.cache_only_status = [200]
|
64
|
+
|
62
65
|
mattr_accessor :extensions_to_ignore
|
63
|
-
self.extensions_to_ignore = %w[.js .css .xml .less .png .jpg .jpeg .gif .pdf .doc .txt .ico .rss .zip .mp3 .rar .exe .wmv .doc .avi .ppt .mpg .mpeg .tif .wav .mov .psd .ai .xls .mp4 .m4a .swf .dat .dmg .iso .flv .m4v .torrent]
|
66
|
+
self.extensions_to_ignore = %w[.js .css .xml .less .png .jpg .jpeg .gif .pdf .doc .txt .ico .rss .zip .mp3 .rar .exe .wmv .doc .avi .ppt .mpg .mpeg .tif .wav .mov .psd .ai .xls .mp4 .m4a .swf .dat .dmg .iso .flv .m4v .torrent .woff2 .woff .gz .ttf .svg]
|
64
67
|
|
65
68
|
mattr_accessor :crawler_user_agents
|
66
69
|
self.crawler_user_agents = [
|
data/seo_cache.gemspec
CHANGED
@@ -21,12 +21,11 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.require_paths = ['lib']
|
22
22
|
|
23
23
|
spec.add_dependency 'activesupport', '~> 5'
|
24
|
-
spec.add_dependency 'net-http-persistent', '~> 3'
|
25
24
|
spec.add_dependency 'rack', '~> 2'
|
26
25
|
spec.add_dependency 'railties', '~> 5'
|
27
26
|
spec.add_dependency 'redis', '~> 4'
|
28
27
|
spec.add_dependency 'redis-namespace', '~> 1'
|
29
|
-
spec.add_dependency 'selenium-webdriver', '3
|
28
|
+
spec.add_dependency 'selenium-webdriver', '~> 3'
|
30
29
|
spec.add_dependency 'webdrivers', '~> 4'
|
31
30
|
|
32
31
|
spec.add_development_dependency 'bundler', '~> 1'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: seo_cache
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- FloXcoder
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '5'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: net-http-persistent
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '3'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '3'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: rack
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -98,16 +84,16 @@ dependencies:
|
|
98
84
|
name: selenium-webdriver
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|
100
86
|
requirements:
|
101
|
-
- -
|
87
|
+
- - "~>"
|
102
88
|
- !ruby/object:Gem::Version
|
103
|
-
version: 3
|
89
|
+
version: '3'
|
104
90
|
type: :runtime
|
105
91
|
prerelease: false
|
106
92
|
version_requirements: !ruby/object:Gem::Requirement
|
107
93
|
requirements:
|
108
|
-
- -
|
94
|
+
- - "~>"
|
109
95
|
- !ruby/object:Gem::Version
|
110
|
-
version: 3
|
96
|
+
version: '3'
|
111
97
|
- !ruby/object:Gem::Dependency
|
112
98
|
name: webdrivers
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -240,8 +226,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
240
226
|
- !ruby/object:Gem::Version
|
241
227
|
version: '0'
|
242
228
|
requirements: []
|
243
|
-
|
244
|
-
rubygems_version: 2.7.9
|
229
|
+
rubygems_version: 3.0.4
|
245
230
|
signing_key:
|
246
231
|
specification_version: 4
|
247
232
|
summary: Cache dedicated for SEO with Javascript rendering
|