seo_cache 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3e55653809271350248536acbfc6aaf970f27e5141487cd389bc5426d52fe6d0
4
+ data.tar.gz: 12f93fb02f2dbfde2296d7bf83a3cb98f13b97b16060a7e0a4a08ddd19ed2dde
5
+ SHA512:
6
+ metadata.gz: dedd26e0dcdefdd193e634bbf88a513472951eea1b19cfb6a3f45450793ab14403ef87d7097191ebd83d3e53b2df05c07368b999695da071ce8772da8069b0fa
7
+ data.tar.gz: ed52a24c1b3efb7be25b97247ca0ebc22843d24e0f4507012c8db789c26422040e7e5fed0ef6e6d893567944774a95757268d83fcc0ef57cc76e2f8d7a781b90
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ /.idea/
2
+ /.bundle/
3
+ /.config/
4
+ /.yardoc
5
+ /_yardoc/
6
+ /coverage/
7
+ /doc/
8
+ /pkg/
9
+ /spec/reports/
10
+ /tmp/
11
+ .ruby-version
12
+ .ruby-gemset
13
+ Gemfile.lock
14
+ tmp
15
+
16
+ # rspec failure tracking
17
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,127 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.5
3
+
4
+ Documentation:
5
+ Enabled: false
6
+
7
+ LineLength:
8
+ Enabled: false
9
+
10
+ Bundler/OrderedGems:
11
+ Enabled: false
12
+
13
+ Layout/AlignHash:
14
+ Enabled: false
15
+
16
+ Layout/AlignParameters:
17
+ Enabled: false
18
+
19
+ Layout/CaseIndentation:
20
+ Enabled: false
21
+
22
+ Layout/EmptyLinesAroundClassBody:
23
+ Enabled: false
24
+
25
+ Layout/ExtraSpacing:
26
+ Enabled: false
27
+ AllowForAlignment: true
28
+
29
+ Layout/FirstParameterIndentation:
30
+ Enabled: false
31
+
32
+ Layout/IndentHash:
33
+ Enabled: false
34
+
35
+ Layout/LeadingCommentSpace:
36
+ Enabled: false
37
+
38
+ Layout/MultilineBlockLayout:
39
+ Enabled: false
40
+
41
+ Layout/MultilineMethodCallBraceLayout:
42
+ Enabled: false
43
+
44
+ Layout/MultilineMethodCallIndentation:
45
+ Enabled: false
46
+
47
+ Layout/MultilineOperationIndentation:
48
+ Enabled: false
49
+
50
+ Layout/SpaceAroundOperators:
51
+ Enabled: false
52
+
53
+ Layout/SpaceInLambdaLiteral:
54
+ Enabled: false
55
+
56
+ Layout/TrailingWhitespace:
57
+ Enabled: false
58
+
59
+ Lint/AmbiguousBlockAssociation:
60
+ Enabled: false
61
+
62
+ Metrics/AbcSize:
63
+ # The ABC size is a calculated magnitude, so this number can be a Fixnum or a Float.
64
+ Max: 240
65
+
66
+ Metrics/BlockLength:
67
+ Max: 90
68
+
69
+ Metrics/BlockNesting:
70
+ Max: 4
71
+
72
+ Metrics/ClassLength:
73
+ CountComments: false # count full line comments?
74
+ Max: 600
75
+
76
+ Metrics/CyclomaticComplexity:
77
+ Enabled: false
78
+
79
+ Metrics/MethodLength:
80
+ CountComments: false # count full line comments?
81
+ Max: 120
82
+
83
+ Metrics/ParameterLists:
84
+ Max: 7
85
+
86
+ Metrics/PerceivedComplexity:
87
+ Enabled: false
88
+
89
+ Rails/OutputSafety:
90
+ Enabled: false
91
+
92
+ Style/AsciiComments:
93
+ Enabled: false
94
+
95
+ Style/ColonMethodCall:
96
+ Enabled: false
97
+
98
+ Style/CommandLiteral:
99
+ Enabled: false
100
+
101
+ Style/FrozenStringLiteralComment:
102
+ Enabled: false
103
+
104
+ Style/Lambda:
105
+ Enabled: false
106
+
107
+ Style/MultilineIfModifier:
108
+ Enabled: false
109
+
110
+ Style/NumericPredicate:
111
+ Enabled: false
112
+
113
+ Style/RedundantReturn:
114
+ Enabled: false
115
+ AllowMultipleReturnValues: true
116
+
117
+ Style/RedundantSelf:
118
+ Enabled: false
119
+
120
+ Style/RegexpLiteral:
121
+ Enabled: false
122
+
123
+ Style/RescueModifier:
124
+ Enabled: false
125
+
126
+ Style/SymbolArray:
127
+ Enabled: false
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.4.1
5
+ before_install: gem install bundler -v 1.16.1
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0
2
+
3
+ - First major release
@@ -0,0 +1,74 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ In the interest of fostering an open and welcoming environment, we as
6
+ contributors and maintainers pledge to making participation in our project and
7
+ our community a harassment-free experience for everyone, regardless of age, body
8
+ size, disability, ethnicity, gender identity and expression, level of experience,
9
+ nationality, personal appearance, race, religion, or sexual identity and
10
+ orientation.
11
+
12
+ ## Our Standards
13
+
14
+ Examples of behavior that contributes to creating a positive environment
15
+ include:
16
+
17
+ * Using welcoming and inclusive language
18
+ * Being respectful of differing viewpoints and experiences
19
+ * Gracefully accepting constructive criticism
20
+ * Focusing on what is best for the community
21
+ * Showing empathy towards other community members
22
+
23
+ Examples of unacceptable behavior by participants include:
24
+
25
+ * The use of sexualized language or imagery and unwelcome sexual attention or
26
+ advances
27
+ * Trolling, insulting/derogatory comments, and personal or political attacks
28
+ * Public or private harassment
29
+ * Publishing others' private information, such as a physical or electronic
30
+ address, without explicit permission
31
+ * Other conduct which could reasonably be considered inappropriate in a
32
+ professional setting
33
+
34
+ ## Our Responsibilities
35
+
36
+ Project maintainers are responsible for clarifying the standards of acceptable
37
+ behavior and are expected to take appropriate and fair corrective action in
38
+ response to any instances of unacceptable behavior.
39
+
40
+ Project maintainers have the right and responsibility to remove, edit, or
41
+ reject comments, commits, code, wiki edits, issues, and other contributions
42
+ that are not aligned to this Code of Conduct, or to ban temporarily or
43
+ permanently any contributor for other behaviors that they deem inappropriate,
44
+ threatening, offensive, or harmful.
45
+
46
+ ## Scope
47
+
48
+ This Code of Conduct applies both within project spaces and in public spaces
49
+ when an individual is representing the project or its community. Examples of
50
+ representing a project or community include using an official project e-mail
51
+ address, posting via an official social media account, or acting as an appointed
52
+ representative at an online or offline event. Representation of a project may be
53
+ further defined and clarified by project maintainers.
54
+
55
+ ## Enforcement
56
+
57
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
+ reported by contacting the project team at flo@l-x.fr. All
59
+ complaints will be reviewed and investigated and will result in a response that
60
+ is deemed necessary and appropriate to the circumstances. The project team is
61
+ obligated to maintain confidentiality with regard to the reporter of an incident.
62
+ Further details of specific enforcement policies may be posted separately.
63
+
64
+ Project maintainers who do not follow or enforce the Code of Conduct in good
65
+ faith may face temporary or permanent repercussions as determined by other
66
+ members of the project's leadership.
67
+
68
+ ## Attribution
69
+
70
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71
+ available at [http://contributor-covenant.org/version/1/4][version]
72
+
73
+ [homepage]: http://contributor-covenant.org
74
+ [version]: http://contributor-covenant.org/version/1/4/
data/CONTRIBUTING.md ADDED
@@ -0,0 +1,42 @@
1
+ # Contributing
2
+
3
+ First, thanks for wanting to contribute. You’re awesome! :heart:
4
+
5
+ ## Help
6
+
7
+ We’re not able to provide support through GitHub Issues. If you’re looking for help with your code, try posting on [Stack Overflow](https://stackoverflow.com/).
8
+
9
+ All features should be documented. If you don’t see a feature in the docs, assume it doesn’t exist.
10
+
11
+ ## Bugs
12
+
13
+ Think you’ve discovered a bug?
14
+
15
+ 1. Search existing issues to see if it’s been reported.
16
+ 2. Try the `master` branch to make sure it hasn’t been fixed.
17
+
18
+ ```rb
19
+ gem "seo_cache", github: "floXcode/seo_cache"
20
+ ```
21
+
22
+ If the above steps don’t help, create an issue. Include:
23
+
24
+ - Detailed steps to reproduce
25
+ - Complete backtraces for exceptions
26
+
27
+ ## New Features
28
+
29
+ If you’d like to discuss a new feature, create an issue and start the title with `[Idea]`.
30
+
31
+ ## Pull Requests
32
+
33
+ Fork the project and create a pull request. A few tips:
34
+
35
+ - Keep changes to a minimum. If you have multiple features or fixes, submit multiple pull requests.
36
+ - Follow the existing style. The code should read like it’s written by a single person.
37
+
38
+ Feel free to open an issue to get feedback on your idea before spending too much time on it.
39
+
40
+ ---
41
+
42
+ This contributing guide is released under [CCO](https://creativecommons.org/publicdomain/zero/1.0/) (public domain). Use it for your own project without attribution.
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in seo_cache.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2019 Flo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,101 @@
1
+ # SeoCache
2
+
3
+ Cache dedicated for SEO with Javascript rendering :fire:
4
+
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'seo_cache'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install seo_cache
21
+
22
+ Install chrome driver on your device
23
+
24
+ ## How it works
25
+
26
+ Specific cache for bots to optimize time to first byte and render Javascript on server side.
27
+
28
+ Options:
29
+
30
+ Choose a cache mode (`disk` or `memory`):
31
+
32
+ SeoCache.cache_mode = 'memory'
33
+
34
+ If cache on disk, specify the cache path (e.g. `Rails.root.join('public', 'seo_cache')`):
35
+
36
+ SeoCache.disk_cache_path = nil
37
+
38
+ URLs to blacklist:
39
+
40
+ SeoCache.blacklist_urls = []
41
+
42
+ URLs to whitelist:
43
+
44
+ SeoCache.whitelist_urls = []
45
+
46
+ Query params un URl to blacklist:
47
+
48
+ SeoCache.blacklist_params = []
49
+
50
+ ## Automatic caching
51
+
52
+ To automate cache, create a cron rake task which called:
53
+
54
+ ```ruby
55
+ SeoCache::PopulateCache.new('https://<your-domain-name>', paths_to_cache).new.perform
56
+ ```
57
+
58
+ ## Server
59
+
60
+ If you use disk caching, add to your Nginx configuration:
61
+
62
+ ```
63
+ location / {
64
+ # cached pages
65
+ set $cache_extension '';
66
+ if ($request_method = GET) {
67
+ set $cache_extension '.html';
68
+ }
69
+
70
+ # Index HTML Files
71
+ if (-f $document_root/seo_cache/$uri/index$cache_extension) {
72
+ rewrite (.*) /seo_cache/$1/index.html break;
73
+ }
74
+
75
+ # Other HTML Files
76
+ if (-f $document_root/seo_cache/$uri$cache_extension) {
77
+ rewrite (.*) /seo_cache/$1.html break;
78
+ }
79
+
80
+ # All
81
+ if (-f $document_root/seo_cache/$uri) {
82
+ rewrite (.*) /seo_cache/$1 break;
83
+ }
84
+ }
85
+ ```
86
+
87
+ ## Inspiration
88
+
89
+ Inspired by [prerender gem](https://github.com/prerender/prerender_rails).
90
+
91
+ ## Contributing
92
+
93
+ Bug reports and pull requests are welcome on GitHub at https://github.com/floXcoder/seo_cache. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
94
+
95
+ ## License
96
+
97
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
98
+
99
+ ## Code of Conduct
100
+
101
+ Everyone interacting in the SeoCache project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/seo_cache/blob/master/CODE_OF_CONDUCT.md).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "seo_cache"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'seo_cache/page_caching'
4
+ require 'seo_cache/page_render'
5
+
6
+ module SeoCache
7
+ class Middleware
8
+ def initialize(app, options = {})
9
+ @options = options
10
+ @extensions_to_ignore = SeoCache.extensions_to_ignore
11
+ @crawler_user_agents = SeoCache.crawler_user_agents
12
+
13
+ @app = app
14
+
15
+ @page_caching = PageCaching.new
16
+ end
17
+
18
+ def call(env)
19
+ if should_show_prerendered_page(env)
20
+ cached_response = before_render(env)
21
+
22
+ return cached_response.finish if cached_response.present?
23
+
24
+ SeoCache.log('missed cache : ' + Rack::Request.new(env).path) if SeoCache.log_missed_cache
25
+
26
+ if SeoCache.prerender_service_url.present?
27
+ prerendered_response = get_prerendered_page_response(env)
28
+ if prerendered_response
29
+ response = build_rack_response_from_prerender(prerendered_response.body)
30
+ after_render(env, prerendered_response)
31
+ return response.finish
32
+ end
33
+ else
34
+ Thread.new do
35
+ prerendered_data = page_render(env)
36
+ after_render(env, prerendered_data) if prerendered_data
37
+ end
38
+ end
39
+ end
40
+
41
+ @app.call(env)
42
+ end
43
+
44
+ def should_show_prerendered_page(env)
45
+ user_agent = env['HTTP_USER_AGENT']
46
+ buffer_agent = env['HTTP_X_BUFFERBOT']
47
+ is_requesting_prerendered_page = false
48
+
49
+ return false unless user_agent
50
+
51
+ return false if env['REQUEST_METHOD'] != 'GET'
52
+
53
+ request = Rack::Request.new(env)
54
+ query_params = Rack::Utils.parse_query(request.query_string)
55
+
56
+ # If it is the generated page...don't prerender
57
+ return false if query_params.has_key?(SeoCache.prerender_url_param)
58
+
59
+ return false if SeoCache.blacklist_params.present? && SeoCache.blacklist_params.any? { |param| query_params.has_key?(param) }
60
+
61
+ is_requesting_prerendered_page = true if Rack::Utils.parse_query(request.query_string).has_key?('_escaped_fragment_') || Rack::Utils.parse_query(request.query_string).has_key?(SeoCache.force_cache_url_param)
62
+
63
+ # if it is a bot...show prerendered page
64
+ is_requesting_prerendered_page = true if @crawler_user_agents.any? { |crawler_user_agent| user_agent.downcase.include?(crawler_user_agent.downcase) }
65
+
66
+ # if it is BufferBot...show prerendered page
67
+ is_requesting_prerendered_page = true if buffer_agent
68
+
69
+ # if it is a bot and is requesting a resource...don't prerender
70
+ return false if @extensions_to_ignore.any? { |extension| request.fullpath.include? extension }
71
+
72
+ # if it is a bot and not requesting a resource and is not whitelisted...don't prerender
73
+ return false if SeoCache.whitelist_urls.present? && SeoCache.whitelist_urls.all? { |whitelisted| !Regexp.new(whitelisted).match(request.fullpath) }
74
+
75
+ # if it is a bot and not requesting a resource and is blacklisted(url or referer)...don't prerender
76
+ blacklisted_url = SeoCache.blacklist_urls.present? && SeoCache.blacklist_urls.any? do |blacklisted|
77
+ regex = Regexp.new(blacklisted)
78
+
79
+ blacklisted_url = regex.match(request.fullpath)
80
+ blacklisted_referer = request.referer ? regex.match(request.referer) : false
81
+
82
+ blacklisted_url || blacklisted_referer
83
+ end
84
+ return false if blacklisted_url
85
+
86
+ return is_requesting_prerendered_page
87
+ end
88
+
89
+ def get_prerendered_page_response(env)
90
+ url = URI.parse(build_api_url(env))
91
+ headers = {
92
+ 'User-Agent' => env['HTTP_USER_AGENT'],
93
+ 'Accept-Encoding' => 'gzip'
94
+ }
95
+ req = Net::HTTP::Get.new(url.request_uri, headers)
96
+ http = Net::HTTP.new(url.host, url.port)
97
+ http.use_ssl = true if url.scheme == 'https'
98
+ response = http.request(req)
99
+ if response['Content-Encoding'] == 'gzip'
100
+ response.body = ActiveSupport::Gzip.decompress(response.body)
101
+ response['Content-Length'] = response.body.length
102
+ response.delete('Content-Encoding')
103
+ end
104
+ response
105
+ rescue StandardError => error
106
+ SeoCache.log_error(error.message)
107
+ end
108
+
109
+ def build_api_url(env)
110
+ new_env = env
111
+ if env['CF-VISITOR']
112
+ match = /"scheme":"(http|https)"/.match(env['CF-VISITOR'])
113
+ (new_env['HTTPS'] = true) && (new_env['rack.url_scheme'] = 'https') && (new_env['SERVER_PORT'] = 443) if match && match[1] == 'https'
114
+ (new_env['HTTPS'] = false) && (new_env['rack.url_scheme'] = 'http') && (new_env['SERVER_PORT'] = 80) if match && match[1] == 'http'
115
+ end
116
+
117
+ if env['X-FORWARDED-PROTO']
118
+ (new_env['HTTPS'] = true) && (new_env['rack.url_scheme'] = 'https') && (new_env['SERVER_PORT'] = 443) if env['X-FORWARDED-PROTO'].split(',')[0] == 'https'
119
+ (new_env['HTTPS'] = false) && (new_env['rack.url_scheme'] = 'http') && (new_env['SERVER_PORT'] = 80) if env['X-FORWARDED-PROTO'].split(',')[0] == 'http'
120
+ end
121
+
122
+ if SeoCache.protocol.present?
123
+ (new_env['HTTPS'] = true) && (new_env['rack.url_scheme'] = 'https') && (new_env['SERVER_PORT'] = 443) if @options[:protocol] == 'https'
124
+ (new_env['HTTPS'] = false) && (new_env['rack.url_scheme'] = 'http') && (new_env['SERVER_PORT'] = 80) if @options[:protocol] == 'http'
125
+ end
126
+
127
+ url = Rack::Request.new(new_env).url
128
+ prerender_url = SeoCache.prerender_service_url
129
+ forward_slash = prerender_url[-1, 1] == '/' ? '' : '/'
130
+ "#{prerender_url}#{forward_slash}#{url}"
131
+ end
132
+
133
+ def build_rack_response_from_prerender(prerendered_response)
134
+ response = Rack::Response.new(prerendered_response.body, prerendered_response.code, prerendered_response.header)
135
+
136
+ # @options[:build_rack_response_from_prerender]&.call(response, prerendered_response)
137
+
138
+ return response
139
+ end
140
+
141
+ def before_render(env)
142
+ # return nil unless @options[:before_render]
143
+ # cached_render = @options[:before_render].call(env)
144
+
145
+ cached_render = @page_caching.get(Rack::Request.new(env).path)
146
+
147
+ return nil unless cached_render
148
+
149
+ if cached_render&.is_a?(String)
150
+ Rack::Response.new(cached_render, 200, 'Content-Type' => 'text/html; charset=utf-8')
151
+ elsif cached_render&.is_a?(Rack::Response)
152
+ cached_render
153
+ end
154
+ end
155
+
156
+ def page_render(env)
157
+ # return nil unless @options[:page_render]
158
+ # @options[:page_render].call(url)
159
+
160
+ # Add key parameter to url
161
+ request = Rack::Request.new(env)
162
+ url = if request.query_string.present? || request.url.end_with?('?')
163
+ request.url + '&'
164
+ else
165
+ request.url + '?'
166
+ end
167
+ url += "#{SeoCache.prerender_url_param}=true"
168
+
169
+ PageRender.new.get(url)
170
+ end
171
+
172
+ def after_render(env, response)
173
+ # return true unless @options[:after_render]
174
+ # @options[:after_render].call(env, response)
175
+
176
+ @page_caching.cache(response, Rack::Request.new(env).path)
177
+ end
178
+ end
179
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SeoCache
4
+ class PageCaching
5
+ def initialize
6
+ @redis = nil
7
+
8
+ initialize_memory_cache if SeoCache.memory_cache?
9
+ end
10
+
11
+ def initialize_memory_cache
12
+ uri = URI.parse(SeoCache.redis_url)
13
+ @redis = Redis::Namespace.new(SeoCache.redis_namespace, redis: Redis.new(host: uri.host, port: uri.port, password: uri.password, connect_timeout: 1, timeout: 1), warnings: false)
14
+ end
15
+
16
+ def get(path, extension = nil)
17
+ @redis.get(cache_path(path, extension)) if SeoCache.memory_cache? && @redis
18
+ end
19
+
20
+ def cache(content, path, extension = nil, gzip = Zlib::BEST_COMPRESSION)
21
+ instrument :write_page, path do
22
+ if SeoCache.memory_cache? && @redis
23
+ write_to_memory(content, cache_path(path, extension))
24
+ else
25
+ write_to_disk(content, cache_path(path, extension), gzip)
26
+ end
27
+ end
28
+ end
29
+
30
+ def expire(path)
31
+ instrument :expire_page, path do
32
+ delete(cache_path(path))
33
+ end
34
+ end
35
+
36
+ def cache_exists?(path)
37
+ if SeoCache.memory_cache? && @redis
38
+ @redis.exists?(cache_path(path))
39
+ else
40
+ File.exist?(cache_path(path))
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def cache_directory
47
+ SeoCache.disk_cache_path
48
+ end
49
+
50
+ def default_extension
51
+ SeoCache.disk_cache_extension
52
+ end
53
+
54
+ def cache_file(path, extension)
55
+ name = if path.empty? || path =~ %r{\A/+\z}
56
+ '/index'
57
+ else
58
+ URI.parser.unescape(path.chomp('/'))
59
+ end
60
+
61
+ if File.extname(name).empty?
62
+ name + (extension || default_extension)
63
+ else
64
+ name
65
+ end
66
+ end
67
+
68
+ def cache_path(path, extension = nil)
69
+ File.join(cache_directory, cache_file(path, extension))
70
+ end
71
+
72
+ def write_to_memory(content, path)
73
+ @redis&.set(path, content)
74
+ end
75
+
76
+ def write_to_disk(content, path, gzip)
77
+ FileUtils.makedirs(File.dirname(path))
78
+ File.open(path, 'wb+') { |f| f.write(content) }
79
+
80
+ Zlib::GzipWriter.open(path + '.gz', gzip) { |f| f.write(content) } if gzip
81
+ end
82
+
83
+ def disk_delete(path)
84
+ File.delete(path) if File.exist?(path)
85
+ File.delete(path + '.gz') if File.exist?(path + '.gz')
86
+ end
87
+
88
+ def memory_delete(path)
89
+ @redis.del(path) if @redis&.exists?(path)
90
+ end
91
+
92
+ def instrument(name, path)
93
+ ActiveSupport::Notifications.instrument("#{name}.seo_cache", path: path) { yield }
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SeoCache
4
+ class PageRender
5
+ def initialize
6
+ init_driver
7
+ end
8
+
9
+ def get(url)
10
+ @driver.get(url)
11
+
12
+ return @driver.page_source
13
+ rescue StandardError => error
14
+ SeoCache.log_error(error.message)
15
+ ensure
16
+ @driver&.quit
17
+ end
18
+
19
+ def persistent_get(url)
20
+ @driver.get(url)
21
+
22
+ return @driver.page_source
23
+ rescue StandardError => error
24
+ SeoCache.log_error(error.message)
25
+ end
26
+
27
+ def close_connection
28
+ @driver&.quit
29
+ end
30
+
31
+ private
32
+
33
+ def init_driver
34
+ # Selenium::WebDriver.logger.level = :info
35
+
36
+ client = ::Selenium::WebDriver::Remote::Http::Persistent.new
37
+ browser_options = ::Selenium::WebDriver::Chrome::Options.new
38
+ browser_options.args << '--headless'
39
+ browser_options.args << '--disable-gpu'
40
+ browser_options.args << '--no-sandbox'
41
+ browser_options.args << '--disable-web-security'
42
+ browser_options.args << '--window-size=1920x1080'
43
+ # browser_options.args << '--remote-debugging-port=3020'
44
+ @driver = ::Selenium::WebDriver.for(:chrome, options: browser_options, http_client: client)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'seo_cache/page_caching'
4
+ require 'seo_cache/page_render'
5
+
6
+ module SeoCache
7
+ class PopulateCache
8
+ def initialize(host, paths, options = {})
9
+ @host = host
10
+ @paths = paths
11
+ @page_render = PageRender.new
12
+ @page_caching = PageCaching.new
13
+
14
+ @force_cache = options.fetch(:force_cache, false)
15
+ end
16
+
17
+ def perform
18
+ @paths.each do |path|
19
+ next if @page_caching.cache_exists?(path) && !@force_cache
20
+
21
+ page_source = @page_render.persistent_get(@host + path)
22
+ @page_caching.cache(page_source, path)
23
+ end
24
+
25
+ @page_render.close_connection
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,3 @@
1
+ module SeoCache
2
+ VERSION = '0.1.0'.freeze
3
+ end
data/lib/seo_cache.rb ADDED
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+ require 'active_support'
5
+ require 'selenium/webdriver'
6
+ require 'chromedriver-helper'
7
+ require 'selenium/webdriver/remote/http/persistent'
8
+ require 'redis'
9
+ require 'redis-namespace'
10
+
11
+ require 'seo_cache/version'
12
+ require 'seo_cache/middleware'
13
+
14
+ module SeoCache
15
+
16
+ mattr_accessor :cache_mode # disk or memory
17
+ self.cache_mode = 'memory'
18
+
19
+ mattr_accessor :disk_cache_path
20
+ self.disk_cache_path = nil
21
+
22
+ mattr_accessor :disk_cache_extension
23
+ self.disk_cache_extension = '.html'
24
+
25
+ mattr_accessor :redis_url
26
+ self.redis_url = 'redis://localhost:6379/'
27
+
28
+ mattr_accessor :redis_namespace
29
+ self.redis_namespace = '_seo_cache:'
30
+
31
+ mattr_accessor :blacklist_urls
32
+ self.blacklist_urls = []
33
+
34
+ mattr_accessor :whitelist_urls
35
+ self.whitelist_urls = []
36
+
37
+ mattr_accessor :blacklist_params
38
+ self.blacklist_params = []
39
+
40
+ mattr_accessor :log_missed_cache
41
+ self.log_missed_cache = false
42
+
43
+ mattr_accessor :protocol
44
+ self.protocol = nil
45
+
46
+ mattr_accessor :prerender_service_url
47
+ self.prerender_service_url = nil
48
+
49
+ mattr_accessor :prerender_url_param
50
+ self.prerender_url_param = '_prerender_'
51
+
52
+ mattr_accessor :force_cache_url_param
53
+ self.force_cache_url_param = '_seo_cache_'
54
+
55
+ mattr_accessor :extensions_to_ignore
56
+ self.extensions_to_ignore = %w[.js .css .xml .less .png .jpg .jpeg .gif .pdf .doc .txt .ico .rss .zip .mp3 .rar .exe .wmv .doc .avi .ppt .mpg .mpeg .tif .wav .mov .psd .ai .xls .mp4 .m4a .swf .dat .dmg .iso .flv .m4v .torrent]
57
+
58
+ mattr_accessor :crawler_user_agents
59
+ self.crawler_user_agents = [
60
+ 'googlebot',
61
+ 'yahoo',
62
+ 'bingbot',
63
+ 'baiduspider',
64
+ 'facebookexternalhit',
65
+ 'twitterbot',
66
+ 'rogerbot',
67
+ 'linkedinbot',
68
+ 'embedly',
69
+ 'bufferbot',
70
+ 'quora link preview',
71
+ 'showyoubot',
72
+ 'outbrain',
73
+ 'pinterest/0.',
74
+ 'developers.google.com/+/web/snippet',
75
+ 'www.google.com/webmasters/tools/richsnippets',
76
+ 'slackbot',
77
+ 'vkShare',
78
+ 'W3C_Validator',
79
+ 'redditbot',
80
+ 'Applebot',
81
+ 'WhatsApp',
82
+ 'flipboard',
83
+ 'tumblr',
84
+ 'bitlybot',
85
+ 'SkypeUriPreview',
86
+ 'nuzzel',
87
+ 'Discordbot',
88
+ 'Google Page Speed',
89
+ 'Qwantify'
90
+ ]
91
+
92
+ def self.memory_cache?
93
+ SeoCache.cache_mode == 'memory'
94
+ end
95
+
96
+ def self.disk_cache?
97
+ SeoCache.cache_mode == 'disk'
98
+ end
99
+
100
+ def self.log(message)
101
+ Rails.logger.info { "[seo_cache] #{message}" }
102
+ end
103
+
104
+ def self.log_error(message)
105
+ Rails.logger.error { "[seo_cache] #{message}" }
106
+ end
107
+ end
data/seo_cache.gemspec ADDED
@@ -0,0 +1,34 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'seo_cache/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'seo_cache'
7
+ spec.version = SeoCache::VERSION
8
+ spec.authors = ['FloXcoder']
9
+ spec.email = ['flo@l-x.fr']
10
+
11
+ spec.summary = 'Cache dedicated for SEO with Javascript rendering'
12
+ spec.description = 'Specific cache for bots to optimize time to first byte and render Javascript on server side.'
13
+ spec.homepage = 'https://github.com/floXcoder/SeoCache'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
17
+ f.match(%r{^(test|spec|features)/})
18
+ end
19
+ spec.bindir = 'exe'
20
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
+ spec.require_paths = ['lib']
22
+
23
+ spec.add_dependency 'rack', '~> 2'
24
+ spec.add_dependency 'railties', '~> 5'
25
+ spec.add_dependency 'activesupport', '~> 5'
26
+ spec.add_dependency 'selenium-webdriver', '~> 3'
27
+ spec.add_dependency 'chromedriver-helper', '~> 2'
28
+ spec.add_dependency 'redis', '~> 4'
29
+ spec.add_dependency 'redis-namespace', '~> 1'
30
+
31
+ spec.add_development_dependency 'bundler', '~> 1'
32
+ spec.add_development_dependency 'rake', '~> 12'
33
+ spec.add_development_dependency 'rspec', '~> 3'
34
+ end
metadata ADDED
@@ -0,0 +1,204 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: seo_cache
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - FloXcoder
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-03-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rack
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: railties
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5'
41
+ - !ruby/object:Gem::Dependency
42
+ name: activesupport
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '5'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '5'
55
+ - !ruby/object:Gem::Dependency
56
+ name: selenium-webdriver
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: chromedriver-helper
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: redis
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '4'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '4'
97
+ - !ruby/object:Gem::Dependency
98
+ name: redis-namespace
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1'
111
+ - !ruby/object:Gem::Dependency
112
+ name: bundler
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '1'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '1'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rake
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '12'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '12'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rspec
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '3'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '3'
153
+ description: Specific cache for bots to optimize time to first byte and render Javascript
154
+ on server side.
155
+ email:
156
+ - flo@l-x.fr
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - ".gitignore"
162
+ - ".rspec"
163
+ - ".rubocop.yml"
164
+ - ".travis.yml"
165
+ - CHANGELOG.md
166
+ - CODE_OF_CONDUCT.md
167
+ - CONTRIBUTING.md
168
+ - Gemfile
169
+ - LICENSE.txt
170
+ - README.md
171
+ - Rakefile
172
+ - bin/console
173
+ - bin/setup
174
+ - lib/seo_cache.rb
175
+ - lib/seo_cache/middleware.rb
176
+ - lib/seo_cache/page_caching.rb
177
+ - lib/seo_cache/page_render.rb
178
+ - lib/seo_cache/populate_cache.rb
179
+ - lib/seo_cache/version.rb
180
+ - seo_cache.gemspec
181
+ homepage: https://github.com/floXcoder/SeoCache
182
+ licenses:
183
+ - MIT
184
+ metadata: {}
185
+ post_install_message:
186
+ rdoc_options: []
187
+ require_paths:
188
+ - lib
189
+ required_ruby_version: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - ">="
192
+ - !ruby/object:Gem::Version
193
+ version: '0'
194
+ required_rubygems_version: !ruby/object:Gem::Requirement
195
+ requirements:
196
+ - - ">="
197
+ - !ruby/object:Gem::Version
198
+ version: '0'
199
+ requirements: []
200
+ rubygems_version: 3.0.3
201
+ signing_key:
202
+ specification_version: 4
203
+ summary: Cache dedicated for SEO with Javascript rendering
204
+ test_files: []