wovnrb 3.5.0 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/README.en.md +30 -13
  3. data/build.sh +7 -0
  4. data/docker/nginx/Dockerfile +18 -0
  5. data/docker/nginx/README.md +13 -0
  6. data/docker/nginx/build.sh +8 -0
  7. data/docker/nginx/scripts/configure_sshd.sh +25 -0
  8. data/docker/nginx/scripts/startup.sh +10 -0
  9. data/docker/nginx/wovnrb.conf +19 -0
  10. data/docker/rails/Dockerfile +9 -0
  11. data/docker/rails/Dockerfile.ECS +17 -0
  12. data/docker/rails/TestSite/Gemfile +0 -2
  13. data/docker/rails/TestSite/app/controllers/custom_response_controller.rb +1 -1
  14. data/docker/rails/TestSite/config/environments/development.rb +2 -0
  15. data/docker/rails/TestSite/config/environments/production.rb +2 -0
  16. data/docker/rails/TestSite/config/environments/test.rb +2 -0
  17. data/docker/rails/TestSite/public/index.html +1 -1
  18. data/docker/rails/TestSite/start.sh +2 -11
  19. data/docker/rails/TestSite/start_rails.sh +9 -0
  20. data/docker/rails/TestSite/yarn.lock +3 -3
  21. data/docker/scripts/jenkins/build.sh +45 -0
  22. data/docker/scripts/jenkins/tag_and_push_image.sh +30 -0
  23. data/docker/scripts/jenkins/taskdef.json +104 -0
  24. data/docker/scripts/jenkins/taskdef.json.bak +99 -0
  25. data/lib/wovnrb/api_translator.rb +6 -1
  26. data/lib/wovnrb/headers.rb +19 -2
  27. data/lib/wovnrb/services/html_converter.rb +17 -1
  28. data/lib/wovnrb/services/url.rb +136 -0
  29. data/lib/wovnrb/store.rb +8 -2
  30. data/lib/wovnrb/url_language_switcher.rb +124 -0
  31. data/lib/wovnrb/version.rb +1 -1
  32. data/lib/wovnrb.rb +3 -1
  33. data/test/lib/api_translator_test.rb +44 -0
  34. data/test/lib/services/html_converter_test.rb +210 -37
  35. data/test/lib/services/url_test.rb +308 -0
  36. data/test/lib/url_language_switcher_test.rb +798 -0
  37. data/test/lib/wovnrb_test.rb +2 -1
  38. data/test/test_helper.rb +4 -1
  39. metadata +22 -3
@@ -1,9 +1,10 @@
1
1
  module Wovnrb
2
2
  class HtmlConverter
3
- def initialize(dom, store, headers)
3
+ def initialize(dom, store, headers, url_lang_switcher)
4
4
  @dom = dom
5
5
  @headers = headers
6
6
  @store = store
7
+ @url_lang_switcher = url_lang_switcher
7
8
  end
8
9
 
9
10
  def build
@@ -32,6 +33,7 @@ module Wovnrb
32
33
  replace_snippet
33
34
  replace_hreflangs
34
35
  inject_lang_html_tag
36
+ translate_canonical_tag if @store.settings['translate_canonical_tag']
35
37
  end
36
38
 
37
39
  def replace_snippet
@@ -48,6 +50,7 @@ module Wovnrb
48
50
  insert_snippet(adds_backend_error_mark: true)
49
51
  insert_hreflang_tags
50
52
  inject_lang_html_tag
53
+ translate_canonical_tag if @store.settings['translate_canonical_tag']
51
54
 
52
55
  html
53
56
  end
@@ -143,6 +146,19 @@ module Wovnrb
143
146
  end
144
147
  end
145
148
 
149
+ def translate_canonical_tag
150
+ canonical_node = @dom.at_css('link[rel="canonical"]')
151
+ return unless canonical_node
152
+
153
+ lang_code = @headers.lang_code
154
+ return if lang_code == @store.settings['default_lang'] && @store.settings['custom_lang_aliases'][lang_code].nil?
155
+
156
+ canonical_url = canonical_node['href']
157
+
158
+ translated_canonical_url = @url_lang_switcher.add_lang_code(canonical_url, lang_code, @headers)
159
+ canonical_node['href'] = translated_canonical_url
160
+ end
161
+
146
162
  # Remove wovn snippet code from dom
147
163
  def strip_snippet
148
164
  @dom.xpath('//script').each do |script_node|
@@ -0,0 +1,136 @@
1
+ module Wovnrb
2
+ # URL utility ported from html-swapper
3
+ class URL
4
+ module FileExtension
5
+ IMG_FILES = 'jpe|jpe?g|bmp|gif|png|btif|tiff?|psd|djvu?|xif|wbmp|webp|p(n|b|g|p)m|rgb|tga|x(b|p)m|xwd|pic|ico|fh(c|4|5|7)?|xif|f(bs|px|st)'.freeze
6
+ AUDIO_FILES = 'mp(3|2)|m(p?2|3|p?4|pg)a|midi?|kar|rmi|web(m|a)|aif(f?|c)|w(ma|av|ax)|m(ka|3u)|sil|s3m|og(a|g)|uvv?a'.freeze
7
+ VIDEO_FILES = 'm(x|4)u|fl(i|v)|3g(p|2)|jp(gv|g?m)|mp(4v?|g4|(?!$)e?g?)|m(1|2)v|ogv|m(ov|ng)|qt|uvv?(h|m|p|s|v)|dvb|mk(v|3d|s)|f4v|as(x|f)|w(m(v|x)|vx)|xvid'.freeze
8
+ DOC_FILES = '(7|g)?zip|tar|rar|7z|gz|ez|aw|atom(cat|svc)?|(cc)?xa?ml|cdmi(a|c|d|o|q)?|epub|g(ml|px|xf)|jar|js|ser|class|json(ml)?|do(c|t)(m|x)?|xls(m|x)?|xps|pp(a|tx?|s)m?|potm?|sldm|mp(p|t)|bin|dms|lrf|mar|so|dist|distz|m?pkg|bpk|dump|rtf|tfi|pdf|pgp|apk|o(t|d)(b|c|ft?|g|h|i|p|s|t)'.freeze
9
+ end
10
+
11
+ # TODO: Maybe this should be applied to all get_attribute calls rather than just href
12
+ def self.normalize_url(href)
13
+ return nil unless href
14
+
15
+ href.delete("\u200b").strip
16
+ end
17
+
18
+ def self.absolute_url?(href)
19
+ href =~ %r{^(https?:)?//}i
20
+ end
21
+
22
+ def self.absolute_path?(href)
23
+ href.match?(%r{^/})
24
+ end
25
+
26
+ def self.relative_path?(href)
27
+ !absolute_url?(href) && !absolute_path?(href)
28
+ end
29
+
30
+ # @param parsed_uri [Addressable::URI]
31
+ def self.path_and_query(parsed_uri)
32
+ parsed_uri.path + (parsed_uri.query ? "?#{parsed_uri.query}" : '')
33
+ end
34
+
35
+ def self.path_and_query_and_hash(parsed_uri)
36
+ uri = parsed_uri.path
37
+ uri += "?#{parsed_uri.query}" if parsed_uri.query
38
+ uri += "##{parsed_uri.fragment}" if parsed_uri.fragment
39
+ uri
40
+ end
41
+
42
+ def self.host_with_port(parsed_uri)
43
+ if parsed_uri.port
44
+ "#{parsed_uri.host}:#{parsed_uri.port}"
45
+ else
46
+ parsed_uri.host.to_s
47
+ end
48
+ end
49
+
50
+ def self.resolve_absolute_uri(base_url, href)
51
+ # This resolves ./../ and also handles href already being absolute
52
+ Addressable::URI.join(base_url, href)
53
+ rescue Addressable::URI::InvalidURIError, ArgumentError => e
54
+ Rollbar.warning('Failed to resolve absolute URI', original_error: e, base_url: base_url, href: href)
55
+ raise
56
+ end
57
+
58
+ def self.resolve_absolute_path(base_url, href)
59
+ normalized_uri = resolve_absolute_uri(base_url, href)
60
+ path = normalized_uri.path
61
+ query = normalized_uri.query ? "?#{normalized_uri.query}" : ''
62
+ fragment = normalized_uri.fragment ? "##{normalized_uri.fragment}" : ''
63
+
64
+ path + query + fragment
65
+ end
66
+
67
+ # Set the path lang to
68
+ def self.prepend_path(url, dir)
69
+ url.sub(%r{(.+\.[^/]+)(/|$)}, "\\1/#{dir}\\2")
70
+ end
71
+
72
+ def self.trim_slashes(path)
73
+ path.gsub(%r{^/|/$}, '')
74
+ end
75
+
76
+ def self.prepend_path_slash(path)
77
+ path ||= ''
78
+ return path if path.starts_with?('/')
79
+
80
+ "/#{path}"
81
+ end
82
+
83
+ def self.join_paths(*paths)
84
+ paths.inject('') do |left, right|
85
+ case [left.end_with?('/'), right.start_with?('/')]
86
+ when [true, true]
87
+ left + right[1..-1]
88
+ when [false, false]
89
+ left + (right.blank? ? right : "/#{right}")
90
+ else
91
+ left + right
92
+ end
93
+ end
94
+ end
95
+
96
+ # @param uri [Addressable::URI]
97
+ # @param new_protocol [String | nil]
98
+ # @return copy of uri [Addressable::URI]
99
+ def self.change_protocol(uri, new_protocol)
100
+ result = uri.dup
101
+ result.scheme = new_protocol
102
+ result
103
+ end
104
+
105
+ def self.valid_protocol?(href)
106
+ scheme_matches = /^\s*(?<scheme>[a-zA-Z]+):/.match(href)
107
+ scheme = scheme_matches ? scheme_matches[:scheme] : nil
108
+
109
+ scheme.nil? || %w[http https].include?(scheme)
110
+ end
111
+
112
+ def self.file?(href_with_query_and_hash)
113
+ href = remove_query_and_hash(href_with_query_and_hash)
114
+ img_files = %r{^(https?://)?.*(\.(#{FileExtension::IMG_FILES}))((\?|#).*)?$}io
115
+ audio_files = %r{^(https?://)?.*(\.(#{FileExtension::AUDIO_FILES}))((\?|#).*)?$}io
116
+ video_files = %r{^(https?://)?.*(\.(#{FileExtension::VIDEO_FILES}))((\?|#).*)?$}io
117
+ doc_files = %r{^(https?://)?.*(\.(#{FileExtension::DOC_FILES}))((\?|#).*)?$}io
118
+ href.match?(img_files) || href.match?(audio_files) || href.match?(video_files) || href.match?(doc_files)
119
+ end
120
+
121
+ def self.remove_query_and_hash(href)
122
+ href.gsub(/[#?].*/, '')
123
+ end
124
+
125
+ # if original path does not end in slash, remove it from new path
126
+ # if original path ends in slash, add it to new path
127
+ def self.normalize_path_slash(original_path, new_path)
128
+ if !original_path.end_with?('/') && new_path.end_with?('/')
129
+ new_path = new_path.chomp('/')
130
+ elsif original_path.end_with?('/') && !new_path.end_with?('/')
131
+ new_path += '/'
132
+ end
133
+ new_path
134
+ end
135
+ end
136
+ end
data/lib/wovnrb/store.rb CHANGED
@@ -25,6 +25,7 @@ module Wovnrb
25
25
  'ignore_class' => [],
26
26
  'api_url' => 'https://wovn.global.ssl.fastly.net',
27
27
  'api_timeout_seconds' => 1.0,
28
+ 'api_timeout_search_engine_bots' => 5.0,
28
29
  'default_lang' => 'ja',
29
30
  'supported_langs' => %w[en ja],
30
31
  'test_mode' => false,
@@ -36,7 +37,8 @@ module Wovnrb
36
37
  'translate_fragment' => true,
37
38
  'widget_url' => 'https://j.wovn.io/1',
38
39
  'wovn_dev_mode' => false,
39
- 'compress_api_requests' => true
40
+ 'compress_api_requests' => true,
41
+ 'translate_canonical_tag' => true
40
42
  )
41
43
  end
42
44
 
@@ -166,7 +168,7 @@ module Wovnrb
166
168
  end
167
169
 
168
170
  def custom_lang_aliases
169
- @setttings['custom_lang_aliases'] || {}
171
+ @settings['custom_lang_aliases'] || {}
170
172
  end
171
173
 
172
174
  def default_lang
@@ -194,6 +196,10 @@ module Wovnrb
194
196
  @settings['wovn_dev_mode']
195
197
  end
196
198
 
199
+ def url_pattern
200
+ @settings['url_pattern']
201
+ end
202
+
197
203
  private
198
204
 
199
205
  def stringify_keys!(hash)
@@ -0,0 +1,124 @@
1
+ require 'wovnrb/services/url'
2
+
3
+ module Wovnrb
4
+ # URL Language switching helper, ported from html-swapper
5
+ class UrlLanguageSwitcher
6
+ def initialize(store)
7
+ @store = store
8
+ end
9
+
10
+ # Adds language code to URL in "href" variable by "pattern" variable and own lang_code.
11
+ # When lang_code is 'ja', add_lang_code('https://wovn.io', 'path', url) returns 'https://wovn.io/ja/'.
12
+ # @param [String] href original URL.
13
+ # @param [String] to_lang_code language code.
14
+ def add_lang_code(href, to_lang_code, headers)
15
+ return nil if href.nil?
16
+ return href if href.match?(/^(#.*)?$/)
17
+
18
+ href_scheme = href[/^[A-Za-z][A-Za-z0-9+\-.]*(?=:)/]
19
+ return href if !href_scheme.nil? && href_scheme != 'http' && href_scheme != 'https'
20
+
21
+ code_to_add = @store.custom_lang_aliases[to_lang_code] || to_lang_code
22
+ if Wovnrb::URL.absolute_url?(href)
23
+ add_lang_code_absolute_url(href, code_to_add, headers)
24
+ else
25
+ add_lang_code_relative_url(href, code_to_add, headers)
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ delegate :default_lang_alias, to: :@store
32
+
33
+ def add_lang_code_absolute_url(href, code_to_add, headers)
34
+ # in the future, perhaps validate url rather than using begin rescue
35
+ # "#{url =~ /\// ? 'http:' : ''}#{url}" =~ URI::regexp
36
+ begin
37
+ href_uri = Addressable::URI.parse(href)
38
+ rescue Addressable::URI::InvalidURIError, ArgumentError => e
39
+ Rollbar.warning('Failed to parse URI', original_error: e, href: href)
40
+ return href
41
+ end
42
+
43
+ if internal_link?(href_uri, headers.host)
44
+ return case @store.settings['url_pattern']
45
+ when 'subdomain'
46
+ sub_d = href.match(%r{//([^.]*)\.})[1]
47
+ sub_code = Lang.get_code(sub_d)
48
+ if sub_code&.casecmp(code_to_add.downcase)&.zero?
49
+ href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
50
+ else
51
+ href.sub(%r{(//)([^.]*)}, "\\1#{code_to_add.downcase}.\\2")
52
+ end
53
+ when 'query'
54
+ add_query_lang_code(href, code_to_add)
55
+ else # path
56
+ href_uri.path = add_lang_code_for_path(href_uri.path, code_to_add, headers)
57
+ href_uri.to_s
58
+ end
59
+ end
60
+
61
+ href
62
+ end
63
+
64
+ def internal_link?(absolute_uri, host_name)
65
+ absolute_uri.host == host_name.split(':')[0]
66
+ end
67
+
68
+ def add_lang_code_relative_url(href, code_to_add, headers)
69
+ begin
70
+ abs_path = normalize_absolute_path(headers.to_absolute_path(href), headers)
71
+ rescue Addressable::URI::InvalidURIError, ArgumentError
72
+ return href
73
+ end
74
+
75
+ case @store.url_pattern
76
+ when 'subdomain'
77
+ "#{headers.protocol}://#{code_to_add.downcase}.#{headers.host}#{abs_path}"
78
+ when 'query'
79
+ add_query_lang_code(href, code_to_add)
80
+ else # path
81
+ add_lang_code_for_path(href, code_to_add, headers)
82
+ end
83
+ end
84
+
85
+ def add_lang_code_for_path(href, code_to_add, headers)
86
+ new_href = href
87
+
88
+ if code_to_add != @store.default_lang || code_to_add == @store.default_lang_alias
89
+ new_href = headers.to_absolute_path(href)
90
+ lang_prefix_path = build_lang_path(code_to_add)
91
+ prefix_path = build_lang_path('')
92
+ suffix_path = new_href.sub(%r{^#{prefix_path}(/|$)}, '')
93
+ .sub(%r{^#{@store.default_lang}(/|$)}, '')
94
+ new_href = URL.join_paths(lang_prefix_path, suffix_path)
95
+ new_href = URL.normalize_path_slash(href, new_href)
96
+ new_href
97
+ end
98
+
99
+ normalize_absolute_path(new_href, headers)
100
+ end
101
+
102
+ def normalize_absolute_path(input_path, headers)
103
+ URL.resolve_absolute_path(headers.url_with_scheme, input_path)
104
+ end
105
+
106
+ def sub_repeat!(string, pattern, replacement)
107
+ loop do
108
+ break unless string.sub!(pattern, replacement)
109
+ end
110
+ end
111
+
112
+ def add_query_lang_code(href, lang_code)
113
+ lang_param = @store.settings['lang_param_name']
114
+ return href if href.match?(/(&|&amp;|\?)?#{lang_param}=[a-zA-Z_-]+/)
115
+
116
+ query_separator = href.include?('?') ? '&' : '?'
117
+ href.sub(/(#|$)/, "#{query_separator}#{lang_param}=#{lang_code}\\1")
118
+ end
119
+
120
+ def build_lang_path(lang_code)
121
+ lang_code.blank? ? '' : URL.prepend_path_slash(lang_code)
122
+ end
123
+ end
124
+ end
@@ -1,3 +1,3 @@
1
1
  module Wovnrb
2
- VERSION = '3.5.0'.freeze
2
+ VERSION = '3.6.0'.freeze
3
3
  end
data/lib/wovnrb.rb CHANGED
@@ -5,6 +5,7 @@ require 'wovnrb/store'
5
5
  require 'wovnrb/lang'
6
6
  require 'wovnrb/services/html_converter'
7
7
  require 'wovnrb/services/html_replace_marker'
8
+ require 'wovnrb/url_language_switcher'
8
9
  require 'nokogiri'
9
10
  require 'active_support'
10
11
  require 'json'
@@ -76,7 +77,8 @@ module Wovnrb
76
77
  html_body = Helpers::NokogumboHelper.parse_html(string_body)
77
78
 
78
79
  if !wovn_ignored?(html_body) && !amp_page?(html_body)
79
- html_converter = HtmlConverter.new(html_body, @store, headers)
80
+ url_lang_switcher = Wovnrb::UrlLanguageSwitcher.new(@store)
81
+ html_converter = HtmlConverter.new(html_body, @store, headers, url_lang_switcher)
80
82
 
81
83
  if needs_api?(html_body, headers)
82
84
  converted_html, marker = html_converter.build_api_compatible_html
@@ -54,6 +54,7 @@ module Wovnrb
54
54
  'lang_code' => 'fr',
55
55
  'url_pattern' => 'subdomain',
56
56
  'lang_param_name' => 'lang',
57
+ 'translate_canonical_tag' => true,
57
58
  'product' => 'WOVN.rb',
58
59
  'version' => VERSION,
59
60
  'body' => 'foo',
@@ -62,6 +63,46 @@ module Wovnrb
62
63
  times: 1
63
64
  end
64
65
 
66
+ def test_api_timeout_is_search_engine_user_higher_default
67
+ settings = {
68
+ 'project_token' => '123456',
69
+ 'custom_lang_aliases' => { 'ja' => 'Japanese' },
70
+ 'default_lang' => 'en',
71
+ 'url_pattern' => 'subdomain',
72
+ 'url_pattern_reg' => '^(?<lang>[^.]+).',
73
+ 'lang_param_name' => 'lang'
74
+ }
75
+ store = Wovnrb::Store.instance
76
+ store.update_settings(settings)
77
+ headers = Wovnrb::Headers.new(
78
+ Wovnrb.get_env('url' => 'http://fr.wovn.io/test', 'HTTP_USER_AGENT' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'),
79
+ Wovnrb.get_settings(settings)
80
+ )
81
+ api_translator = ApiTranslator.new(store, headers, REQUEST_UUID)
82
+ assert_equal(5.0, api_translator.send(:api_timeout))
83
+ end
84
+
85
+ def test_api_timeout_no_user_agent_use_normal_default
86
+ settings = {
87
+ 'project_token' => '123456',
88
+ 'custom_lang_aliases' => { 'ja' => 'Japanese' },
89
+ 'default_lang' => 'en',
90
+ 'url_pattern' => 'subdomain',
91
+ 'url_pattern_reg' => '^(?<lang>[^.]+).',
92
+ 'lang_param_name' => 'lang'
93
+ }
94
+ store = Wovnrb::Store.instance
95
+ store.update_settings(settings)
96
+ env = Wovnrb.get_env('url' => 'http://fr.wovn.io/test')
97
+ env.delete('HTTP_USER_AGENT')
98
+ headers = Wovnrb::Headers.new(
99
+ env,
100
+ Wovnrb.get_settings(settings)
101
+ )
102
+ api_translator = ApiTranslator.new(store, headers, REQUEST_UUID)
103
+ assert_equal(1.0, api_translator.send(:api_timeout))
104
+ end
105
+
65
106
  private
66
107
 
67
108
  def assert_translation(original_html_fixture, translated_html_fixture, success_expected, response: { encoding: 'gzip', status_code: 200 }, compress_data: true)
@@ -80,6 +121,8 @@ module Wovnrb
80
121
  api_translator, store, _headers = create_sut
81
122
  translation_request_stub = stub_translation_api_request(store, original_html, translated_html, response, compress_data: compress_data)
82
123
 
124
+ expected_api_timeout = store.settings['api_timeout_seconds']
125
+ assert_equal(expected_api_timeout, api_translator.send(:api_timeout))
83
126
  actual_translated_html = api_translator.translate(original_html)
84
127
  assert_requested(translation_request_stub, times: 1) if translation_request_stub
85
128
  actual_translated_html
@@ -152,6 +195,7 @@ module Wovnrb
152
195
  'lang_code' => 'fr',
153
196
  'url_pattern' => 'subdomain',
154
197
  'lang_param_name' => 'lang',
198
+ 'translate_canonical_tag' => true,
155
199
  'product' => 'WOVN.rb',
156
200
  'version' => VERSION,
157
201
  'body' => original_html,