wovnrb 3.10.3 → 3.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.en.md +256 -256
- data/README.ja.md +218 -218
- data/README.md +1 -1
- data/docker/rails/TestSite/config/application.rb +2 -1
- data/docker/rails/TestSite/yarn.lock +7353 -7353
- data/lib/wovnrb/api_translator.rb +183 -183
- data/lib/wovnrb/headers.rb +192 -192
- data/lib/wovnrb/lang.rb +260 -260
- data/lib/wovnrb/services/html_converter.rb +222 -222
- data/lib/wovnrb/services/html_replace_marker.rb +48 -48
- data/lib/wovnrb/store.rb +222 -221
- data/lib/wovnrb/version.rb +3 -3
- data/lib/wovnrb.rb +145 -138
- data/test/lib/api_translator_test.rb +217 -217
- data/test/lib/lang_test.rb +59 -59
- data/test/lib/services/html_converter_test.rb +532 -532
- data/test/lib/services/html_replace_marker_test.rb +149 -149
- data/test/lib/url_language_switcher_test.rb +1097 -1097
- data/test/lib/wovnrb_test.rb +454 -342
- metadata +3 -3
data/lib/wovnrb/lang.rb
CHANGED
@@ -1,260 +1,260 @@
|
|
1
|
-
require 'addressable'
|
2
|
-
|
3
|
-
module Wovnrb
|
4
|
-
class Lang
|
5
|
-
LANG = {
|
6
|
-
'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
|
7
|
-
'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
|
8
|
-
'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
|
9
|
-
'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
|
10
|
-
'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
|
11
|
-
'zh-CN' => { name: '简体中文(中国)', code: 'zh-CN', en: 'Simp Chinese (China)' },
|
12
|
-
'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
|
13
|
-
'zh-Hant-HK' => { name: '繁體中文(香港)', code: 'zh-Hant-HK', en: 'Trad Chinese (Hong Kong)' },
|
14
|
-
'zh-Hant-TW' => { name: '繁體中文(台湾)', code: 'zh-Hant-TW', en: 'Trad Chinese (Taiwan)' },
|
15
|
-
'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
|
16
|
-
'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
|
17
|
-
'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
|
18
|
-
'en' => { name: 'English', code: 'en', en: 'English' },
|
19
|
-
'en-AU' => { name: 'English (Australia)', code: 'en-AU', en: 'English (Australia)' },
|
20
|
-
'en-CA' => { name: 'English (Canada)', code: 'en-CA', en: 'English (Canada)' },
|
21
|
-
'en-IN' => { name: 'English (India)', code: 'en-IN', en: 'English (India)' },
|
22
|
-
'en-NZ' => { name: 'English (New Zealand)', code: 'en-NZ', en: 'English (New Zealand)' },
|
23
|
-
'en-ZA' => { name: 'English (South Africa)', code: 'en-ZA', en: 'English (South Africa)' },
|
24
|
-
'en-GB' => { name: 'English (United Kingdom)', code: 'en-GB', en: 'English (United Kingdom)' },
|
25
|
-
'en-SG' => { name: 'English (Singapore)', code: 'en-SG', en: 'English (Singapore)' },
|
26
|
-
'en-US' => { name: 'English (United States)', code: 'en-US', en: 'English (United States)' },
|
27
|
-
'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
|
28
|
-
'fr' => { name: 'Français', code: 'fr', en: 'French' },
|
29
|
-
'fr-CA' => { name: 'Français (Canada)', code: 'fr-CA', en: 'French (Canada)' },
|
30
|
-
'fr-FR' => { name: 'Français (France)', code: 'fr-FR', en: 'French (France)' },
|
31
|
-
'fr-CH' => { name: 'Français (Suisse)', code: 'fr-CH', en: 'French (Switzerland)' },
|
32
|
-
'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
|
33
|
-
'de' => { name: 'Deutsch', code: 'de', en: 'German' },
|
34
|
-
'de-AT' => { name: 'Deutsch (Österreich)', code: 'de-AT', en: 'German (Austria)' },
|
35
|
-
'de-DE' => { name: 'Deutsch (Deutschland)', code: 'de-DE', en: 'German (Germany)' },
|
36
|
-
'de-LI' => { name: 'Deutsch (Liechtenstien)', code: 'de-LI', en: 'German (Liechtenstien)' },
|
37
|
-
'de-CH' => { name: 'Deutsch (Schweiz)', code: 'de-CH', en: 'German (Switzerland)' },
|
38
|
-
'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
|
39
|
-
'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
|
40
|
-
'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
|
41
|
-
'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
|
42
|
-
'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
|
43
|
-
'it-IT' => { name: 'Italiano (Italia)', code: 'it-IT', en: 'Italian (Italy)' },
|
44
|
-
'it-CH' => { name: 'Italiano (Svizzera)', code: 'it-CH', en: 'Italian (Switzerland)' },
|
45
|
-
'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
|
46
|
-
'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
|
47
|
-
'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
|
48
|
-
'mn' => { name: 'монгол', code: 'mn', en: 'Mongolian' },
|
49
|
-
'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
|
50
|
-
'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
|
51
|
-
'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
|
52
|
-
'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
|
53
|
-
'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
|
54
|
-
'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
|
55
|
-
'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
|
56
|
-
'pt-BR' => { name: 'Português (Brasil)', code: 'pt-BR', en: 'Portuguese (Brazil)' },
|
57
|
-
'pt-PT' => { name: 'Português (Portugal)', code: 'pt-PT', en: 'Portuguese (Portugal)' },
|
58
|
-
'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
|
59
|
-
'es' => { name: 'Español', code: 'es', en: 'Spanish' },
|
60
|
-
'es-AR' => { name: 'Español (Argentina)', code: 'es-AR', en: 'Spanish (Argentina)' },
|
61
|
-
'es-CL' => { name: 'Español (Chile)', code: 'es-CL', en: 'Spanish (Chile)' },
|
62
|
-
'es-CO' => { name: 'Español (Colombia)', code: 'es-CO', en: 'Spanish (Colombia)' },
|
63
|
-
'es-CR' => { name: 'Español (Costa Rica)', code: 'es-CR', en: 'Spanish (Costa Rica)' },
|
64
|
-
'es-HN' => { name: 'Español (Honduras)', code: 'es-HN', en: 'Spanish (Honduras)' },
|
65
|
-
'es-419' => { name: 'Español (Latinoamérica)', code: 'es-419', en: 'Spanish (Latin America)' },
|
66
|
-
'es-MX' => { name: 'Español (México)', code: 'es-MX', en: 'Spanish (Mexico)' },
|
67
|
-
'es-PE' => { name: 'Español (Perú)', code: 'es-PE', en: 'Spanish (Peru)' },
|
68
|
-
'es-ES' => { name: 'Español (España)', code: 'es-ES', en: 'Spanish (Spain)' },
|
69
|
-
'es-US' => { name: 'Español (Estados Unidos)', code: 'es-US', en: 'Spanish (United States)' },
|
70
|
-
'es-UY' => { name: 'Español (Uruguay)', code: 'es-UY', en: 'Spanish (Uruguay)' },
|
71
|
-
'es-VE' => { name: 'Español (Venezuela)', code: 'es-VE', en: 'Spanish (Venezuela)' },
|
72
|
-
'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
|
73
|
-
'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
|
74
|
-
'tl' => { name: 'Tagalog', code: 'tl', en: 'Tagalog' },
|
75
|
-
'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
|
76
|
-
'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
|
77
|
-
'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
|
78
|
-
'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
|
79
|
-
'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
|
80
|
-
'uz' => { name: 'Oʻzbekcha', code: 'uz', en: 'Uzbek' },
|
81
|
-
'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' },
|
82
|
-
'km' => { name: 'ភាសាខ្មែរ', code: 'km', en: 'Khmer' },
|
83
|
-
'ta' => { name: 'தமிழ்', code: 'ta', en: 'Tamil' },
|
84
|
-
'si' => { name: 'සිංහල', code: 'si', en: 'Sinhala' }
|
85
|
-
}.freeze
|
86
|
-
|
87
|
-
# Provides the ISO639-1 code for a given lang code.
|
88
|
-
# Source: https://support.google.com/webmasters/answer/189077?hl=en
|
89
|
-
#
|
90
|
-
# @param lang_code [String] lang_code Code of the language.
|
91
|
-
#
|
92
|
-
# @return [String] The ISO639-1 code of the language.
|
93
|
-
def self.iso_639_1_normalization(lang_code)
|
94
|
-
lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
|
95
|
-
end
|
96
|
-
|
97
|
-
def self.get_code(lang_name)
|
98
|
-
return nil if lang_name.nil?
|
99
|
-
return lang_name if LANG[lang_name]
|
100
|
-
|
101
|
-
custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
|
102
|
-
custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
|
103
|
-
return custom_lang[:code] if custom_lang
|
104
|
-
|
105
|
-
LANG.each do |_k, l|
|
106
|
-
return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
|
107
|
-
end
|
108
|
-
nil
|
109
|
-
end
|
110
|
-
|
111
|
-
def self.get_lang(lang)
|
112
|
-
lang_code = get_code(lang)
|
113
|
-
LANG[lang_code]
|
114
|
-
end
|
115
|
-
|
116
|
-
def initialize(lang_name)
|
117
|
-
@lang_code = Lang.get_code(lang_name)
|
118
|
-
end
|
119
|
-
|
120
|
-
attr_reader :lang_code
|
121
|
-
|
122
|
-
# Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
|
123
|
-
# When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
|
124
|
-
# If you want to know more examples, see also test/lib/lang_test.rb.
|
125
|
-
#
|
126
|
-
# @param [String] href original URL.
|
127
|
-
# @param [String] pattern url_pattern of the settings. ('path', 'subdomain' or 'query')
|
128
|
-
# @param [Wovnrb::Header] headers instance of Wovn::Header. It generates new env variable for original request.
|
129
|
-
# @return [String] URL added langauge code.
|
130
|
-
def add_lang_code(href, pattern, headers)
|
131
|
-
return href if /^(#.*)?$/.match?(href)
|
132
|
-
|
133
|
-
settings = Store.instance.settings
|
134
|
-
code_to_add = settings['custom_lang_aliases'][@lang_code] || @lang_code
|
135
|
-
lang_param_name = settings['lang_param_name']
|
136
|
-
# absolute links
|
137
|
-
new_href = href
|
138
|
-
if href && href =~ /^(https?:)?\/\//i
|
139
|
-
# in the future, perhaps validate url rather than using begin rescue
|
140
|
-
# "#{url =~ /\// ? 'http:' : ''}#{url}" =~ URI::regexp
|
141
|
-
begin
|
142
|
-
uri = Addressable::URI.parse(href)
|
143
|
-
rescue
|
144
|
-
return new_href
|
145
|
-
end
|
146
|
-
# only add lang if it's an internal link
|
147
|
-
# DNS names are case insensitive
|
148
|
-
if uri.host.downcase === headers.host.downcase
|
149
|
-
case pattern
|
150
|
-
when 'subdomain'
|
151
|
-
sub_d = href.match(/\/\/([^.]*)\./)[1]
|
152
|
-
sub_code = Lang.get_code(sub_d)
|
153
|
-
new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
|
154
|
-
href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
|
155
|
-
else
|
156
|
-
href.sub(/(\/\/)([^.]*)/, "\\1#{code_to_add.downcase}.\\2")
|
157
|
-
end
|
158
|
-
when 'query'
|
159
|
-
new_href = add_query_lang_code(href, code_to_add, lang_param_name)
|
160
|
-
else # path
|
161
|
-
new_href = href.sub(/([^.]*\.[^\/]*)(\/|$)/, "\\1/#{code_to_add}/")
|
162
|
-
end
|
163
|
-
end
|
164
|
-
elsif href
|
165
|
-
case pattern
|
166
|
-
when 'subdomain'
|
167
|
-
lang_url = "#{headers.protocol}://#{code_to_add.downcase}.#{headers.host}"
|
168
|
-
current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
|
169
|
-
new_href = case href
|
170
|
-
when /^\.\..*$/
|
171
|
-
# ../path
|
172
|
-
"#{lang_url}/#{href.gsub(/^\.\.\//, '')}"
|
173
|
-
when /^\..*$/
|
174
|
-
# ./path
|
175
|
-
"#{lang_url}#{current_dir}/#{href.gsub(/^\.\//, '')}"
|
176
|
-
when /^\/.*$/
|
177
|
-
# /path
|
178
|
-
lang_url + href
|
179
|
-
else
|
180
|
-
# path
|
181
|
-
"#{lang_url}#{current_dir}/#{href}"
|
182
|
-
end
|
183
|
-
when 'query'
|
184
|
-
new_href = add_query_lang_code(href, code_to_add, lang_param_name)
|
185
|
-
else # path
|
186
|
-
if /^\//.match?(href)
|
187
|
-
new_href = "/#{code_to_add}#{href}"
|
188
|
-
else
|
189
|
-
current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
|
190
|
-
current_dir = '/' if current_dir == ''
|
191
|
-
new_href = "/#{code_to_add}#{current_dir}#{href}"
|
192
|
-
end
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
new_href
|
197
|
-
end
|
198
|
-
|
199
|
-
private
|
200
|
-
|
201
|
-
def index_href_for_encoding_and_decoding(dom)
|
202
|
-
result = {}
|
203
|
-
dom.xpath('//*[@href]').each do |a_tag|
|
204
|
-
url = a_tag['href']
|
205
|
-
begin
|
206
|
-
encoded_url = Addressable::URI.parse(url).normalize.to_s
|
207
|
-
result[encoded_url] = url if encoded_url != url
|
208
|
-
rescue Addressable::URI::InvalidURIError => e
|
209
|
-
WovnLogger.instance.error("Failed parse url : #{url}#{e.message}")
|
210
|
-
end
|
211
|
-
end
|
212
|
-
result
|
213
|
-
end
|
214
|
-
|
215
|
-
def replace_dom_values(dom, values, store, url, headers)
|
216
|
-
text_index = values['text_vals'] || {}
|
217
|
-
html_text_index = values['html_text_vals'] || {}
|
218
|
-
src_index = values['img_vals'] || {}
|
219
|
-
img_src_prefix = values['img_src_prefix'] || ''
|
220
|
-
host_aliases = values['host_aliases'] || []
|
221
|
-
|
222
|
-
replacers = []
|
223
|
-
# add lang code to anchors href if not default lang
|
224
|
-
if @lang_code != store.settings['default_lang']
|
225
|
-
pattern = store.settings['url_pattern']
|
226
|
-
replacers << LinkReplacer.new(store, pattern, headers)
|
227
|
-
end
|
228
|
-
|
229
|
-
replacers << if html_text_index.empty?
|
230
|
-
TextReplacer.new(store, text_index)
|
231
|
-
else
|
232
|
-
UnifiedValues::TextReplacer.new(store, html_text_index)
|
233
|
-
end
|
234
|
-
replacers << MetaReplacer.new(store, text_index, pattern, headers)
|
235
|
-
replacers << InputReplacer.new(store, text_index)
|
236
|
-
replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
|
237
|
-
replacers << ScriptReplacer.new(store) if dom.html?
|
238
|
-
|
239
|
-
replacers.each do |replacer|
|
240
|
-
replacer.replace(dom, self)
|
241
|
-
end
|
242
|
-
end
|
243
|
-
|
244
|
-
def get_langs(values)
|
245
|
-
langs = Set.new
|
246
|
-
(values['text_vals'] || {}).merge(values['img_vals'] || {}).each do |_key, index|
|
247
|
-
index.each do |l, _val|
|
248
|
-
langs.add(l)
|
249
|
-
end
|
250
|
-
end
|
251
|
-
langs
|
252
|
-
end
|
253
|
-
|
254
|
-
def add_query_lang_code(href, lang_code, lang_param_name)
|
255
|
-
query_separator = href.include?('?') ? '&' : '?'
|
256
|
-
|
257
|
-
href.sub(/(#|$)/, "#{query_separator}#{lang_param_name}=#{lang_code}\\1")
|
258
|
-
end
|
259
|
-
end
|
260
|
-
end
|
1
|
+
require 'addressable'
|
2
|
+
|
3
|
+
module Wovnrb
|
4
|
+
class Lang
|
5
|
+
LANG = {
|
6
|
+
'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
|
7
|
+
'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
|
8
|
+
'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
|
9
|
+
'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
|
10
|
+
'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
|
11
|
+
'zh-CN' => { name: '简体中文(中国)', code: 'zh-CN', en: 'Simp Chinese (China)' },
|
12
|
+
'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
|
13
|
+
'zh-Hant-HK' => { name: '繁體中文(香港)', code: 'zh-Hant-HK', en: 'Trad Chinese (Hong Kong)' },
|
14
|
+
'zh-Hant-TW' => { name: '繁體中文(台湾)', code: 'zh-Hant-TW', en: 'Trad Chinese (Taiwan)' },
|
15
|
+
'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
|
16
|
+
'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
|
17
|
+
'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
|
18
|
+
'en' => { name: 'English', code: 'en', en: 'English' },
|
19
|
+
'en-AU' => { name: 'English (Australia)', code: 'en-AU', en: 'English (Australia)' },
|
20
|
+
'en-CA' => { name: 'English (Canada)', code: 'en-CA', en: 'English (Canada)' },
|
21
|
+
'en-IN' => { name: 'English (India)', code: 'en-IN', en: 'English (India)' },
|
22
|
+
'en-NZ' => { name: 'English (New Zealand)', code: 'en-NZ', en: 'English (New Zealand)' },
|
23
|
+
'en-ZA' => { name: 'English (South Africa)', code: 'en-ZA', en: 'English (South Africa)' },
|
24
|
+
'en-GB' => { name: 'English (United Kingdom)', code: 'en-GB', en: 'English (United Kingdom)' },
|
25
|
+
'en-SG' => { name: 'English (Singapore)', code: 'en-SG', en: 'English (Singapore)' },
|
26
|
+
'en-US' => { name: 'English (United States)', code: 'en-US', en: 'English (United States)' },
|
27
|
+
'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
|
28
|
+
'fr' => { name: 'Français', code: 'fr', en: 'French' },
|
29
|
+
'fr-CA' => { name: 'Français (Canada)', code: 'fr-CA', en: 'French (Canada)' },
|
30
|
+
'fr-FR' => { name: 'Français (France)', code: 'fr-FR', en: 'French (France)' },
|
31
|
+
'fr-CH' => { name: 'Français (Suisse)', code: 'fr-CH', en: 'French (Switzerland)' },
|
32
|
+
'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
|
33
|
+
'de' => { name: 'Deutsch', code: 'de', en: 'German' },
|
34
|
+
'de-AT' => { name: 'Deutsch (Österreich)', code: 'de-AT', en: 'German (Austria)' },
|
35
|
+
'de-DE' => { name: 'Deutsch (Deutschland)', code: 'de-DE', en: 'German (Germany)' },
|
36
|
+
'de-LI' => { name: 'Deutsch (Liechtenstien)', code: 'de-LI', en: 'German (Liechtenstien)' },
|
37
|
+
'de-CH' => { name: 'Deutsch (Schweiz)', code: 'de-CH', en: 'German (Switzerland)' },
|
38
|
+
'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
|
39
|
+
'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
|
40
|
+
'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
|
41
|
+
'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
|
42
|
+
'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
|
43
|
+
'it-IT' => { name: 'Italiano (Italia)', code: 'it-IT', en: 'Italian (Italy)' },
|
44
|
+
'it-CH' => { name: 'Italiano (Svizzera)', code: 'it-CH', en: 'Italian (Switzerland)' },
|
45
|
+
'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
|
46
|
+
'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
|
47
|
+
'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
|
48
|
+
'mn' => { name: 'монгол', code: 'mn', en: 'Mongolian' },
|
49
|
+
'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
|
50
|
+
'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
|
51
|
+
'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
|
52
|
+
'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
|
53
|
+
'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
|
54
|
+
'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
|
55
|
+
'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
|
56
|
+
'pt-BR' => { name: 'Português (Brasil)', code: 'pt-BR', en: 'Portuguese (Brazil)' },
|
57
|
+
'pt-PT' => { name: 'Português (Portugal)', code: 'pt-PT', en: 'Portuguese (Portugal)' },
|
58
|
+
'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
|
59
|
+
'es' => { name: 'Español', code: 'es', en: 'Spanish' },
|
60
|
+
'es-AR' => { name: 'Español (Argentina)', code: 'es-AR', en: 'Spanish (Argentina)' },
|
61
|
+
'es-CL' => { name: 'Español (Chile)', code: 'es-CL', en: 'Spanish (Chile)' },
|
62
|
+
'es-CO' => { name: 'Español (Colombia)', code: 'es-CO', en: 'Spanish (Colombia)' },
|
63
|
+
'es-CR' => { name: 'Español (Costa Rica)', code: 'es-CR', en: 'Spanish (Costa Rica)' },
|
64
|
+
'es-HN' => { name: 'Español (Honduras)', code: 'es-HN', en: 'Spanish (Honduras)' },
|
65
|
+
'es-419' => { name: 'Español (Latinoamérica)', code: 'es-419', en: 'Spanish (Latin America)' },
|
66
|
+
'es-MX' => { name: 'Español (México)', code: 'es-MX', en: 'Spanish (Mexico)' },
|
67
|
+
'es-PE' => { name: 'Español (Perú)', code: 'es-PE', en: 'Spanish (Peru)' },
|
68
|
+
'es-ES' => { name: 'Español (España)', code: 'es-ES', en: 'Spanish (Spain)' },
|
69
|
+
'es-US' => { name: 'Español (Estados Unidos)', code: 'es-US', en: 'Spanish (United States)' },
|
70
|
+
'es-UY' => { name: 'Español (Uruguay)', code: 'es-UY', en: 'Spanish (Uruguay)' },
|
71
|
+
'es-VE' => { name: 'Español (Venezuela)', code: 'es-VE', en: 'Spanish (Venezuela)' },
|
72
|
+
'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
|
73
|
+
'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
|
74
|
+
'tl' => { name: 'Tagalog', code: 'tl', en: 'Tagalog' },
|
75
|
+
'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
|
76
|
+
'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
|
77
|
+
'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
|
78
|
+
'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
|
79
|
+
'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
|
80
|
+
'uz' => { name: 'Oʻzbekcha', code: 'uz', en: 'Uzbek' },
|
81
|
+
'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' },
|
82
|
+
'km' => { name: 'ភាសាខ្មែរ', code: 'km', en: 'Khmer' },
|
83
|
+
'ta' => { name: 'தமிழ்', code: 'ta', en: 'Tamil' },
|
84
|
+
'si' => { name: 'සිංහල', code: 'si', en: 'Sinhala' }
|
85
|
+
}.freeze
|
86
|
+
|
87
|
+
# Provides the ISO639-1 code for a given lang code.
|
88
|
+
# Source: https://support.google.com/webmasters/answer/189077?hl=en
|
89
|
+
#
|
90
|
+
# @param lang_code [String] lang_code Code of the language.
|
91
|
+
#
|
92
|
+
# @return [String] The ISO639-1 code of the language.
|
93
|
+
def self.iso_639_1_normalization(lang_code)
|
94
|
+
lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
|
95
|
+
end
|
96
|
+
|
97
|
+
def self.get_code(lang_name)
|
98
|
+
return nil if lang_name.nil?
|
99
|
+
return lang_name if LANG[lang_name]
|
100
|
+
|
101
|
+
custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
|
102
|
+
custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
|
103
|
+
return custom_lang[:code] if custom_lang
|
104
|
+
|
105
|
+
LANG.each do |_k, l|
|
106
|
+
return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
|
107
|
+
end
|
108
|
+
nil
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.get_lang(lang)
|
112
|
+
lang_code = get_code(lang)
|
113
|
+
LANG[lang_code]
|
114
|
+
end
|
115
|
+
|
116
|
+
def initialize(lang_name)
|
117
|
+
@lang_code = Lang.get_code(lang_name)
|
118
|
+
end
|
119
|
+
|
120
|
+
attr_reader :lang_code
|
121
|
+
|
122
|
+
# Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
|
123
|
+
# When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
|
124
|
+
# If you want to know more examples, see also test/lib/lang_test.rb.
|
125
|
+
#
|
126
|
+
# @param [String] href original URL.
|
127
|
+
# @param [String] pattern url_pattern of the settings. ('path', 'subdomain' or 'query')
|
128
|
+
# @param [Wovnrb::Header] headers instance of Wovn::Header. It generates new env variable for original request.
|
129
|
+
# @return [String] URL added langauge code.
|
130
|
+
def add_lang_code(href, pattern, headers)
|
131
|
+
return href if /^(#.*)?$/.match?(href)
|
132
|
+
|
133
|
+
settings = Store.instance.settings
|
134
|
+
code_to_add = settings['custom_lang_aliases'][@lang_code] || @lang_code
|
135
|
+
lang_param_name = settings['lang_param_name']
|
136
|
+
# absolute links
|
137
|
+
new_href = href
|
138
|
+
if href && href =~ /^(https?:)?\/\//i
|
139
|
+
# in the future, perhaps validate url rather than using begin rescue
|
140
|
+
# "#{url =~ /\// ? 'http:' : ''}#{url}" =~ URI::regexp
|
141
|
+
begin
|
142
|
+
uri = Addressable::URI.parse(href)
|
143
|
+
rescue
|
144
|
+
return new_href
|
145
|
+
end
|
146
|
+
# only add lang if it's an internal link
|
147
|
+
# DNS names are case insensitive
|
148
|
+
if uri.host.downcase === headers.host.downcase
|
149
|
+
case pattern
|
150
|
+
when 'subdomain'
|
151
|
+
sub_d = href.match(/\/\/([^.]*)\./)[1]
|
152
|
+
sub_code = Lang.get_code(sub_d)
|
153
|
+
new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
|
154
|
+
href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
|
155
|
+
else
|
156
|
+
href.sub(/(\/\/)([^.]*)/, "\\1#{code_to_add.downcase}.\\2")
|
157
|
+
end
|
158
|
+
when 'query'
|
159
|
+
new_href = add_query_lang_code(href, code_to_add, lang_param_name)
|
160
|
+
else # path
|
161
|
+
new_href = href.sub(/([^.]*\.[^\/]*)(\/|$)/, "\\1/#{code_to_add}/")
|
162
|
+
end
|
163
|
+
end
|
164
|
+
elsif href
|
165
|
+
case pattern
|
166
|
+
when 'subdomain'
|
167
|
+
lang_url = "#{headers.protocol}://#{code_to_add.downcase}.#{headers.host}"
|
168
|
+
current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
|
169
|
+
new_href = case href
|
170
|
+
when /^\.\..*$/
|
171
|
+
# ../path
|
172
|
+
"#{lang_url}/#{href.gsub(/^\.\.\//, '')}"
|
173
|
+
when /^\..*$/
|
174
|
+
# ./path
|
175
|
+
"#{lang_url}#{current_dir}/#{href.gsub(/^\.\//, '')}"
|
176
|
+
when /^\/.*$/
|
177
|
+
# /path
|
178
|
+
lang_url + href
|
179
|
+
else
|
180
|
+
# path
|
181
|
+
"#{lang_url}#{current_dir}/#{href}"
|
182
|
+
end
|
183
|
+
when 'query'
|
184
|
+
new_href = add_query_lang_code(href, code_to_add, lang_param_name)
|
185
|
+
else # path
|
186
|
+
if /^\//.match?(href)
|
187
|
+
new_href = "/#{code_to_add}#{href}"
|
188
|
+
else
|
189
|
+
current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
|
190
|
+
current_dir = '/' if current_dir == ''
|
191
|
+
new_href = "/#{code_to_add}#{current_dir}#{href}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
new_href
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
|
201
|
+
def index_href_for_encoding_and_decoding(dom)
|
202
|
+
result = {}
|
203
|
+
dom.xpath('//*[@href]').each do |a_tag|
|
204
|
+
url = a_tag['href']
|
205
|
+
begin
|
206
|
+
encoded_url = Addressable::URI.parse(url).normalize.to_s
|
207
|
+
result[encoded_url] = url if encoded_url != url
|
208
|
+
rescue Addressable::URI::InvalidURIError => e
|
209
|
+
WovnLogger.instance.error("Failed parse url : #{url}#{e.message}")
|
210
|
+
end
|
211
|
+
end
|
212
|
+
result
|
213
|
+
end
|
214
|
+
|
215
|
+
def replace_dom_values(dom, values, store, url, headers)
|
216
|
+
text_index = values['text_vals'] || {}
|
217
|
+
html_text_index = values['html_text_vals'] || {}
|
218
|
+
src_index = values['img_vals'] || {}
|
219
|
+
img_src_prefix = values['img_src_prefix'] || ''
|
220
|
+
host_aliases = values['host_aliases'] || []
|
221
|
+
|
222
|
+
replacers = []
|
223
|
+
# add lang code to anchors href if not default lang
|
224
|
+
if @lang_code != store.settings['default_lang']
|
225
|
+
pattern = store.settings['url_pattern']
|
226
|
+
replacers << LinkReplacer.new(store, pattern, headers)
|
227
|
+
end
|
228
|
+
|
229
|
+
replacers << if html_text_index.empty?
|
230
|
+
TextReplacer.new(store, text_index)
|
231
|
+
else
|
232
|
+
UnifiedValues::TextReplacer.new(store, html_text_index)
|
233
|
+
end
|
234
|
+
replacers << MetaReplacer.new(store, text_index, pattern, headers)
|
235
|
+
replacers << InputReplacer.new(store, text_index)
|
236
|
+
replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
|
237
|
+
replacers << ScriptReplacer.new(store) if dom.html?
|
238
|
+
|
239
|
+
replacers.each do |replacer|
|
240
|
+
replacer.replace(dom, self)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def get_langs(values)
|
245
|
+
langs = Set.new
|
246
|
+
(values['text_vals'] || {}).merge(values['img_vals'] || {}).each do |_key, index|
|
247
|
+
index.each do |l, _val|
|
248
|
+
langs.add(l)
|
249
|
+
end
|
250
|
+
end
|
251
|
+
langs
|
252
|
+
end
|
253
|
+
|
254
|
+
def add_query_lang_code(href, lang_code, lang_param_name)
|
255
|
+
query_separator = href.include?('?') ? '&' : '?'
|
256
|
+
|
257
|
+
href.sub(/(#|$)/, "#{query_separator}#{lang_param_name}=#{lang_code}\\1")
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|