wovnrb 3.11.0 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/README.en.md +256 -256
- data/README.ja.md +218 -218
- data/docker/rails/TestSite/config/application.rb +1 -2
- data/docker/rails/TestSite/yarn.lock +7449 -7353
- data/lib/wovnrb/api_translator.rb +186 -183
- data/lib/wovnrb/headers.rb +192 -192
- data/lib/wovnrb/lang.rb +260 -260
- data/lib/wovnrb/services/html_converter.rb +240 -222
- data/lib/wovnrb/services/html_replace_marker.rb +48 -48
- data/lib/wovnrb/settings.rb +2 -2
- data/lib/wovnrb/store.rb +230 -222
- data/lib/wovnrb/version.rb +3 -3
- data/lib/wovnrb.rb +146 -145
- data/test/lib/api_translator_test.rb +228 -217
- data/test/lib/custom_domain/custom_domain_langs_test.rb +2 -2
- data/test/lib/lang_test.rb +59 -59
- data/test/lib/services/html_converter_test.rb +570 -532
- data/test/lib/services/html_replace_marker_test.rb +149 -149
- data/test/lib/url_language_switcher_test.rb +1097 -1097
- data/test/lib/wovnrb_test.rb +477 -454
- data/test/test_helper.rb +1 -0
- metadata +6 -6
data/lib/wovnrb/lang.rb
CHANGED
|
@@ -1,260 +1,260 @@
|
|
|
1
|
-
require 'addressable'
|
|
2
|
-
|
|
3
|
-
module Wovnrb
|
|
4
|
-
class Lang
|
|
5
|
-
LANG = {
|
|
6
|
-
'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
|
|
7
|
-
'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
|
|
8
|
-
'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
|
|
9
|
-
'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
|
|
10
|
-
'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
|
|
11
|
-
'zh-CN' => { name: '简体中文(中国)', code: 'zh-CN', en: 'Simp Chinese (China)' },
|
|
12
|
-
'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
|
|
13
|
-
'zh-Hant-HK' => { name: '繁體中文(香港)', code: 'zh-Hant-HK', en: 'Trad Chinese (Hong Kong)' },
|
|
14
|
-
'zh-Hant-TW' => { name: '繁體中文(台湾)', code: 'zh-Hant-TW', en: 'Trad Chinese (Taiwan)' },
|
|
15
|
-
'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
|
|
16
|
-
'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
|
|
17
|
-
'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
|
|
18
|
-
'en' => { name: 'English', code: 'en', en: 'English' },
|
|
19
|
-
'en-AU' => { name: 'English (Australia)', code: 'en-AU', en: 'English (Australia)' },
|
|
20
|
-
'en-CA' => { name: 'English (Canada)', code: 'en-CA', en: 'English (Canada)' },
|
|
21
|
-
'en-IN' => { name: 'English (India)', code: 'en-IN', en: 'English (India)' },
|
|
22
|
-
'en-NZ' => { name: 'English (New Zealand)', code: 'en-NZ', en: 'English (New Zealand)' },
|
|
23
|
-
'en-ZA' => { name: 'English (South Africa)', code: 'en-ZA', en: 'English (South Africa)' },
|
|
24
|
-
'en-GB' => { name: 'English (United Kingdom)', code: 'en-GB', en: 'English (United Kingdom)' },
|
|
25
|
-
'en-SG' => { name: 'English (Singapore)', code: 'en-SG', en: 'English (Singapore)' },
|
|
26
|
-
'en-US' => { name: 'English (United States)', code: 'en-US', en: 'English (United States)' },
|
|
27
|
-
'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
|
|
28
|
-
'fr' => { name: 'Français', code: 'fr', en: 'French' },
|
|
29
|
-
'fr-CA' => { name: 'Français (Canada)', code: 'fr-CA', en: 'French (Canada)' },
|
|
30
|
-
'fr-FR' => { name: 'Français (France)', code: 'fr-FR', en: 'French (France)' },
|
|
31
|
-
'fr-CH' => { name: 'Français (Suisse)', code: 'fr-CH', en: 'French (Switzerland)' },
|
|
32
|
-
'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
|
|
33
|
-
'de' => { name: 'Deutsch', code: 'de', en: 'German' },
|
|
34
|
-
'de-AT' => { name: 'Deutsch (Österreich)', code: 'de-AT', en: 'German (Austria)' },
|
|
35
|
-
'de-DE' => { name: 'Deutsch (Deutschland)', code: 'de-DE', en: 'German (Germany)' },
|
|
36
|
-
'de-LI' => { name: 'Deutsch (Liechtenstien)', code: 'de-LI', en: 'German (Liechtenstien)' },
|
|
37
|
-
'de-CH' => { name: 'Deutsch (Schweiz)', code: 'de-CH', en: 'German (Switzerland)' },
|
|
38
|
-
'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
|
|
39
|
-
'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
|
|
40
|
-
'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
|
|
41
|
-
'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
|
|
42
|
-
'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
|
|
43
|
-
'it-IT' => { name: 'Italiano (Italia)', code: 'it-IT', en: 'Italian (Italy)' },
|
|
44
|
-
'it-CH' => { name: 'Italiano (Svizzera)', code: 'it-CH', en: 'Italian (Switzerland)' },
|
|
45
|
-
'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
|
|
46
|
-
'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
|
|
47
|
-
'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
|
|
48
|
-
'mn' => { name: 'монгол', code: 'mn', en: 'Mongolian' },
|
|
49
|
-
'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
|
|
50
|
-
'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
|
|
51
|
-
'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
|
|
52
|
-
'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
|
|
53
|
-
'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
|
|
54
|
-
'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
|
|
55
|
-
'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
|
|
56
|
-
'pt-BR' => { name: 'Português (Brasil)', code: 'pt-BR', en: 'Portuguese (Brazil)' },
|
|
57
|
-
'pt-PT' => { name: 'Português (Portugal)', code: 'pt-PT', en: 'Portuguese (Portugal)' },
|
|
58
|
-
'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
|
|
59
|
-
'es' => { name: 'Español', code: 'es', en: 'Spanish' },
|
|
60
|
-
'es-AR' => { name: 'Español (Argentina)', code: 'es-AR', en: 'Spanish (Argentina)' },
|
|
61
|
-
'es-CL' => { name: 'Español (Chile)', code: 'es-CL', en: 'Spanish (Chile)' },
|
|
62
|
-
'es-CO' => { name: 'Español (Colombia)', code: 'es-CO', en: 'Spanish (Colombia)' },
|
|
63
|
-
'es-CR' => { name: 'Español (Costa Rica)', code: 'es-CR', en: 'Spanish (Costa Rica)' },
|
|
64
|
-
'es-HN' => { name: 'Español (Honduras)', code: 'es-HN', en: 'Spanish (Honduras)' },
|
|
65
|
-
'es-419' => { name: 'Español (Latinoamérica)', code: 'es-419', en: 'Spanish (Latin America)' },
|
|
66
|
-
'es-MX' => { name: 'Español (México)', code: 'es-MX', en: 'Spanish (Mexico)' },
|
|
67
|
-
'es-PE' => { name: 'Español (Perú)', code: 'es-PE', en: 'Spanish (Peru)' },
|
|
68
|
-
'es-ES' => { name: 'Español (España)', code: 'es-ES', en: 'Spanish (Spain)' },
|
|
69
|
-
'es-US' => { name: 'Español (Estados Unidos)', code: 'es-US', en: 'Spanish (United States)' },
|
|
70
|
-
'es-UY' => { name: 'Español (Uruguay)', code: 'es-UY', en: 'Spanish (Uruguay)' },
|
|
71
|
-
'es-VE' => { name: 'Español (Venezuela)', code: 'es-VE', en: 'Spanish (Venezuela)' },
|
|
72
|
-
'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
|
|
73
|
-
'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
|
|
74
|
-
'tl' => { name: 'Tagalog', code: 'tl', en: 'Tagalog' },
|
|
75
|
-
'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
|
|
76
|
-
'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
|
|
77
|
-
'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
|
|
78
|
-
'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
|
|
79
|
-
'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
|
|
80
|
-
'uz' => { name: 'Oʻzbekcha', code: 'uz', en: 'Uzbek' },
|
|
81
|
-
'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' },
|
|
82
|
-
'km' => { name: 'ភាសាខ្មែរ', code: 'km', en: 'Khmer' },
|
|
83
|
-
'ta' => { name: 'தமிழ்', code: 'ta', en: 'Tamil' },
|
|
84
|
-
'si' => { name: 'සිංහල', code: 'si', en: 'Sinhala' }
|
|
85
|
-
}.freeze
|
|
86
|
-
|
|
87
|
-
# Provides the ISO639-1 code for a given lang code.
|
|
88
|
-
# Source: https://support.google.com/webmasters/answer/189077?hl=en
|
|
89
|
-
#
|
|
90
|
-
# @param lang_code [String] lang_code Code of the language.
|
|
91
|
-
#
|
|
92
|
-
# @return [String] The ISO639-1 code of the language.
|
|
93
|
-
def self.iso_639_1_normalization(lang_code)
|
|
94
|
-
lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
def self.get_code(lang_name)
|
|
98
|
-
return nil if lang_name.nil?
|
|
99
|
-
return lang_name if LANG[lang_name]
|
|
100
|
-
|
|
101
|
-
custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
|
|
102
|
-
custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
|
|
103
|
-
return custom_lang[:code] if custom_lang
|
|
104
|
-
|
|
105
|
-
LANG.
|
|
106
|
-
return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
|
|
107
|
-
end
|
|
108
|
-
nil
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def self.get_lang(lang)
|
|
112
|
-
lang_code = get_code(lang)
|
|
113
|
-
LANG[lang_code]
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
def initialize(lang_name)
|
|
117
|
-
@lang_code = Lang.get_code(lang_name)
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
attr_reader :lang_code
|
|
121
|
-
|
|
122
|
-
# Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
|
|
123
|
-
# When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
|
|
124
|
-
# If you want to know more examples, see also test/lib/lang_test.rb.
|
|
125
|
-
#
|
|
126
|
-
# @param [String] href original URL.
|
|
127
|
-
# @param [String] pattern url_pattern of the settings. ('path', 'subdomain' or 'query')
|
|
128
|
-
# @param [Wovnrb::Header] headers instance of Wovn::Header. It generates new env variable for original request.
|
|
129
|
-
# @return [String] URL added langauge code.
|
|
130
|
-
def add_lang_code(href, pattern, headers)
|
|
131
|
-
return href if /^(#.*)?$/.match?(href)
|
|
132
|
-
|
|
133
|
-
settings = Store.instance.settings
|
|
134
|
-
code_to_add = settings['custom_lang_aliases'][@lang_code] || @lang_code
|
|
135
|
-
lang_param_name = settings['lang_param_name']
|
|
136
|
-
# absolute links
|
|
137
|
-
new_href = href
|
|
138
|
-
if href && href =~ /^(https?:)?\/\//i
|
|
139
|
-
# in the future, perhaps validate url rather than using begin rescue
|
|
140
|
-
# "#{url =~ /\// ? 'http:' : ''}#{url}" =~ URI::regexp
|
|
141
|
-
begin
|
|
142
|
-
uri = Addressable::URI.parse(href)
|
|
143
|
-
rescue
|
|
144
|
-
return new_href
|
|
145
|
-
end
|
|
146
|
-
# only add lang if it's an internal link
|
|
147
|
-
# DNS names are case insensitive
|
|
148
|
-
if uri.host.downcase === headers.host.downcase
|
|
149
|
-
case pattern
|
|
150
|
-
when 'subdomain'
|
|
151
|
-
sub_d = href.match(/\/\/([^.]*)\./)[1]
|
|
152
|
-
sub_code = Lang.get_code(sub_d)
|
|
153
|
-
new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
|
|
154
|
-
href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
|
|
155
|
-
else
|
|
156
|
-
href.sub(/(\/\/)([^.]*)/, "\\1#{code_to_add.downcase}.\\2")
|
|
157
|
-
end
|
|
158
|
-
when 'query'
|
|
159
|
-
new_href = add_query_lang_code(href, code_to_add, lang_param_name)
|
|
160
|
-
else # path
|
|
161
|
-
new_href = href.sub(/([^.]*\.[^\/]*)(\/|$)/, "\\1/#{code_to_add}/")
|
|
162
|
-
end
|
|
163
|
-
end
|
|
164
|
-
elsif href
|
|
165
|
-
case pattern
|
|
166
|
-
when 'subdomain'
|
|
167
|
-
lang_url = "#{headers.protocol}://#{code_to_add.downcase}.#{headers.host}"
|
|
168
|
-
current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
|
|
169
|
-
new_href = case href
|
|
170
|
-
when /^\.\..*$/
|
|
171
|
-
# ../path
|
|
172
|
-
"#{lang_url}/#{href.gsub(/^\.\.\//, '')}"
|
|
173
|
-
when /^\..*$/
|
|
174
|
-
# ./path
|
|
175
|
-
"#{lang_url}#{current_dir}/#{href.gsub(/^\.\//, '')}"
|
|
176
|
-
when /^\/.*$/
|
|
177
|
-
# /path
|
|
178
|
-
lang_url + href
|
|
179
|
-
else
|
|
180
|
-
# path
|
|
181
|
-
"#{lang_url}#{current_dir}/#{href}"
|
|
182
|
-
end
|
|
183
|
-
when 'query'
|
|
184
|
-
new_href = add_query_lang_code(href, code_to_add, lang_param_name)
|
|
185
|
-
else # path
|
|
186
|
-
if /^\//.match?(href)
|
|
187
|
-
new_href = "/#{code_to_add}#{href}"
|
|
188
|
-
else
|
|
189
|
-
current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
|
|
190
|
-
current_dir = '/' if current_dir == ''
|
|
191
|
-
new_href = "/#{code_to_add}#{current_dir}#{href}"
|
|
192
|
-
end
|
|
193
|
-
end
|
|
194
|
-
end
|
|
195
|
-
|
|
196
|
-
new_href
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
private
|
|
200
|
-
|
|
201
|
-
def index_href_for_encoding_and_decoding(dom)
|
|
202
|
-
result = {}
|
|
203
|
-
dom.xpath('//*[@href]').each do |a_tag|
|
|
204
|
-
url = a_tag['href']
|
|
205
|
-
begin
|
|
206
|
-
encoded_url = Addressable::URI.parse(url).normalize.to_s
|
|
207
|
-
result[encoded_url] = url if encoded_url != url
|
|
208
|
-
rescue Addressable::URI::InvalidURIError => e
|
|
209
|
-
WovnLogger.instance.error("Failed parse url : #{url}#{e.message}")
|
|
210
|
-
end
|
|
211
|
-
end
|
|
212
|
-
result
|
|
213
|
-
end
|
|
214
|
-
|
|
215
|
-
def replace_dom_values(dom, values, store, url, headers)
|
|
216
|
-
text_index = values['text_vals'] || {}
|
|
217
|
-
html_text_index = values['html_text_vals'] || {}
|
|
218
|
-
src_index = values['img_vals'] || {}
|
|
219
|
-
img_src_prefix = values['img_src_prefix'] || ''
|
|
220
|
-
host_aliases = values['host_aliases'] || []
|
|
221
|
-
|
|
222
|
-
replacers = []
|
|
223
|
-
# add lang code to anchors href if not default lang
|
|
224
|
-
if @lang_code != store.settings['default_lang']
|
|
225
|
-
pattern = store.settings['url_pattern']
|
|
226
|
-
replacers << LinkReplacer.new(store, pattern, headers)
|
|
227
|
-
end
|
|
228
|
-
|
|
229
|
-
replacers << if html_text_index.empty?
|
|
230
|
-
TextReplacer.new(store, text_index)
|
|
231
|
-
else
|
|
232
|
-
UnifiedValues::TextReplacer.new(store, html_text_index)
|
|
233
|
-
end
|
|
234
|
-
replacers << MetaReplacer.new(store, text_index, pattern, headers)
|
|
235
|
-
replacers << InputReplacer.new(store, text_index)
|
|
236
|
-
replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
|
|
237
|
-
replacers << ScriptReplacer.new(store) if dom.html?
|
|
238
|
-
|
|
239
|
-
replacers.each do |replacer|
|
|
240
|
-
replacer.replace(dom, self)
|
|
241
|
-
end
|
|
242
|
-
end
|
|
243
|
-
|
|
244
|
-
def get_langs(values)
|
|
245
|
-
langs = Set.new
|
|
246
|
-
(values['text_vals'] || {}).merge(values['img_vals'] || {}).
|
|
247
|
-
index.
|
|
248
|
-
langs.add(l)
|
|
249
|
-
end
|
|
250
|
-
end
|
|
251
|
-
langs
|
|
252
|
-
end
|
|
253
|
-
|
|
254
|
-
def add_query_lang_code(href, lang_code, lang_param_name)
|
|
255
|
-
query_separator = href.include?('?') ? '&' : '?'
|
|
256
|
-
|
|
257
|
-
href.sub(/(#|$)/, "#{query_separator}#{lang_param_name}=#{lang_code}\\1")
|
|
258
|
-
end
|
|
259
|
-
end
|
|
260
|
-
end
|
|
1
|
+
require 'addressable'
|
|
2
|
+
|
|
3
|
+
module Wovnrb
|
|
4
|
+
class Lang
|
|
5
|
+
LANG = {
|
|
6
|
+
'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
|
|
7
|
+
'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
|
|
8
|
+
'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
|
|
9
|
+
'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
|
|
10
|
+
'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
|
|
11
|
+
'zh-CN' => { name: '简体中文(中国)', code: 'zh-CN', en: 'Simp Chinese (China)' },
|
|
12
|
+
'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
|
|
13
|
+
'zh-Hant-HK' => { name: '繁體中文(香港)', code: 'zh-Hant-HK', en: 'Trad Chinese (Hong Kong)' },
|
|
14
|
+
'zh-Hant-TW' => { name: '繁體中文(台湾)', code: 'zh-Hant-TW', en: 'Trad Chinese (Taiwan)' },
|
|
15
|
+
'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
|
|
16
|
+
'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
|
|
17
|
+
'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
|
|
18
|
+
'en' => { name: 'English', code: 'en', en: 'English' },
|
|
19
|
+
'en-AU' => { name: 'English (Australia)', code: 'en-AU', en: 'English (Australia)' },
|
|
20
|
+
'en-CA' => { name: 'English (Canada)', code: 'en-CA', en: 'English (Canada)' },
|
|
21
|
+
'en-IN' => { name: 'English (India)', code: 'en-IN', en: 'English (India)' },
|
|
22
|
+
'en-NZ' => { name: 'English (New Zealand)', code: 'en-NZ', en: 'English (New Zealand)' },
|
|
23
|
+
'en-ZA' => { name: 'English (South Africa)', code: 'en-ZA', en: 'English (South Africa)' },
|
|
24
|
+
'en-GB' => { name: 'English (United Kingdom)', code: 'en-GB', en: 'English (United Kingdom)' },
|
|
25
|
+
'en-SG' => { name: 'English (Singapore)', code: 'en-SG', en: 'English (Singapore)' },
|
|
26
|
+
'en-US' => { name: 'English (United States)', code: 'en-US', en: 'English (United States)' },
|
|
27
|
+
'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
|
|
28
|
+
'fr' => { name: 'Français', code: 'fr', en: 'French' },
|
|
29
|
+
'fr-CA' => { name: 'Français (Canada)', code: 'fr-CA', en: 'French (Canada)' },
|
|
30
|
+
'fr-FR' => { name: 'Français (France)', code: 'fr-FR', en: 'French (France)' },
|
|
31
|
+
'fr-CH' => { name: 'Français (Suisse)', code: 'fr-CH', en: 'French (Switzerland)' },
|
|
32
|
+
'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
|
|
33
|
+
'de' => { name: 'Deutsch', code: 'de', en: 'German' },
|
|
34
|
+
'de-AT' => { name: 'Deutsch (Österreich)', code: 'de-AT', en: 'German (Austria)' },
|
|
35
|
+
'de-DE' => { name: 'Deutsch (Deutschland)', code: 'de-DE', en: 'German (Germany)' },
|
|
36
|
+
'de-LI' => { name: 'Deutsch (Liechtenstien)', code: 'de-LI', en: 'German (Liechtenstien)' },
|
|
37
|
+
'de-CH' => { name: 'Deutsch (Schweiz)', code: 'de-CH', en: 'German (Switzerland)' },
|
|
38
|
+
'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
|
|
39
|
+
'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
|
|
40
|
+
'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
|
|
41
|
+
'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
|
|
42
|
+
'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
|
|
43
|
+
'it-IT' => { name: 'Italiano (Italia)', code: 'it-IT', en: 'Italian (Italy)' },
|
|
44
|
+
'it-CH' => { name: 'Italiano (Svizzera)', code: 'it-CH', en: 'Italian (Switzerland)' },
|
|
45
|
+
'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
|
|
46
|
+
'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
|
|
47
|
+
'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
|
|
48
|
+
'mn' => { name: 'монгол', code: 'mn', en: 'Mongolian' },
|
|
49
|
+
'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
|
|
50
|
+
'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
|
|
51
|
+
'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
|
|
52
|
+
'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
|
|
53
|
+
'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
|
|
54
|
+
'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
|
|
55
|
+
'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
|
|
56
|
+
'pt-BR' => { name: 'Português (Brasil)', code: 'pt-BR', en: 'Portuguese (Brazil)' },
|
|
57
|
+
'pt-PT' => { name: 'Português (Portugal)', code: 'pt-PT', en: 'Portuguese (Portugal)' },
|
|
58
|
+
'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
|
|
59
|
+
'es' => { name: 'Español', code: 'es', en: 'Spanish' },
|
|
60
|
+
'es-AR' => { name: 'Español (Argentina)', code: 'es-AR', en: 'Spanish (Argentina)' },
|
|
61
|
+
'es-CL' => { name: 'Español (Chile)', code: 'es-CL', en: 'Spanish (Chile)' },
|
|
62
|
+
'es-CO' => { name: 'Español (Colombia)', code: 'es-CO', en: 'Spanish (Colombia)' },
|
|
63
|
+
'es-CR' => { name: 'Español (Costa Rica)', code: 'es-CR', en: 'Spanish (Costa Rica)' },
|
|
64
|
+
'es-HN' => { name: 'Español (Honduras)', code: 'es-HN', en: 'Spanish (Honduras)' },
|
|
65
|
+
'es-419' => { name: 'Español (Latinoamérica)', code: 'es-419', en: 'Spanish (Latin America)' },
|
|
66
|
+
'es-MX' => { name: 'Español (México)', code: 'es-MX', en: 'Spanish (Mexico)' },
|
|
67
|
+
'es-PE' => { name: 'Español (Perú)', code: 'es-PE', en: 'Spanish (Peru)' },
|
|
68
|
+
'es-ES' => { name: 'Español (España)', code: 'es-ES', en: 'Spanish (Spain)' },
|
|
69
|
+
'es-US' => { name: 'Español (Estados Unidos)', code: 'es-US', en: 'Spanish (United States)' },
|
|
70
|
+
'es-UY' => { name: 'Español (Uruguay)', code: 'es-UY', en: 'Spanish (Uruguay)' },
|
|
71
|
+
'es-VE' => { name: 'Español (Venezuela)', code: 'es-VE', en: 'Spanish (Venezuela)' },
|
|
72
|
+
'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
|
|
73
|
+
'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
|
|
74
|
+
'tl' => { name: 'Tagalog', code: 'tl', en: 'Tagalog' },
|
|
75
|
+
'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
|
|
76
|
+
'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
|
|
77
|
+
'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
|
|
78
|
+
'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
|
|
79
|
+
'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
|
|
80
|
+
'uz' => { name: 'Oʻzbekcha', code: 'uz', en: 'Uzbek' },
|
|
81
|
+
'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' },
|
|
82
|
+
'km' => { name: 'ភាសាខ្មែរ', code: 'km', en: 'Khmer' },
|
|
83
|
+
'ta' => { name: 'தமிழ்', code: 'ta', en: 'Tamil' },
|
|
84
|
+
'si' => { name: 'සිංහල', code: 'si', en: 'Sinhala' }
|
|
85
|
+
}.freeze
|
|
86
|
+
|
|
87
|
+
# Provides the ISO639-1 code for a given lang code.
|
|
88
|
+
# Source: https://support.google.com/webmasters/answer/189077?hl=en
|
|
89
|
+
#
|
|
90
|
+
# @param lang_code [String] lang_code Code of the language.
|
|
91
|
+
#
|
|
92
|
+
# @return [String] The ISO639-1 code of the language.
|
|
93
|
+
def self.iso_639_1_normalization(lang_code)
|
|
94
|
+
lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def self.get_code(lang_name)
|
|
98
|
+
return nil if lang_name.nil?
|
|
99
|
+
return lang_name if LANG[lang_name]
|
|
100
|
+
|
|
101
|
+
custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
|
|
102
|
+
custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
|
|
103
|
+
return custom_lang[:code] if custom_lang
|
|
104
|
+
|
|
105
|
+
LANG.each_value do |l|
|
|
106
|
+
return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
|
|
107
|
+
end
|
|
108
|
+
nil
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def self.get_lang(lang)
|
|
112
|
+
lang_code = get_code(lang)
|
|
113
|
+
LANG[lang_code]
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def initialize(lang_name)
|
|
117
|
+
@lang_code = Lang.get_code(lang_name)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
attr_reader :lang_code
|
|
121
|
+
|
|
122
|
+
# Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
|
|
123
|
+
# When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
|
|
124
|
+
# If you want to know more examples, see also test/lib/lang_test.rb.
|
|
125
|
+
#
|
|
126
|
+
# @param [String] href original URL.
|
|
127
|
+
# @param [String] pattern url_pattern of the settings. ('path', 'subdomain' or 'query')
|
|
128
|
+
# @param [Wovnrb::Header] headers instance of Wovn::Header. It generates new env variable for original request.
|
|
129
|
+
# @return [String] URL added langauge code.
|
|
130
|
+
def add_lang_code(href, pattern, headers)
|
|
131
|
+
return href if /^(#.*)?$/.match?(href)
|
|
132
|
+
|
|
133
|
+
settings = Store.instance.settings
|
|
134
|
+
code_to_add = settings['custom_lang_aliases'][@lang_code] || @lang_code
|
|
135
|
+
lang_param_name = settings['lang_param_name']
|
|
136
|
+
# absolute links
|
|
137
|
+
new_href = href
|
|
138
|
+
if href && href =~ /^(https?:)?\/\//i
|
|
139
|
+
# in the future, perhaps validate url rather than using begin rescue
|
|
140
|
+
# "#{url =~ /\// ? 'http:' : ''}#{url}" =~ URI::regexp
|
|
141
|
+
begin
|
|
142
|
+
uri = Addressable::URI.parse(href)
|
|
143
|
+
rescue
|
|
144
|
+
return new_href
|
|
145
|
+
end
|
|
146
|
+
# only add lang if it's an internal link
|
|
147
|
+
# DNS names are case insensitive
|
|
148
|
+
if uri.host.downcase === headers.host.downcase
|
|
149
|
+
case pattern
|
|
150
|
+
when 'subdomain'
|
|
151
|
+
sub_d = href.match(/\/\/([^.]*)\./)[1]
|
|
152
|
+
sub_code = Lang.get_code(sub_d)
|
|
153
|
+
new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
|
|
154
|
+
href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
|
|
155
|
+
else
|
|
156
|
+
href.sub(/(\/\/)([^.]*)/, "\\1#{code_to_add.downcase}.\\2")
|
|
157
|
+
end
|
|
158
|
+
when 'query'
|
|
159
|
+
new_href = add_query_lang_code(href, code_to_add, lang_param_name)
|
|
160
|
+
else # path
|
|
161
|
+
new_href = href.sub(/([^.]*\.[^\/]*)(\/|$)/, "\\1/#{code_to_add}/")
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
elsif href
|
|
165
|
+
case pattern
|
|
166
|
+
when 'subdomain'
|
|
167
|
+
lang_url = "#{headers.protocol}://#{code_to_add.downcase}.#{headers.host}"
|
|
168
|
+
current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
|
|
169
|
+
new_href = case href
|
|
170
|
+
when /^\.\..*$/
|
|
171
|
+
# ../path
|
|
172
|
+
"#{lang_url}/#{href.gsub(/^\.\.\//, '')}"
|
|
173
|
+
when /^\..*$/
|
|
174
|
+
# ./path
|
|
175
|
+
"#{lang_url}#{current_dir}/#{href.gsub(/^\.\//, '')}"
|
|
176
|
+
when /^\/.*$/
|
|
177
|
+
# /path
|
|
178
|
+
lang_url + href
|
|
179
|
+
else
|
|
180
|
+
# path
|
|
181
|
+
"#{lang_url}#{current_dir}/#{href}"
|
|
182
|
+
end
|
|
183
|
+
when 'query'
|
|
184
|
+
new_href = add_query_lang_code(href, code_to_add, lang_param_name)
|
|
185
|
+
else # path
|
|
186
|
+
if /^\//.match?(href)
|
|
187
|
+
new_href = "/#{code_to_add}#{href}"
|
|
188
|
+
else
|
|
189
|
+
current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
|
|
190
|
+
current_dir = '/' if current_dir == ''
|
|
191
|
+
new_href = "/#{code_to_add}#{current_dir}#{href}"
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
new_href
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
private
|
|
200
|
+
|
|
201
|
+
def index_href_for_encoding_and_decoding(dom)
|
|
202
|
+
result = {}
|
|
203
|
+
dom.xpath('//*[@href]').each do |a_tag|
|
|
204
|
+
url = a_tag['href']
|
|
205
|
+
begin
|
|
206
|
+
encoded_url = Addressable::URI.parse(url).normalize.to_s
|
|
207
|
+
result[encoded_url] = url if encoded_url != url
|
|
208
|
+
rescue Addressable::URI::InvalidURIError => e
|
|
209
|
+
WovnLogger.instance.error("Failed parse url : #{url}#{e.message}")
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
result
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def replace_dom_values(dom, values, store, url, headers)
|
|
216
|
+
text_index = values['text_vals'] || {}
|
|
217
|
+
html_text_index = values['html_text_vals'] || {}
|
|
218
|
+
src_index = values['img_vals'] || {}
|
|
219
|
+
img_src_prefix = values['img_src_prefix'] || ''
|
|
220
|
+
host_aliases = values['host_aliases'] || []
|
|
221
|
+
|
|
222
|
+
replacers = []
|
|
223
|
+
# add lang code to anchors href if not default lang
|
|
224
|
+
if @lang_code != store.settings['default_lang']
|
|
225
|
+
pattern = store.settings['url_pattern']
|
|
226
|
+
replacers << LinkReplacer.new(store, pattern, headers)
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
replacers << if html_text_index.empty?
|
|
230
|
+
TextReplacer.new(store, text_index)
|
|
231
|
+
else
|
|
232
|
+
UnifiedValues::TextReplacer.new(store, html_text_index)
|
|
233
|
+
end
|
|
234
|
+
replacers << MetaReplacer.new(store, text_index, pattern, headers)
|
|
235
|
+
replacers << InputReplacer.new(store, text_index)
|
|
236
|
+
replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
|
|
237
|
+
replacers << ScriptReplacer.new(store) if dom.html?
|
|
238
|
+
|
|
239
|
+
replacers.each do |replacer|
|
|
240
|
+
replacer.replace(dom, self)
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def get_langs(values)
|
|
245
|
+
langs = Set.new
|
|
246
|
+
(values['text_vals'] || {}).merge(values['img_vals'] || {}).each_value do |index|
|
|
247
|
+
index.each_key do |l|
|
|
248
|
+
langs.add(l)
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
langs
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def add_query_lang_code(href, lang_code, lang_param_name)
|
|
255
|
+
query_separator = href.include?('?') ? '&' : '?'
|
|
256
|
+
|
|
257
|
+
href.sub(/(#|$)/, "#{query_separator}#{lang_param_name}=#{lang_code}\\1")
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|