wovnrb 3.10.3 → 3.11.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/wovnrb/lang.rb CHANGED
@@ -1,260 +1,260 @@
1
- require 'addressable'
2
-
3
- module Wovnrb
4
- class Lang
5
- LANG = {
6
- 'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
7
- 'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
8
- 'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
9
- 'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
10
- 'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
11
- 'zh-CN' => { name: '简体中文(中国)', code: 'zh-CN', en: 'Simp Chinese (China)' },
12
- 'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
13
- 'zh-Hant-HK' => { name: '繁體中文(香港)', code: 'zh-Hant-HK', en: 'Trad Chinese (Hong Kong)' },
14
- 'zh-Hant-TW' => { name: '繁體中文(台湾)', code: 'zh-Hant-TW', en: 'Trad Chinese (Taiwan)' },
15
- 'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
16
- 'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
17
- 'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
18
- 'en' => { name: 'English', code: 'en', en: 'English' },
19
- 'en-AU' => { name: 'English (Australia)', code: 'en-AU', en: 'English (Australia)' },
20
- 'en-CA' => { name: 'English (Canada)', code: 'en-CA', en: 'English (Canada)' },
21
- 'en-IN' => { name: 'English (India)', code: 'en-IN', en: 'English (India)' },
22
- 'en-NZ' => { name: 'English (New Zealand)', code: 'en-NZ', en: 'English (New Zealand)' },
23
- 'en-ZA' => { name: 'English (South Africa)', code: 'en-ZA', en: 'English (South Africa)' },
24
- 'en-GB' => { name: 'English (United Kingdom)', code: 'en-GB', en: 'English (United Kingdom)' },
25
- 'en-SG' => { name: 'English (Singapore)', code: 'en-SG', en: 'English (Singapore)' },
26
- 'en-US' => { name: 'English (United States)', code: 'en-US', en: 'English (United States)' },
27
- 'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
28
- 'fr' => { name: 'Français', code: 'fr', en: 'French' },
29
- 'fr-CA' => { name: 'Français (Canada)', code: 'fr-CA', en: 'French (Canada)' },
30
- 'fr-FR' => { name: 'Français (France)', code: 'fr-FR', en: 'French (France)' },
31
- 'fr-CH' => { name: 'Français (Suisse)', code: 'fr-CH', en: 'French (Switzerland)' },
32
- 'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
33
- 'de' => { name: 'Deutsch', code: 'de', en: 'German' },
34
- 'de-AT' => { name: 'Deutsch (Österreich)', code: 'de-AT', en: 'German (Austria)' },
35
- 'de-DE' => { name: 'Deutsch (Deutschland)', code: 'de-DE', en: 'German (Germany)' },
36
- 'de-LI' => { name: 'Deutsch (Liechtenstien)', code: 'de-LI', en: 'German (Liechtenstien)' },
37
- 'de-CH' => { name: 'Deutsch (Schweiz)', code: 'de-CH', en: 'German (Switzerland)' },
38
- 'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
39
- 'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
40
- 'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
41
- 'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
42
- 'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
43
- 'it-IT' => { name: 'Italiano (Italia)', code: 'it-IT', en: 'Italian (Italy)' },
44
- 'it-CH' => { name: 'Italiano (Svizzera)', code: 'it-CH', en: 'Italian (Switzerland)' },
45
- 'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
46
- 'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
47
- 'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
48
- 'mn' => { name: 'монгол', code: 'mn', en: 'Mongolian' },
49
- 'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
50
- 'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
51
- 'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
52
- 'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
53
- 'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
54
- 'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
55
- 'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
56
- 'pt-BR' => { name: 'Português (Brasil)', code: 'pt-BR', en: 'Portuguese (Brazil)' },
57
- 'pt-PT' => { name: 'Português (Portugal)', code: 'pt-PT', en: 'Portuguese (Portugal)' },
58
- 'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
59
- 'es' => { name: 'Español', code: 'es', en: 'Spanish' },
60
- 'es-AR' => { name: 'Español (Argentina)', code: 'es-AR', en: 'Spanish (Argentina)' },
61
- 'es-CL' => { name: 'Español (Chile)', code: 'es-CL', en: 'Spanish (Chile)' },
62
- 'es-CO' => { name: 'Español (Colombia)', code: 'es-CO', en: 'Spanish (Colombia)' },
63
- 'es-CR' => { name: 'Español (Costa Rica)', code: 'es-CR', en: 'Spanish (Costa Rica)' },
64
- 'es-HN' => { name: 'Español (Honduras)', code: 'es-HN', en: 'Spanish (Honduras)' },
65
- 'es-419' => { name: 'Español (Latinoamérica)', code: 'es-419', en: 'Spanish (Latin America)' },
66
- 'es-MX' => { name: 'Español (México)', code: 'es-MX', en: 'Spanish (Mexico)' },
67
- 'es-PE' => { name: 'Español (Perú)', code: 'es-PE', en: 'Spanish (Peru)' },
68
- 'es-ES' => { name: 'Español (España)', code: 'es-ES', en: 'Spanish (Spain)' },
69
- 'es-US' => { name: 'Español (Estados Unidos)', code: 'es-US', en: 'Spanish (United States)' },
70
- 'es-UY' => { name: 'Español (Uruguay)', code: 'es-UY', en: 'Spanish (Uruguay)' },
71
- 'es-VE' => { name: 'Español (Venezuela)', code: 'es-VE', en: 'Spanish (Venezuela)' },
72
- 'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
73
- 'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
74
- 'tl' => { name: 'Tagalog', code: 'tl', en: 'Tagalog' },
75
- 'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
76
- 'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
77
- 'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
78
- 'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
79
- 'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
80
- 'uz' => { name: 'Oʻzbekcha', code: 'uz', en: 'Uzbek' },
81
- 'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' },
82
- 'km' => { name: 'ភាសាខ្មែរ', code: 'km', en: 'Khmer' },
83
- 'ta' => { name: 'தமிழ்', code: 'ta', en: 'Tamil' },
84
- 'si' => { name: 'සිංහල', code: 'si', en: 'Sinhala' }
85
- }.freeze
86
-
87
- # Provides the ISO639-1 code for a given lang code.
88
- # Source: https://support.google.com/webmasters/answer/189077?hl=en
89
- #
90
- # @param lang_code [String] lang_code Code of the language.
91
- #
92
- # @return [String] The ISO639-1 code of the language.
93
- def self.iso_639_1_normalization(lang_code)
94
- lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
95
- end
96
-
97
- def self.get_code(lang_name)
98
- return nil if lang_name.nil?
99
- return lang_name if LANG[lang_name]
100
-
101
- custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
102
- custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
103
- return custom_lang[:code] if custom_lang
104
-
105
- LANG.each do |_k, l|
106
- return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
107
- end
108
- nil
109
- end
110
-
111
- def self.get_lang(lang)
112
- lang_code = get_code(lang)
113
- LANG[lang_code]
114
- end
115
-
116
- def initialize(lang_name)
117
- @lang_code = Lang.get_code(lang_name)
118
- end
119
-
120
- attr_reader :lang_code
121
-
122
- # Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
123
- # When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
124
- # If you want to know more examples, see also test/lib/lang_test.rb.
125
- #
126
- # @param [String] href original URL.
127
- # @param [String] pattern url_pattern of the settings. ('path', 'subdomain' or 'query')
128
- # @param [Wovnrb::Header] headers instance of Wovn::Header. It generates new env variable for original request.
129
- # @return [String] URL added langauge code.
130
- def add_lang_code(href, pattern, headers)
131
- return href if /^(#.*)?$/.match?(href)
132
-
133
- settings = Store.instance.settings
134
- code_to_add = settings['custom_lang_aliases'][@lang_code] || @lang_code
135
- lang_param_name = settings['lang_param_name']
136
- # absolute links
137
- new_href = href
138
- if href && href =~ /^(https?:)?\/\//i
139
- # in the future, perhaps validate url rather than using begin rescue
140
- # "#{url =~ /\// ? 'http:' : ''}#{url}" =~ URI::regexp
141
- begin
142
- uri = Addressable::URI.parse(href)
143
- rescue
144
- return new_href
145
- end
146
- # only add lang if it's an internal link
147
- # DNS names are case insensitive
148
- if uri.host.downcase === headers.host.downcase
149
- case pattern
150
- when 'subdomain'
151
- sub_d = href.match(/\/\/([^.]*)\./)[1]
152
- sub_code = Lang.get_code(sub_d)
153
- new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
154
- href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
155
- else
156
- href.sub(/(\/\/)([^.]*)/, "\\1#{code_to_add.downcase}.\\2")
157
- end
158
- when 'query'
159
- new_href = add_query_lang_code(href, code_to_add, lang_param_name)
160
- else # path
161
- new_href = href.sub(/([^.]*\.[^\/]*)(\/|$)/, "\\1/#{code_to_add}/")
162
- end
163
- end
164
- elsif href
165
- case pattern
166
- when 'subdomain'
167
- lang_url = "#{headers.protocol}://#{code_to_add.downcase}.#{headers.host}"
168
- current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
169
- new_href = case href
170
- when /^\.\..*$/
171
- # ../path
172
- "#{lang_url}/#{href.gsub(/^\.\.\//, '')}"
173
- when /^\..*$/
174
- # ./path
175
- "#{lang_url}#{current_dir}/#{href.gsub(/^\.\//, '')}"
176
- when /^\/.*$/
177
- # /path
178
- lang_url + href
179
- else
180
- # path
181
- "#{lang_url}#{current_dir}/#{href}"
182
- end
183
- when 'query'
184
- new_href = add_query_lang_code(href, code_to_add, lang_param_name)
185
- else # path
186
- if /^\//.match?(href)
187
- new_href = "/#{code_to_add}#{href}"
188
- else
189
- current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
190
- current_dir = '/' if current_dir == ''
191
- new_href = "/#{code_to_add}#{current_dir}#{href}"
192
- end
193
- end
194
- end
195
-
196
- new_href
197
- end
198
-
199
- private
200
-
201
- def index_href_for_encoding_and_decoding(dom)
202
- result = {}
203
- dom.xpath('//*[@href]').each do |a_tag|
204
- url = a_tag['href']
205
- begin
206
- encoded_url = Addressable::URI.parse(url).normalize.to_s
207
- result[encoded_url] = url if encoded_url != url
208
- rescue Addressable::URI::InvalidURIError => e
209
- WovnLogger.instance.error("Failed parse url : #{url}#{e.message}")
210
- end
211
- end
212
- result
213
- end
214
-
215
- def replace_dom_values(dom, values, store, url, headers)
216
- text_index = values['text_vals'] || {}
217
- html_text_index = values['html_text_vals'] || {}
218
- src_index = values['img_vals'] || {}
219
- img_src_prefix = values['img_src_prefix'] || ''
220
- host_aliases = values['host_aliases'] || []
221
-
222
- replacers = []
223
- # add lang code to anchors href if not default lang
224
- if @lang_code != store.settings['default_lang']
225
- pattern = store.settings['url_pattern']
226
- replacers << LinkReplacer.new(store, pattern, headers)
227
- end
228
-
229
- replacers << if html_text_index.empty?
230
- TextReplacer.new(store, text_index)
231
- else
232
- UnifiedValues::TextReplacer.new(store, html_text_index)
233
- end
234
- replacers << MetaReplacer.new(store, text_index, pattern, headers)
235
- replacers << InputReplacer.new(store, text_index)
236
- replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
237
- replacers << ScriptReplacer.new(store) if dom.html?
238
-
239
- replacers.each do |replacer|
240
- replacer.replace(dom, self)
241
- end
242
- end
243
-
244
- def get_langs(values)
245
- langs = Set.new
246
- (values['text_vals'] || {}).merge(values['img_vals'] || {}).each do |_key, index|
247
- index.each do |l, _val|
248
- langs.add(l)
249
- end
250
- end
251
- langs
252
- end
253
-
254
- def add_query_lang_code(href, lang_code, lang_param_name)
255
- query_separator = href.include?('?') ? '&' : '?'
256
-
257
- href.sub(/(#|$)/, "#{query_separator}#{lang_param_name}=#{lang_code}\\1")
258
- end
259
- end
260
- end
1
+ require 'addressable'
2
+
3
+ module Wovnrb
4
+ class Lang
5
+ LANG = {
6
+ 'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
7
+ 'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
8
+ 'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
9
+ 'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
10
+ 'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
11
+ 'zh-CN' => { name: '简体中文(中国)', code: 'zh-CN', en: 'Simp Chinese (China)' },
12
+ 'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
13
+ 'zh-Hant-HK' => { name: '繁體中文(香港)', code: 'zh-Hant-HK', en: 'Trad Chinese (Hong Kong)' },
14
+ 'zh-Hant-TW' => { name: '繁體中文(台湾)', code: 'zh-Hant-TW', en: 'Trad Chinese (Taiwan)' },
15
+ 'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
16
+ 'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
17
+ 'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
18
+ 'en' => { name: 'English', code: 'en', en: 'English' },
19
+ 'en-AU' => { name: 'English (Australia)', code: 'en-AU', en: 'English (Australia)' },
20
+ 'en-CA' => { name: 'English (Canada)', code: 'en-CA', en: 'English (Canada)' },
21
+ 'en-IN' => { name: 'English (India)', code: 'en-IN', en: 'English (India)' },
22
+ 'en-NZ' => { name: 'English (New Zealand)', code: 'en-NZ', en: 'English (New Zealand)' },
23
+ 'en-ZA' => { name: 'English (South Africa)', code: 'en-ZA', en: 'English (South Africa)' },
24
+ 'en-GB' => { name: 'English (United Kingdom)', code: 'en-GB', en: 'English (United Kingdom)' },
25
+ 'en-SG' => { name: 'English (Singapore)', code: 'en-SG', en: 'English (Singapore)' },
26
+ 'en-US' => { name: 'English (United States)', code: 'en-US', en: 'English (United States)' },
27
+ 'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
28
+ 'fr' => { name: 'Français', code: 'fr', en: 'French' },
29
+ 'fr-CA' => { name: 'Français (Canada)', code: 'fr-CA', en: 'French (Canada)' },
30
+ 'fr-FR' => { name: 'Français (France)', code: 'fr-FR', en: 'French (France)' },
31
+ 'fr-CH' => { name: 'Français (Suisse)', code: 'fr-CH', en: 'French (Switzerland)' },
32
+ 'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
33
+ 'de' => { name: 'Deutsch', code: 'de', en: 'German' },
34
+ 'de-AT' => { name: 'Deutsch (Österreich)', code: 'de-AT', en: 'German (Austria)' },
35
+ 'de-DE' => { name: 'Deutsch (Deutschland)', code: 'de-DE', en: 'German (Germany)' },
36
+ 'de-LI' => { name: 'Deutsch (Liechtenstien)', code: 'de-LI', en: 'German (Liechtenstien)' },
37
+ 'de-CH' => { name: 'Deutsch (Schweiz)', code: 'de-CH', en: 'German (Switzerland)' },
38
+ 'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
39
+ 'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
40
+ 'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
41
+ 'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
42
+ 'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
43
+ 'it-IT' => { name: 'Italiano (Italia)', code: 'it-IT', en: 'Italian (Italy)' },
44
+ 'it-CH' => { name: 'Italiano (Svizzera)', code: 'it-CH', en: 'Italian (Switzerland)' },
45
+ 'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
46
+ 'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
47
+ 'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
48
+ 'mn' => { name: 'монгол', code: 'mn', en: 'Mongolian' },
49
+ 'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
50
+ 'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
51
+ 'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
52
+ 'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
53
+ 'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
54
+ 'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
55
+ 'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
56
+ 'pt-BR' => { name: 'Português (Brasil)', code: 'pt-BR', en: 'Portuguese (Brazil)' },
57
+ 'pt-PT' => { name: 'Português (Portugal)', code: 'pt-PT', en: 'Portuguese (Portugal)' },
58
+ 'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
59
+ 'es' => { name: 'Español', code: 'es', en: 'Spanish' },
60
+ 'es-AR' => { name: 'Español (Argentina)', code: 'es-AR', en: 'Spanish (Argentina)' },
61
+ 'es-CL' => { name: 'Español (Chile)', code: 'es-CL', en: 'Spanish (Chile)' },
62
+ 'es-CO' => { name: 'Español (Colombia)', code: 'es-CO', en: 'Spanish (Colombia)' },
63
+ 'es-CR' => { name: 'Español (Costa Rica)', code: 'es-CR', en: 'Spanish (Costa Rica)' },
64
+ 'es-HN' => { name: 'Español (Honduras)', code: 'es-HN', en: 'Spanish (Honduras)' },
65
+ 'es-419' => { name: 'Español (Latinoamérica)', code: 'es-419', en: 'Spanish (Latin America)' },
66
+ 'es-MX' => { name: 'Español (México)', code: 'es-MX', en: 'Spanish (Mexico)' },
67
+ 'es-PE' => { name: 'Español (Perú)', code: 'es-PE', en: 'Spanish (Peru)' },
68
+ 'es-ES' => { name: 'Español (España)', code: 'es-ES', en: 'Spanish (Spain)' },
69
+ 'es-US' => { name: 'Español (Estados Unidos)', code: 'es-US', en: 'Spanish (United States)' },
70
+ 'es-UY' => { name: 'Español (Uruguay)', code: 'es-UY', en: 'Spanish (Uruguay)' },
71
+ 'es-VE' => { name: 'Español (Venezuela)', code: 'es-VE', en: 'Spanish (Venezuela)' },
72
+ 'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
73
+ 'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
74
+ 'tl' => { name: 'Tagalog', code: 'tl', en: 'Tagalog' },
75
+ 'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
76
+ 'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
77
+ 'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
78
+ 'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
79
+ 'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
80
+ 'uz' => { name: 'Oʻzbekcha', code: 'uz', en: 'Uzbek' },
81
+ 'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' },
82
+ 'km' => { name: 'ភាសាខ្មែរ', code: 'km', en: 'Khmer' },
83
+ 'ta' => { name: 'தமிழ்', code: 'ta', en: 'Tamil' },
84
+ 'si' => { name: 'සිංහල', code: 'si', en: 'Sinhala' }
85
+ }.freeze
86
+
87
+ # Provides the ISO639-1 code for a given lang code.
88
+ # Source: https://support.google.com/webmasters/answer/189077?hl=en
89
+ #
90
+ # @param lang_code [String] lang_code Code of the language.
91
+ #
92
+ # @return [String] The ISO639-1 code of the language.
93
+ def self.iso_639_1_normalization(lang_code)
94
+ lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
95
+ end
96
+
97
+ def self.get_code(lang_name)
98
+ return nil if lang_name.nil?
99
+ return lang_name if LANG[lang_name]
100
+
101
+ custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
102
+ custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
103
+ return custom_lang[:code] if custom_lang
104
+
105
+ LANG.each do |_k, l|
106
+ return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
107
+ end
108
+ nil
109
+ end
110
+
111
+ def self.get_lang(lang)
112
+ lang_code = get_code(lang)
113
+ LANG[lang_code]
114
+ end
115
+
116
+ def initialize(lang_name)
117
+ @lang_code = Lang.get_code(lang_name)
118
+ end
119
+
120
+ attr_reader :lang_code
121
+
122
+ # Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
123
+ # When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
124
+ # If you want to know more examples, see also test/lib/lang_test.rb.
125
+ #
126
+ # @param [String] href original URL.
127
+ # @param [String] pattern url_pattern of the settings. ('path', 'subdomain' or 'query')
128
+ # @param [Wovnrb::Header] headers instance of Wovn::Header. It generates new env variable for original request.
129
+ # @return [String] URL added langauge code.
130
+ def add_lang_code(href, pattern, headers)
131
+ return href if /^(#.*)?$/.match?(href)
132
+
133
+ settings = Store.instance.settings
134
+ code_to_add = settings['custom_lang_aliases'][@lang_code] || @lang_code
135
+ lang_param_name = settings['lang_param_name']
136
+ # absolute links
137
+ new_href = href
138
+ if href && href =~ /^(https?:)?\/\//i
139
+ # in the future, perhaps validate url rather than using begin rescue
140
+ # "#{url =~ /\// ? 'http:' : ''}#{url}" =~ URI::regexp
141
+ begin
142
+ uri = Addressable::URI.parse(href)
143
+ rescue
144
+ return new_href
145
+ end
146
+ # only add lang if it's an internal link
147
+ # DNS names are case insensitive
148
+ if uri.host.downcase === headers.host.downcase
149
+ case pattern
150
+ when 'subdomain'
151
+ sub_d = href.match(/\/\/([^.]*)\./)[1]
152
+ sub_code = Lang.get_code(sub_d)
153
+ new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
154
+ href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
155
+ else
156
+ href.sub(/(\/\/)([^.]*)/, "\\1#{code_to_add.downcase}.\\2")
157
+ end
158
+ when 'query'
159
+ new_href = add_query_lang_code(href, code_to_add, lang_param_name)
160
+ else # path
161
+ new_href = href.sub(/([^.]*\.[^\/]*)(\/|$)/, "\\1/#{code_to_add}/")
162
+ end
163
+ end
164
+ elsif href
165
+ case pattern
166
+ when 'subdomain'
167
+ lang_url = "#{headers.protocol}://#{code_to_add.downcase}.#{headers.host}"
168
+ current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
169
+ new_href = case href
170
+ when /^\.\..*$/
171
+ # ../path
172
+ "#{lang_url}/#{href.gsub(/^\.\.\//, '')}"
173
+ when /^\..*$/
174
+ # ./path
175
+ "#{lang_url}#{current_dir}/#{href.gsub(/^\.\//, '')}"
176
+ when /^\/.*$/
177
+ # /path
178
+ lang_url + href
179
+ else
180
+ # path
181
+ "#{lang_url}#{current_dir}/#{href}"
182
+ end
183
+ when 'query'
184
+ new_href = add_query_lang_code(href, code_to_add, lang_param_name)
185
+ else # path
186
+ if /^\//.match?(href)
187
+ new_href = "/#{code_to_add}#{href}"
188
+ else
189
+ current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
190
+ current_dir = '/' if current_dir == ''
191
+ new_href = "/#{code_to_add}#{current_dir}#{href}"
192
+ end
193
+ end
194
+ end
195
+
196
+ new_href
197
+ end
198
+
199
+ private
200
+
201
+ def index_href_for_encoding_and_decoding(dom)
202
+ result = {}
203
+ dom.xpath('//*[@href]').each do |a_tag|
204
+ url = a_tag['href']
205
+ begin
206
+ encoded_url = Addressable::URI.parse(url).normalize.to_s
207
+ result[encoded_url] = url if encoded_url != url
208
+ rescue Addressable::URI::InvalidURIError => e
209
+ WovnLogger.instance.error("Failed parse url : #{url}#{e.message}")
210
+ end
211
+ end
212
+ result
213
+ end
214
+
215
+ def replace_dom_values(dom, values, store, url, headers)
216
+ text_index = values['text_vals'] || {}
217
+ html_text_index = values['html_text_vals'] || {}
218
+ src_index = values['img_vals'] || {}
219
+ img_src_prefix = values['img_src_prefix'] || ''
220
+ host_aliases = values['host_aliases'] || []
221
+
222
+ replacers = []
223
+ # add lang code to anchors href if not default lang
224
+ if @lang_code != store.settings['default_lang']
225
+ pattern = store.settings['url_pattern']
226
+ replacers << LinkReplacer.new(store, pattern, headers)
227
+ end
228
+
229
+ replacers << if html_text_index.empty?
230
+ TextReplacer.new(store, text_index)
231
+ else
232
+ UnifiedValues::TextReplacer.new(store, html_text_index)
233
+ end
234
+ replacers << MetaReplacer.new(store, text_index, pattern, headers)
235
+ replacers << InputReplacer.new(store, text_index)
236
+ replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
237
+ replacers << ScriptReplacer.new(store) if dom.html?
238
+
239
+ replacers.each do |replacer|
240
+ replacer.replace(dom, self)
241
+ end
242
+ end
243
+
244
+ def get_langs(values)
245
+ langs = Set.new
246
+ (values['text_vals'] || {}).merge(values['img_vals'] || {}).each do |_key, index|
247
+ index.each do |l, _val|
248
+ langs.add(l)
249
+ end
250
+ end
251
+ langs
252
+ end
253
+
254
+ def add_query_lang_code(href, lang_code, lang_param_name)
255
+ query_separator = href.include?('?') ? '&' : '?'
256
+
257
+ href.sub(/(#|$)/, "#{query_separator}#{lang_param_name}=#{lang_code}\\1")
258
+ end
259
+ end
260
+ end