wovnrb 3.11.0 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/wovnrb/lang.rb CHANGED
@@ -1,260 +1,260 @@
1
- require 'addressable'
2
-
3
- module Wovnrb
4
- class Lang
5
- LANG = {
6
- 'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
7
- 'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
8
- 'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
9
- 'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
10
- 'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
11
- 'zh-CN' => { name: '简体中文(中国)', code: 'zh-CN', en: 'Simp Chinese (China)' },
12
- 'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
13
- 'zh-Hant-HK' => { name: '繁體中文(香港)', code: 'zh-Hant-HK', en: 'Trad Chinese (Hong Kong)' },
14
- 'zh-Hant-TW' => { name: '繁體中文(台湾)', code: 'zh-Hant-TW', en: 'Trad Chinese (Taiwan)' },
15
- 'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
16
- 'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
17
- 'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
18
- 'en' => { name: 'English', code: 'en', en: 'English' },
19
- 'en-AU' => { name: 'English (Australia)', code: 'en-AU', en: 'English (Australia)' },
20
- 'en-CA' => { name: 'English (Canada)', code: 'en-CA', en: 'English (Canada)' },
21
- 'en-IN' => { name: 'English (India)', code: 'en-IN', en: 'English (India)' },
22
- 'en-NZ' => { name: 'English (New Zealand)', code: 'en-NZ', en: 'English (New Zealand)' },
23
- 'en-ZA' => { name: 'English (South Africa)', code: 'en-ZA', en: 'English (South Africa)' },
24
- 'en-GB' => { name: 'English (United Kingdom)', code: 'en-GB', en: 'English (United Kingdom)' },
25
- 'en-SG' => { name: 'English (Singapore)', code: 'en-SG', en: 'English (Singapore)' },
26
- 'en-US' => { name: 'English (United States)', code: 'en-US', en: 'English (United States)' },
27
- 'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
28
- 'fr' => { name: 'Français', code: 'fr', en: 'French' },
29
- 'fr-CA' => { name: 'Français (Canada)', code: 'fr-CA', en: 'French (Canada)' },
30
- 'fr-FR' => { name: 'Français (France)', code: 'fr-FR', en: 'French (France)' },
31
- 'fr-CH' => { name: 'Français (Suisse)', code: 'fr-CH', en: 'French (Switzerland)' },
32
- 'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
33
- 'de' => { name: 'Deutsch', code: 'de', en: 'German' },
34
- 'de-AT' => { name: 'Deutsch (Österreich)', code: 'de-AT', en: 'German (Austria)' },
35
- 'de-DE' => { name: 'Deutsch (Deutschland)', code: 'de-DE', en: 'German (Germany)' },
36
- 'de-LI' => { name: 'Deutsch (Liechtenstien)', code: 'de-LI', en: 'German (Liechtenstien)' },
37
- 'de-CH' => { name: 'Deutsch (Schweiz)', code: 'de-CH', en: 'German (Switzerland)' },
38
- 'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
39
- 'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
40
- 'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
41
- 'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
42
- 'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
43
- 'it-IT' => { name: 'Italiano (Italia)', code: 'it-IT', en: 'Italian (Italy)' },
44
- 'it-CH' => { name: 'Italiano (Svizzera)', code: 'it-CH', en: 'Italian (Switzerland)' },
45
- 'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
46
- 'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
47
- 'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
48
- 'mn' => { name: 'монгол', code: 'mn', en: 'Mongolian' },
49
- 'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
50
- 'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
51
- 'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
52
- 'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
53
- 'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
54
- 'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
55
- 'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
56
- 'pt-BR' => { name: 'Português (Brasil)', code: 'pt-BR', en: 'Portuguese (Brazil)' },
57
- 'pt-PT' => { name: 'Português (Portugal)', code: 'pt-PT', en: 'Portuguese (Portugal)' },
58
- 'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
59
- 'es' => { name: 'Español', code: 'es', en: 'Spanish' },
60
- 'es-AR' => { name: 'Español (Argentina)', code: 'es-AR', en: 'Spanish (Argentina)' },
61
- 'es-CL' => { name: 'Español (Chile)', code: 'es-CL', en: 'Spanish (Chile)' },
62
- 'es-CO' => { name: 'Español (Colombia)', code: 'es-CO', en: 'Spanish (Colombia)' },
63
- 'es-CR' => { name: 'Español (Costa Rica)', code: 'es-CR', en: 'Spanish (Costa Rica)' },
64
- 'es-HN' => { name: 'Español (Honduras)', code: 'es-HN', en: 'Spanish (Honduras)' },
65
- 'es-419' => { name: 'Español (Latinoamérica)', code: 'es-419', en: 'Spanish (Latin America)' },
66
- 'es-MX' => { name: 'Español (México)', code: 'es-MX', en: 'Spanish (Mexico)' },
67
- 'es-PE' => { name: 'Español (Perú)', code: 'es-PE', en: 'Spanish (Peru)' },
68
- 'es-ES' => { name: 'Español (España)', code: 'es-ES', en: 'Spanish (Spain)' },
69
- 'es-US' => { name: 'Español (Estados Unidos)', code: 'es-US', en: 'Spanish (United States)' },
70
- 'es-UY' => { name: 'Español (Uruguay)', code: 'es-UY', en: 'Spanish (Uruguay)' },
71
- 'es-VE' => { name: 'Español (Venezuela)', code: 'es-VE', en: 'Spanish (Venezuela)' },
72
- 'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
73
- 'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
74
- 'tl' => { name: 'Tagalog', code: 'tl', en: 'Tagalog' },
75
- 'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
76
- 'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
77
- 'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
78
- 'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
79
- 'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
80
- 'uz' => { name: 'Oʻzbekcha', code: 'uz', en: 'Uzbek' },
81
- 'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' },
82
- 'km' => { name: 'ភាសាខ្មែរ', code: 'km', en: 'Khmer' },
83
- 'ta' => { name: 'தமிழ்', code: 'ta', en: 'Tamil' },
84
- 'si' => { name: 'සිංහල', code: 'si', en: 'Sinhala' }
85
- }.freeze
86
-
87
- # Provides the ISO639-1 code for a given lang code.
88
- # Source: https://support.google.com/webmasters/answer/189077?hl=en
89
- #
90
- # @param lang_code [String] lang_code Code of the language.
91
- #
92
- # @return [String] The ISO639-1 code of the language.
93
- def self.iso_639_1_normalization(lang_code)
94
- lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
95
- end
96
-
97
- def self.get_code(lang_name)
98
- return nil if lang_name.nil?
99
- return lang_name if LANG[lang_name]
100
-
101
- custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
102
- custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
103
- return custom_lang[:code] if custom_lang
104
-
105
- LANG.each do |_k, l|
106
- return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
107
- end
108
- nil
109
- end
110
-
111
- def self.get_lang(lang)
112
- lang_code = get_code(lang)
113
- LANG[lang_code]
114
- end
115
-
116
- def initialize(lang_name)
117
- @lang_code = Lang.get_code(lang_name)
118
- end
119
-
120
- attr_reader :lang_code
121
-
122
- # Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
123
- # When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
124
- # If you want to know more examples, see also test/lib/lang_test.rb.
125
- #
126
- # @param [String] href original URL.
127
- # @param [String] pattern url_pattern of the settings. ('path', 'subdomain' or 'query')
128
- # @param [Wovnrb::Header] headers instance of Wovn::Header. It generates new env variable for original request.
129
- # @return [String] URL added langauge code.
130
- def add_lang_code(href, pattern, headers)
131
- return href if /^(#.*)?$/.match?(href)
132
-
133
- settings = Store.instance.settings
134
- code_to_add = settings['custom_lang_aliases'][@lang_code] || @lang_code
135
- lang_param_name = settings['lang_param_name']
136
- # absolute links
137
- new_href = href
138
- if href && href =~ /^(https?:)?\/\//i
139
- # in the future, perhaps validate url rather than using begin rescue
140
- # "#{url =~ /\// ? 'http:' : ''}#{url}" =~ URI::regexp
141
- begin
142
- uri = Addressable::URI.parse(href)
143
- rescue
144
- return new_href
145
- end
146
- # only add lang if it's an internal link
147
- # DNS names are case insensitive
148
- if uri.host.downcase === headers.host.downcase
149
- case pattern
150
- when 'subdomain'
151
- sub_d = href.match(/\/\/([^.]*)\./)[1]
152
- sub_code = Lang.get_code(sub_d)
153
- new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
154
- href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
155
- else
156
- href.sub(/(\/\/)([^.]*)/, "\\1#{code_to_add.downcase}.\\2")
157
- end
158
- when 'query'
159
- new_href = add_query_lang_code(href, code_to_add, lang_param_name)
160
- else # path
161
- new_href = href.sub(/([^.]*\.[^\/]*)(\/|$)/, "\\1/#{code_to_add}/")
162
- end
163
- end
164
- elsif href
165
- case pattern
166
- when 'subdomain'
167
- lang_url = "#{headers.protocol}://#{code_to_add.downcase}.#{headers.host}"
168
- current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
169
- new_href = case href
170
- when /^\.\..*$/
171
- # ../path
172
- "#{lang_url}/#{href.gsub(/^\.\.\//, '')}"
173
- when /^\..*$/
174
- # ./path
175
- "#{lang_url}#{current_dir}/#{href.gsub(/^\.\//, '')}"
176
- when /^\/.*$/
177
- # /path
178
- lang_url + href
179
- else
180
- # path
181
- "#{lang_url}#{current_dir}/#{href}"
182
- end
183
- when 'query'
184
- new_href = add_query_lang_code(href, code_to_add, lang_param_name)
185
- else # path
186
- if /^\//.match?(href)
187
- new_href = "/#{code_to_add}#{href}"
188
- else
189
- current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
190
- current_dir = '/' if current_dir == ''
191
- new_href = "/#{code_to_add}#{current_dir}#{href}"
192
- end
193
- end
194
- end
195
-
196
- new_href
197
- end
198
-
199
- private
200
-
201
- def index_href_for_encoding_and_decoding(dom)
202
- result = {}
203
- dom.xpath('//*[@href]').each do |a_tag|
204
- url = a_tag['href']
205
- begin
206
- encoded_url = Addressable::URI.parse(url).normalize.to_s
207
- result[encoded_url] = url if encoded_url != url
208
- rescue Addressable::URI::InvalidURIError => e
209
- WovnLogger.instance.error("Failed parse url : #{url}#{e.message}")
210
- end
211
- end
212
- result
213
- end
214
-
215
- def replace_dom_values(dom, values, store, url, headers)
216
- text_index = values['text_vals'] || {}
217
- html_text_index = values['html_text_vals'] || {}
218
- src_index = values['img_vals'] || {}
219
- img_src_prefix = values['img_src_prefix'] || ''
220
- host_aliases = values['host_aliases'] || []
221
-
222
- replacers = []
223
- # add lang code to anchors href if not default lang
224
- if @lang_code != store.settings['default_lang']
225
- pattern = store.settings['url_pattern']
226
- replacers << LinkReplacer.new(store, pattern, headers)
227
- end
228
-
229
- replacers << if html_text_index.empty?
230
- TextReplacer.new(store, text_index)
231
- else
232
- UnifiedValues::TextReplacer.new(store, html_text_index)
233
- end
234
- replacers << MetaReplacer.new(store, text_index, pattern, headers)
235
- replacers << InputReplacer.new(store, text_index)
236
- replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
237
- replacers << ScriptReplacer.new(store) if dom.html?
238
-
239
- replacers.each do |replacer|
240
- replacer.replace(dom, self)
241
- end
242
- end
243
-
244
- def get_langs(values)
245
- langs = Set.new
246
- (values['text_vals'] || {}).merge(values['img_vals'] || {}).each do |_key, index|
247
- index.each do |l, _val|
248
- langs.add(l)
249
- end
250
- end
251
- langs
252
- end
253
-
254
- def add_query_lang_code(href, lang_code, lang_param_name)
255
- query_separator = href.include?('?') ? '&' : '?'
256
-
257
- href.sub(/(#|$)/, "#{query_separator}#{lang_param_name}=#{lang_code}\\1")
258
- end
259
- end
260
- end
1
+ require 'addressable'
2
+
3
+ module Wovnrb
4
+ class Lang
5
+ LANG = {
6
+ 'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
7
+ 'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
8
+ 'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
9
+ 'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
10
+ 'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
11
+ 'zh-CN' => { name: '简体中文(中国)', code: 'zh-CN', en: 'Simp Chinese (China)' },
12
+ 'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
13
+ 'zh-Hant-HK' => { name: '繁體中文(香港)', code: 'zh-Hant-HK', en: 'Trad Chinese (Hong Kong)' },
14
+ 'zh-Hant-TW' => { name: '繁體中文(台湾)', code: 'zh-Hant-TW', en: 'Trad Chinese (Taiwan)' },
15
+ 'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
16
+ 'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
17
+ 'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
18
+ 'en' => { name: 'English', code: 'en', en: 'English' },
19
+ 'en-AU' => { name: 'English (Australia)', code: 'en-AU', en: 'English (Australia)' },
20
+ 'en-CA' => { name: 'English (Canada)', code: 'en-CA', en: 'English (Canada)' },
21
+ 'en-IN' => { name: 'English (India)', code: 'en-IN', en: 'English (India)' },
22
+ 'en-NZ' => { name: 'English (New Zealand)', code: 'en-NZ', en: 'English (New Zealand)' },
23
+ 'en-ZA' => { name: 'English (South Africa)', code: 'en-ZA', en: 'English (South Africa)' },
24
+ 'en-GB' => { name: 'English (United Kingdom)', code: 'en-GB', en: 'English (United Kingdom)' },
25
+ 'en-SG' => { name: 'English (Singapore)', code: 'en-SG', en: 'English (Singapore)' },
26
+ 'en-US' => { name: 'English (United States)', code: 'en-US', en: 'English (United States)' },
27
+ 'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
28
+ 'fr' => { name: 'Français', code: 'fr', en: 'French' },
29
+ 'fr-CA' => { name: 'Français (Canada)', code: 'fr-CA', en: 'French (Canada)' },
30
+ 'fr-FR' => { name: 'Français (France)', code: 'fr-FR', en: 'French (France)' },
31
+ 'fr-CH' => { name: 'Français (Suisse)', code: 'fr-CH', en: 'French (Switzerland)' },
32
+ 'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
33
+ 'de' => { name: 'Deutsch', code: 'de', en: 'German' },
34
+ 'de-AT' => { name: 'Deutsch (Österreich)', code: 'de-AT', en: 'German (Austria)' },
35
+ 'de-DE' => { name: 'Deutsch (Deutschland)', code: 'de-DE', en: 'German (Germany)' },
36
+ 'de-LI' => { name: 'Deutsch (Liechtenstien)', code: 'de-LI', en: 'German (Liechtenstien)' },
37
+ 'de-CH' => { name: 'Deutsch (Schweiz)', code: 'de-CH', en: 'German (Switzerland)' },
38
+ 'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
39
+ 'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
40
+ 'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
41
+ 'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
42
+ 'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
43
+ 'it-IT' => { name: 'Italiano (Italia)', code: 'it-IT', en: 'Italian (Italy)' },
44
+ 'it-CH' => { name: 'Italiano (Svizzera)', code: 'it-CH', en: 'Italian (Switzerland)' },
45
+ 'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
46
+ 'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
47
+ 'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
48
+ 'mn' => { name: 'монгол', code: 'mn', en: 'Mongolian' },
49
+ 'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
50
+ 'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
51
+ 'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
52
+ 'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
53
+ 'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
54
+ 'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
55
+ 'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
56
+ 'pt-BR' => { name: 'Português (Brasil)', code: 'pt-BR', en: 'Portuguese (Brazil)' },
57
+ 'pt-PT' => { name: 'Português (Portugal)', code: 'pt-PT', en: 'Portuguese (Portugal)' },
58
+ 'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
59
+ 'es' => { name: 'Español', code: 'es', en: 'Spanish' },
60
+ 'es-AR' => { name: 'Español (Argentina)', code: 'es-AR', en: 'Spanish (Argentina)' },
61
+ 'es-CL' => { name: 'Español (Chile)', code: 'es-CL', en: 'Spanish (Chile)' },
62
+ 'es-CO' => { name: 'Español (Colombia)', code: 'es-CO', en: 'Spanish (Colombia)' },
63
+ 'es-CR' => { name: 'Español (Costa Rica)', code: 'es-CR', en: 'Spanish (Costa Rica)' },
64
+ 'es-HN' => { name: 'Español (Honduras)', code: 'es-HN', en: 'Spanish (Honduras)' },
65
+ 'es-419' => { name: 'Español (Latinoamérica)', code: 'es-419', en: 'Spanish (Latin America)' },
66
+ 'es-MX' => { name: 'Español (México)', code: 'es-MX', en: 'Spanish (Mexico)' },
67
+ 'es-PE' => { name: 'Español (Perú)', code: 'es-PE', en: 'Spanish (Peru)' },
68
+ 'es-ES' => { name: 'Español (España)', code: 'es-ES', en: 'Spanish (Spain)' },
69
+ 'es-US' => { name: 'Español (Estados Unidos)', code: 'es-US', en: 'Spanish (United States)' },
70
+ 'es-UY' => { name: 'Español (Uruguay)', code: 'es-UY', en: 'Spanish (Uruguay)' },
71
+ 'es-VE' => { name: 'Español (Venezuela)', code: 'es-VE', en: 'Spanish (Venezuela)' },
72
+ 'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
73
+ 'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
74
+ 'tl' => { name: 'Tagalog', code: 'tl', en: 'Tagalog' },
75
+ 'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
76
+ 'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
77
+ 'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
78
+ 'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
79
+ 'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
80
+ 'uz' => { name: 'Oʻzbekcha', code: 'uz', en: 'Uzbek' },
81
+ 'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' },
82
+ 'km' => { name: 'ភាសាខ្មែរ', code: 'km', en: 'Khmer' },
83
+ 'ta' => { name: 'தமிழ்', code: 'ta', en: 'Tamil' },
84
+ 'si' => { name: 'සිංහල', code: 'si', en: 'Sinhala' }
85
+ }.freeze
86
+
87
+ # Provides the ISO639-1 code for a given lang code.
88
+ # Source: https://support.google.com/webmasters/answer/189077?hl=en
89
+ #
90
+ # @param lang_code [String] lang_code Code of the language.
91
+ #
92
+ # @return [String] The ISO639-1 code of the language.
93
+ def self.iso_639_1_normalization(lang_code)
94
+ lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
95
+ end
96
+
97
+ def self.get_code(lang_name)
98
+ return nil if lang_name.nil?
99
+ return lang_name if LANG[lang_name]
100
+
101
+ custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
102
+ custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
103
+ return custom_lang[:code] if custom_lang
104
+
105
+ LANG.each_value do |l|
106
+ return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
107
+ end
108
+ nil
109
+ end
110
+
111
+ def self.get_lang(lang)
112
+ lang_code = get_code(lang)
113
+ LANG[lang_code]
114
+ end
115
+
116
+ def initialize(lang_name)
117
+ @lang_code = Lang.get_code(lang_name)
118
+ end
119
+
120
+ attr_reader :lang_code
121
+
122
+ # Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
123
+ # When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
124
+ # If you want to know more examples, see also test/lib/lang_test.rb.
125
+ #
126
+ # @param [String] href original URL.
127
+ # @param [String] pattern url_pattern of the settings. ('path', 'subdomain' or 'query')
128
+ # @param [Wovnrb::Header] headers instance of Wovn::Header. It generates new env variable for original request.
129
+ # @return [String] URL added langauge code.
130
+ def add_lang_code(href, pattern, headers)
131
+ return href if /^(#.*)?$/.match?(href)
132
+
133
+ settings = Store.instance.settings
134
+ code_to_add = settings['custom_lang_aliases'][@lang_code] || @lang_code
135
+ lang_param_name = settings['lang_param_name']
136
+ # absolute links
137
+ new_href = href
138
+ if href && href =~ /^(https?:)?\/\//i
139
+ # in the future, perhaps validate url rather than using begin rescue
140
+ # "#{url =~ /\// ? 'http:' : ''}#{url}" =~ URI::regexp
141
+ begin
142
+ uri = Addressable::URI.parse(href)
143
+ rescue
144
+ return new_href
145
+ end
146
+ # only add lang if it's an internal link
147
+ # DNS names are case insensitive
148
+ if uri.host.downcase === headers.host.downcase
149
+ case pattern
150
+ when 'subdomain'
151
+ sub_d = href.match(/\/\/([^.]*)\./)[1]
152
+ sub_code = Lang.get_code(sub_d)
153
+ new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
154
+ href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
155
+ else
156
+ href.sub(/(\/\/)([^.]*)/, "\\1#{code_to_add.downcase}.\\2")
157
+ end
158
+ when 'query'
159
+ new_href = add_query_lang_code(href, code_to_add, lang_param_name)
160
+ else # path
161
+ new_href = href.sub(/([^.]*\.[^\/]*)(\/|$)/, "\\1/#{code_to_add}/")
162
+ end
163
+ end
164
+ elsif href
165
+ case pattern
166
+ when 'subdomain'
167
+ lang_url = "#{headers.protocol}://#{code_to_add.downcase}.#{headers.host}"
168
+ current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
169
+ new_href = case href
170
+ when /^\.\..*$/
171
+ # ../path
172
+ "#{lang_url}/#{href.gsub(/^\.\.\//, '')}"
173
+ when /^\..*$/
174
+ # ./path
175
+ "#{lang_url}#{current_dir}/#{href.gsub(/^\.\//, '')}"
176
+ when /^\/.*$/
177
+ # /path
178
+ lang_url + href
179
+ else
180
+ # path
181
+ "#{lang_url}#{current_dir}/#{href}"
182
+ end
183
+ when 'query'
184
+ new_href = add_query_lang_code(href, code_to_add, lang_param_name)
185
+ else # path
186
+ if /^\//.match?(href)
187
+ new_href = "/#{code_to_add}#{href}"
188
+ else
189
+ current_dir = headers.pathname.sub(/[^\/]*\.[^.]{2,6}$/, '')
190
+ current_dir = '/' if current_dir == ''
191
+ new_href = "/#{code_to_add}#{current_dir}#{href}"
192
+ end
193
+ end
194
+ end
195
+
196
+ new_href
197
+ end
198
+
199
+ private
200
+
201
+ def index_href_for_encoding_and_decoding(dom)
202
+ result = {}
203
+ dom.xpath('//*[@href]').each do |a_tag|
204
+ url = a_tag['href']
205
+ begin
206
+ encoded_url = Addressable::URI.parse(url).normalize.to_s
207
+ result[encoded_url] = url if encoded_url != url
208
+ rescue Addressable::URI::InvalidURIError => e
209
+ WovnLogger.instance.error("Failed parse url : #{url}#{e.message}")
210
+ end
211
+ end
212
+ result
213
+ end
214
+
215
+ def replace_dom_values(dom, values, store, url, headers)
216
+ text_index = values['text_vals'] || {}
217
+ html_text_index = values['html_text_vals'] || {}
218
+ src_index = values['img_vals'] || {}
219
+ img_src_prefix = values['img_src_prefix'] || ''
220
+ host_aliases = values['host_aliases'] || []
221
+
222
+ replacers = []
223
+ # add lang code to anchors href if not default lang
224
+ if @lang_code != store.settings['default_lang']
225
+ pattern = store.settings['url_pattern']
226
+ replacers << LinkReplacer.new(store, pattern, headers)
227
+ end
228
+
229
+ replacers << if html_text_index.empty?
230
+ TextReplacer.new(store, text_index)
231
+ else
232
+ UnifiedValues::TextReplacer.new(store, html_text_index)
233
+ end
234
+ replacers << MetaReplacer.new(store, text_index, pattern, headers)
235
+ replacers << InputReplacer.new(store, text_index)
236
+ replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
237
+ replacers << ScriptReplacer.new(store) if dom.html?
238
+
239
+ replacers.each do |replacer|
240
+ replacer.replace(dom, self)
241
+ end
242
+ end
243
+
244
+ def get_langs(values)
245
+ langs = Set.new
246
+ (values['text_vals'] || {}).merge(values['img_vals'] || {}).each_value do |index|
247
+ index.each_key do |l|
248
+ langs.add(l)
249
+ end
250
+ end
251
+ langs
252
+ end
253
+
254
+ def add_query_lang_code(href, lang_code, lang_param_name)
255
+ query_separator = href.include?('?') ? '&' : '?'
256
+
257
+ href.sub(/(#|$)/, "#{query_separator}#{lang_param_name}=#{lang_code}\\1")
258
+ end
259
+ end
260
+ end