wovnrb 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +308 -0
- data/Rakefile +13 -14
- data/lib/wovnrb.rb +43 -98
- data/lib/wovnrb/api_translator.rb +143 -0
- data/lib/wovnrb/headers.rb +78 -92
- data/lib/wovnrb/helpers/nokogumbo_helper.rb +1 -1
- data/lib/wovnrb/lang.rb +93 -125
- data/lib/wovnrb/railtie.rb +5 -7
- data/lib/wovnrb/services/glob.rb +3 -3
- data/lib/wovnrb/services/html_converter.rb +192 -0
- data/lib/wovnrb/services/html_replace_marker.rb +38 -0
- data/lib/wovnrb/services/wovn_logger.rb +8 -4
- data/lib/wovnrb/settings.rb +5 -3
- data/lib/wovnrb/store.rb +35 -26
- data/lib/wovnrb/text_caches/cache_base.rb +3 -2
- data/lib/wovnrb/text_caches/memory_cache.rb +2 -2
- data/lib/wovnrb/version.rb +1 -1
- data/test/fixtures/html/test.html +8 -0
- data/test/fixtures/html/test_translated.html +8 -0
- data/test/lib/api_translator_test.rb +109 -0
- data/test/lib/headers_test.rb +84 -55
- data/test/lib/lang_test.rb +157 -357
- data/test/lib/services/glob_test.rb +1 -1
- data/test/lib/services/html_converter_test.rb +166 -0
- data/test/lib/services/html_replace_marker_test.rb +75 -0
- data/test/lib/services/wovn_logger_test.rb +6 -6
- data/test/lib/store_test.rb +25 -69
- data/test/lib/text_caches/cache_base_test.rb +1 -1
- data/test/lib/text_caches/memory_cache_test.rb +10 -11
- data/test/lib/wovnrb_test.rb +77 -310
- data/test/test_helper.rb +22 -32
- data/wovnrb.gemspec +35 -44
- metadata +86 -205
- data/ext/dom/Makefile +0 -239
- data/lib/wovnrb/api_data.rb +0 -59
- data/lib/wovnrb/html_replacers/image_replacer.rb +0 -69
- data/lib/wovnrb/html_replacers/input_replacer.rb +0 -38
- data/lib/wovnrb/html_replacers/link_replacer.rb +0 -78
- data/lib/wovnrb/html_replacers/meta_replacer.rb +0 -28
- data/lib/wovnrb/html_replacers/replacer_base.rb +0 -49
- data/lib/wovnrb/html_replacers/script_replacer.rb +0 -39
- data/lib/wovnrb/html_replacers/text_replacer.rb +0 -21
- data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +0 -76
- data/lib/wovnrb/html_replacers/unified_values/element_category.rb +0 -242
- data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +0 -134
- data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +0 -35
- data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +0 -152
- data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +0 -65
- data/lib/wovnrb/services/url.rb +0 -12
- data/lib/wovnrb/services/value_agent.rb +0 -9
- data/test/fixtures/unified_values/site_html/simple_actual.html +0 -96
- data/test/fixtures/unified_values/site_html/simple_expected.json +0 -251
- data/test/fixtures/unified_values/site_html/wovn.io_actual.html +0 -686
- data/test/fixtures/unified_values/site_html/wovn.io_expected.json +0 -543
- data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +0 -1024
- data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +0 -3345
- data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +0 -22
- data/test/fixtures/unified_values/small_html/br_tag_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/br_tag_expected.json +0 -12
- data/test/fixtures/unified_values/small_html/comment_tag_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/comment_tag_expected.json +0 -10
- data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +0 -7
- data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +0 -11
- data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +0 -14
- data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +0 -8
- data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +0 -20
- data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +0 -20
- data/test/fixtures/unified_values/small_html/empty_tag_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/empty_tag_expected.json +0 -12
- data/test/fixtures/unified_values/small_html/empty_text_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/empty_text_expected.json +0 -1
- data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +0 -16
- data/test/fixtures/unified_values/small_html/ignored_class_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/ignored_class_expected.json +0 -13
- data/test/fixtures/unified_values/small_html/img_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/img_expected.json +0 -23
- data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +0 -16
- data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +0 -12
- data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +0 -14
- data/test/fixtures/unified_values/small_html/option_tag_actual.html +0 -9
- data/test/fixtures/unified_values/small_html/option_tag_expected.json +0 -13
- data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +0 -22
- data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +0 -9
- data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +0 -8
- data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +0 -6
- data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +0 -8
- data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +0 -24
- data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +0 -14
- data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +0 -13
- data/test/lib/api_data_test.rb +0 -83
- data/test/lib/html_replacers/image_replacer_test.rb +0 -165
- data/test/lib/html_replacers/input_replacer_test.rb +0 -140
- data/test/lib/html_replacers/link_replacer_test.rb +0 -328
- data/test/lib/html_replacers/meta_replacer_test.rb +0 -157
- data/test/lib/html_replacers/replacer_base_test.rb +0 -128
- data/test/lib/html_replacers/script_replacer_test.rb +0 -139
- data/test/lib/html_replacers/text_replacer_test.rb +0 -99
- data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +0 -137
- data/test/lib/html_replacers/unified_values/element_category_test.rb +0 -49
- data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +0 -137
- data/test/lib/html_replacers/unified_values/text_replacer_test.rb +0 -270
- data/test/lib/html_replacers/unified_values/text_scraper_test.rb +0 -121
- data/test/lib/html_replacers/unified_values/values_stack_test.rb +0 -122
- data/test/lib/services/url_test.rb +0 -9
- data/test/lib/services/value_agent_test.rb +0 -32
- data/test/services/url_test.rb +0 -163
- data/values/values +0 -1
data/lib/wovnrb/lang.rb
CHANGED
@@ -1,51 +1,49 @@
|
|
1
|
-
# -*- encoding: UTF-8 -*-
|
2
|
-
|
3
1
|
require 'addressable'
|
4
2
|
|
5
3
|
module Wovnrb
|
6
4
|
class Lang
|
7
5
|
LANG = {
|
8
|
-
#http://msdn.microsoft.com/en-us/library/hh456380.aspx
|
9
|
-
'ar' => {name: 'العربية', code: 'ar', en: 'Arabic'},
|
10
|
-
'eu' => {name: 'Euskara', code: 'eu', en: 'Basque'},
|
11
|
-
'bn' => {name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali'},
|
12
|
-
'bg' => {name: 'Български', code: 'bg', en: 'Bulgarian'},
|
13
|
-
'ca' => {name: 'Català', code: 'ca', en: 'Catalan'},
|
14
|
-
'zh-CHS' => {name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese'},
|
15
|
-
'zh-CHT' => {name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese'},
|
16
|
-
'da' => {name: 'Dansk', code: 'da', en: 'Danish'},
|
17
|
-
'nl' => {name: 'Nederlands', code: 'nl', en: 'Dutch'},
|
18
|
-
'en' => {name: 'English', code: 'en', en: 'English'},
|
19
|
-
'fi' => {name: 'Suomi', code: 'fi', en: 'Finnish'},
|
20
|
-
'fr' => {name: 'Français', code: 'fr', en: 'French'},
|
21
|
-
'gl' => {name: 'Galego', code: 'gl', en: 'Galician'},
|
22
|
-
'de' => {name: 'Deutsch', code: 'de', en: 'German'},
|
23
|
-
'el' => {name: 'Ελληνικά', code: 'el', en: 'Greek'},
|
24
|
-
'he' => {name: 'עברית', code: 'he', en: 'Hebrew'},
|
25
|
-
'hu' => {name: 'Magyar', code: 'hu', en: 'Hungarian'},
|
26
|
-
'id' => {name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian'},
|
27
|
-
'it' => {name: 'Italiano', code: 'it', en: 'Italian'},
|
28
|
-
'ja' => {name: '日本語', code: 'ja', en: 'Japanese'},
|
29
|
-
'ko' => {name: '한국어', code: 'ko', en: 'Korean'},
|
30
|
-
'lv' =>{name: 'Latviešu', code: 'lv', en: 'Latvian'},
|
31
|
-
'ms' => {name: 'Bahasa Melayu', code: 'ms', en: 'Malay'},
|
6
|
+
# http://msdn.microsoft.com/en-us/library/hh456380.aspx
|
7
|
+
'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
|
8
|
+
'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
|
9
|
+
'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
|
10
|
+
'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
|
11
|
+
'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
|
12
|
+
'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
|
13
|
+
'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
|
14
|
+
'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
|
15
|
+
'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
|
16
|
+
'en' => { name: 'English', code: 'en', en: 'English' },
|
17
|
+
'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
|
18
|
+
'fr' => { name: 'Français', code: 'fr', en: 'French' },
|
19
|
+
'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
|
20
|
+
'de' => { name: 'Deutsch', code: 'de', en: 'German' },
|
21
|
+
'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
|
22
|
+
'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
|
23
|
+
'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
|
24
|
+
'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
|
25
|
+
'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
|
26
|
+
'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
|
27
|
+
'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
|
28
|
+
'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
|
29
|
+
'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
|
32
30
|
'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
|
33
|
-
'ne' => {name: 'नेपाली भाषा', code: 'ne', en: 'Nepali'},
|
34
|
-
'fa' => {name: 'زبان_فارسی', code: 'fa', en: 'Persian'},
|
35
|
-
'no' => {name: 'Norsk', code: 'no', en: 'Norwegian'},
|
36
|
-
'pl' => {name: 'Polski', code: 'pl', en: 'Polish'},
|
37
|
-
'pt' => {name: 'Português', code: 'pt', en: 'Portuguese'},
|
38
|
-
'ru' => {name: 'Русский', code: 'ru', en: 'Russian'},
|
39
|
-
'es' => {name: 'Español', code: 'es', en: 'Spanish'},
|
40
|
-
'sw' => {name: 'Kiswahili', code: 'sw', en: 'Swahili'},
|
41
|
-
'sv' => {name: 'Svensk', code: 'sv', en: 'Swedish'},
|
42
|
-
'th' => {name: 'ภาษาไทย', code: 'th', en: 'Thai'},
|
43
|
-
'hi' => {name: 'हिन्दी', code: 'hi', en: 'Hindi'},
|
44
|
-
'tr' => {name: 'Türkçe', code: 'tr', en: 'Turkish'},
|
45
|
-
'uk' => {name: 'Українська', code: 'uk', en: 'Ukrainian'},
|
46
|
-
'ur' => {name: 'اردو', code: 'ur', en: 'Urdu'},
|
47
|
-
'vi' => {name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese'}
|
48
|
-
}
|
31
|
+
'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
|
32
|
+
'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
|
33
|
+
'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
|
34
|
+
'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
|
35
|
+
'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
|
36
|
+
'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
|
37
|
+
'es' => { name: 'Español', code: 'es', en: 'Spanish' },
|
38
|
+
'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
|
39
|
+
'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
|
40
|
+
'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
|
41
|
+
'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
|
42
|
+
'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
|
43
|
+
'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
|
44
|
+
'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
|
45
|
+
'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' }
|
46
|
+
}.freeze
|
49
47
|
|
50
48
|
# Provides the ISO639-1 code for a given lang code.
|
51
49
|
# Source: https://support.google.com/webmasters/answer/189077?hl=en
|
@@ -54,35 +52,33 @@ module Wovnrb
|
|
54
52
|
#
|
55
53
|
# @return [String] The ISO639-1 code of the language.
|
56
54
|
def self.iso_639_1_normalization(lang_code)
|
57
|
-
|
55
|
+
lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
|
58
56
|
end
|
59
57
|
|
60
58
|
def self.get_code(lang_name)
|
61
59
|
return nil if lang_name.nil?
|
62
60
|
return lang_name if LANG[lang_name]
|
61
|
+
|
63
62
|
custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
|
64
63
|
custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
|
65
64
|
return custom_lang[:code] if custom_lang
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
end
|
65
|
+
|
66
|
+
LANG.each do |_k, l|
|
67
|
+
return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
|
70
68
|
end
|
71
|
-
|
69
|
+
nil
|
72
70
|
end
|
73
71
|
|
74
72
|
def self.get_lang(lang)
|
75
73
|
lang_code = get_code(lang)
|
76
|
-
|
74
|
+
LANG[lang_code]
|
77
75
|
end
|
78
76
|
|
79
77
|
def initialize(lang_name)
|
80
78
|
@lang_code = Lang.get_code(lang_name)
|
81
79
|
end
|
82
80
|
|
83
|
-
|
84
|
-
@lang_code
|
85
|
-
end
|
81
|
+
attr_reader :lang_code
|
86
82
|
|
87
83
|
# Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
|
88
84
|
# When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
|
@@ -94,6 +90,7 @@ module Wovnrb
|
|
94
90
|
# @return [String] URL added langauge code.
|
95
91
|
def add_lang_code(href, pattern, headers)
|
96
92
|
return href if href =~ /^(#.*)?$/
|
93
|
+
|
97
94
|
code_to_add = Store.instance.settings['custom_lang_aliases'][@lang_code] || @lang_code
|
98
95
|
# absolute links
|
99
96
|
new_href = href
|
@@ -109,83 +106,54 @@ module Wovnrb
|
|
109
106
|
# DNS names are case insensitive
|
110
107
|
if uri.host.downcase === headers.host.downcase
|
111
108
|
case pattern
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
109
|
+
when 'subdomain'
|
110
|
+
sub_d = href.match(/\/\/([^\.]*)\./)[1]
|
111
|
+
sub_code = Lang.get_code(sub_d)
|
112
|
+
new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
|
113
|
+
href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
|
114
|
+
else
|
115
|
+
href.sub(/(\/\/)([^\.]*)/, '\1' + code_to_add.downcase + '.' + '\2')
|
116
|
+
end
|
117
|
+
when 'query'
|
118
|
+
new_href = add_query_lang_code(href, code_to_add)
|
119
|
+
else # path
|
120
|
+
new_href = href.sub(/([^\.]*\.[^\/]*)(\/|$)/, '\1/' + code_to_add + '/')
|
124
121
|
end
|
125
122
|
end
|
126
123
|
elsif href
|
127
124
|
case pattern
|
128
|
-
|
129
|
-
|
125
|
+
when 'subdomain'
|
126
|
+
lang_url = headers.protocol + '://' + code_to_add.downcase + '.' + headers.host
|
127
|
+
current_dir = headers.pathname.sub(/[^\/]*\.[^\.]{2,6}$/, '')
|
128
|
+
new_href = if href =~ /^\.\..*$/
|
129
|
+
# ../path
|
130
|
+
lang_url + '/' + href.gsub(/^\.\.\//, '')
|
131
|
+
elsif href =~ /^\..*$/
|
132
|
+
# ./path
|
133
|
+
lang_url + current_dir + '/' + href.gsub(/^\.\//, '')
|
134
|
+
elsif href =~ /^\/.*$/
|
135
|
+
# /path
|
136
|
+
lang_url + href
|
137
|
+
else
|
138
|
+
# path
|
139
|
+
lang_url + current_dir + '/' + href
|
140
|
+
end
|
141
|
+
when 'query'
|
142
|
+
new_href = add_query_lang_code(href, code_to_add)
|
143
|
+
else # path
|
144
|
+
if href =~ /^\//
|
145
|
+
new_href = '/' + code_to_add + href
|
146
|
+
else
|
130
147
|
current_dir = headers.pathname.sub(/[^\/]*\.[^\.]{2,6}$/, '')
|
131
|
-
if
|
132
|
-
|
133
|
-
|
134
|
-
elsif href =~ /^\..*$/
|
135
|
-
# ./path
|
136
|
-
new_href = lang_url + current_dir + '/' + href.gsub(/^\.\//, '')
|
137
|
-
elsif href =~ /^\/.*$/
|
138
|
-
# /path
|
139
|
-
new_href = lang_url + href
|
140
|
-
else
|
141
|
-
# path
|
142
|
-
new_href = lang_url + current_dir + '/' + href
|
143
|
-
end
|
144
|
-
when 'query'
|
145
|
-
new_href = add_query_lang_code(href, code_to_add)
|
146
|
-
else # path
|
147
|
-
if href =~ /^\//
|
148
|
-
new_href = '/' + code_to_add + href
|
149
|
-
else
|
150
|
-
current_dir = headers.pathname.sub(/[^\/]*\.[^\.]{2,6}$/, '')
|
151
|
-
current_dir = '/' if current_dir == ''
|
152
|
-
new_href = '/' + code_to_add + current_dir + href
|
153
|
-
end
|
148
|
+
current_dir = '/' if current_dir == ''
|
149
|
+
new_href = '/' + code_to_add + current_dir + href
|
150
|
+
end
|
154
151
|
end
|
155
152
|
end
|
156
153
|
|
157
154
|
new_href
|
158
155
|
end
|
159
156
|
|
160
|
-
def switch_dom_lang(dom, store, values, url, headers)
|
161
|
-
replace_dom_values(dom, values, store, url, headers)
|
162
|
-
|
163
|
-
if dom.html?
|
164
|
-
# INSERT LANGUAGE METALINKS
|
165
|
-
parent_node = dom.at_css('head') || dom.at_css('body') || dom.at_css('html')
|
166
|
-
published_langs = get_langs(values)
|
167
|
-
all_langs = published_langs.add(store.settings['default_lang'])
|
168
|
-
all_langs.each do |l|
|
169
|
-
insert_node = Nokogiri::XML::Node.new('link', dom)
|
170
|
-
insert_node['rel'] = 'alternate'
|
171
|
-
insert_node['hreflang'] = Lang::iso_639_1_normalization(l)
|
172
|
-
insert_node['href'] = headers.redirect_location(l)
|
173
|
-
parent_node.add_child(insert_node)
|
174
|
-
end
|
175
|
-
|
176
|
-
# set lang property on HTML tag
|
177
|
-
if dom.at_css('html') || dom.at_css('HTML')
|
178
|
-
(dom.at_css('html') || dom.at_css('HTML')).set_attribute('lang', Lang::iso_639_1_normalization(@lang_code))
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
index_href = index_href_for_encoding_and_decoding(dom)
|
183
|
-
# NOTE: when we use `#to_html` with nokogiri, nokogiri encode all href.
|
184
|
-
# but we want to keep original href as much as possible.
|
185
|
-
# That's why we replace href with original href which added lang info by wovnrb like this after we used `to_html`
|
186
|
-
dom.to_html(save_with: 0).gsub(/href="([^"]*)"/) { |m| "href=\"#{index_href[$1] || $1}\"" }
|
187
|
-
end
|
188
|
-
|
189
157
|
private
|
190
158
|
|
191
159
|
def index_href_for_encoding_and_decoding(dom)
|
@@ -216,11 +184,11 @@ module Wovnrb
|
|
216
184
|
replacers << LinkReplacer.new(store, pattern, headers)
|
217
185
|
end
|
218
186
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
187
|
+
replacers << if html_text_index.empty?
|
188
|
+
TextReplacer.new(store, text_index)
|
189
|
+
else
|
190
|
+
UnifiedValues::TextReplacer.new(store, html_text_index)
|
191
|
+
end
|
224
192
|
replacers << MetaReplacer.new(store, text_index, pattern, headers)
|
225
193
|
replacers << InputReplacer.new(store, text_index)
|
226
194
|
replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
|
@@ -233,8 +201,8 @@ module Wovnrb
|
|
233
201
|
|
234
202
|
def get_langs(values)
|
235
203
|
langs = Set.new
|
236
|
-
(values['text_vals'] || {}).merge(values['img_vals'] || {}).each do |
|
237
|
-
index.each do |l,
|
204
|
+
(values['text_vals'] || {}).merge(values['img_vals'] || {}).each do |_key, index|
|
205
|
+
index.each do |l, _val|
|
238
206
|
langs.add(l)
|
239
207
|
end
|
240
208
|
end
|
data/lib/wovnrb/railtie.rb
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
module Wovnrb
|
2
|
-
|
3
2
|
class Railtie < Rails::Railtie
|
4
3
|
initializer 'wovnrb.configure_rails_initialization' do |app|
|
5
4
|
app.middleware.insert_before(0, Wovnrb::Interceptor)
|
6
|
-
#begin
|
5
|
+
# begin
|
7
6
|
# app.middleware.insert_before(Rack::Runtime, Wovnrb::Interceptor)
|
8
|
-
#rescue
|
7
|
+
# rescue
|
9
8
|
# app.middleware.insert_before(0, Wovnrb::Interceptor)
|
10
|
-
#end
|
9
|
+
# end
|
11
10
|
|
12
|
-
#if Rails.env.development? && config.respond_to?(:wovnrb)
|
11
|
+
# if Rails.env.development? && config.respond_to?(:wovnrb)
|
13
12
|
# config.after_initialize do
|
14
13
|
# config.wovnrb[:project_token] = User.first.short_token
|
15
14
|
# end
|
16
|
-
#end
|
15
|
+
# end
|
17
16
|
end
|
18
17
|
end
|
19
|
-
|
20
18
|
end
|
data/lib/wovnrb/services/glob.rb
CHANGED
@@ -11,8 +11,8 @@ module Wovnrb
|
|
11
11
|
sub_directories = pattern.split('/**', -1)
|
12
12
|
regexp = sub_directories.map do |sub_dir|
|
13
13
|
sub_dir.split('*', -1)
|
14
|
-
|
15
|
-
|
14
|
+
.map { |p| Regexp.escape(p) }
|
15
|
+
.join('[^/]*')
|
16
16
|
end.join('(/[^/]*)*')
|
17
17
|
|
18
18
|
@regexp = Regexp.new("^#{regexp}$")
|
@@ -22,4 +22,4 @@ module Wovnrb
|
|
22
22
|
!@regexp.match(url).nil?
|
23
23
|
end
|
24
24
|
end
|
25
|
-
end
|
25
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
module Wovnrb
|
2
|
+
class HtmlConverter
|
3
|
+
def initialize(dom, store, headers)
|
4
|
+
@dom = dom
|
5
|
+
@headers = headers
|
6
|
+
@store = store
|
7
|
+
end
|
8
|
+
|
9
|
+
def build
|
10
|
+
transform_html
|
11
|
+
html
|
12
|
+
end
|
13
|
+
|
14
|
+
def build_api_compatible_html
|
15
|
+
marker = HtmlReplaceMarker.new
|
16
|
+
converted_html = replace_dom(marker)
|
17
|
+
|
18
|
+
[converted_html, marker]
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def html
|
24
|
+
@dom.to_html(save_with: 0).strip
|
25
|
+
end
|
26
|
+
|
27
|
+
def transform_html
|
28
|
+
replace_snippet
|
29
|
+
replace_hreflangs
|
30
|
+
inject_lang_html_tag
|
31
|
+
end
|
32
|
+
|
33
|
+
def replace_snippet
|
34
|
+
strip_snippet
|
35
|
+
insert_snippet
|
36
|
+
end
|
37
|
+
|
38
|
+
def replace_dom(marker)
|
39
|
+
strip_snippet
|
40
|
+
strip_hreflangs if add_hreflang
|
41
|
+
|
42
|
+
@dom.traverse { |node| transform_node(node, marker) }
|
43
|
+
|
44
|
+
insert_snippet(true)
|
45
|
+
insert_hreflang_tags
|
46
|
+
|
47
|
+
html
|
48
|
+
end
|
49
|
+
|
50
|
+
def transform_node(node, marker)
|
51
|
+
strip_wovn_ignore(node, marker)
|
52
|
+
strip_custom_ignore(node, marker)
|
53
|
+
strip_form(node, marker)
|
54
|
+
strip_script(node, marker)
|
55
|
+
end
|
56
|
+
|
57
|
+
def strip_script(node, marker)
|
58
|
+
put_replace_marker(node, marker) if node.name.casecmp('script').zero?
|
59
|
+
end
|
60
|
+
|
61
|
+
def strip_form(node, marker)
|
62
|
+
if node.name.casecmp('form').zero?
|
63
|
+
put_replace_marker(node, marker)
|
64
|
+
return
|
65
|
+
end
|
66
|
+
|
67
|
+
if node.name.casecmp('input').zero? && node.get_attribute('type') == 'hidden'
|
68
|
+
original_text = node.get_attribute('value')
|
69
|
+
return if original_text.include?(HtmlReplaceMarker::KEY_PREFIX)
|
70
|
+
|
71
|
+
node.set_attribute('value', marker.add_value(original_text))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def strip_custom_ignore(node, marker)
|
76
|
+
classes = node.get_attribute('class')
|
77
|
+
return unless classes.present?
|
78
|
+
|
79
|
+
ignored_classes = @store.settings['ignore_class']
|
80
|
+
should_be_ignored = (ignored_classes & classes.split(' ')).present?
|
81
|
+
|
82
|
+
put_replace_marker(node, marker) if should_be_ignored
|
83
|
+
end
|
84
|
+
|
85
|
+
def strip_wovn_ignore(node, marker)
|
86
|
+
put_replace_marker(node, marker) if node && node.get_attribute('wovn-ignore')
|
87
|
+
end
|
88
|
+
|
89
|
+
def put_replace_marker(node, marker)
|
90
|
+
original_text = node.inner_text
|
91
|
+
return if original_text.include?(HtmlReplaceMarker::KEY_PREFIX)
|
92
|
+
|
93
|
+
node.inner_html = marker.add_comment_value(original_text)
|
94
|
+
end
|
95
|
+
|
96
|
+
def strip_hreflangs
|
97
|
+
supported_langs = @store.supported_langs
|
98
|
+
@dom.xpath('//link') do |node|
|
99
|
+
node.remove if node['hreflang'] && supported_langs.include?(Lang.iso_639_1_normalization(node['hreflang']))
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_hreflang
|
104
|
+
!!(@store && @headers)
|
105
|
+
end
|
106
|
+
|
107
|
+
def inject_lang_html_tag
|
108
|
+
root = @dom.at_css('html')
|
109
|
+
return unless root
|
110
|
+
|
111
|
+
current_lang = @headers.lang_code
|
112
|
+
default_lang = @store.default_lang
|
113
|
+
|
114
|
+
if current_lang != default_lang
|
115
|
+
root['lang'] = current_lang
|
116
|
+
else
|
117
|
+
root.delete('lang')
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def replace_hreflangs
|
122
|
+
strip_hreflang_tags
|
123
|
+
insert_hreflang_tags
|
124
|
+
end
|
125
|
+
|
126
|
+
def strip_hreflang_tags
|
127
|
+
@dom.xpath('//link').each do |node|
|
128
|
+
node.remove if node['hreflang'] && @store.supported_langs.include?(Lang.iso_639_1_normalization(node['hreflang']))
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def insert_hreflang_tags
|
133
|
+
parent_node = @dom.at_css('head') || @dom.at_css('body') || @dom.at_css('html')
|
134
|
+
return unless parent_node
|
135
|
+
|
136
|
+
@store.supported_langs.each do |lang_code|
|
137
|
+
insert_node = Nokogiri::XML::Node.new('link', @dom)
|
138
|
+
insert_node['rel'] = 'alternate'
|
139
|
+
insert_node['hreflang'] = Lang.iso_639_1_normalization(lang_code)
|
140
|
+
insert_node['href'] = @headers.redirect_location(lang_code)
|
141
|
+
|
142
|
+
parent_node.add_child(insert_node.to_s)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# Remove wovn snippet code from dom
|
147
|
+
def strip_snippet
|
148
|
+
@dom.xpath('//script').each do |script_node|
|
149
|
+
script_node.remove if script_node['src'] && script_node['src'] =~ /^\/\/j.(dev-)?wovn.io(:3000)?\//
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def insert_snippet(adds_backend_error_mark = true)
|
154
|
+
parent_node = @dom.at_css('head') || @dom.at_css('body') || @dom.at_css('html')
|
155
|
+
return unless parent_node
|
156
|
+
|
157
|
+
insert_node = Nokogiri::XML::Node.new('script', @dom)
|
158
|
+
insert_node['src'] = "//j.#{@store.wovn_host}/1"
|
159
|
+
insert_node['async'] = true
|
160
|
+
insert_node['data-wovnio'] = data_wovnio
|
161
|
+
insert_node['data-wovnio-type'] = 'fallback_snippet' if adds_backend_error_mark
|
162
|
+
# do this so that there will be a closing tag (better compatibility with browsers)
|
163
|
+
insert_node.content = ''
|
164
|
+
|
165
|
+
if !parent_node.children.empty?
|
166
|
+
parent_node.children.first.add_previous_sibling(insert_node)
|
167
|
+
else
|
168
|
+
parent_node.add_child(insert_node)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def data_wovnio
|
173
|
+
token = @store.settings['project_token']
|
174
|
+
current_lang = @headers.lang_code
|
175
|
+
default_lang = @store.settings['default_lang']
|
176
|
+
url_pattern = @store.settings['url_pattern']
|
177
|
+
lang_code_aliases_json = JSON.generate(@store.settings['custom_lang_aliases'])
|
178
|
+
|
179
|
+
CGI.escapeHTML(
|
180
|
+
[
|
181
|
+
"key=#{token}",
|
182
|
+
'backend=true',
|
183
|
+
"currentLang=#{current_lang}",
|
184
|
+
"defaultLang=#{default_lang}",
|
185
|
+
"urlPattern=#{url_pattern}",
|
186
|
+
"langCodeAliases=#{lang_code_aliases_json}",
|
187
|
+
"version=WOVN.rb_#{VERSION}"
|
188
|
+
].join('&')
|
189
|
+
)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|