wovnrb 1.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +1 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +1 -0
  5. data/.rubocop_todo.yml +308 -0
  6. data/Rakefile +13 -14
  7. data/lib/wovnrb.rb +43 -98
  8. data/lib/wovnrb/api_translator.rb +143 -0
  9. data/lib/wovnrb/headers.rb +78 -92
  10. data/lib/wovnrb/helpers/nokogumbo_helper.rb +1 -1
  11. data/lib/wovnrb/lang.rb +93 -125
  12. data/lib/wovnrb/railtie.rb +5 -7
  13. data/lib/wovnrb/services/glob.rb +3 -3
  14. data/lib/wovnrb/services/html_converter.rb +192 -0
  15. data/lib/wovnrb/services/html_replace_marker.rb +38 -0
  16. data/lib/wovnrb/services/wovn_logger.rb +8 -4
  17. data/lib/wovnrb/settings.rb +5 -3
  18. data/lib/wovnrb/store.rb +35 -26
  19. data/lib/wovnrb/text_caches/cache_base.rb +3 -2
  20. data/lib/wovnrb/text_caches/memory_cache.rb +2 -2
  21. data/lib/wovnrb/version.rb +1 -1
  22. data/test/fixtures/html/test.html +8 -0
  23. data/test/fixtures/html/test_translated.html +8 -0
  24. data/test/lib/api_translator_test.rb +109 -0
  25. data/test/lib/headers_test.rb +84 -55
  26. data/test/lib/lang_test.rb +157 -357
  27. data/test/lib/services/glob_test.rb +1 -1
  28. data/test/lib/services/html_converter_test.rb +166 -0
  29. data/test/lib/services/html_replace_marker_test.rb +75 -0
  30. data/test/lib/services/wovn_logger_test.rb +6 -6
  31. data/test/lib/store_test.rb +25 -69
  32. data/test/lib/text_caches/cache_base_test.rb +1 -1
  33. data/test/lib/text_caches/memory_cache_test.rb +10 -11
  34. data/test/lib/wovnrb_test.rb +77 -310
  35. data/test/test_helper.rb +22 -32
  36. data/wovnrb.gemspec +35 -44
  37. metadata +86 -205
  38. data/ext/dom/Makefile +0 -239
  39. data/lib/wovnrb/api_data.rb +0 -59
  40. data/lib/wovnrb/html_replacers/image_replacer.rb +0 -69
  41. data/lib/wovnrb/html_replacers/input_replacer.rb +0 -38
  42. data/lib/wovnrb/html_replacers/link_replacer.rb +0 -78
  43. data/lib/wovnrb/html_replacers/meta_replacer.rb +0 -28
  44. data/lib/wovnrb/html_replacers/replacer_base.rb +0 -49
  45. data/lib/wovnrb/html_replacers/script_replacer.rb +0 -39
  46. data/lib/wovnrb/html_replacers/text_replacer.rb +0 -21
  47. data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +0 -76
  48. data/lib/wovnrb/html_replacers/unified_values/element_category.rb +0 -242
  49. data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +0 -134
  50. data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +0 -35
  51. data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +0 -152
  52. data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +0 -65
  53. data/lib/wovnrb/services/url.rb +0 -12
  54. data/lib/wovnrb/services/value_agent.rb +0 -9
  55. data/test/fixtures/unified_values/site_html/simple_actual.html +0 -96
  56. data/test/fixtures/unified_values/site_html/simple_expected.json +0 -251
  57. data/test/fixtures/unified_values/site_html/wovn.io_actual.html +0 -686
  58. data/test/fixtures/unified_values/site_html/wovn.io_expected.json +0 -543
  59. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +0 -1024
  60. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +0 -3345
  61. data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +0 -12
  62. data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +0 -22
  63. data/test/fixtures/unified_values/small_html/br_tag_actual.html +0 -10
  64. data/test/fixtures/unified_values/small_html/br_tag_expected.json +0 -12
  65. data/test/fixtures/unified_values/small_html/comment_tag_actual.html +0 -12
  66. data/test/fixtures/unified_values/small_html/comment_tag_expected.json +0 -10
  67. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +0 -7
  68. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +0 -11
  69. data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +0 -14
  70. data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +0 -8
  71. data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +0 -20
  72. data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +0 -20
  73. data/test/fixtures/unified_values/small_html/empty_tag_actual.html +0 -10
  74. data/test/fixtures/unified_values/small_html/empty_tag_expected.json +0 -12
  75. data/test/fixtures/unified_values/small_html/empty_text_actual.html +0 -12
  76. data/test/fixtures/unified_values/small_html/empty_text_expected.json +0 -1
  77. data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +0 -12
  78. data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +0 -16
  79. data/test/fixtures/unified_values/small_html/ignored_class_actual.html +0 -10
  80. data/test/fixtures/unified_values/small_html/ignored_class_expected.json +0 -13
  81. data/test/fixtures/unified_values/small_html/img_actual.html +0 -12
  82. data/test/fixtures/unified_values/small_html/img_expected.json +0 -23
  83. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +0 -10
  84. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +0 -16
  85. data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +0 -10
  86. data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +0 -12
  87. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +0 -10
  88. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +0 -14
  89. data/test/fixtures/unified_values/small_html/option_tag_actual.html +0 -9
  90. data/test/fixtures/unified_values/small_html/option_tag_expected.json +0 -13
  91. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +0 -10
  92. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +0 -22
  93. data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +0 -9
  94. data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +0 -8
  95. data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +0 -6
  96. data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +0 -8
  97. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +0 -12
  98. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +0 -24
  99. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +0 -12
  100. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +0 -14
  101. data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +0 -10
  102. data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +0 -13
  103. data/test/lib/api_data_test.rb +0 -83
  104. data/test/lib/html_replacers/image_replacer_test.rb +0 -165
  105. data/test/lib/html_replacers/input_replacer_test.rb +0 -140
  106. data/test/lib/html_replacers/link_replacer_test.rb +0 -328
  107. data/test/lib/html_replacers/meta_replacer_test.rb +0 -157
  108. data/test/lib/html_replacers/replacer_base_test.rb +0 -128
  109. data/test/lib/html_replacers/script_replacer_test.rb +0 -139
  110. data/test/lib/html_replacers/text_replacer_test.rb +0 -99
  111. data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +0 -137
  112. data/test/lib/html_replacers/unified_values/element_category_test.rb +0 -49
  113. data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +0 -137
  114. data/test/lib/html_replacers/unified_values/text_replacer_test.rb +0 -270
  115. data/test/lib/html_replacers/unified_values/text_scraper_test.rb +0 -121
  116. data/test/lib/html_replacers/unified_values/values_stack_test.rb +0 -122
  117. data/test/lib/services/url_test.rb +0 -9
  118. data/test/lib/services/value_agent_test.rb +0 -32
  119. data/test/services/url_test.rb +0 -163
  120. data/values/values +0 -1
data/lib/wovnrb/lang.rb CHANGED
@@ -1,51 +1,49 @@
1
- # -*- encoding: UTF-8 -*-
2
-
3
1
  require 'addressable'
4
2
 
5
3
  module Wovnrb
6
4
  class Lang
7
5
  LANG = {
8
- #http://msdn.microsoft.com/en-us/library/hh456380.aspx
9
- 'ar' => {name: 'العربية', code: 'ar', en: 'Arabic'},
10
- 'eu' => {name: 'Euskara', code: 'eu', en: 'Basque'},
11
- 'bn' => {name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali'},
12
- 'bg' => {name: 'Български', code: 'bg', en: 'Bulgarian'},
13
- 'ca' => {name: 'Català', code: 'ca', en: 'Catalan'},
14
- 'zh-CHS' => {name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese'},
15
- 'zh-CHT' => {name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese'},
16
- 'da' => {name: 'Dansk', code: 'da', en: 'Danish'},
17
- 'nl' => {name: 'Nederlands', code: 'nl', en: 'Dutch'},
18
- 'en' => {name: 'English', code: 'en', en: 'English'},
19
- 'fi' => {name: 'Suomi', code: 'fi', en: 'Finnish'},
20
- 'fr' => {name: 'Français', code: 'fr', en: 'French'},
21
- 'gl' => {name: 'Galego', code: 'gl', en: 'Galician'},
22
- 'de' => {name: 'Deutsch', code: 'de', en: 'German'},
23
- 'el' => {name: 'Ελληνικά', code: 'el', en: 'Greek'},
24
- 'he' => {name: 'עברית', code: 'he', en: 'Hebrew'},
25
- 'hu' => {name: 'Magyar', code: 'hu', en: 'Hungarian'},
26
- 'id' => {name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian'},
27
- 'it' => {name: 'Italiano', code: 'it', en: 'Italian'},
28
- 'ja' => {name: '日本語', code: 'ja', en: 'Japanese'},
29
- 'ko' => {name: '한국어', code: 'ko', en: 'Korean'},
30
- 'lv' =>{name: 'Latviešu', code: 'lv', en: 'Latvian'},
31
- 'ms' => {name: 'Bahasa Melayu', code: 'ms', en: 'Malay'},
6
+ # http://msdn.microsoft.com/en-us/library/hh456380.aspx
7
+ 'ar' => { name: 'العربية', code: 'ar', en: 'Arabic' },
8
+ 'eu' => { name: 'Euskara', code: 'eu', en: 'Basque' },
9
+ 'bn' => { name: 'বাংলা ভাষা', code: 'bn', en: 'Bengali' },
10
+ 'bg' => { name: 'Български', code: 'bg', en: 'Bulgarian' },
11
+ 'ca' => { name: 'Català', code: 'ca', en: 'Catalan' },
12
+ 'zh-CHS' => { name: '简体中文', code: 'zh-CHS', en: 'Simp Chinese' },
13
+ 'zh-CHT' => { name: '繁體中文', code: 'zh-CHT', en: 'Trad Chinese' },
14
+ 'da' => { name: 'Dansk', code: 'da', en: 'Danish' },
15
+ 'nl' => { name: 'Nederlands', code: 'nl', en: 'Dutch' },
16
+ 'en' => { name: 'English', code: 'en', en: 'English' },
17
+ 'fi' => { name: 'Suomi', code: 'fi', en: 'Finnish' },
18
+ 'fr' => { name: 'Français', code: 'fr', en: 'French' },
19
+ 'gl' => { name: 'Galego', code: 'gl', en: 'Galician' },
20
+ 'de' => { name: 'Deutsch', code: 'de', en: 'German' },
21
+ 'el' => { name: 'Ελληνικά', code: 'el', en: 'Greek' },
22
+ 'he' => { name: 'עברית', code: 'he', en: 'Hebrew' },
23
+ 'hu' => { name: 'Magyar', code: 'hu', en: 'Hungarian' },
24
+ 'id' => { name: 'Bahasa Indonesia', code: 'id', en: 'Indonesian' },
25
+ 'it' => { name: 'Italiano', code: 'it', en: 'Italian' },
26
+ 'ja' => { name: '日本語', code: 'ja', en: 'Japanese' },
27
+ 'ko' => { name: '한국어', code: 'ko', en: 'Korean' },
28
+ 'lv' => { name: 'Latviešu', code: 'lv', en: 'Latvian' },
29
+ 'ms' => { name: 'Bahasa Melayu', code: 'ms', en: 'Malay' },
32
30
  'my' => { name: 'ဗမာစာ', code: 'my', en: 'Burmese' },
33
- 'ne' => {name: 'नेपाली भाषा', code: 'ne', en: 'Nepali'},
34
- 'fa' => {name: 'زبان_فارسی', code: 'fa', en: 'Persian'},
35
- 'no' => {name: 'Norsk', code: 'no', en: 'Norwegian'},
36
- 'pl' => {name: 'Polski', code: 'pl', en: 'Polish'},
37
- 'pt' => {name: 'Português', code: 'pt', en: 'Portuguese'},
38
- 'ru' => {name: 'Русский', code: 'ru', en: 'Russian'},
39
- 'es' => {name: 'Español', code: 'es', en: 'Spanish'},
40
- 'sw' => {name: 'Kiswahili', code: 'sw', en: 'Swahili'},
41
- 'sv' => {name: 'Svensk', code: 'sv', en: 'Swedish'},
42
- 'th' => {name: 'ภาษาไทย', code: 'th', en: 'Thai'},
43
- 'hi' => {name: 'हिन्दी', code: 'hi', en: 'Hindi'},
44
- 'tr' => {name: 'Türkçe', code: 'tr', en: 'Turkish'},
45
- 'uk' => {name: 'Українська', code: 'uk', en: 'Ukrainian'},
46
- 'ur' => {name: 'اردو', code: 'ur', en: 'Urdu'},
47
- 'vi' => {name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese'},
48
- }
31
+ 'ne' => { name: 'नेपाली भाषा', code: 'ne', en: 'Nepali' },
32
+ 'fa' => { name: 'زبان_فارسی', code: 'fa', en: 'Persian' },
33
+ 'no' => { name: 'Norsk', code: 'no', en: 'Norwegian' },
34
+ 'pl' => { name: 'Polski', code: 'pl', en: 'Polish' },
35
+ 'pt' => { name: 'Português', code: 'pt', en: 'Portuguese' },
36
+ 'ru' => { name: 'Русский', code: 'ru', en: 'Russian' },
37
+ 'es' => { name: 'Español', code: 'es', en: 'Spanish' },
38
+ 'sw' => { name: 'Kiswahili', code: 'sw', en: 'Swahili' },
39
+ 'sv' => { name: 'Svensk', code: 'sv', en: 'Swedish' },
40
+ 'th' => { name: 'ภาษาไทย', code: 'th', en: 'Thai' },
41
+ 'hi' => { name: 'हिन्दी', code: 'hi', en: 'Hindi' },
42
+ 'tr' => { name: 'Türkçe', code: 'tr', en: 'Turkish' },
43
+ 'uk' => { name: 'Українська', code: 'uk', en: 'Ukrainian' },
44
+ 'ur' => { name: 'اردو', code: 'ur', en: 'Urdu' },
45
+ 'vi' => { name: 'Tiếng Việt', code: 'vi', en: 'Vietnamese' }
46
+ }.freeze
49
47
 
50
48
  # Provides the ISO639-1 code for a given lang code.
51
49
  # Source: https://support.google.com/webmasters/answer/189077?hl=en
@@ -54,35 +52,33 @@ module Wovnrb
54
52
  #
55
53
  # @return [String] The ISO639-1 code of the language.
56
54
  def self.iso_639_1_normalization(lang_code)
57
- return lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
55
+ lang_code.sub(/zh-CHT/i, 'zh-Hant').sub(/zh-CHS/i, 'zh-Hans')
58
56
  end
59
57
 
60
58
  def self.get_code(lang_name)
61
59
  return nil if lang_name.nil?
62
60
  return lang_name if LANG[lang_name]
61
+
63
62
  custom_lang_aliases = Store.instance.settings['custom_lang_aliases']
64
63
  custom_lang = LANG[custom_lang_aliases.invert[lang_name]]
65
64
  return custom_lang[:code] if custom_lang
66
- LANG.each do |k, l|
67
- if lang_name.downcase == l[:name].downcase || lang_name.downcase == l[:en].downcase || lang_name.downcase == l[:code].downcase
68
- return l[:code]
69
- end
65
+
66
+ LANG.each do |_k, l|
67
+ return l[:code] if lang_name.casecmp(l[:name]).zero? || lang_name.casecmp(l[:en]).zero? || lang_name.casecmp(l[:code]).zero?
70
68
  end
71
- return nil
69
+ nil
72
70
  end
73
71
 
74
72
  def self.get_lang(lang)
75
73
  lang_code = get_code(lang)
76
- return LANG[lang_code]
74
+ LANG[lang_code]
77
75
  end
78
76
 
79
77
  def initialize(lang_name)
80
78
  @lang_code = Lang.get_code(lang_name)
81
79
  end
82
80
 
83
- def lang_code
84
- @lang_code
85
- end
81
+ attr_reader :lang_code
86
82
 
87
83
  # Adds language code to URL in "href" variable by "pattern" variable and own @lang_code.
88
84
  # When @lang_code is 'ja', add_lang_code('https://wovn.io', 'path', headers) returns 'https://wovn.io/ja/'.
@@ -94,6 +90,7 @@ module Wovnrb
94
90
  # @return [String] URL added langauge code.
95
91
  def add_lang_code(href, pattern, headers)
96
92
  return href if href =~ /^(#.*)?$/
93
+
97
94
  code_to_add = Store.instance.settings['custom_lang_aliases'][@lang_code] || @lang_code
98
95
  # absolute links
99
96
  new_href = href
@@ -109,83 +106,54 @@ module Wovnrb
109
106
  # DNS names are case insensitive
110
107
  if uri.host.downcase === headers.host.downcase
111
108
  case pattern
112
- when 'subdomain'
113
- sub_d = href.match(/\/\/([^\.]*)\./)[1]
114
- sub_code = Lang.get_code(sub_d)
115
- if sub_code && sub_code.downcase == code_to_add.downcase
116
- new_href = href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
117
- else
118
- new_href = href.sub(/(\/\/)([^\.]*)/, '\1' + code_to_add.downcase + '.' + '\2')
119
- end
120
- when 'query'
121
- new_href = add_query_lang_code(href, code_to_add)
122
- else # path
123
- new_href = href.sub(/([^\.]*\.[^\/]*)(\/|$)/, '\1/' + code_to_add + '/')
109
+ when 'subdomain'
110
+ sub_d = href.match(/\/\/([^\.]*)\./)[1]
111
+ sub_code = Lang.get_code(sub_d)
112
+ new_href = if sub_code && sub_code.casecmp(code_to_add).zero?
113
+ href.sub(Regexp.new(code_to_add, 'i'), code_to_add.downcase)
114
+ else
115
+ href.sub(/(\/\/)([^\.]*)/, '\1' + code_to_add.downcase + '.' + '\2')
116
+ end
117
+ when 'query'
118
+ new_href = add_query_lang_code(href, code_to_add)
119
+ else # path
120
+ new_href = href.sub(/([^\.]*\.[^\/]*)(\/|$)/, '\1/' + code_to_add + '/')
124
121
  end
125
122
  end
126
123
  elsif href
127
124
  case pattern
128
- when 'subdomain'
129
- lang_url = headers.protocol + '://' + code_to_add.downcase + '.' + headers.host
125
+ when 'subdomain'
126
+ lang_url = headers.protocol + '://' + code_to_add.downcase + '.' + headers.host
127
+ current_dir = headers.pathname.sub(/[^\/]*\.[^\.]{2,6}$/, '')
128
+ new_href = if href =~ /^\.\..*$/
129
+ # ../path
130
+ lang_url + '/' + href.gsub(/^\.\.\//, '')
131
+ elsif href =~ /^\..*$/
132
+ # ./path
133
+ lang_url + current_dir + '/' + href.gsub(/^\.\//, '')
134
+ elsif href =~ /^\/.*$/
135
+ # /path
136
+ lang_url + href
137
+ else
138
+ # path
139
+ lang_url + current_dir + '/' + href
140
+ end
141
+ when 'query'
142
+ new_href = add_query_lang_code(href, code_to_add)
143
+ else # path
144
+ if href =~ /^\//
145
+ new_href = '/' + code_to_add + href
146
+ else
130
147
  current_dir = headers.pathname.sub(/[^\/]*\.[^\.]{2,6}$/, '')
131
- if href =~ /^\.\..*$/
132
- # ../path
133
- new_href = lang_url + '/' + href.gsub(/^\.\.\//, '')
134
- elsif href =~ /^\..*$/
135
- # ./path
136
- new_href = lang_url + current_dir + '/' + href.gsub(/^\.\//, '')
137
- elsif href =~ /^\/.*$/
138
- # /path
139
- new_href = lang_url + href
140
- else
141
- # path
142
- new_href = lang_url + current_dir + '/' + href
143
- end
144
- when 'query'
145
- new_href = add_query_lang_code(href, code_to_add)
146
- else # path
147
- if href =~ /^\//
148
- new_href = '/' + code_to_add + href
149
- else
150
- current_dir = headers.pathname.sub(/[^\/]*\.[^\.]{2,6}$/, '')
151
- current_dir = '/' if current_dir == ''
152
- new_href = '/' + code_to_add + current_dir + href
153
- end
148
+ current_dir = '/' if current_dir == ''
149
+ new_href = '/' + code_to_add + current_dir + href
150
+ end
154
151
  end
155
152
  end
156
153
 
157
154
  new_href
158
155
  end
159
156
 
160
- def switch_dom_lang(dom, store, values, url, headers)
161
- replace_dom_values(dom, values, store, url, headers)
162
-
163
- if dom.html?
164
- # INSERT LANGUAGE METALINKS
165
- parent_node = dom.at_css('head') || dom.at_css('body') || dom.at_css('html')
166
- published_langs = get_langs(values)
167
- all_langs = published_langs.add(store.settings['default_lang'])
168
- all_langs.each do |l|
169
- insert_node = Nokogiri::XML::Node.new('link', dom)
170
- insert_node['rel'] = 'alternate'
171
- insert_node['hreflang'] = Lang::iso_639_1_normalization(l)
172
- insert_node['href'] = headers.redirect_location(l)
173
- parent_node.add_child(insert_node)
174
- end
175
-
176
- # set lang property on HTML tag
177
- if dom.at_css('html') || dom.at_css('HTML')
178
- (dom.at_css('html') || dom.at_css('HTML')).set_attribute('lang', Lang::iso_639_1_normalization(@lang_code))
179
- end
180
- end
181
-
182
- index_href = index_href_for_encoding_and_decoding(dom)
183
- # NOTE: when we use `#to_html` with nokogiri, nokogiri encode all href.
184
- # but we want to keep original href as much as possible.
185
- # That's why we replace href with original href which added lang info by wovnrb like this after we used `to_html`
186
- dom.to_html(save_with: 0).gsub(/href="([^"]*)"/) { |m| "href=\"#{index_href[$1] || $1}\"" }
187
- end
188
-
189
157
  private
190
158
 
191
159
  def index_href_for_encoding_and_decoding(dom)
@@ -216,11 +184,11 @@ module Wovnrb
216
184
  replacers << LinkReplacer.new(store, pattern, headers)
217
185
  end
218
186
 
219
- unless html_text_index.empty?
220
- replacers << UnifiedValues::TextReplacer.new(store, html_text_index)
221
- else
222
- replacers << TextReplacer.new(store, text_index)
223
- end
187
+ replacers << if html_text_index.empty?
188
+ TextReplacer.new(store, text_index)
189
+ else
190
+ UnifiedValues::TextReplacer.new(store, html_text_index)
191
+ end
224
192
  replacers << MetaReplacer.new(store, text_index, pattern, headers)
225
193
  replacers << InputReplacer.new(store, text_index)
226
194
  replacers << ImageReplacer.new(store, url, text_index, src_index, img_src_prefix, host_aliases)
@@ -233,8 +201,8 @@ module Wovnrb
233
201
 
234
202
  def get_langs(values)
235
203
  langs = Set.new
236
- (values['text_vals'] || {}).merge(values['img_vals'] || {}).each do |key, index|
237
- index.each do |l, val|
204
+ (values['text_vals'] || {}).merge(values['img_vals'] || {}).each do |_key, index|
205
+ index.each do |l, _val|
238
206
  langs.add(l)
239
207
  end
240
208
  end
@@ -1,20 +1,18 @@
1
1
  module Wovnrb
2
-
3
2
  class Railtie < Rails::Railtie
4
3
  initializer 'wovnrb.configure_rails_initialization' do |app|
5
4
  app.middleware.insert_before(0, Wovnrb::Interceptor)
6
- #begin
5
+ # begin
7
6
  # app.middleware.insert_before(Rack::Runtime, Wovnrb::Interceptor)
8
- #rescue
7
+ # rescue
9
8
  # app.middleware.insert_before(0, Wovnrb::Interceptor)
10
- #end
9
+ # end
11
10
 
12
- #if Rails.env.development? && config.respond_to?(:wovnrb)
11
+ # if Rails.env.development? && config.respond_to?(:wovnrb)
13
12
  # config.after_initialize do
14
13
  # config.wovnrb[:project_token] = User.first.short_token
15
14
  # end
16
- #end
15
+ # end
17
16
  end
18
17
  end
19
-
20
18
  end
@@ -11,8 +11,8 @@ module Wovnrb
11
11
  sub_directories = pattern.split('/**', -1)
12
12
  regexp = sub_directories.map do |sub_dir|
13
13
  sub_dir.split('*', -1)
14
- .map {|p| Regexp.escape(p)}
15
- .join('[^/]*')
14
+ .map { |p| Regexp.escape(p) }
15
+ .join('[^/]*')
16
16
  end.join('(/[^/]*)*')
17
17
 
18
18
  @regexp = Regexp.new("^#{regexp}$")
@@ -22,4 +22,4 @@ module Wovnrb
22
22
  !@regexp.match(url).nil?
23
23
  end
24
24
  end
25
- end
25
+ end
@@ -0,0 +1,192 @@
1
+ module Wovnrb
2
+ class HtmlConverter
3
+ def initialize(dom, store, headers)
4
+ @dom = dom
5
+ @headers = headers
6
+ @store = store
7
+ end
8
+
9
+ def build
10
+ transform_html
11
+ html
12
+ end
13
+
14
+ def build_api_compatible_html
15
+ marker = HtmlReplaceMarker.new
16
+ converted_html = replace_dom(marker)
17
+
18
+ [converted_html, marker]
19
+ end
20
+
21
+ private
22
+
23
+ def html
24
+ @dom.to_html(save_with: 0).strip
25
+ end
26
+
27
+ def transform_html
28
+ replace_snippet
29
+ replace_hreflangs
30
+ inject_lang_html_tag
31
+ end
32
+
33
+ def replace_snippet
34
+ strip_snippet
35
+ insert_snippet
36
+ end
37
+
38
+ def replace_dom(marker)
39
+ strip_snippet
40
+ strip_hreflangs if add_hreflang
41
+
42
+ @dom.traverse { |node| transform_node(node, marker) }
43
+
44
+ insert_snippet(true)
45
+ insert_hreflang_tags
46
+
47
+ html
48
+ end
49
+
50
+ def transform_node(node, marker)
51
+ strip_wovn_ignore(node, marker)
52
+ strip_custom_ignore(node, marker)
53
+ strip_form(node, marker)
54
+ strip_script(node, marker)
55
+ end
56
+
57
+ def strip_script(node, marker)
58
+ put_replace_marker(node, marker) if node.name.casecmp('script').zero?
59
+ end
60
+
61
+ def strip_form(node, marker)
62
+ if node.name.casecmp('form').zero?
63
+ put_replace_marker(node, marker)
64
+ return
65
+ end
66
+
67
+ if node.name.casecmp('input').zero? && node.get_attribute('type') == 'hidden'
68
+ original_text = node.get_attribute('value')
69
+ return if original_text.include?(HtmlReplaceMarker::KEY_PREFIX)
70
+
71
+ node.set_attribute('value', marker.add_value(original_text))
72
+ end
73
+ end
74
+
75
+ def strip_custom_ignore(node, marker)
76
+ classes = node.get_attribute('class')
77
+ return unless classes.present?
78
+
79
+ ignored_classes = @store.settings['ignore_class']
80
+ should_be_ignored = (ignored_classes & classes.split(' ')).present?
81
+
82
+ put_replace_marker(node, marker) if should_be_ignored
83
+ end
84
+
85
+ def strip_wovn_ignore(node, marker)
86
+ put_replace_marker(node, marker) if node && node.get_attribute('wovn-ignore')
87
+ end
88
+
89
+ def put_replace_marker(node, marker)
90
+ original_text = node.inner_text
91
+ return if original_text.include?(HtmlReplaceMarker::KEY_PREFIX)
92
+
93
+ node.inner_html = marker.add_comment_value(original_text)
94
+ end
95
+
96
+ def strip_hreflangs
97
+ supported_langs = @store.supported_langs
98
+ @dom.xpath('//link') do |node|
99
+ node.remove if node['hreflang'] && supported_langs.include?(Lang.iso_639_1_normalization(node['hreflang']))
100
+ end
101
+ end
102
+
103
+ def add_hreflang
104
+ !!(@store && @headers)
105
+ end
106
+
107
+ def inject_lang_html_tag
108
+ root = @dom.at_css('html')
109
+ return unless root
110
+
111
+ current_lang = @headers.lang_code
112
+ default_lang = @store.default_lang
113
+
114
+ if current_lang != default_lang
115
+ root['lang'] = current_lang
116
+ else
117
+ root.delete('lang')
118
+ end
119
+ end
120
+
121
+ def replace_hreflangs
122
+ strip_hreflang_tags
123
+ insert_hreflang_tags
124
+ end
125
+
126
+ def strip_hreflang_tags
127
+ @dom.xpath('//link').each do |node|
128
+ node.remove if node['hreflang'] && @store.supported_langs.include?(Lang.iso_639_1_normalization(node['hreflang']))
129
+ end
130
+ end
131
+
132
+ def insert_hreflang_tags
133
+ parent_node = @dom.at_css('head') || @dom.at_css('body') || @dom.at_css('html')
134
+ return unless parent_node
135
+
136
+ @store.supported_langs.each do |lang_code|
137
+ insert_node = Nokogiri::XML::Node.new('link', @dom)
138
+ insert_node['rel'] = 'alternate'
139
+ insert_node['hreflang'] = Lang.iso_639_1_normalization(lang_code)
140
+ insert_node['href'] = @headers.redirect_location(lang_code)
141
+
142
+ parent_node.add_child(insert_node.to_s)
143
+ end
144
+ end
145
+
146
+ # Remove wovn snippet code from dom
147
+ def strip_snippet
148
+ @dom.xpath('//script').each do |script_node|
149
+ script_node.remove if script_node['src'] && script_node['src'] =~ /^\/\/j.(dev-)?wovn.io(:3000)?\//
150
+ end
151
+ end
152
+
153
+ def insert_snippet(adds_backend_error_mark = true)
154
+ parent_node = @dom.at_css('head') || @dom.at_css('body') || @dom.at_css('html')
155
+ return unless parent_node
156
+
157
+ insert_node = Nokogiri::XML::Node.new('script', @dom)
158
+ insert_node['src'] = "//j.#{@store.wovn_host}/1"
159
+ insert_node['async'] = true
160
+ insert_node['data-wovnio'] = data_wovnio
161
+ insert_node['data-wovnio-type'] = 'fallback_snippet' if adds_backend_error_mark
162
+ # do this so that there will be a closing tag (better compatibility with browsers)
163
+ insert_node.content = ''
164
+
165
+ if !parent_node.children.empty?
166
+ parent_node.children.first.add_previous_sibling(insert_node)
167
+ else
168
+ parent_node.add_child(insert_node)
169
+ end
170
+ end
171
+
172
+ def data_wovnio
173
+ token = @store.settings['project_token']
174
+ current_lang = @headers.lang_code
175
+ default_lang = @store.settings['default_lang']
176
+ url_pattern = @store.settings['url_pattern']
177
+ lang_code_aliases_json = JSON.generate(@store.settings['custom_lang_aliases'])
178
+
179
+ CGI.escapeHTML(
180
+ [
181
+ "key=#{token}",
182
+ 'backend=true',
183
+ "currentLang=#{current_lang}",
184
+ "defaultLang=#{default_lang}",
185
+ "urlPattern=#{url_pattern}",
186
+ "langCodeAliases=#{lang_code_aliases_json}",
187
+ "version=WOVN.rb_#{VERSION}"
188
+ ].join('&')
189
+ )
190
+ end
191
+ end
192
+ end