wovnrb 1.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +1 -0
  3. data/.gitignore +2 -0
  4. data/.rubocop.yml +1 -0
  5. data/.rubocop_todo.yml +308 -0
  6. data/Rakefile +13 -14
  7. data/lib/wovnrb.rb +43 -98
  8. data/lib/wovnrb/api_translator.rb +143 -0
  9. data/lib/wovnrb/headers.rb +78 -92
  10. data/lib/wovnrb/helpers/nokogumbo_helper.rb +1 -1
  11. data/lib/wovnrb/lang.rb +93 -125
  12. data/lib/wovnrb/railtie.rb +5 -7
  13. data/lib/wovnrb/services/glob.rb +3 -3
  14. data/lib/wovnrb/services/html_converter.rb +192 -0
  15. data/lib/wovnrb/services/html_replace_marker.rb +38 -0
  16. data/lib/wovnrb/services/wovn_logger.rb +8 -4
  17. data/lib/wovnrb/settings.rb +5 -3
  18. data/lib/wovnrb/store.rb +35 -26
  19. data/lib/wovnrb/text_caches/cache_base.rb +3 -2
  20. data/lib/wovnrb/text_caches/memory_cache.rb +2 -2
  21. data/lib/wovnrb/version.rb +1 -1
  22. data/test/fixtures/html/test.html +8 -0
  23. data/test/fixtures/html/test_translated.html +8 -0
  24. data/test/lib/api_translator_test.rb +109 -0
  25. data/test/lib/headers_test.rb +84 -55
  26. data/test/lib/lang_test.rb +157 -357
  27. data/test/lib/services/glob_test.rb +1 -1
  28. data/test/lib/services/html_converter_test.rb +166 -0
  29. data/test/lib/services/html_replace_marker_test.rb +75 -0
  30. data/test/lib/services/wovn_logger_test.rb +6 -6
  31. data/test/lib/store_test.rb +25 -69
  32. data/test/lib/text_caches/cache_base_test.rb +1 -1
  33. data/test/lib/text_caches/memory_cache_test.rb +10 -11
  34. data/test/lib/wovnrb_test.rb +77 -310
  35. data/test/test_helper.rb +22 -32
  36. data/wovnrb.gemspec +35 -44
  37. metadata +86 -205
  38. data/ext/dom/Makefile +0 -239
  39. data/lib/wovnrb/api_data.rb +0 -59
  40. data/lib/wovnrb/html_replacers/image_replacer.rb +0 -69
  41. data/lib/wovnrb/html_replacers/input_replacer.rb +0 -38
  42. data/lib/wovnrb/html_replacers/link_replacer.rb +0 -78
  43. data/lib/wovnrb/html_replacers/meta_replacer.rb +0 -28
  44. data/lib/wovnrb/html_replacers/replacer_base.rb +0 -49
  45. data/lib/wovnrb/html_replacers/script_replacer.rb +0 -39
  46. data/lib/wovnrb/html_replacers/text_replacer.rb +0 -21
  47. data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +0 -76
  48. data/lib/wovnrb/html_replacers/unified_values/element_category.rb +0 -242
  49. data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +0 -134
  50. data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +0 -35
  51. data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +0 -152
  52. data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +0 -65
  53. data/lib/wovnrb/services/url.rb +0 -12
  54. data/lib/wovnrb/services/value_agent.rb +0 -9
  55. data/test/fixtures/unified_values/site_html/simple_actual.html +0 -96
  56. data/test/fixtures/unified_values/site_html/simple_expected.json +0 -251
  57. data/test/fixtures/unified_values/site_html/wovn.io_actual.html +0 -686
  58. data/test/fixtures/unified_values/site_html/wovn.io_expected.json +0 -543
  59. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +0 -1024
  60. data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +0 -3345
  61. data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +0 -12
  62. data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +0 -22
  63. data/test/fixtures/unified_values/small_html/br_tag_actual.html +0 -10
  64. data/test/fixtures/unified_values/small_html/br_tag_expected.json +0 -12
  65. data/test/fixtures/unified_values/small_html/comment_tag_actual.html +0 -12
  66. data/test/fixtures/unified_values/small_html/comment_tag_expected.json +0 -10
  67. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +0 -7
  68. data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +0 -11
  69. data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +0 -14
  70. data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +0 -8
  71. data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +0 -20
  72. data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +0 -20
  73. data/test/fixtures/unified_values/small_html/empty_tag_actual.html +0 -10
  74. data/test/fixtures/unified_values/small_html/empty_tag_expected.json +0 -12
  75. data/test/fixtures/unified_values/small_html/empty_text_actual.html +0 -12
  76. data/test/fixtures/unified_values/small_html/empty_text_expected.json +0 -1
  77. data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +0 -12
  78. data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +0 -16
  79. data/test/fixtures/unified_values/small_html/ignored_class_actual.html +0 -10
  80. data/test/fixtures/unified_values/small_html/ignored_class_expected.json +0 -13
  81. data/test/fixtures/unified_values/small_html/img_actual.html +0 -12
  82. data/test/fixtures/unified_values/small_html/img_expected.json +0 -23
  83. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +0 -10
  84. data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +0 -16
  85. data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +0 -10
  86. data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +0 -12
  87. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +0 -10
  88. data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +0 -14
  89. data/test/fixtures/unified_values/small_html/option_tag_actual.html +0 -9
  90. data/test/fixtures/unified_values/small_html/option_tag_expected.json +0 -13
  91. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +0 -10
  92. data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +0 -22
  93. data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +0 -9
  94. data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +0 -8
  95. data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +0 -6
  96. data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +0 -8
  97. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +0 -12
  98. data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +0 -24
  99. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +0 -12
  100. data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +0 -14
  101. data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +0 -10
  102. data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +0 -13
  103. data/test/lib/api_data_test.rb +0 -83
  104. data/test/lib/html_replacers/image_replacer_test.rb +0 -165
  105. data/test/lib/html_replacers/input_replacer_test.rb +0 -140
  106. data/test/lib/html_replacers/link_replacer_test.rb +0 -328
  107. data/test/lib/html_replacers/meta_replacer_test.rb +0 -157
  108. data/test/lib/html_replacers/replacer_base_test.rb +0 -128
  109. data/test/lib/html_replacers/script_replacer_test.rb +0 -139
  110. data/test/lib/html_replacers/text_replacer_test.rb +0 -99
  111. data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +0 -137
  112. data/test/lib/html_replacers/unified_values/element_category_test.rb +0 -49
  113. data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +0 -137
  114. data/test/lib/html_replacers/unified_values/text_replacer_test.rb +0 -270
  115. data/test/lib/html_replacers/unified_values/text_scraper_test.rb +0 -121
  116. data/test/lib/html_replacers/unified_values/values_stack_test.rb +0 -122
  117. data/test/lib/services/url_test.rb +0 -9
  118. data/test/lib/services/value_agent_test.rb +0 -32
  119. data/test/services/url_test.rb +0 -163
  120. data/values/values +0 -1
@@ -1,28 +0,0 @@
1
- module Wovnrb
2
- class MetaReplacer < ReplacerBase
3
- def initialize(store, text_index, pattern = nil, headers = nil)
4
- super(store)
5
- @text_index = text_index
6
- @pattern = pattern
7
- @headers = headers
8
- end
9
-
10
- def replace(dom, lang)
11
- dom.xpath('.//meta').select { |node|
12
- next if wovn_ignore?(node)
13
- (node.get_attribute('name') || node.get_attribute('property') || '') =~ /^(description|title|og:title|og:description|og:url|twitter:title|twitter:description)$/
14
- }.each do |node|
15
- node_content = node.get_attribute('content').strip
16
- if @headers && @pattern && node.get_attribute('property') && node.get_attribute('property') === 'og:url'
17
- new_url = lang.add_lang_code(node_content, @pattern, @headers)
18
- node.set_attribute('content', new_url)
19
- next
20
- end
21
- # shouldn't need size check, but for now...
22
- if @text_index[node_content] && @text_index[node_content][lang.lang_code] && @text_index[node_content][lang.lang_code].size > 0
23
- node.set_attribute('content', replace_text(node.get_attribute('content'), @text_index[node_content][lang.lang_code][0]['data']))
24
- end
25
- end
26
- end
27
- end
28
- end
@@ -1,49 +0,0 @@
1
- module Wovnrb
2
- class ReplacerBase
3
- def initialize(store, ignored_class_set = [])
4
- @store = store
5
- @ignored_class_set = Set.new(ignored_class_set)
6
- end
7
-
8
- def replace(dom, lang)
9
- raise NotImplementedError.new('replace is not defined')
10
- end
11
-
12
- protected
13
- def wovn_ignore?(node)
14
- if !node.get_attribute('wovn-ignore').nil?
15
- return true
16
- elsif node.name === 'html' || node.parent.nil?
17
- return false
18
- end
19
-
20
- node_class = node.get_attribute('class')
21
- if node_class
22
- classes = node_class.split
23
- @store.settings['ignore_class'].each do |ignore_class|
24
- if classes.include?(ignore_class)
25
- return true
26
- end
27
- end
28
- end
29
-
30
- wovn_ignore?(node.parent)
31
- end
32
-
33
- # Add comment-node node to remember original src
34
- # <title> may not contain other markup, so add comment-node to node's previous
35
- # @see https://www.w3.org/TR/html401/struct/global.html#h-7.4.2
36
- def add_comment_node(node, text)
37
- comment_node = Nokogiri::XML::Comment.new(node.document, "wovn-src:#{text}")
38
- if node.parent.name == 'title'
39
- node.parent.add_previous_sibling(comment_node)
40
- else
41
- node.add_previous_sibling(comment_node)
42
- end
43
- end
44
-
45
- def replace_text(from, to)
46
- from.gsub(/\A(\s*)[\S\s]*?(\s*)\Z/, '\1' + to + '\2')
47
- end
48
- end
49
- end
@@ -1,39 +0,0 @@
1
- module Wovnrb
2
- class ScriptReplacer < ReplacerBase
3
- def initialize(store)
4
- super(store)
5
- end
6
-
7
- def replace(dom, lang)
8
- remove_embed_wovn_script(dom)
9
- add_wovn_script(dom, lang)
10
- end
11
-
12
- private
13
- def remove_embed_wovn_script(dom)
14
- dom.xpath('//script').each do |script_node|
15
- if script_node['src'] && script_node['src'] =~ /^\/\/j.(dev-)?wovn.io(:3000)?\//
16
- script_node.remove
17
- end
18
- end
19
- end
20
-
21
- def add_wovn_script(dom, lang)
22
- parent_node = dom.at_css('head') || dom.at_css('body') || dom.at_css('html')
23
-
24
- # INSERT BACKEND WIDGET
25
- insert_node = Nokogiri::XML::Node.new('script', dom)
26
- insert_node['src'] = "//j.#{@store.wovn_host}/1"
27
- insert_node['async'] = true
28
- version = defined?(VERSION) ? VERSION : ''
29
- insert_node['data-wovnio'] = "key=#{@store.settings['project_token']}&backend=true&currentLang=#{lang.lang_code}&defaultLang=#{@store.settings['default_lang']}&urlPattern=#{@store.settings['url_pattern']}&langCodeAliases=#{JSON.dump(@store.settings['custom_lang_aliases'])}&version=#{version}"
30
- # do this so that there will be a closing tag (better compatibility with browsers)
31
- insert_node.content = ' '
32
- if parent_node.children.size > 0
33
- parent_node.children.first.add_previous_sibling(insert_node)
34
- else
35
- parent_node.add_child(insert_node)
36
- end
37
- end
38
- end
39
- end
@@ -1,21 +0,0 @@
1
- module Wovnrb
2
- class TextReplacer < ReplacerBase
3
- def initialize(store, text_index)
4
- super(store)
5
- @text_index = text_index
6
- end
7
-
8
- def replace(dom, lang)
9
- dom.xpath('.//text()').each do |node|
10
- next if wovn_ignore?(node)
11
-
12
- node_text = node.content.strip
13
- # shouldn't need size check, but for now...
14
- if @text_index[node_text] && @text_index[node_text][lang.lang_code] && @text_index[node_text][lang.lang_code].size > 0
15
- add_comment_node(node, node_text)
16
- node.content = replace_text(node.content, @text_index[node_text][lang.lang_code][0]['data'])
17
- end
18
- end
19
- end
20
- end
21
- end
@@ -1,76 +0,0 @@
1
- module Wovnrb
2
- module UnifiedValues
3
- class DstSwappingTargetsCreator
4
- # NOTE: `text_index` is the format like below
5
- #
6
- # {
7
- # "<span>apple is a good</span>foods"=>
8
- # {"ja" =>
9
- # [{"xpath"=>"/html/body/div", "data"=>"りんごは<span>おいしい</span>たべものです"}]
10
- # },
11
- # "click<a>here</a>"=>
12
- # {"ja" =>
13
- # [{"xpath"=>"/html/body/div", "data"=>"<a>こちら</a>をクリックしてください"}]
14
- # }
15
- # }
16
-
17
- def initialize(text_index)
18
- @text_index = text_index
19
- end
20
-
21
- # NOTE: `run` make a swapping target like below
22
- #
23
- # {
24
- # "<span>apple is a good</span>foods"=>
25
- # {"ja" =>
26
- # [{"xpath"=>"/html/body/div", "data"=>"りんごは<span>おいしい</span>たべものです", 'swapping_targets'=>["りんごは", "おいしい", "たべものです"]}]
27
- # },
28
- # "click<a>here</a>"=>
29
- # {"ja" =>
30
- # [{"xpath"=>"/html/body/div", "data"=>" <a>こちら</a>をクリックしてください"}, 'swapping_targets'=>["", "こちら", "をクリックしてください"]]
31
- # }
32
- # }
33
-
34
- def run!
35
- @text_index.each do |_, v|
36
- mold = []
37
- v.values.each do |values|
38
- values.each do |value|
39
- value['data'].split(/(<.+?>)/).each_with_index do |data, _index|
40
- mold_size = mold.size
41
- mold.push('') if mold_size.even? && data.start_with?('<')
42
- mold.push(data)
43
- end
44
-
45
- mold.push('') if /\A<.+?>\z/ =~ mold.last
46
-
47
- value['swapping_targets'] = remove_tag_element(mold)
48
- end
49
- end
50
- end
51
- end
52
-
53
- private
54
-
55
- def remove_tag_element(mold)
56
- end_tag_of_wovn_ignore = nil
57
- swapping_targets = []
58
-
59
- mold.each do |value|
60
- if end_tag_of_wovn_ignore.nil? && value =~ /\A<.*wovn-ignore>\z/
61
- end_tag_of_wovn_ignore = "</#{value.gsub(' wovn-ignore', '')[1..-1]}"
62
- next
63
- end
64
-
65
- end_tag_of_wovn_ignore = nil if value == end_tag_of_wovn_ignore
66
-
67
- if end_tag_of_wovn_ignore.nil? && /\A<.+?>\z/ !~ value
68
- swapping_targets << value
69
- end
70
- end
71
-
72
- swapping_targets
73
- end
74
- end
75
- end
76
- end
@@ -1,242 +0,0 @@
1
- module Wovnrb
2
- module UnifiedValues
3
- module ElementCategory
4
- FLOW_CONTENT = 1
5
- PALPABLE_CONTENT = 2
6
- PHRASING_CONTENT = 3
7
- EMBEDDED_CONTENT = 4
8
- INTERACTIVE_CONTENT = 5
9
- METADATA_CONTENT = 6
10
- HEADING_CONTENT = 7
11
- SECTIONING_CONTENT = 8
12
- SECTIONING_ROOT = 9
13
- FORM_ASSOCIATED_ELEMENT = 10
14
- SCRIPT_SUPPORTING_ELEMENT = 11
15
- SVG_ELEMENT = 12
16
-
17
- DOCUMENT_ELEMENT = 100
18
- DOCUMENT_METADATA = 101
19
- SECTION = 1010
20
- GROUPING_CONTENT = 103
21
- TEXT_LEVEL_SEMANTICS = 104
22
- EDITS = 105
23
- EMBEDDED_CONTENTS = 106
24
- LINKS = 107
25
- TABULAR_DATA = 108
26
- FORMS = 109
27
- INTERACTIVE_ELEMENTS = 110
28
- SCRIPTING = 111
29
- ESCAPABLE_RAW_TEXT_ELEMENT = 112
30
-
31
- RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS = 200
32
- RESTRICTED_TO_CHILD_OF_GROUPING = 201
33
- RESTRICTED_TO_CHILD_OF_EMBEDDED = 202
34
- RESTRICTED_TO_CHILD_OF_TABULAR = 203
35
- RESTRICTED_TO_CHILD_OF_FORM = 204
36
- RESTRICTED_TO_CHILD_OF_INTERACTIVE = 205
37
-
38
- RESTRICTION_PARENT_ELEMENT = 300
39
- FORM_OWNER = 301
40
-
41
- # Below will help to know every elements.
42
- # https://www.w3.org/TR/html5/
43
- # https://dev.w3.org/html5/html-author/#the-elements
44
- # https://html.spec.whatwg.org/#elements-2
45
- #
46
- # Below will help to know default display of elements.
47
- # https://html.spec.whatwg.org/#rendering
48
-
49
- # This constant is for internal. SHOULD NOT be used outside this class.
50
- CONTENT_TYPES = {
51
- 'a' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
52
- 'abbr' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
53
- 'address' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
54
- 'area' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT],
55
- 'article' => [SECTION, FLOW_CONTENT, SECTIONING_CONTENT, PALPABLE_CONTENT],
56
- 'aside' => [SECTION, FLOW_CONTENT, SECTIONING_CONTENT, PALPABLE_CONTENT],
57
- 'audio' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
58
- 'b' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
59
- 'base' => [DOCUMENT_METADATA, METADATA_CONTENT],
60
- 'bb' => [INTERACTIVE_ELEMENTS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT],
61
- 'bdi' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
62
- 'bdo' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
63
- 'blockquote' => [GROUPING_CONTENT, FLOW_CONTENT, SECTIONING_ROOT, PALPABLE_CONTENT],
64
- 'body' => [SECTION, SECTIONING_ROOT],
65
- 'br' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT],
66
- 'button' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT],
67
- 'canvas' => [SCRIPTING, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENTS, PALPABLE_CONTENT],
68
- 'caption' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR],
69
- 'cite' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
70
- 'code' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
71
- 'col' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR],
72
- 'colgroup' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
73
- 'data' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
74
- 'datalist' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, RESTRICTION_PARENT_ELEMENT],
75
- 'dd' => [GROUPING_CONTENT, RESTRICTED_TO_CHILD_OF_GROUPING],
76
- 'del' => [EDITS, FLOW_CONTENT, PHRASING_CONTENT],
77
- 'details' => [INTERACTIVE_ELEMENTS, FLOW_CONTENT, SECTIONING_ROOT, INTERACTIVE_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
78
- 'dfn' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
79
- 'dialog' => [INTERACTIVE_ELEMENTS, FLOW_CONTENT, SECTIONING_ROOT],
80
- 'div' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
81
- 'dl' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
82
- 'dt' => [GROUPING_CONTENT, RESTRICTED_TO_CHILD_OF_GROUPING],
83
- 'em' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
84
- 'embed' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
85
- 'fieldset' => [FORMS, FLOW_CONTENT, SECTIONING_ROOT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
86
- 'figcaption' => [GROUPING_CONTENT, RESTRICTED_TO_CHILD_OF_GROUPING],
87
- 'figure' => [GROUPING_CONTENT, FLOW_CONTENT, SECTIONING_ROOT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
88
- 'footer' => [SECTION, FLOW_CONTENT, PALPABLE_CONTENT],
89
- 'form' => [FORMS, FLOW_CONTENT, PALPABLE_CONTENT, FORM_OWNER],
90
- 'h1' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
91
- 'h2' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
92
- 'h3' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
93
- 'h4' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
94
- 'h5' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
95
- 'h6' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
96
- 'head' => [DOCUMENT_METADATA],
97
- 'header' => [SECTIONING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
98
- 'hgroup' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
99
- 'hr' => [GROUPING_CONTENT, FLOW_CONTENT],
100
- 'html' => [DOCUMENT_ELEMENT],
101
- 'i' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
102
- 'iframe' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
103
- 'img' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, FORM_ASSOCIATED_ELEMENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
104
- 'input' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT],
105
- 'ins' => [EDITS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
106
- 'kbd' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
107
- 'label' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT],
108
- 'legend' => [FORMS, RESTRICTED_TO_CHILD_OF_FORM],
109
- 'li' => [GROUPING_CONTENT, RESTRICTED_TO_CHILD_OF_GROUPING],
110
- 'link' => [DOCUMENT_METADATA, METADATA_CONTENT, FLOW_CONTENT, PHRASING_CONTENT],
111
- 'main' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
112
- 'map' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
113
- 'mark' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
114
- 'menu' => [INTERACTIVE_ELEMENTS, FLOW_CONTENT, INTERACTIVE_CONTENT],
115
- 'meta' => [DOCUMENT_METADATA, METADATA_CONTENT],
116
- 'meter' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
117
- 'nav' => [SECTION, FLOW_CONTENT, SECTIONING_CONTENT, PALPABLE_CONTENT],
118
- 'noscript' => [SCRIPTING, METADATA_CONTENT, FLOW_CONTENT, PHRASING_CONTENT],
119
- 'object' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
120
- 'ol' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
121
- 'optgroup' => [FORMS, RESTRICTED_TO_CHILD_OF_FORM, RESTRICTION_PARENT_ELEMENT],
122
- 'option' => [FORMS, RESTRICTED_TO_CHILD_OF_FORM],
123
- 'output' => [FLOW_CONTENT, PHRASING_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT],
124
- 'p' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
125
- 'param' => [EMBEDDED_CONTENTS, RESTRICTED_TO_CHILD_OF_EMBEDDED],
126
- 'picture' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT],
127
- 'pre' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
128
- 'progress' => [FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
129
- 'q' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
130
- 'rb' => [TEXT_LEVEL_SEMANTICS, RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS],
131
- 'rp' => [TEXT_LEVEL_SEMANTICS, RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS],
132
- 'rt' => [TEXT_LEVEL_SEMANTICS, RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS],
133
- 'rtc' => [TEXT_LEVEL_SEMANTICS, RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS],
134
- 'ruby' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
135
- 's' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
136
- 'samp' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
137
- 'script' => [SCRIPTING, METADATA_CONTENT, FLOW_CONTENT, PHRASING_CONTENT, SCRIPT_SUPPORTING_ELEMENT],
138
- 'section' => [SECTION, FLOW_CONTENT, SECTIONING_CONTENT, PALPABLE_CONTENT],
139
- 'select' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
140
- 'slot' => [SCRIPTING, FLOW_CONTENT, PHRASING_CONTENT],
141
- 'small' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
142
- 'source' => [EMBEDDED_CONTENTS, RESTRICTED_TO_CHILD_OF_EMBEDDED],
143
- 'span' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
144
- 'strong' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
145
- 'style' => [DOCUMENT_METADATA, METADATA_CONTENT, FLOW_CONTENT],
146
- 'sub' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
147
- 'summary' => [INTERACTIVE_ELEMENTS, RESTRICTED_TO_CHILD_OF_INTERACTIVE],
148
- 'sup' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
149
- 'svg' => [SVG_ELEMENT],
150
- 'table' => [TABULAR_DATA, FLOW_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
151
- 'tbody' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
152
- 'td' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR],
153
- 'template' => [SCRIPTING, METADATA_CONTENT, FLOW_CONTENT, PHRASING_CONTENT, SCRIPT_SUPPORTING_ELEMENT],
154
- 'textarea' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT, ESCAPABLE_RAW_TEXT_ELEMENT],
155
- 'tfoot' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
156
- 'th' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR],
157
- 'thead' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
158
- 'time' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
159
- 'title' => [DOCUMENT_METADATA, METADATA_CONTENT, ESCAPABLE_RAW_TEXT_ELEMENT],
160
- 'tr' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
161
- 'track' => [EMBEDDED_CONTENTS, RESTRICTED_TO_CHILD_OF_EMBEDDED],
162
- 'u' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
163
- 'ul' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
164
- 'var' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
165
- 'video' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
166
- 'wbr' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT]
167
- }.freeze
168
-
169
- # NOTE: Autonomous "custom element" belongs to
170
- # [FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT]
171
-
172
- VOID_ELEMENTS = Set.new(%w[area base br col embed hr img input link meta param source track wbr]).freeze
173
-
174
- INLINE_ELEMENTS_INCLUDING_VOID = Set.new(CONTENT_TYPES.select do |_name, types|
175
- # Split all table related elements.
176
- next false if types.include?(TABULAR_DATA)
177
-
178
- # RESTRICTION_PARENT_ELEMENT creates group for specific purpose.
179
- next false if types.include?(RESTRICTION_PARENT_ELEMENT)
180
-
181
- # Ignore meta elements
182
- next false if types.include?(METADATA_CONTENT)
183
-
184
- # Text-level contents are for decorating text like span
185
- next true if types.include?(TEXT_LEVEL_SEMANTICS)
186
-
187
- # Divide media to block
188
- if types.include?(EMBEDDED_CONTENTS)
189
- # Not divide content inside media
190
- next true if types.include?(RESTRICTED_TO_CHILD_OF_EMBEDDED)
191
- next false
192
- end
193
-
194
- if types.include?(FORMS)
195
- # Divide form element creating section
196
- next false if types.include?(SECTIONING_ROOT)
197
-
198
- # Divide "FORM"
199
- next false if types.include?(FORM_OWNER)
200
-
201
- # Ignore contents inputted by user
202
- next false if types.include?(ESCAPABLE_RAW_TEXT_ELEMENT)
203
-
204
- # Otherwise are grouped as FORM
205
- next true
206
- end
207
-
208
- false
209
- end.keys).freeze
210
-
211
- INLINE_ELEMENTS = (INLINE_ELEMENTS_INCLUDING_VOID - VOID_ELEMENTS).freeze
212
- EMPTY_ELEMENTS = (INLINE_ELEMENTS_INCLUDING_VOID & VOID_ELEMENTS).freeze
213
-
214
- # IGNORE_ELEMENTS doesn't scrape contents inside the tags
215
- IGNORE_ELEMENTS = Set.new(CONTENT_TYPES.select do |name, types|
216
- # meta should be ignored but require swapping.
217
- next true if name == 'meta'
218
- next true if types.include?(EMBEDDED_CONTENTS) && !types.include?(RESTRICTED_TO_CHILD_OF_EMBEDDED)
219
- end.keys).freeze
220
-
221
- # SKIP_ELEMENTS doesn't scrape and swap contents
222
- SKIP_ELEMENTS = Set.new(CONTENT_TYPES.select do |name, types|
223
- # iterate title to get content
224
- next false if name == 'title'
225
- next false if name == 'meta'
226
-
227
- next true if types.include?(METADATA_CONTENT)
228
- end.keys).freeze
229
-
230
- # SKIP_ELEMENTS doesn't scrape and swap contents. But it scrape and swap tag's attributes like `placeholder`
231
- SKIP_ELEMENTS_WITHOUT_ATTRIBUTES = Set.new(CONTENT_TYPES.select do |name, types|
232
- # iterate title to get content
233
- next false if name == 'title'
234
-
235
- # Ignore contents inputted by user
236
- next true if types.include?(ESCAPABLE_RAW_TEXT_ELEMENT)
237
- end.keys).freeze
238
-
239
- BLOCK_ELEMENTS = (Set.new(CONTENT_TYPES.keys) - INLINE_ELEMENTS - EMPTY_ELEMENTS - IGNORE_ELEMENTS - SKIP_ELEMENTS - SKIP_ELEMENTS_WITHOUT_ATTRIBUTES).freeze
240
- end
241
- end
242
- end