wovnrb 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +308 -0
- data/Rakefile +13 -14
- data/lib/wovnrb.rb +43 -98
- data/lib/wovnrb/api_translator.rb +143 -0
- data/lib/wovnrb/headers.rb +78 -92
- data/lib/wovnrb/helpers/nokogumbo_helper.rb +1 -1
- data/lib/wovnrb/lang.rb +93 -125
- data/lib/wovnrb/railtie.rb +5 -7
- data/lib/wovnrb/services/glob.rb +3 -3
- data/lib/wovnrb/services/html_converter.rb +192 -0
- data/lib/wovnrb/services/html_replace_marker.rb +38 -0
- data/lib/wovnrb/services/wovn_logger.rb +8 -4
- data/lib/wovnrb/settings.rb +5 -3
- data/lib/wovnrb/store.rb +35 -26
- data/lib/wovnrb/text_caches/cache_base.rb +3 -2
- data/lib/wovnrb/text_caches/memory_cache.rb +2 -2
- data/lib/wovnrb/version.rb +1 -1
- data/test/fixtures/html/test.html +8 -0
- data/test/fixtures/html/test_translated.html +8 -0
- data/test/lib/api_translator_test.rb +109 -0
- data/test/lib/headers_test.rb +84 -55
- data/test/lib/lang_test.rb +157 -357
- data/test/lib/services/glob_test.rb +1 -1
- data/test/lib/services/html_converter_test.rb +166 -0
- data/test/lib/services/html_replace_marker_test.rb +75 -0
- data/test/lib/services/wovn_logger_test.rb +6 -6
- data/test/lib/store_test.rb +25 -69
- data/test/lib/text_caches/cache_base_test.rb +1 -1
- data/test/lib/text_caches/memory_cache_test.rb +10 -11
- data/test/lib/wovnrb_test.rb +77 -310
- data/test/test_helper.rb +22 -32
- data/wovnrb.gemspec +35 -44
- metadata +86 -205
- data/ext/dom/Makefile +0 -239
- data/lib/wovnrb/api_data.rb +0 -59
- data/lib/wovnrb/html_replacers/image_replacer.rb +0 -69
- data/lib/wovnrb/html_replacers/input_replacer.rb +0 -38
- data/lib/wovnrb/html_replacers/link_replacer.rb +0 -78
- data/lib/wovnrb/html_replacers/meta_replacer.rb +0 -28
- data/lib/wovnrb/html_replacers/replacer_base.rb +0 -49
- data/lib/wovnrb/html_replacers/script_replacer.rb +0 -39
- data/lib/wovnrb/html_replacers/text_replacer.rb +0 -21
- data/lib/wovnrb/html_replacers/unified_values/dst_swapping_targets_creator.rb +0 -76
- data/lib/wovnrb/html_replacers/unified_values/element_category.rb +0 -242
- data/lib/wovnrb/html_replacers/unified_values/node_swapping_targets_creator.rb +0 -134
- data/lib/wovnrb/html_replacers/unified_values/text_replacer.rb +0 -35
- data/lib/wovnrb/html_replacers/unified_values/text_scraper.rb +0 -152
- data/lib/wovnrb/html_replacers/unified_values/values_stack.rb +0 -65
- data/lib/wovnrb/services/url.rb +0 -12
- data/lib/wovnrb/services/value_agent.rb +0 -9
- data/test/fixtures/unified_values/site_html/simple_actual.html +0 -96
- data/test/fixtures/unified_values/site_html/simple_expected.json +0 -251
- data/test/fixtures/unified_values/site_html/wovn.io_actual.html +0 -686
- data/test/fixtures/unified_values/site_html/wovn.io_expected.json +0 -543
- data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_actual.html +0 -1024
- data/test/fixtures/unified_values/site_html/www.yahoo.co.jp_expected.json +0 -3345
- data/test/fixtures/unified_values/small_html/block_inside_inline_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/block_inside_inline_expected.json +0 -22
- data/test/fixtures/unified_values/small_html/br_tag_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/br_tag_expected.json +0 -12
- data/test/fixtures/unified_values/small_html/comment_tag_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/comment_tag_expected.json +0 -10
- data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_actual.html +0 -7
- data/test/fixtures/unified_values/small_html/complex_text_with_html_entity_expected.json +0 -11
- data/test/fixtures/unified_values/small_html/deep_nested_block_actual.html +0 -14
- data/test/fixtures/unified_values/small_html/deep_nested_block_expected.json +0 -8
- data/test/fixtures/unified_values/small_html/deep_nested_inline_actual.html +0 -20
- data/test/fixtures/unified_values/small_html/deep_nested_inline_expected.json +0 -20
- data/test/fixtures/unified_values/small_html/empty_tag_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/empty_tag_expected.json +0 -12
- data/test/fixtures/unified_values/small_html/empty_text_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/empty_text_expected.json +0 -1
- data/test/fixtures/unified_values/small_html/ignore_tag_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/ignore_tag_expected.json +0 -16
- data/test/fixtures/unified_values/small_html/ignored_class_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/ignored_class_expected.json +0 -13
- data/test/fixtures/unified_values/small_html/img_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/img_expected.json +0 -23
- data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/nested_and_complex_wovn_ignore_expected.json +0 -16
- data/test/fixtures/unified_values/small_html/nested_text_value_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/nested_text_value_expected.json +0 -12
- data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/nested_text_value_mixed_plan_text_expected.json +0 -14
- data/test/fixtures/unified_values/small_html/option_tag_actual.html +0 -9
- data/test/fixtures/unified_values/small_html/option_tag_expected.json +0 -13
- data/test/fixtures/unified_values/small_html/text_different_inline_each_other_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/text_different_inline_each_other_expected.json +0 -22
- data/test/fixtures/unified_values/small_html/text_in_svg_actual.html +0 -9
- data/test/fixtures/unified_values/small_html/text_in_svg_expected.json +0 -8
- data/test/fixtures/unified_values/small_html/text_with_html_entity_actual.html +0 -6
- data/test/fixtures/unified_values/small_html/text_with_html_entity_expected.json +0 -8
- data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/unknown_or_custom_tag_expected.json +0 -24
- data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_actual.html +0 -12
- data/test/fixtures/unified_values/small_html/unnecessay_top_end_tag_expected.json +0 -14
- data/test/fixtures/unified_values/small_html/wovn_ignore_actual.html +0 -10
- data/test/fixtures/unified_values/small_html/wovn_ignore_expected.json +0 -13
- data/test/lib/api_data_test.rb +0 -83
- data/test/lib/html_replacers/image_replacer_test.rb +0 -165
- data/test/lib/html_replacers/input_replacer_test.rb +0 -140
- data/test/lib/html_replacers/link_replacer_test.rb +0 -328
- data/test/lib/html_replacers/meta_replacer_test.rb +0 -157
- data/test/lib/html_replacers/replacer_base_test.rb +0 -128
- data/test/lib/html_replacers/script_replacer_test.rb +0 -139
- data/test/lib/html_replacers/text_replacer_test.rb +0 -99
- data/test/lib/html_replacers/unified_values/dst_swapping_targets_creator_test.rb +0 -137
- data/test/lib/html_replacers/unified_values/element_category_test.rb +0 -49
- data/test/lib/html_replacers/unified_values/node_swapping_targets_creator_test.rb +0 -137
- data/test/lib/html_replacers/unified_values/text_replacer_test.rb +0 -270
- data/test/lib/html_replacers/unified_values/text_scraper_test.rb +0 -121
- data/test/lib/html_replacers/unified_values/values_stack_test.rb +0 -122
- data/test/lib/services/url_test.rb +0 -9
- data/test/lib/services/value_agent_test.rb +0 -32
- data/test/services/url_test.rb +0 -163
- data/values/values +0 -1
@@ -1,28 +0,0 @@
|
|
1
|
-
module Wovnrb
|
2
|
-
class MetaReplacer < ReplacerBase
|
3
|
-
def initialize(store, text_index, pattern = nil, headers = nil)
|
4
|
-
super(store)
|
5
|
-
@text_index = text_index
|
6
|
-
@pattern = pattern
|
7
|
-
@headers = headers
|
8
|
-
end
|
9
|
-
|
10
|
-
def replace(dom, lang)
|
11
|
-
dom.xpath('.//meta').select { |node|
|
12
|
-
next if wovn_ignore?(node)
|
13
|
-
(node.get_attribute('name') || node.get_attribute('property') || '') =~ /^(description|title|og:title|og:description|og:url|twitter:title|twitter:description)$/
|
14
|
-
}.each do |node|
|
15
|
-
node_content = node.get_attribute('content').strip
|
16
|
-
if @headers && @pattern && node.get_attribute('property') && node.get_attribute('property') === 'og:url'
|
17
|
-
new_url = lang.add_lang_code(node_content, @pattern, @headers)
|
18
|
-
node.set_attribute('content', new_url)
|
19
|
-
next
|
20
|
-
end
|
21
|
-
# shouldn't need size check, but for now...
|
22
|
-
if @text_index[node_content] && @text_index[node_content][lang.lang_code] && @text_index[node_content][lang.lang_code].size > 0
|
23
|
-
node.set_attribute('content', replace_text(node.get_attribute('content'), @text_index[node_content][lang.lang_code][0]['data']))
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
@@ -1,49 +0,0 @@
|
|
1
|
-
module Wovnrb
|
2
|
-
class ReplacerBase
|
3
|
-
def initialize(store, ignored_class_set = [])
|
4
|
-
@store = store
|
5
|
-
@ignored_class_set = Set.new(ignored_class_set)
|
6
|
-
end
|
7
|
-
|
8
|
-
def replace(dom, lang)
|
9
|
-
raise NotImplementedError.new('replace is not defined')
|
10
|
-
end
|
11
|
-
|
12
|
-
protected
|
13
|
-
def wovn_ignore?(node)
|
14
|
-
if !node.get_attribute('wovn-ignore').nil?
|
15
|
-
return true
|
16
|
-
elsif node.name === 'html' || node.parent.nil?
|
17
|
-
return false
|
18
|
-
end
|
19
|
-
|
20
|
-
node_class = node.get_attribute('class')
|
21
|
-
if node_class
|
22
|
-
classes = node_class.split
|
23
|
-
@store.settings['ignore_class'].each do |ignore_class|
|
24
|
-
if classes.include?(ignore_class)
|
25
|
-
return true
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
wovn_ignore?(node.parent)
|
31
|
-
end
|
32
|
-
|
33
|
-
# Add comment-node node to remember original src
|
34
|
-
# <title> may not contain other markup, so add comment-node to node's previous
|
35
|
-
# @see https://www.w3.org/TR/html401/struct/global.html#h-7.4.2
|
36
|
-
def add_comment_node(node, text)
|
37
|
-
comment_node = Nokogiri::XML::Comment.new(node.document, "wovn-src:#{text}")
|
38
|
-
if node.parent.name == 'title'
|
39
|
-
node.parent.add_previous_sibling(comment_node)
|
40
|
-
else
|
41
|
-
node.add_previous_sibling(comment_node)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
def replace_text(from, to)
|
46
|
-
from.gsub(/\A(\s*)[\S\s]*?(\s*)\Z/, '\1' + to + '\2')
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
module Wovnrb
|
2
|
-
class ScriptReplacer < ReplacerBase
|
3
|
-
def initialize(store)
|
4
|
-
super(store)
|
5
|
-
end
|
6
|
-
|
7
|
-
def replace(dom, lang)
|
8
|
-
remove_embed_wovn_script(dom)
|
9
|
-
add_wovn_script(dom, lang)
|
10
|
-
end
|
11
|
-
|
12
|
-
private
|
13
|
-
def remove_embed_wovn_script(dom)
|
14
|
-
dom.xpath('//script').each do |script_node|
|
15
|
-
if script_node['src'] && script_node['src'] =~ /^\/\/j.(dev-)?wovn.io(:3000)?\//
|
16
|
-
script_node.remove
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def add_wovn_script(dom, lang)
|
22
|
-
parent_node = dom.at_css('head') || dom.at_css('body') || dom.at_css('html')
|
23
|
-
|
24
|
-
# INSERT BACKEND WIDGET
|
25
|
-
insert_node = Nokogiri::XML::Node.new('script', dom)
|
26
|
-
insert_node['src'] = "//j.#{@store.wovn_host}/1"
|
27
|
-
insert_node['async'] = true
|
28
|
-
version = defined?(VERSION) ? VERSION : ''
|
29
|
-
insert_node['data-wovnio'] = "key=#{@store.settings['project_token']}&backend=true¤tLang=#{lang.lang_code}&defaultLang=#{@store.settings['default_lang']}&urlPattern=#{@store.settings['url_pattern']}&langCodeAliases=#{JSON.dump(@store.settings['custom_lang_aliases'])}&version=#{version}"
|
30
|
-
# do this so that there will be a closing tag (better compatibility with browsers)
|
31
|
-
insert_node.content = ' '
|
32
|
-
if parent_node.children.size > 0
|
33
|
-
parent_node.children.first.add_previous_sibling(insert_node)
|
34
|
-
else
|
35
|
-
parent_node.add_child(insert_node)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
module Wovnrb
|
2
|
-
class TextReplacer < ReplacerBase
|
3
|
-
def initialize(store, text_index)
|
4
|
-
super(store)
|
5
|
-
@text_index = text_index
|
6
|
-
end
|
7
|
-
|
8
|
-
def replace(dom, lang)
|
9
|
-
dom.xpath('.//text()').each do |node|
|
10
|
-
next if wovn_ignore?(node)
|
11
|
-
|
12
|
-
node_text = node.content.strip
|
13
|
-
# shouldn't need size check, but for now...
|
14
|
-
if @text_index[node_text] && @text_index[node_text][lang.lang_code] && @text_index[node_text][lang.lang_code].size > 0
|
15
|
-
add_comment_node(node, node_text)
|
16
|
-
node.content = replace_text(node.content, @text_index[node_text][lang.lang_code][0]['data'])
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
module Wovnrb
|
2
|
-
module UnifiedValues
|
3
|
-
class DstSwappingTargetsCreator
|
4
|
-
# NOTE: `text_index` is the format like below
|
5
|
-
#
|
6
|
-
# {
|
7
|
-
# "<span>apple is a good</span>foods"=>
|
8
|
-
# {"ja" =>
|
9
|
-
# [{"xpath"=>"/html/body/div", "data"=>"りんごは<span>おいしい</span>たべものです"}]
|
10
|
-
# },
|
11
|
-
# "click<a>here</a>"=>
|
12
|
-
# {"ja" =>
|
13
|
-
# [{"xpath"=>"/html/body/div", "data"=>"<a>こちら</a>をクリックしてください"}]
|
14
|
-
# }
|
15
|
-
# }
|
16
|
-
|
17
|
-
def initialize(text_index)
|
18
|
-
@text_index = text_index
|
19
|
-
end
|
20
|
-
|
21
|
-
# NOTE: `run` make a swapping target like below
|
22
|
-
#
|
23
|
-
# {
|
24
|
-
# "<span>apple is a good</span>foods"=>
|
25
|
-
# {"ja" =>
|
26
|
-
# [{"xpath"=>"/html/body/div", "data"=>"りんごは<span>おいしい</span>たべものです", 'swapping_targets'=>["りんごは", "おいしい", "たべものです"]}]
|
27
|
-
# },
|
28
|
-
# "click<a>here</a>"=>
|
29
|
-
# {"ja" =>
|
30
|
-
# [{"xpath"=>"/html/body/div", "data"=>" <a>こちら</a>をクリックしてください"}, 'swapping_targets'=>["", "こちら", "をクリックしてください"]]
|
31
|
-
# }
|
32
|
-
# }
|
33
|
-
|
34
|
-
def run!
|
35
|
-
@text_index.each do |_, v|
|
36
|
-
mold = []
|
37
|
-
v.values.each do |values|
|
38
|
-
values.each do |value|
|
39
|
-
value['data'].split(/(<.+?>)/).each_with_index do |data, _index|
|
40
|
-
mold_size = mold.size
|
41
|
-
mold.push('') if mold_size.even? && data.start_with?('<')
|
42
|
-
mold.push(data)
|
43
|
-
end
|
44
|
-
|
45
|
-
mold.push('') if /\A<.+?>\z/ =~ mold.last
|
46
|
-
|
47
|
-
value['swapping_targets'] = remove_tag_element(mold)
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
private
|
54
|
-
|
55
|
-
def remove_tag_element(mold)
|
56
|
-
end_tag_of_wovn_ignore = nil
|
57
|
-
swapping_targets = []
|
58
|
-
|
59
|
-
mold.each do |value|
|
60
|
-
if end_tag_of_wovn_ignore.nil? && value =~ /\A<.*wovn-ignore>\z/
|
61
|
-
end_tag_of_wovn_ignore = "</#{value.gsub(' wovn-ignore', '')[1..-1]}"
|
62
|
-
next
|
63
|
-
end
|
64
|
-
|
65
|
-
end_tag_of_wovn_ignore = nil if value == end_tag_of_wovn_ignore
|
66
|
-
|
67
|
-
if end_tag_of_wovn_ignore.nil? && /\A<.+?>\z/ !~ value
|
68
|
-
swapping_targets << value
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
swapping_targets
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,242 +0,0 @@
|
|
1
|
-
module Wovnrb
|
2
|
-
module UnifiedValues
|
3
|
-
module ElementCategory
|
4
|
-
FLOW_CONTENT = 1
|
5
|
-
PALPABLE_CONTENT = 2
|
6
|
-
PHRASING_CONTENT = 3
|
7
|
-
EMBEDDED_CONTENT = 4
|
8
|
-
INTERACTIVE_CONTENT = 5
|
9
|
-
METADATA_CONTENT = 6
|
10
|
-
HEADING_CONTENT = 7
|
11
|
-
SECTIONING_CONTENT = 8
|
12
|
-
SECTIONING_ROOT = 9
|
13
|
-
FORM_ASSOCIATED_ELEMENT = 10
|
14
|
-
SCRIPT_SUPPORTING_ELEMENT = 11
|
15
|
-
SVG_ELEMENT = 12
|
16
|
-
|
17
|
-
DOCUMENT_ELEMENT = 100
|
18
|
-
DOCUMENT_METADATA = 101
|
19
|
-
SECTION = 1010
|
20
|
-
GROUPING_CONTENT = 103
|
21
|
-
TEXT_LEVEL_SEMANTICS = 104
|
22
|
-
EDITS = 105
|
23
|
-
EMBEDDED_CONTENTS = 106
|
24
|
-
LINKS = 107
|
25
|
-
TABULAR_DATA = 108
|
26
|
-
FORMS = 109
|
27
|
-
INTERACTIVE_ELEMENTS = 110
|
28
|
-
SCRIPTING = 111
|
29
|
-
ESCAPABLE_RAW_TEXT_ELEMENT = 112
|
30
|
-
|
31
|
-
RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS = 200
|
32
|
-
RESTRICTED_TO_CHILD_OF_GROUPING = 201
|
33
|
-
RESTRICTED_TO_CHILD_OF_EMBEDDED = 202
|
34
|
-
RESTRICTED_TO_CHILD_OF_TABULAR = 203
|
35
|
-
RESTRICTED_TO_CHILD_OF_FORM = 204
|
36
|
-
RESTRICTED_TO_CHILD_OF_INTERACTIVE = 205
|
37
|
-
|
38
|
-
RESTRICTION_PARENT_ELEMENT = 300
|
39
|
-
FORM_OWNER = 301
|
40
|
-
|
41
|
-
# Below will help to know every elements.
|
42
|
-
# https://www.w3.org/TR/html5/
|
43
|
-
# https://dev.w3.org/html5/html-author/#the-elements
|
44
|
-
# https://html.spec.whatwg.org/#elements-2
|
45
|
-
#
|
46
|
-
# Below will help to know default display of elements.
|
47
|
-
# https://html.spec.whatwg.org/#rendering
|
48
|
-
|
49
|
-
# This constant is for internal. SHOULD NOT be used outside this class.
|
50
|
-
CONTENT_TYPES = {
|
51
|
-
'a' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
|
52
|
-
'abbr' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
53
|
-
'address' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
|
54
|
-
'area' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT],
|
55
|
-
'article' => [SECTION, FLOW_CONTENT, SECTIONING_CONTENT, PALPABLE_CONTENT],
|
56
|
-
'aside' => [SECTION, FLOW_CONTENT, SECTIONING_CONTENT, PALPABLE_CONTENT],
|
57
|
-
'audio' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
|
58
|
-
'b' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
59
|
-
'base' => [DOCUMENT_METADATA, METADATA_CONTENT],
|
60
|
-
'bb' => [INTERACTIVE_ELEMENTS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT],
|
61
|
-
'bdi' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
62
|
-
'bdo' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
63
|
-
'blockquote' => [GROUPING_CONTENT, FLOW_CONTENT, SECTIONING_ROOT, PALPABLE_CONTENT],
|
64
|
-
'body' => [SECTION, SECTIONING_ROOT],
|
65
|
-
'br' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT],
|
66
|
-
'button' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT],
|
67
|
-
'canvas' => [SCRIPTING, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENTS, PALPABLE_CONTENT],
|
68
|
-
'caption' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR],
|
69
|
-
'cite' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
70
|
-
'code' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
71
|
-
'col' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR],
|
72
|
-
'colgroup' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
|
73
|
-
'data' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
74
|
-
'datalist' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
75
|
-
'dd' => [GROUPING_CONTENT, RESTRICTED_TO_CHILD_OF_GROUPING],
|
76
|
-
'del' => [EDITS, FLOW_CONTENT, PHRASING_CONTENT],
|
77
|
-
'details' => [INTERACTIVE_ELEMENTS, FLOW_CONTENT, SECTIONING_ROOT, INTERACTIVE_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
78
|
-
'dfn' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
79
|
-
'dialog' => [INTERACTIVE_ELEMENTS, FLOW_CONTENT, SECTIONING_ROOT],
|
80
|
-
'div' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
|
81
|
-
'dl' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
82
|
-
'dt' => [GROUPING_CONTENT, RESTRICTED_TO_CHILD_OF_GROUPING],
|
83
|
-
'em' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
84
|
-
'embed' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
|
85
|
-
'fieldset' => [FORMS, FLOW_CONTENT, SECTIONING_ROOT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
86
|
-
'figcaption' => [GROUPING_CONTENT, RESTRICTED_TO_CHILD_OF_GROUPING],
|
87
|
-
'figure' => [GROUPING_CONTENT, FLOW_CONTENT, SECTIONING_ROOT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
88
|
-
'footer' => [SECTION, FLOW_CONTENT, PALPABLE_CONTENT],
|
89
|
-
'form' => [FORMS, FLOW_CONTENT, PALPABLE_CONTENT, FORM_OWNER],
|
90
|
-
'h1' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
|
91
|
-
'h2' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
|
92
|
-
'h3' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
|
93
|
-
'h4' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
|
94
|
-
'h5' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
|
95
|
-
'h6' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
|
96
|
-
'head' => [DOCUMENT_METADATA],
|
97
|
-
'header' => [SECTIONING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
|
98
|
-
'hgroup' => [SECTION, FLOW_CONTENT, HEADING_CONTENT, PALPABLE_CONTENT],
|
99
|
-
'hr' => [GROUPING_CONTENT, FLOW_CONTENT],
|
100
|
-
'html' => [DOCUMENT_ELEMENT],
|
101
|
-
'i' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
102
|
-
'iframe' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
|
103
|
-
'img' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, FORM_ASSOCIATED_ELEMENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
|
104
|
-
'input' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT],
|
105
|
-
'ins' => [EDITS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
106
|
-
'kbd' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
107
|
-
'label' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT],
|
108
|
-
'legend' => [FORMS, RESTRICTED_TO_CHILD_OF_FORM],
|
109
|
-
'li' => [GROUPING_CONTENT, RESTRICTED_TO_CHILD_OF_GROUPING],
|
110
|
-
'link' => [DOCUMENT_METADATA, METADATA_CONTENT, FLOW_CONTENT, PHRASING_CONTENT],
|
111
|
-
'main' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
|
112
|
-
'map' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
113
|
-
'mark' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
114
|
-
'menu' => [INTERACTIVE_ELEMENTS, FLOW_CONTENT, INTERACTIVE_CONTENT],
|
115
|
-
'meta' => [DOCUMENT_METADATA, METADATA_CONTENT],
|
116
|
-
'meter' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
117
|
-
'nav' => [SECTION, FLOW_CONTENT, SECTIONING_CONTENT, PALPABLE_CONTENT],
|
118
|
-
'noscript' => [SCRIPTING, METADATA_CONTENT, FLOW_CONTENT, PHRASING_CONTENT],
|
119
|
-
'object' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
120
|
-
'ol' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
121
|
-
'optgroup' => [FORMS, RESTRICTED_TO_CHILD_OF_FORM, RESTRICTION_PARENT_ELEMENT],
|
122
|
-
'option' => [FORMS, RESTRICTED_TO_CHILD_OF_FORM],
|
123
|
-
'output' => [FLOW_CONTENT, PHRASING_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT],
|
124
|
-
'p' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
|
125
|
-
'param' => [EMBEDDED_CONTENTS, RESTRICTED_TO_CHILD_OF_EMBEDDED],
|
126
|
-
'picture' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT],
|
127
|
-
'pre' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT],
|
128
|
-
'progress' => [FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
129
|
-
'q' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
130
|
-
'rb' => [TEXT_LEVEL_SEMANTICS, RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS],
|
131
|
-
'rp' => [TEXT_LEVEL_SEMANTICS, RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS],
|
132
|
-
'rt' => [TEXT_LEVEL_SEMANTICS, RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS],
|
133
|
-
'rtc' => [TEXT_LEVEL_SEMANTICS, RESTRICTED_TO_CHILD_OF_TEXT_LEVEL_SEMANTICS],
|
134
|
-
'ruby' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
135
|
-
's' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
136
|
-
'samp' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
137
|
-
'script' => [SCRIPTING, METADATA_CONTENT, FLOW_CONTENT, PHRASING_CONTENT, SCRIPT_SUPPORTING_ELEMENT],
|
138
|
-
'section' => [SECTION, FLOW_CONTENT, SECTIONING_CONTENT, PALPABLE_CONTENT],
|
139
|
-
'select' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
140
|
-
'slot' => [SCRIPTING, FLOW_CONTENT, PHRASING_CONTENT],
|
141
|
-
'small' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
142
|
-
'source' => [EMBEDDED_CONTENTS, RESTRICTED_TO_CHILD_OF_EMBEDDED],
|
143
|
-
'span' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
144
|
-
'strong' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
145
|
-
'style' => [DOCUMENT_METADATA, METADATA_CONTENT, FLOW_CONTENT],
|
146
|
-
'sub' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
147
|
-
'summary' => [INTERACTIVE_ELEMENTS, RESTRICTED_TO_CHILD_OF_INTERACTIVE],
|
148
|
-
'sup' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
149
|
-
'svg' => [SVG_ELEMENT],
|
150
|
-
'table' => [TABULAR_DATA, FLOW_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
151
|
-
'tbody' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
|
152
|
-
'td' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR],
|
153
|
-
'template' => [SCRIPTING, METADATA_CONTENT, FLOW_CONTENT, PHRASING_CONTENT, SCRIPT_SUPPORTING_ELEMENT],
|
154
|
-
'textarea' => [FORMS, FLOW_CONTENT, PHRASING_CONTENT, INTERACTIVE_CONTENT, FORM_ASSOCIATED_ELEMENT, PALPABLE_CONTENT, ESCAPABLE_RAW_TEXT_ELEMENT],
|
155
|
-
'tfoot' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
|
156
|
-
'th' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR],
|
157
|
-
'thead' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
|
158
|
-
'time' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
159
|
-
'title' => [DOCUMENT_METADATA, METADATA_CONTENT, ESCAPABLE_RAW_TEXT_ELEMENT],
|
160
|
-
'tr' => [TABULAR_DATA, RESTRICTED_TO_CHILD_OF_TABULAR, RESTRICTION_PARENT_ELEMENT],
|
161
|
-
'track' => [EMBEDDED_CONTENTS, RESTRICTED_TO_CHILD_OF_EMBEDDED],
|
162
|
-
'u' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
163
|
-
'ul' => [GROUPING_CONTENT, FLOW_CONTENT, PALPABLE_CONTENT, RESTRICTION_PARENT_ELEMENT],
|
164
|
-
'var' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT],
|
165
|
-
'video' => [EMBEDDED_CONTENTS, FLOW_CONTENT, PHRASING_CONTENT, EMBEDDED_CONTENT, INTERACTIVE_CONTENT, PALPABLE_CONTENT],
|
166
|
-
'wbr' => [TEXT_LEVEL_SEMANTICS, FLOW_CONTENT, PHRASING_CONTENT]
|
167
|
-
}.freeze
|
168
|
-
|
169
|
-
# NOTE: Autonomous "custom element" belongs to
|
170
|
-
# [FLOW_CONTENT, PHRASING_CONTENT, PALPABLE_CONTENT]
|
171
|
-
|
172
|
-
VOID_ELEMENTS = Set.new(%w[area base br col embed hr img input link meta param source track wbr]).freeze
|
173
|
-
|
174
|
-
INLINE_ELEMENTS_INCLUDING_VOID = Set.new(CONTENT_TYPES.select do |_name, types|
|
175
|
-
# Split all table related elements.
|
176
|
-
next false if types.include?(TABULAR_DATA)
|
177
|
-
|
178
|
-
# RESTRICTION_PARENT_ELEMENT creates group for specific purpose.
|
179
|
-
next false if types.include?(RESTRICTION_PARENT_ELEMENT)
|
180
|
-
|
181
|
-
# Ignore meta elements
|
182
|
-
next false if types.include?(METADATA_CONTENT)
|
183
|
-
|
184
|
-
# Text-level contents are for decorating text like span
|
185
|
-
next true if types.include?(TEXT_LEVEL_SEMANTICS)
|
186
|
-
|
187
|
-
# Divide media to block
|
188
|
-
if types.include?(EMBEDDED_CONTENTS)
|
189
|
-
# Not divide content inside media
|
190
|
-
next true if types.include?(RESTRICTED_TO_CHILD_OF_EMBEDDED)
|
191
|
-
next false
|
192
|
-
end
|
193
|
-
|
194
|
-
if types.include?(FORMS)
|
195
|
-
# Divide form element creating section
|
196
|
-
next false if types.include?(SECTIONING_ROOT)
|
197
|
-
|
198
|
-
# Divide "FORM"
|
199
|
-
next false if types.include?(FORM_OWNER)
|
200
|
-
|
201
|
-
# Ignore contents inputted by user
|
202
|
-
next false if types.include?(ESCAPABLE_RAW_TEXT_ELEMENT)
|
203
|
-
|
204
|
-
# Otherwise are grouped as FORM
|
205
|
-
next true
|
206
|
-
end
|
207
|
-
|
208
|
-
false
|
209
|
-
end.keys).freeze
|
210
|
-
|
211
|
-
INLINE_ELEMENTS = (INLINE_ELEMENTS_INCLUDING_VOID - VOID_ELEMENTS).freeze
|
212
|
-
EMPTY_ELEMENTS = (INLINE_ELEMENTS_INCLUDING_VOID & VOID_ELEMENTS).freeze
|
213
|
-
|
214
|
-
# IGNORE_ELEMENTS doesn't scrape contents inside the tags
|
215
|
-
IGNORE_ELEMENTS = Set.new(CONTENT_TYPES.select do |name, types|
|
216
|
-
# meta should be ignored but require swapping.
|
217
|
-
next true if name == 'meta'
|
218
|
-
next true if types.include?(EMBEDDED_CONTENTS) && !types.include?(RESTRICTED_TO_CHILD_OF_EMBEDDED)
|
219
|
-
end.keys).freeze
|
220
|
-
|
221
|
-
# SKIP_ELEMENTS doesn't scrape and swap contents
|
222
|
-
SKIP_ELEMENTS = Set.new(CONTENT_TYPES.select do |name, types|
|
223
|
-
# iterate title to get content
|
224
|
-
next false if name == 'title'
|
225
|
-
next false if name == 'meta'
|
226
|
-
|
227
|
-
next true if types.include?(METADATA_CONTENT)
|
228
|
-
end.keys).freeze
|
229
|
-
|
230
|
-
# SKIP_ELEMENTS doesn't scrape and swap contents. But it scrape and swap tag's attributes like `placeholder`
|
231
|
-
SKIP_ELEMENTS_WITHOUT_ATTRIBUTES = Set.new(CONTENT_TYPES.select do |name, types|
|
232
|
-
# iterate title to get content
|
233
|
-
next false if name == 'title'
|
234
|
-
|
235
|
-
# Ignore contents inputted by user
|
236
|
-
next true if types.include?(ESCAPABLE_RAW_TEXT_ELEMENT)
|
237
|
-
end.keys).freeze
|
238
|
-
|
239
|
-
BLOCK_ELEMENTS = (Set.new(CONTENT_TYPES.keys) - INLINE_ELEMENTS - EMPTY_ELEMENTS - IGNORE_ELEMENTS - SKIP_ELEMENTS - SKIP_ELEMENTS_WITHOUT_ATTRIBUTES).freeze
|
240
|
-
end
|
241
|
-
end
|
242
|
-
end
|