coradoc-html 1.1.18 → 1.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/lib/coradoc/html/cleaner.rb +128 -0
  3. data/lib/coradoc/html/converters/a.rb +77 -0
  4. data/lib/coradoc/html/converters/aside.rb +20 -0
  5. data/lib/coradoc/html/converters/audio.rb +19 -0
  6. data/lib/coradoc/html/converters/base.rb +98 -0
  7. data/lib/coradoc/html/converters/blockquote.rb +25 -0
  8. data/lib/coradoc/html/converters/br.rb +17 -0
  9. data/lib/coradoc/html/converters/bypass.rb +82 -0
  10. data/lib/coradoc/html/converters/code.rb +25 -0
  11. data/lib/coradoc/html/converters/div.rb +23 -0
  12. data/lib/coradoc/html/converters/dl.rb +82 -0
  13. data/lib/coradoc/html/converters/drop.rb +26 -0
  14. data/lib/coradoc/html/converters/em.rb +23 -0
  15. data/lib/coradoc/html/converters/figure.rb +33 -0
  16. data/lib/coradoc/html/converters/h.rb +58 -0
  17. data/lib/coradoc/html/converters/head.rb +29 -0
  18. data/lib/coradoc/html/converters/hr.rb +17 -0
  19. data/lib/coradoc/html/converters/img.rb +103 -0
  20. data/lib/coradoc/html/converters/li.rb +35 -0
  21. data/lib/coradoc/html/converters/mark.rb +21 -0
  22. data/lib/coradoc/html/converters/markup.rb +93 -0
  23. data/lib/coradoc/html/converters/math.rb +37 -0
  24. data/lib/coradoc/html/converters/media_base.rb +48 -0
  25. data/lib/coradoc/html/converters/ol.rb +42 -0
  26. data/lib/coradoc/html/converters/p.rb +64 -0
  27. data/lib/coradoc/html/converters/pass_through.rb +15 -0
  28. data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
  29. data/lib/coradoc/html/converters/pre.rb +57 -0
  30. data/lib/coradoc/html/converters/q.rb +25 -0
  31. data/lib/coradoc/html/converters/strong.rb +22 -0
  32. data/lib/coradoc/html/converters/sub.rb +20 -0
  33. data/lib/coradoc/html/converters/sup.rb +20 -0
  34. data/lib/coradoc/html/converters/table.rb +64 -0
  35. data/lib/coradoc/html/converters/td.rb +42 -0
  36. data/lib/coradoc/html/converters/text.rb +66 -0
  37. data/lib/coradoc/html/converters/tr.rb +27 -0
  38. data/lib/coradoc/html/converters/video.rb +27 -0
  39. data/lib/coradoc/html/converters.rb +104 -0
  40. data/lib/coradoc/html/drop/drop_factory.rb +14 -22
  41. data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
  42. data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
  43. data/lib/coradoc/html/drop.rb +30 -8
  44. data/lib/coradoc/html/errors.rb +11 -0
  45. data/lib/coradoc/html/html_converter.rb +78 -0
  46. data/lib/coradoc/html/input_config.rb +66 -0
  47. data/lib/coradoc/html/plugin.rb +90 -0
  48. data/lib/coradoc/html/plugins/plateau.rb +212 -0
  49. data/lib/coradoc/html/postprocessor.rb +19 -0
  50. data/lib/coradoc/html/spa.rb +0 -2
  51. data/lib/coradoc/html/static.rb +0 -2
  52. data/lib/coradoc/html/tag_mapping.rb +3 -1
  53. data/lib/coradoc/html/transform/from_core_model.rb +2 -2
  54. data/lib/coradoc/html/transform/to_core_model.rb +3 -3
  55. data/lib/coradoc/html/version.rb +1 -1
  56. data/lib/coradoc/html.rb +30 -5
  57. metadata +46 -47
  58. data/lib/coradoc/html/input/cleaner.rb +0 -134
  59. data/lib/coradoc/html/input/config.rb +0 -80
  60. data/lib/coradoc/html/input/converters/a.rb +0 -79
  61. data/lib/coradoc/html/input/converters/aside.rb +0 -22
  62. data/lib/coradoc/html/input/converters/audio.rb +0 -21
  63. data/lib/coradoc/html/input/converters/base.rb +0 -118
  64. data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
  65. data/lib/coradoc/html/input/converters/br.rb +0 -19
  66. data/lib/coradoc/html/input/converters/bypass.rb +0 -84
  67. data/lib/coradoc/html/input/converters/code.rb +0 -27
  68. data/lib/coradoc/html/input/converters/div.rb +0 -25
  69. data/lib/coradoc/html/input/converters/dl.rb +0 -84
  70. data/lib/coradoc/html/input/converters/drop.rb +0 -28
  71. data/lib/coradoc/html/input/converters/em.rb +0 -25
  72. data/lib/coradoc/html/input/converters/figure.rb +0 -35
  73. data/lib/coradoc/html/input/converters/h.rb +0 -74
  74. data/lib/coradoc/html/input/converters/head.rb +0 -31
  75. data/lib/coradoc/html/input/converters/hr.rb +0 -19
  76. data/lib/coradoc/html/input/converters/img.rb +0 -105
  77. data/lib/coradoc/html/input/converters/li.rb +0 -37
  78. data/lib/coradoc/html/input/converters/mark.rb +0 -23
  79. data/lib/coradoc/html/input/converters/markup.rb +0 -103
  80. data/lib/coradoc/html/input/converters/math.rb +0 -39
  81. data/lib/coradoc/html/input/converters/media_base.rb +0 -50
  82. data/lib/coradoc/html/input/converters/ol.rb +0 -44
  83. data/lib/coradoc/html/input/converters/p.rb +0 -90
  84. data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
  85. data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
  86. data/lib/coradoc/html/input/converters/pre.rb +0 -59
  87. data/lib/coradoc/html/input/converters/q.rb +0 -27
  88. data/lib/coradoc/html/input/converters/strong.rb +0 -24
  89. data/lib/coradoc/html/input/converters/sub.rb +0 -22
  90. data/lib/coradoc/html/input/converters/sup.rb +0 -22
  91. data/lib/coradoc/html/input/converters/table.rb +0 -66
  92. data/lib/coradoc/html/input/converters/td.rb +0 -44
  93. data/lib/coradoc/html/input/converters/text.rb +0 -68
  94. data/lib/coradoc/html/input/converters/tr.rb +0 -29
  95. data/lib/coradoc/html/input/converters/video.rb +0 -29
  96. data/lib/coradoc/html/input/converters.rb +0 -107
  97. data/lib/coradoc/html/input/errors.rb +0 -22
  98. data/lib/coradoc/html/input/html_converter.rb +0 -98
  99. data/lib/coradoc/html/input/plugin.rb +0 -120
  100. data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
  101. data/lib/coradoc/html/input/postprocessor.rb +0 -25
  102. data/lib/coradoc/html/input.rb +0 -86
  103. data/lib/coradoc/html/output.rb +0 -89
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be959f8cb4f3c516d0f81d10937dabee6eacae2ddca275df46c8700e9dc800cf
4
- data.tar.gz: 13c683ec524a321f1cca99aec03f9659591761ec58cf725cf5aa33ffd4af7705
3
+ metadata.gz: 011622bc6889a0af8aadea9d1a13e4cd4e322b808f5afe95b5da4a785a4b720f
4
+ data.tar.gz: ffe601423e5ab805e854e150744983336c49d217858a9f4d5aa660ced64d9f9b
5
5
  SHA512:
6
- metadata.gz: 02c5c0388481200b0880bb3ab5b8afa042dc7bd32b00ddab298ffe4257186305249e0511b6686f38a57892ccbd99138157ed0b29dc0177cd71a8042a3c085f14
7
- data.tar.gz: ce56d6b09705b12f0a8eb333b907d0abef4532e139c22080cc65285157c2c8bc2c693a23fed3eba55141ae9aff1380600b89b3181579c7da4dbf002d1f2b4525
6
+ metadata.gz: 0225afbdd3e517cb260bad9edc7a469605918937faebcc5a425b202e61ac36a6e66d96304defbca5562aa49421fdb20d436c0d47241ab272a0c19344ae839940
7
+ data.tar.gz: c2847f907efb900fb4ccef551254e808cc042c7aa5d8af2c0439a493fa560b58e07ed55d643a9c915d4cd6f765b1c7d6cff498e6408fee1e8f242b4833b40ff5
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ class Cleaner
6
+ INNER_WHITESPACE_REGEX_1 = /\n stem:\[/
7
+ INNER_WHITESPACE_REGEX_2 = /(stem:\[([^\]]|\\\])*\])\n(?=\S)/
8
+ NEWLINES_REGEX = /\n{3,}/
9
+ LEADING_NEWLINE_REGEX = /\A\n+/
10
+ WHITESPACE_REGEX = /[ \t\r\n]+/
11
+ TRAILING_WHITESPACE_REGEX = /[ \t\r\n]+\z/
12
+
13
+ def tidy(string)
14
+ return string.transform_values { |i| tidy(i) } if string.is_a? Hash
15
+
16
+ result = HtmlConverter.track_time 'Removing inner whitespace' do
17
+ remove_inner_whitespaces(String.new(string))
18
+ end
19
+ result = HtmlConverter.track_time 'Removing newlines' do
20
+ remove_newlines(result)
21
+ end
22
+ result = HtmlConverter.track_time 'Removing leading newlines' do
23
+ remove_leading_newlines(result)
24
+ end
25
+ result = HtmlConverter.track_time 'Cleaning tag borders' do
26
+ clean_tag_borders(result)
27
+ end
28
+ result = HtmlConverter.track_time 'Cleaning punctuation characters' do
29
+ clean_punctuation_characters(result)
30
+ end
31
+ result = remove_block_leading_newlines(result)
32
+ result = remove_section_attribute_newlines(result)
33
+ end
34
+
35
+ def remove_block_leading_newlines(string)
36
+ string.gsub("]\n****\n\n", "]\n****\n")
37
+ end
38
+
39
+ def remove_section_attribute_newlines(string)
40
+ string.gsub("]\n\n==", "]\n==")
41
+ end
42
+
43
+ def remove_newlines(string)
44
+ string.gsub(NEWLINES_REGEX, "\n\n")
45
+ end
46
+
47
+ def remove_leading_newlines(string)
48
+ string.gsub(LEADING_NEWLINE_REGEX, '')
49
+ end
50
+
51
+ def remove_inner_whitespaces(string)
52
+ unless string.nil?
53
+ string.gsub!("\n stem:[", "\nstem:[")
54
+ string.gsub!(INNER_WHITESPACE_REGEX_1, '\\1 ')
55
+ string.gsub!(INNER_WHITESPACE_REGEX_2, '\\1')
56
+ end
57
+ result = +''
58
+ string.each_line do |line|
59
+ result << preserve_border_whitespaces(line) do
60
+ line.gsub(/\A[ \t\r\n]+/, '').gsub(/[ \t\r\n]+\z/, '').gsub(/[ \t]{2,}/, ' ')
61
+ end
62
+ end
63
+ result
64
+ end
65
+
66
+ def clean_tag_borders(string)
67
+ result = string.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
68
+ preserve_border_whitespaces(
69
+ match,
70
+ default_border: Html.input_config.tag_border
71
+ ) do
72
+ match.strip.sub('~~ ', '~~').sub(' ~~', '~~')
73
+ end
74
+ end
75
+
76
+ result.gsub(/\s?\[.*?\]\s?/) do |match|
77
+ preserve_border_whitespaces(match) do
78
+ match.strip.sub('[ ', '[').sub(' ]', ']')
79
+ end
80
+ end
81
+ end
82
+
83
+ def clean_punctuation_characters(string)
84
+ string.gsub(/(\*\*|~~|__)\s([.!?'"])/, '\\1\\2')
85
+ end
86
+
87
+ def preprocess_word_html(string)
88
+ clean_headings(scrub_whitespace(string.dup))
89
+ end
90
+
91
+ def scrub_whitespace(string)
92
+ string.gsub!(/&nbsp;|&#xA0;| /i, '&#xA0;')
93
+ string = Coradoc.strip_unicode(string)
94
+ string.gsub!(/( +)$/, ' ')
95
+ string.gsub!("\n\n\n\n", "\n\n")
96
+ string
97
+ end
98
+
99
+ def clean_headings(string)
100
+ string.gsub!(%r{<h([1-9])[^>]*></h\1>}, ' ')
101
+ string.gsub!(
102
+ %r{<h([1-9])[^>]* style="vertical-align: super;[^>]*>(.+?)</h\1>},
103
+ '<sup>\\2</sup>'
104
+ )
105
+ string
106
+ end
107
+
108
+ private
109
+
110
+ def preserve_border_whitespaces(string, options = {})
111
+ return string if /\A\s*\Z/.match?(string)
112
+
113
+ default_border = options.fetch(:default_border, '')
114
+ default_border = '' if /[\[(\])]/.match?(string)
115
+ string_start = present_or_default(string[/\A\s*/], default_border)
116
+ string_end = present_or_default(string[/\s*\Z/], default_border)
117
+ result = yield
118
+ string_start + result + string_end
119
+ end
120
+
121
+ def present_or_default(string, default)
122
+ return default if string.nil? || string.empty?
123
+
124
+ string
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'coradoc'
4
+
5
+ module Coradoc
6
+ module Html
7
+ module Converters
8
+ class A < Base
9
+ INSTANCE = new
10
+
11
+ def to_coradoc(node, state = {})
12
+ # Use treat_children_coradoc to get CoreModel elements
13
+ content = treat_children_coradoc(node, state)
14
+
15
+ href = node['href']
16
+ title = extract_title(node)
17
+ id = node['id'] || node['name']
18
+
19
+ id = id&.gsub(/\s/, '')&.gsub(/__+/, '_')
20
+ id = nil if id&.empty?
21
+
22
+ return nil if /^_Toc\d+$|^_GoBack$/.match?(id)
23
+
24
+ # For inline anchors - return CoreModel InlineElement with format_type "anchor"
25
+ if id
26
+ return Coradoc::CoreModel::InlineElement.new(
27
+ format_type: 'anchor',
28
+ target: id
29
+ )
30
+ end
31
+
32
+ # For cross-references
33
+ if href.to_s.start_with?('#')
34
+ ref_id = href.sub(/^#/, '').gsub(/\s/, '').gsub(/__+/, '_')
35
+ content_str = extract_text_from_content(content)
36
+ return Coradoc::CoreModel::CrossReferenceElement.new(
37
+ target: ref_id,
38
+ content: content_str.strip.empty? ? nil : content_str.strip
39
+ )
40
+ end
41
+
42
+ return nil if href.to_s.empty?
43
+
44
+ # For links
45
+ ambigous_characters = /[\w.?&#=%;\[\u{ff}-\u{10ffff}]/
46
+ right_constrain = textnode_after_start_with?(node, ambigous_characters)
47
+
48
+ content_str = extract_text_from_content(content)
49
+
50
+ out = []
51
+ # Add leading space if needed
52
+ if textnode_before_end_with?(node, ambigous_characters)
53
+ out << Coradoc::CoreModel::TextElement.new(
54
+ content: ' '
55
+ )
56
+ end
57
+
58
+ # Create link element
59
+ link = Coradoc::CoreModel::LinkElement.new(
60
+ target: href,
61
+ content: content_str.strip,
62
+ metadata: {
63
+ title: (title.strip unless title.to_s.strip.empty?),
64
+ right_constrain: right_constrain
65
+ }.compact
66
+ )
67
+ out << link
68
+
69
+ # Return single element or array
70
+ out.length == 1 ? out.first : out
71
+ end
72
+ end
73
+
74
+ register :a, A::INSTANCE
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Aside < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ content = treat_children_coradoc(node, state)
11
+ Coradoc::CoreModel::SidebarBlock.new(
12
+ children: content
13
+ )
14
+ end
15
+ end
16
+
17
+ register :aside, Aside::INSTANCE
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Audio < MediaBase
7
+ INSTANCE = new
8
+
9
+ private
10
+
11
+ def semantic_type
12
+ :audio
13
+ end
14
+ end
15
+
16
+ register :audio, Audio::INSTANCE
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Base
7
+ def treat_children_coradoc(node, state)
8
+ results = node.children.map do |child|
9
+ treat_coradoc(child, state)
10
+ end.flatten
11
+
12
+ results.reject do |x|
13
+ x.nil? || (x.is_a?(String) && x.strip.empty?)
14
+ end
15
+ end
16
+
17
+ def treat_coradoc(node, state)
18
+ Converters.process_coradoc(node, state)
19
+ end
20
+
21
+ def extract_title(node)
22
+ node['title'].to_s
23
+ end
24
+
25
+ def node_has_ancestor?(node, name)
26
+ case name
27
+ when String
28
+ node.ancestors(name).any?
29
+ when Array
30
+ name.any? { |n| node.ancestors(n).any? }
31
+ end
32
+ end
33
+
34
+ def textnode_before_end_with?(node, str)
35
+ return false unless [String, Regexp].include?(str.class)
36
+ return false if str.is_a?(String) && str.empty?
37
+
38
+ str = /#{Regexp.escape(str)}/ if str.is_a?(String)
39
+ str = /(?:#{str})\z/
40
+
41
+ node2 = node.at_xpath('preceding-sibling::node()[1]')
42
+ node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
43
+ end
44
+
45
+ def textnode_after_start_with?(node, str)
46
+ return false unless [String, Regexp].include?(str.class)
47
+ return false if str.is_a?(String) && str.empty?
48
+
49
+ str = /#{Regexp.escape(str)}/ if str.is_a?(String)
50
+ str = /\A(?:#{str})/
51
+
52
+ node2 = node.at_xpath('following-sibling::node()[1]')
53
+ node2.is_a?(Nokogiri::XML::Text) && node2.text.match?(str)
54
+ end
55
+
56
+ def extract_leading_trailing_whitespace(node)
57
+ node.text =~ /^(\s+)/
58
+ leading_whitespace = ::Regexp.last_match(1)
59
+ unless leading_whitespace.nil?
60
+ first_text = node.at_xpath('./text()[1]')
61
+ first_text&.replace(first_text.text.lstrip)
62
+ leading_whitespace = ' '
63
+ end
64
+ node.text =~ /(\s+)$/
65
+ trailing_whitespace = ::Regexp.last_match(1)
66
+ unless trailing_whitespace.nil?
67
+ last_text = node.at_xpath('./text()[last()]')
68
+ last_text&.replace(last_text.text.rstrip)
69
+ trailing_whitespace = ' '
70
+ end
71
+ [leading_whitespace, trailing_whitespace]
72
+ end
73
+
74
+ def unconstrained_before?(node)
75
+ before = node.at_xpath('preceding::node()[1]')
76
+
77
+ before &&
78
+ !before.text.strip.empty? &&
79
+ before.text[-1]&.match?(/\w/)
80
+ end
81
+
82
+ def unconstrained_after?(node)
83
+ after = node.at_xpath('following::node()[1]')
84
+
85
+ after && !after.text.strip.empty? &&
86
+ after.text[0]&.match?(/\w|,|;|"|\.\?!/)
87
+ end
88
+
89
+ # Extract plain text from a mixed content array. Delegates to
90
+ # CoreModel::InlineContent.text_of — single source of truth for
91
+ # nil/Array/InlineElement/StructuralElement handling.
92
+ def extract_text_from_content(content)
93
+ Coradoc::CoreModel::InlineContent.text_of(content)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Blockquote < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ id = node['id']
11
+ cite = node['cite']
12
+ content = treat_children_coradoc(node, state)
13
+
14
+ Coradoc::CoreModel::QuoteBlock.new(
15
+ children: content,
16
+ id: id,
17
+ attribution: cite
18
+ )
19
+ end
20
+ end
21
+
22
+ register :blockquote, Blockquote::INSTANCE
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Br < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(_node, _state = {})
10
+ Coradoc::CoreModel::LineBreakElement.new
11
+ end
12
+ end
13
+
14
+ register :br, Br::INSTANCE
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Bypass < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ treat_children_coradoc(node, state)
11
+ end
12
+ end
13
+
14
+ register :document, Bypass::INSTANCE
15
+ register :html, Bypass::INSTANCE
16
+ register :body, Bypass::INSTANCE
17
+ register :span, Bypass::INSTANCE
18
+ register :thead, Bypass::INSTANCE
19
+ register :tbody, Bypass::INSTANCE
20
+ register :tfoot, Bypass::INSTANCE
21
+ register :abbr, Bypass::INSTANCE
22
+ register :acronym, Bypass::INSTANCE
23
+ register :address, Bypass::INSTANCE
24
+ register :applet, Bypass::INSTANCE
25
+ register :map, Bypass::INSTANCE
26
+ register :area, Bypass::INSTANCE
27
+ register :bdi, Bypass::INSTANCE
28
+ register :bdo, Bypass::INSTANCE
29
+ register :big, Bypass::INSTANCE
30
+ register :button, Bypass::INSTANCE
31
+ register :canvas, Bypass::INSTANCE
32
+ register :data, Bypass::INSTANCE
33
+ register :datalist, Bypass::INSTANCE
34
+ register :del, Bypass::INSTANCE
35
+ register :ins, Bypass::INSTANCE
36
+ register :dfn, Bypass::INSTANCE
37
+ register :dialog, Bypass::INSTANCE
38
+ register :embed, Bypass::INSTANCE
39
+ register :fieldset, Bypass::INSTANCE
40
+ register :font, Bypass::INSTANCE
41
+ register :footer, Bypass::INSTANCE
42
+ register :form, Bypass::INSTANCE
43
+ register :frame, Bypass::INSTANCE
44
+ register :frameset, Bypass::INSTANCE
45
+ register :header, Bypass::INSTANCE
46
+ register :iframe, Bypass::INSTANCE
47
+ register :input, Bypass::INSTANCE
48
+ register :label, Bypass::INSTANCE
49
+ register :legend, Bypass::INSTANCE
50
+ register :main, Bypass::INSTANCE
51
+ register :menu, Bypass::INSTANCE
52
+ register :menulist, Bypass::INSTANCE
53
+ register :meter, Bypass::INSTANCE
54
+ register :nav, Bypass::INSTANCE
55
+ register :noframes, Bypass::INSTANCE
56
+ register :noscript, Bypass::INSTANCE
57
+ register :object, Bypass::INSTANCE
58
+ register :optgroup, Bypass::INSTANCE
59
+ register :option, Bypass::INSTANCE
60
+ register :output, Bypass::INSTANCE
61
+ register :param, Bypass::INSTANCE
62
+ register :picture, Bypass::INSTANCE
63
+ register :progress, Bypass::INSTANCE
64
+ register :ruby, Bypass::INSTANCE
65
+ register :rt, Bypass::INSTANCE
66
+ register :rp, Bypass::INSTANCE
67
+ register :s, Bypass::INSTANCE
68
+ register :select, Bypass::INSTANCE
69
+ register :small, Bypass::INSTANCE
70
+ register :strike, Bypass::INSTANCE
71
+ register :details, Bypass::INSTANCE
72
+ register :section, Bypass::INSTANCE
73
+ register :summary, Bypass::INSTANCE
74
+ register :svg, Bypass::INSTANCE
75
+ register :template, Bypass::INSTANCE
76
+ register :textarea, Bypass::INSTANCE
77
+ register :track, Bypass::INSTANCE
78
+ register :u, Bypass::INSTANCE
79
+ register :wbr, Bypass::INSTANCE
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Code < Markup
7
+ INSTANCE = new
8
+
9
+ def coradoc_format_type
10
+ 'monospace'
11
+ end
12
+
13
+ def markup_ancestor_tag_names
14
+ %w[code tt kbd samp var]
15
+ end
16
+ end
17
+
18
+ register :code, Code::INSTANCE
19
+ register :tt, Code::INSTANCE
20
+ register :kbd, Code::INSTANCE
21
+ register :samp, Code::INSTANCE
22
+ register :var, Code::INSTANCE
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Div < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ id = node['id']
11
+ contents = treat_children_coradoc(node, state)
12
+
13
+ Coradoc::CoreModel::OpenBlock.new(
14
+ children: contents,
15
+ id: id
16
+ )
17
+ end
18
+ end
19
+
20
+ register :div, Div::INSTANCE
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Dl < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ items = process_dl(node, state)
11
+
12
+ # Convert items to CoreModel::ListItem objects
13
+ # For definition lists, term goes in content, definition goes in children
14
+ list_items = items.map do |item|
15
+ term_text = extract_text_from_content(item[:name])
16
+ Coradoc::CoreModel::ListItem.new(
17
+ content: term_text,
18
+ children: item[:value]
19
+ )
20
+ end
21
+
22
+ # Use CoreModel::ListBlock with marker_type "definition"
23
+ Coradoc::CoreModel::ListBlock.new(
24
+ marker_type: 'definition',
25
+ items: list_items
26
+ )
27
+ end
28
+
29
+ def process_dl(node, state = {})
30
+ groups = []
31
+ current = { name: [], value: [] }
32
+
33
+ seen_dd = false
34
+ child = node.at_xpath('*[1]')
35
+ grandchild = nil
36
+ until child.nil?
37
+ if child.name == 'div'
38
+ grandchild = child.at_xpath('*[1]')
39
+ until grandchild.nil?
40
+ groups, current, seen_dd = process_dt_or_dd(
41
+ groups,
42
+ current,
43
+ seen_dd,
44
+ grandchild,
45
+ state
46
+ )
47
+ grandchild = grandchild.at_xpath('following-sibling::*[1]')
48
+ end
49
+ elsif %w[dt dd].include?(child.name)
50
+ groups, current, seen_dd = process_dt_or_dd(
51
+ groups,
52
+ current,
53
+ seen_dd,
54
+ child,
55
+ state
56
+ )
57
+ end
58
+ child = child.at_xpath('following-sibling::*[1]')
59
+ groups << current if current[:name].any? && current[:value].any?
60
+ end
61
+ groups
62
+ end
63
+
64
+ def process_dt_or_dd(groups, current, seen_dd, subnode, state = {})
65
+ if subnode.name == 'dt'
66
+ if seen_dd
67
+ current = { name: [], value: [] }
68
+ seen_dd = false
69
+ end
70
+ current[:name] += treat_children_coradoc(subnode, state)
71
+ elsif subnode.name == 'dd'
72
+ current[:value] += treat_children_coradoc(subnode, state)
73
+ seen_dd = true
74
+ end
75
+ [groups, current, seen_dd]
76
+ end
77
+ end
78
+
79
+ register :dl, Dl::INSTANCE
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Skip < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(_node, _state = {})
10
+ ''
11
+ end
12
+ end
13
+
14
+ register :caption, Skip::INSTANCE
15
+ register :figcaption, Skip::INSTANCE
16
+ register :title, Skip::INSTANCE
17
+ register :link, Skip::INSTANCE
18
+ register :style, Skip::INSTANCE
19
+ register :meta, Skip::INSTANCE
20
+ register :script, Skip::INSTANCE
21
+ register :comment, Skip::INSTANCE
22
+ register :colgroup, Skip::INSTANCE
23
+ register :col, Skip::INSTANCE
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Em < Markup
7
+ INSTANCE = new
8
+
9
+ def coradoc_format_type
10
+ 'italic'
11
+ end
12
+
13
+ def markup_ancestor_tag_names
14
+ %w[em i cite]
15
+ end
16
+ end
17
+
18
+ register :em, Em::INSTANCE
19
+ register :i, Em::INSTANCE
20
+ register :cite, Em::INSTANCE
21
+ end
22
+ end
23
+ end