coradoc-html 1.1.18 → 1.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/lib/coradoc/html/cleaner.rb +128 -0
  3. data/lib/coradoc/html/converters/a.rb +77 -0
  4. data/lib/coradoc/html/converters/aside.rb +20 -0
  5. data/lib/coradoc/html/converters/audio.rb +19 -0
  6. data/lib/coradoc/html/converters/base.rb +98 -0
  7. data/lib/coradoc/html/converters/blockquote.rb +25 -0
  8. data/lib/coradoc/html/converters/br.rb +17 -0
  9. data/lib/coradoc/html/converters/bypass.rb +82 -0
  10. data/lib/coradoc/html/converters/code.rb +25 -0
  11. data/lib/coradoc/html/converters/div.rb +23 -0
  12. data/lib/coradoc/html/converters/dl.rb +82 -0
  13. data/lib/coradoc/html/converters/drop.rb +26 -0
  14. data/lib/coradoc/html/converters/em.rb +23 -0
  15. data/lib/coradoc/html/converters/figure.rb +33 -0
  16. data/lib/coradoc/html/converters/h.rb +58 -0
  17. data/lib/coradoc/html/converters/head.rb +29 -0
  18. data/lib/coradoc/html/converters/hr.rb +17 -0
  19. data/lib/coradoc/html/converters/img.rb +103 -0
  20. data/lib/coradoc/html/converters/li.rb +35 -0
  21. data/lib/coradoc/html/converters/mark.rb +21 -0
  22. data/lib/coradoc/html/converters/markup.rb +93 -0
  23. data/lib/coradoc/html/converters/math.rb +37 -0
  24. data/lib/coradoc/html/converters/media_base.rb +48 -0
  25. data/lib/coradoc/html/converters/ol.rb +42 -0
  26. data/lib/coradoc/html/converters/p.rb +64 -0
  27. data/lib/coradoc/html/converters/pass_through.rb +15 -0
  28. data/lib/coradoc/html/converters/positional_formatting.rb +35 -0
  29. data/lib/coradoc/html/converters/pre.rb +57 -0
  30. data/lib/coradoc/html/converters/q.rb +25 -0
  31. data/lib/coradoc/html/converters/strong.rb +22 -0
  32. data/lib/coradoc/html/converters/sub.rb +20 -0
  33. data/lib/coradoc/html/converters/sup.rb +20 -0
  34. data/lib/coradoc/html/converters/table.rb +64 -0
  35. data/lib/coradoc/html/converters/td.rb +42 -0
  36. data/lib/coradoc/html/converters/text.rb +66 -0
  37. data/lib/coradoc/html/converters/tr.rb +27 -0
  38. data/lib/coradoc/html/converters/video.rb +27 -0
  39. data/lib/coradoc/html/converters.rb +104 -0
  40. data/lib/coradoc/html/drop/drop_factory.rb +14 -22
  41. data/lib/coradoc/html/drop/inline_element_drop.rb +3 -5
  42. data/lib/coradoc/html/drop/raw_inline_element_drop.rb +30 -0
  43. data/lib/coradoc/html/drop.rb +30 -8
  44. data/lib/coradoc/html/errors.rb +11 -0
  45. data/lib/coradoc/html/html_converter.rb +78 -0
  46. data/lib/coradoc/html/input_config.rb +66 -0
  47. data/lib/coradoc/html/plugin.rb +90 -0
  48. data/lib/coradoc/html/plugins/plateau.rb +212 -0
  49. data/lib/coradoc/html/postprocessor.rb +19 -0
  50. data/lib/coradoc/html/spa.rb +0 -2
  51. data/lib/coradoc/html/static.rb +0 -2
  52. data/lib/coradoc/html/tag_mapping.rb +3 -1
  53. data/lib/coradoc/html/transform/from_core_model.rb +2 -2
  54. data/lib/coradoc/html/transform/to_core_model.rb +3 -3
  55. data/lib/coradoc/html/version.rb +1 -1
  56. data/lib/coradoc/html.rb +30 -5
  57. metadata +46 -47
  58. data/lib/coradoc/html/input/cleaner.rb +0 -134
  59. data/lib/coradoc/html/input/config.rb +0 -80
  60. data/lib/coradoc/html/input/converters/a.rb +0 -79
  61. data/lib/coradoc/html/input/converters/aside.rb +0 -22
  62. data/lib/coradoc/html/input/converters/audio.rb +0 -21
  63. data/lib/coradoc/html/input/converters/base.rb +0 -118
  64. data/lib/coradoc/html/input/converters/blockquote.rb +0 -27
  65. data/lib/coradoc/html/input/converters/br.rb +0 -19
  66. data/lib/coradoc/html/input/converters/bypass.rb +0 -84
  67. data/lib/coradoc/html/input/converters/code.rb +0 -27
  68. data/lib/coradoc/html/input/converters/div.rb +0 -25
  69. data/lib/coradoc/html/input/converters/dl.rb +0 -84
  70. data/lib/coradoc/html/input/converters/drop.rb +0 -28
  71. data/lib/coradoc/html/input/converters/em.rb +0 -25
  72. data/lib/coradoc/html/input/converters/figure.rb +0 -35
  73. data/lib/coradoc/html/input/converters/h.rb +0 -74
  74. data/lib/coradoc/html/input/converters/head.rb +0 -31
  75. data/lib/coradoc/html/input/converters/hr.rb +0 -19
  76. data/lib/coradoc/html/input/converters/img.rb +0 -105
  77. data/lib/coradoc/html/input/converters/li.rb +0 -37
  78. data/lib/coradoc/html/input/converters/mark.rb +0 -23
  79. data/lib/coradoc/html/input/converters/markup.rb +0 -103
  80. data/lib/coradoc/html/input/converters/math.rb +0 -39
  81. data/lib/coradoc/html/input/converters/media_base.rb +0 -50
  82. data/lib/coradoc/html/input/converters/ol.rb +0 -44
  83. data/lib/coradoc/html/input/converters/p.rb +0 -90
  84. data/lib/coradoc/html/input/converters/pass_through.rb +0 -17
  85. data/lib/coradoc/html/input/converters/positional_formatting.rb +0 -37
  86. data/lib/coradoc/html/input/converters/pre.rb +0 -59
  87. data/lib/coradoc/html/input/converters/q.rb +0 -27
  88. data/lib/coradoc/html/input/converters/strong.rb +0 -24
  89. data/lib/coradoc/html/input/converters/sub.rb +0 -22
  90. data/lib/coradoc/html/input/converters/sup.rb +0 -22
  91. data/lib/coradoc/html/input/converters/table.rb +0 -66
  92. data/lib/coradoc/html/input/converters/td.rb +0 -44
  93. data/lib/coradoc/html/input/converters/text.rb +0 -68
  94. data/lib/coradoc/html/input/converters/tr.rb +0 -29
  95. data/lib/coradoc/html/input/converters/video.rb +0 -29
  96. data/lib/coradoc/html/input/converters.rb +0 -107
  97. data/lib/coradoc/html/input/errors.rb +0 -22
  98. data/lib/coradoc/html/input/html_converter.rb +0 -98
  99. data/lib/coradoc/html/input/plugin.rb +0 -120
  100. data/lib/coradoc/html/input/plugins/plateau.rb +0 -214
  101. data/lib/coradoc/html/input/postprocessor.rb +0 -25
  102. data/lib/coradoc/html/input.rb +0 -86
  103. data/lib/coradoc/html/output.rb +0 -89
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Figure < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ id = node['id']
11
+ title_content = extract_title(node)
12
+ content = treat_children_coradoc(node, state)
13
+
14
+ # Use CoreModel::ExampleBlock for example/figure
15
+ Coradoc::CoreModel::ExampleBlock.new(
16
+ title: extract_text_from_content(title_content),
17
+ children: content,
18
+ id: id
19
+ )
20
+ end
21
+
22
+ def extract_title(node)
23
+ title = node.at('./figcaption')
24
+ return '' if title.nil?
25
+
26
+ treat_children_coradoc(title, {})
27
+ end
28
+ end
29
+
30
+ register :figure, Figure::INSTANCE
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class H < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ id = node['id']
11
+ internal_anchor = treat_children_anchors(node, state)
12
+
13
+ if id.to_s.empty? && internal_anchor.size.positive?
14
+ first_model = internal_anchor.first
15
+ id = first_model.target if first_model.is_a?(Coradoc::CoreModel::InlineElement) && first_model.target
16
+ end
17
+
18
+ level_int = node.name[/\d/].to_i
19
+ content = treat_children_no_anchors(node, state)
20
+
21
+ Coradoc::CoreModel::SectionElement.new(
22
+ title: extract_title_text(content),
23
+ level: level_int,
24
+ id: id,
25
+ children: []
26
+ )
27
+ end
28
+
29
+ def treat_children_no_anchors(node, state)
30
+ node.children.reject { |a| a.name == 'a' }
31
+ .map do |child|
32
+ treat_coradoc(child, state)
33
+ end.flatten.compact
34
+ end
35
+
36
+ def treat_children_anchors(node, state)
37
+ node.children.select { |a| a.name == 'a' }
38
+ .map do |child|
39
+ treat_coradoc(child, state)
40
+ end.flatten.compact
41
+ end
42
+
43
+ private
44
+
45
+ def extract_title_text(content)
46
+ Coradoc::CoreModel::InlineContent.text_of(content).strip
47
+ end
48
+ end
49
+
50
+ register :h1, H::INSTANCE
51
+ register :h2, H::INSTANCE
52
+ register :h3, H::INSTANCE
53
+ register :h4, H::INSTANCE
54
+ register :h5, H::INSTANCE
55
+ register :h6, H::INSTANCE
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Head < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, _state = {})
10
+ title = extract_title(node)
11
+ # Use DocumentElement for document header
12
+ Coradoc::CoreModel::DocumentElement.new(
13
+ title: title,
14
+ level: 0
15
+ )
16
+ end
17
+
18
+ def extract_title(node)
19
+ title = node.at('./title')
20
+ return '(???)' if title.nil?
21
+
22
+ title.text
23
+ end
24
+ end
25
+
26
+ register :head, Head::INSTANCE
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Hr < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(_node, _state = {})
10
+ Coradoc::CoreModel::HorizontalRuleBlock.new
11
+ end
12
+ end
13
+
14
+ register :hr, Hr::INSTANCE
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fileutils'
4
+ require 'pathname'
5
+ require 'tempfile'
6
+ require 'base64'
7
+ require 'marcel'
8
+
9
+ module Coradoc
10
+ module Html
11
+ module Converters
12
+ class Img < Base
13
+ INSTANCE = new
14
+
15
+ def image_number
16
+ format(
17
+ Html.input_config.image_counter_pattern,
18
+ Html.input_config.image_counter
19
+ )
20
+ end
21
+
22
+ def image_number_increment
23
+ Html.input_config.image_counter += 1
24
+ end
25
+
26
+ def datauri2file(src)
27
+ return unless src
28
+
29
+ %r{^data:image/(?:[^;]+);base64,(?<imgdata>.+)$} =~ src
30
+
31
+ dest_dir = Pathname.new(Html.input_config.destination).dirname
32
+ images_dir = dest_dir.join('images')
33
+ FileUtils.mkdir_p(images_dir)
34
+
35
+ ext, image_src_path, tempfile = determine_image_src_path(
36
+ src,
37
+ imgdata
38
+ )
39
+ image_dest_path = images_dir + "#{image_number}.#{ext}"
40
+
41
+ if File.exist?(image_src_path)
42
+ FileUtils.cp(image_src_path, image_dest_path)
43
+ else
44
+ Kernel.warn "Image #{image_src_path} does not exist"
45
+ end
46
+
47
+ image_number_increment
48
+
49
+ image_dest_path.relative_path_from(dest_dir)
50
+ ensure
51
+ tempfile&.close!
52
+ end
53
+
54
+ def determine_image_src_path(src, imgdata)
55
+ return copy_temp_file(imgdata) if imgdata
56
+
57
+ ext = File.extname(src).strip.downcase[1..]
58
+ [ext, Pathname.new(Html.input_config.sourcedir).join(src)]
59
+ end
60
+
61
+ def copy_temp_file(imgdata)
62
+ f = Tempfile.open(['radoc', '.jpg'])
63
+ f.binmode
64
+ f.write(Base64.strict_decode64(imgdata))
65
+ f.rewind
66
+ ext = Marcel::MimeType.for(f).sub(%r{^[^/]+/}, '')
67
+ ext = 'svg' if ext == 'svg+xml'
68
+ [ext, f.path, f]
69
+ end
70
+
71
+ def to_coradoc(node, _state = {})
72
+ id = node['id']
73
+ alt = node['alt']
74
+ src = node['src']
75
+ width = node['width']
76
+ height = node['height']
77
+
78
+ # Convert width/height to integers if they are numeric strings
79
+ width = width.to_i if width&.match?(/\A\d+\z/)
80
+ height = height.to_i if height&.match?(/\A\d+\z/)
81
+
82
+ title = extract_title(node)
83
+
84
+ src = datauri2file(src) if Html.input_config.external_images
85
+
86
+ # Use CoreModel::Image
87
+ return unless src
88
+
89
+ Coradoc::CoreModel::Image.new(
90
+ src: src,
91
+ alt: alt,
92
+ caption: title,
93
+ width: width&.to_s,
94
+ height: height&.to_s,
95
+ id: id
96
+ )
97
+ end
98
+ end
99
+
100
+ register :img, Img::INSTANCE
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Li < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ id = node['id']
11
+
12
+ # Check if all children are <p> tags
13
+ p_children = node.children.select { |child| child.name == 'p' }
14
+ non_empty_children = node.children.reject { |c| c.text? && c.text.strip.empty? }
15
+
16
+ content = if p_children.any? && p_children.size == non_empty_children.size && p_children.size == 1
17
+ # Single <p> tag - extract its content directly as inline content
18
+ treat_children_coradoc(p_children.first, state)
19
+ else
20
+ treat_children_coradoc(node, state)
21
+ end
22
+
23
+ # Use CoreModel::ListItem with children for mixed content
24
+ # content can be an array of inline elements or a single string
25
+ Coradoc::CoreModel::ListItem.new(
26
+ children: content,
27
+ id: id
28
+ )
29
+ end
30
+ end
31
+
32
+ register :li, Li::INSTANCE
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Mark < Markup
7
+ INSTANCE = new
8
+
9
+ def coradoc_format_type
10
+ 'highlight'
11
+ end
12
+
13
+ def markup_ancestor_tag_names
14
+ %w[mark]
15
+ end
16
+ end
17
+
18
+ register :mark, Mark::INSTANCE
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Markup < Base
7
+ def to_coradoc(node, state = {})
8
+ u_before = unconstrained_before?(node)
9
+ u_after = unconstrained_after?(node)
10
+
11
+ leading_ws, trailing_ws =
12
+ extract_leading_trailing_whitespace(node)
13
+
14
+ # Wrap whitespace in InlineElement so it can be processed
15
+ leading_whitespace = if leading_ws
16
+ Coradoc::CoreModel::TextElement.new(
17
+ content: leading_ws
18
+ )
19
+ end
20
+ trailing_whitespace = if trailing_ws
21
+ Coradoc::CoreModel::TextElement.new(
22
+ content: trailing_ws
23
+ )
24
+ end
25
+
26
+ content = treat_children_coradoc(node, state)
27
+
28
+ if node_has_ancestor?(node, markup_ancestor_tag_names)
29
+ content
30
+ elsif node.children.empty?
31
+ # Return InlineElement wrapper for whitespace
32
+ if leading_ws
33
+ Coradoc::CoreModel::TextElement.new(
34
+ content: leading_ws
35
+ )
36
+ end
37
+ else
38
+ u = (u_before && leading_whitespace.nil?) ||
39
+ (u_after && trailing_whitespace.nil?)
40
+
41
+ # Separate text strings from InlineElements in content array
42
+ text_content, nested = extract_text_and_elements(content)
43
+
44
+ # Create CoreModel::InlineElement with the appropriate format type
45
+ inline_element = Coradoc::CoreModel::InlineElement.format_type_class(coradoc_format_type).new(
46
+ content: text_content,
47
+ nested_elements: nested.empty? ? nil : nested,
48
+ metadata: { unconstrained: u }
49
+ )
50
+ result = [leading_whitespace, inline_element, trailing_whitespace].compact
51
+ result.length == 1 ? result.first : result
52
+ end
53
+ end
54
+
55
+ # Extract text content and InlineElements from mixed content array
56
+ def extract_text_and_elements(content)
57
+ return [content, []] unless content.is_a?(Array)
58
+
59
+ text_parts = []
60
+ elements = []
61
+
62
+ content.each do |item|
63
+ case item
64
+ when String
65
+ text_parts << item
66
+ when Coradoc::CoreModel::InlineElement
67
+ elements << item
68
+ when Coradoc::CoreModel::Base
69
+ # For other block types, convert to text
70
+ text_parts << extract_text_from_model(item)
71
+ else
72
+ text_parts << item.to_s
73
+ end
74
+ end
75
+
76
+ [text_parts.join, elements]
77
+ end
78
+
79
+ # Extract text from a CoreModel object via the shared
80
+ # CoreModel::InlineContent helper. Kept as a thin wrapper so
81
+ # callers in Markup can pass single elements without wrapping.
82
+ def extract_text_from_model(model)
83
+ Coradoc::CoreModel::InlineContent.text_of(model)
84
+ end
85
+
86
+ # Subclasses should override this to return the format type
87
+ def coradoc_format_type
88
+ 'text'
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Math < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, _state = {})
10
+ stem = node.to_s.tr("\n", ' ')
11
+ if Html.input_config.mathml2asciimath
12
+ require 'plurimath'
13
+ stem = Plurimath::Math.parse(stem, :mathml).to_asciimath
14
+ end
15
+
16
+ unless stem.nil?
17
+ stem = stem.gsub('[', '\\[')
18
+ stem = stem.gsub(']', '\\]')
19
+ loop do
20
+ new_stem = stem.gsub(/\(\(([^)]{1,100})\)\)/, '(\\1)')
21
+ break if new_stem == stem
22
+
23
+ stem = new_stem
24
+ end
25
+ end
26
+
27
+ Coradoc::CoreModel::StemElement.new(
28
+ content: stem,
29
+ stem_type: 'mathml'
30
+ )
31
+ end
32
+ end
33
+
34
+ register :math, Math::INSTANCE
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class MediaBase < Base
7
+ def to_coradoc(node, _state = {})
8
+ src = node['src']
9
+ id = node['id']
10
+ title = extract_title(node)
11
+
12
+ Coradoc::CoreModel::Block.new(
13
+ block_semantic_type: semantic_type,
14
+ content: src,
15
+ title: title,
16
+ id: id,
17
+ element_attributes: build_attributes(node)
18
+ )
19
+ end
20
+
21
+ def extract_title(node)
22
+ track = node.at('./track') || node.at('.//source')
23
+ return '' if track.nil?
24
+
25
+ track['label'] || track['srclang'] || ''
26
+ end
27
+
28
+ private
29
+
30
+ def semantic_type
31
+ raise NotImplementedError
32
+ end
33
+
34
+ def base_attributes(node)
35
+ {
36
+ autoplay: node['autoplay'],
37
+ loop: node['loop'],
38
+ controls: node['controls']
39
+ }.compact
40
+ end
41
+
42
+ def build_attributes(node)
43
+ base_attributes(node)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Ol < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ id = node['id']
11
+ items = treat_children_coradoc(node, state)
12
+
13
+ marker_type = get_list_type(node, state)
14
+
15
+ Coradoc::CoreModel::ListBlock.new(
16
+ marker_type: marker_type,
17
+ items: items,
18
+ id: id,
19
+ start: node['start']&.to_i
20
+ )
21
+ end
22
+
23
+ def get_list_type(node, _state)
24
+ case node.name
25
+ when 'ol'
26
+ 'ordered'
27
+ when 'ul'
28
+ 'unordered'
29
+ when 'dir'
30
+ 'unordered'
31
+ else
32
+ 'unordered'
33
+ end
34
+ end
35
+ end
36
+
37
+ register :ol, Ol::INSTANCE
38
+ register :ul, Ol::INSTANCE
39
+ register :dir, Ol::INSTANCE
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class P < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, state = {})
10
+ id = node['id']
11
+ content = treat_children_coradoc(node, state)
12
+
13
+ content = strip_fullwidth_spaces(content)
14
+
15
+ Coradoc::CoreModel::ParagraphBlock.new(
16
+ children: content,
17
+ id: id
18
+ )
19
+ end
20
+
21
+ private
22
+
23
+ def strip_fullwidth_spaces(content)
24
+ return content unless content.is_a?(Array)
25
+
26
+ content = strip_fullwidth_per_element(content)
27
+ content = Coradoc::CoreModel::InlineContent.strip_edges(content)
28
+ reject_empty_elements(content)
29
+ end
30
+
31
+ # Strip CJK fullwidth spaces from the leading/trailing edge of
32
+ # every InlineElement's content. Returns a new array; inputs
33
+ # are not mutated.
34
+ def strip_fullwidth_per_element(content)
35
+ content.map do |item|
36
+ next item unless item.is_a?(Coradoc::CoreModel::InlineElement)
37
+ next item unless item.content.is_a?(String)
38
+
39
+ item.with_content(item.content.gsub(/\A +| +\z/, ''))
40
+ end
41
+ end
42
+
43
+ def reject_empty_elements(content)
44
+ content.reject do |item|
45
+ if item.is_a?(Coradoc::CoreModel::InlineElement)
46
+ item.content.to_s.empty? && !has_nested_content?(item)
47
+ elsif item.is_a?(String)
48
+ item.empty?
49
+ else
50
+ false
51
+ end
52
+ end
53
+ end
54
+
55
+ def has_nested_content?(item)
56
+ item.is_a?(Coradoc::CoreModel::InlineElement) &&
57
+ item.nested_elements && !item.nested_elements.empty?
58
+ end
59
+ end
60
+
61
+ register :p, P::INSTANCE
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class PassThrough < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, _state = {})
10
+ node.to_s
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ # Shared logic for superscript/subscript converters.
7
+ #
8
+ # Subclasses must implement `element_class` returning the
9
+ # CoreModel class (e.g., SuperscriptElement, SubscriptElement).
10
+ module PositionalFormatting
11
+ def to_coradoc(node, state = {})
12
+ leading_whitespace, trailing_whitespace = extract_leading_trailing_whitespace(node)
13
+
14
+ content = treat_children_coradoc(node, state)
15
+
16
+ return nil if content_empty?(content)
17
+
18
+ e = element_class.new(content: content)
19
+ result = [leading_whitespace, e, trailing_whitespace].compact
20
+ result.length == 1 ? result.first : result
21
+ end
22
+
23
+ private
24
+
25
+ def content_empty?(content)
26
+ return true if content.nil?
27
+ return content.strip.empty? if content.is_a?(String)
28
+ return content.empty? if content.is_a?(Array)
29
+
30
+ false
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class Pre < Base
7
+ INSTANCE = new
8
+
9
+ def to_coradoc(node, _state = {})
10
+ id = node['id']
11
+ lang = language(node)
12
+ content = extract_text_content(node)
13
+
14
+ if lang
15
+ Coradoc::CoreModel::SourceBlock.new(
16
+ content: content,
17
+ id: id,
18
+ language: lang
19
+ )
20
+ else
21
+ Coradoc::CoreModel::LiteralBlock.new(
22
+ content: content,
23
+ id: id
24
+ )
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def extract_text_content(node)
31
+ # Get text content from pre node
32
+ node.text
33
+ end
34
+
35
+ def language(node)
36
+ lang = language_from_highlight_class(node)
37
+ lang || language_from_confluence_class(node)
38
+ end
39
+
40
+ def language_from_highlight_class(node)
41
+ node.parent['class'].to_s[/highlight-([a-zA-Z0-9]+)/, 1]
42
+ end
43
+
44
+ def language_from_confluence_class(node)
45
+ class_str = node['class'].to_s
46
+ return nil unless class_str.include?('brush:')
47
+
48
+ # Extract language from brush: language; pattern
49
+ match = class_str.match(/brush:\s*([^;]+);/)
50
+ match ? match[1].strip : nil
51
+ end
52
+ end
53
+
54
+ register :pre, Pre::INSTANCE
55
+ end
56
+ end
57
+ end