coradoc-html 1.1.7 → 1.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. checksums.yaml +4 -4
  2. data/lib/coradoc/html/config.rb +36 -12
  3. data/lib/coradoc/html/converter_base.rb +26 -68
  4. data/lib/coradoc/html/drop/annotation_drop.rb +31 -0
  5. data/lib/coradoc/html/drop/base.rb +72 -0
  6. data/lib/coradoc/html/drop/bibliography_drop.rb +15 -0
  7. data/lib/coradoc/html/drop/bibliography_entry_drop.rb +24 -0
  8. data/lib/coradoc/html/drop/block_drop.rb +69 -0
  9. data/lib/coradoc/html/drop/definition_item_drop.rb +36 -0
  10. data/lib/coradoc/html/drop/definition_list_drop.rb +15 -0
  11. data/lib/coradoc/html/drop/document_drop.rb +52 -0
  12. data/lib/coradoc/html/drop/drop_factory.rb +72 -0
  13. data/lib/coradoc/html/drop/footnote_drop.rb +24 -0
  14. data/lib/coradoc/html/drop/image_drop.rb +35 -0
  15. data/lib/coradoc/html/drop/inline_element_drop.rb +64 -0
  16. data/lib/coradoc/html/drop/list_block_drop.rb +23 -0
  17. data/lib/coradoc/html/drop/list_item_drop.rb +20 -0
  18. data/lib/coradoc/html/drop/table_cell_drop.rb +35 -0
  19. data/lib/coradoc/html/drop/table_drop.rb +15 -0
  20. data/lib/coradoc/html/drop/table_row_drop.rb +23 -0
  21. data/lib/coradoc/html/drop/term_drop.rb +24 -0
  22. data/lib/coradoc/html/drop/text_content_drop.rb +15 -0
  23. data/lib/coradoc/html/drop/toc_drop.rb +15 -0
  24. data/lib/coradoc/html/drop/toc_entry_drop.rb +32 -0
  25. data/lib/coradoc/html/drop.rb +18 -0
  26. data/lib/coradoc/html/escape.rb +29 -0
  27. data/lib/coradoc/html/input/cleaner.rb +4 -33
  28. data/lib/coradoc/html/input/config.rb +4 -3
  29. data/lib/coradoc/html/input/converters/a.rb +8 -19
  30. data/lib/coradoc/html/input/converters/aside.rb +4 -5
  31. data/lib/coradoc/html/input/converters/audio.rb +6 -35
  32. data/lib/coradoc/html/input/converters/base.rb +29 -27
  33. data/lib/coradoc/html/input/converters/blockquote.rb +4 -2
  34. data/lib/coradoc/html/input/converters/br.rb +4 -4
  35. data/lib/coradoc/html/input/converters/bypass.rb +68 -67
  36. data/lib/coradoc/html/input/converters/code.rb +7 -5
  37. data/lib/coradoc/html/input/converters/div.rb +4 -4
  38. data/lib/coradoc/html/input/converters/dl.rb +3 -25
  39. data/lib/coradoc/html/input/converters/drop.rb +13 -13
  40. data/lib/coradoc/html/input/converters/em.rb +5 -3
  41. data/lib/coradoc/html/input/converters/figure.rb +3 -26
  42. data/lib/coradoc/html/input/converters/h.rb +9 -11
  43. data/lib/coradoc/html/input/converters/head.rb +5 -4
  44. data/lib/coradoc/html/input/converters/hr.rb +4 -5
  45. data/lib/coradoc/html/input/converters/img.rb +4 -9
  46. data/lib/coradoc/html/input/converters/li.rb +3 -1
  47. data/lib/coradoc/html/input/converters/mark.rb +3 -1
  48. data/lib/coradoc/html/input/converters/markup.rb +4 -8
  49. data/lib/coradoc/html/input/converters/math.rb +7 -14
  50. data/lib/coradoc/html/input/converters/media_base.rb +50 -0
  51. data/lib/coradoc/html/input/converters/ol.rb +6 -8
  52. data/lib/coradoc/html/input/converters/p.rb +43 -34
  53. data/lib/coradoc/html/input/converters/pass_through.rb +2 -4
  54. data/lib/coradoc/html/input/converters/positional_formatting.rb +37 -0
  55. data/lib/coradoc/html/input/converters/pre.rb +3 -3
  56. data/lib/coradoc/html/input/converters/q.rb +6 -3
  57. data/lib/coradoc/html/input/converters/strong.rb +4 -2
  58. data/lib/coradoc/html/input/converters/sub.rb +5 -23
  59. data/lib/coradoc/html/input/converters/sup.rb +5 -23
  60. data/lib/coradoc/html/input/converters/table.rb +3 -1
  61. data/lib/coradoc/html/input/converters/td.rb +4 -30
  62. data/lib/coradoc/html/input/converters/text.rb +4 -3
  63. data/lib/coradoc/html/input/converters/tr.rb +3 -2
  64. data/lib/coradoc/html/input/converters/video.rb +12 -36
  65. data/lib/coradoc/html/input/converters.rb +55 -70
  66. data/lib/coradoc/html/input/html_converter.rb +2 -74
  67. data/lib/coradoc/html/input/plugin.rb +8 -57
  68. data/lib/coradoc/html/input/plugins/plateau.rb +4 -19
  69. data/lib/coradoc/html/input/postprocessor.rb +3 -9
  70. data/lib/coradoc/html/input.rb +26 -8
  71. data/lib/coradoc/html/layout_renderer.rb +163 -0
  72. data/lib/coradoc/html/output.rb +6 -12
  73. data/lib/coradoc/html/renderer.rb +86 -357
  74. data/lib/coradoc/html/section_numberable.rb +9 -0
  75. data/lib/coradoc/html/spa.rb +29 -270
  76. data/lib/coradoc/html/static.rb +29 -238
  77. data/lib/coradoc/html/template_caching.rb +31 -0
  78. data/lib/coradoc/html/template_config.rb +11 -70
  79. data/lib/coradoc/html/template_helpers.rb +39 -31
  80. data/lib/coradoc/html/template_locator.rb +17 -11
  81. data/lib/coradoc/html/theme.rb +1 -7
  82. data/lib/coradoc/html/title_text.rb +57 -0
  83. data/lib/coradoc/html/toc_builder.rb +112 -0
  84. data/lib/coradoc/html/toc_serializer.rb +31 -0
  85. data/lib/coradoc/html/transform/from_core_model.rb +13 -12
  86. data/lib/coradoc/html/transform/to_core_model.rb +10 -12
  87. data/lib/coradoc/html/version.rb +1 -1
  88. data/lib/coradoc/html.rb +41 -88
  89. metadata +38 -70
  90. data/lib/coradoc/html/base.rb +0 -157
  91. data/lib/coradoc/html/converters/admonition.rb +0 -180
  92. data/lib/coradoc/html/converters/attribute.rb +0 -68
  93. data/lib/coradoc/html/converters/attribute_reference.rb +0 -60
  94. data/lib/coradoc/html/converters/audio.rb +0 -165
  95. data/lib/coradoc/html/converters/base.rb +0 -615
  96. data/lib/coradoc/html/converters/bibliography.rb +0 -82
  97. data/lib/coradoc/html/converters/bibliography_entry.rb +0 -108
  98. data/lib/coradoc/html/converters/block_image.rb +0 -72
  99. data/lib/coradoc/html/converters/bold.rb +0 -34
  100. data/lib/coradoc/html/converters/break.rb +0 -32
  101. data/lib/coradoc/html/converters/comment_block.rb +0 -42
  102. data/lib/coradoc/html/converters/comment_line.rb +0 -54
  103. data/lib/coradoc/html/converters/cross_reference.rb +0 -59
  104. data/lib/coradoc/html/converters/document.rb +0 -108
  105. data/lib/coradoc/html/converters/example.rb +0 -114
  106. data/lib/coradoc/html/converters/highlight.rb +0 -34
  107. data/lib/coradoc/html/converters/include.rb +0 -68
  108. data/lib/coradoc/html/converters/inline_image.rb +0 -41
  109. data/lib/coradoc/html/converters/italic.rb +0 -34
  110. data/lib/coradoc/html/converters/line_break.rb +0 -31
  111. data/lib/coradoc/html/converters/link.rb +0 -46
  112. data/lib/coradoc/html/converters/list_item.rb +0 -75
  113. data/lib/coradoc/html/converters/listing.rb +0 -99
  114. data/lib/coradoc/html/converters/literal.rb +0 -102
  115. data/lib/coradoc/html/converters/monospace.rb +0 -34
  116. data/lib/coradoc/html/converters/open.rb +0 -78
  117. data/lib/coradoc/html/converters/ordered.rb +0 -53
  118. data/lib/coradoc/html/converters/paragraph.rb +0 -46
  119. data/lib/coradoc/html/converters/quote.rb +0 -113
  120. data/lib/coradoc/html/converters/reviewer_comment.rb +0 -74
  121. data/lib/coradoc/html/converters/reviewer_note.rb +0 -134
  122. data/lib/coradoc/html/converters/section.rb +0 -90
  123. data/lib/coradoc/html/converters/sidebar.rb +0 -113
  124. data/lib/coradoc/html/converters/source.rb +0 -137
  125. data/lib/coradoc/html/converters/source_code.rb +0 -16
  126. data/lib/coradoc/html/converters/span.rb +0 -61
  127. data/lib/coradoc/html/converters/strikethrough.rb +0 -34
  128. data/lib/coradoc/html/converters/subscript.rb +0 -34
  129. data/lib/coradoc/html/converters/superscript.rb +0 -34
  130. data/lib/coradoc/html/converters/table.rb +0 -85
  131. data/lib/coradoc/html/converters/table_cell.rb +0 -203
  132. data/lib/coradoc/html/converters/table_row.rb +0 -45
  133. data/lib/coradoc/html/converters/template_html_converter.rb +0 -105
  134. data/lib/coradoc/html/converters/term.rb +0 -58
  135. data/lib/coradoc/html/converters/text_element.rb +0 -44
  136. data/lib/coradoc/html/converters/underline.rb +0 -34
  137. data/lib/coradoc/html/converters/unordered.rb +0 -47
  138. data/lib/coradoc/html/converters/verse.rb +0 -105
  139. data/lib/coradoc/html/converters/video.rb +0 -179
  140. data/lib/coradoc/html/element_mapping.rb +0 -210
  141. data/lib/coradoc/html/entity.rb +0 -137
  142. data/lib/coradoc/html/input/converters/ignore.rb +0 -22
  143. data/lib/coradoc/html/input/converters/th.rb +0 -20
  144. data/lib/coradoc/html/theme/base.rb +0 -231
  145. data/lib/coradoc/html/theme/classic_renderer.rb +0 -390
  146. data/lib/coradoc/html/theme/modern/components/ui_components.rb +0 -344
  147. data/lib/coradoc/html/theme/modern/css_generator.rb +0 -311
  148. data/lib/coradoc/html/theme/modern/javascript_generator.rb +0 -314
  149. data/lib/coradoc/html/theme/modern/serializers/document_serializer.rb +0 -382
  150. data/lib/coradoc/html/theme/modern/tailwind_config_builder.rb +0 -164
  151. data/lib/coradoc/html/theme/modern/vue_template_generator.rb +0 -374
  152. data/lib/coradoc/html/theme/modern_renderer.rb +0 -250
  153. data/lib/coradoc/html/theme/registry.rb +0 -153
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class InlineElementDrop < Base
7
+ FORMAT_TAG_MAP = {
8
+ 'bold' => 'strong',
9
+ 'italic' => 'em',
10
+ 'monospace' => 'code',
11
+ 'superscript' => 'sup',
12
+ 'subscript' => 'sub',
13
+ 'underline' => 'u',
14
+ 'strikethrough' => 'del',
15
+ 'highlight' => 'mark',
16
+ 'quotation' => 'q',
17
+ 'small' => 'small',
18
+ 'stem' => 'code'
19
+ }.freeze
20
+
21
+ def format_type
22
+ @model.resolve_format_type
23
+ end
24
+
25
+ def html_tag
26
+ case format_type
27
+ when 'link', 'xref' then 'a'
28
+ when 'footnote' then 'sup'
29
+ when 'span', 'term' then 'span'
30
+ else FORMAT_TAG_MAP[format_type]
31
+ end
32
+ end
33
+
34
+ def href
35
+ case format_type
36
+ when 'link'
37
+ @model.target || @model.metadata('href') || '#'
38
+ when 'xref'
39
+ target = @model.target || @model.metadata('href') || ''
40
+ "##{target}"
41
+ end
42
+ end
43
+
44
+ def text
45
+ Escape.escape_html(extract_text(@model.content))
46
+ end
47
+
48
+ def css_class
49
+ case format_type
50
+ when 'stem' then 'stem'
51
+ when 'term' then 'term'
52
+ when 'span' then @model.metadata('class')
53
+ end
54
+ end
55
+
56
+ def term_ref
57
+ @model.content.to_s if format_type == 'term'
58
+ end
59
+ end
60
+
61
+ DropFactory.register(CoreModel::InlineElement, InlineElementDrop)
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class ListBlockDrop < Base
7
+ def html_tag
8
+ case @model.marker_type
9
+ when 'ordered' then 'ol'
10
+ when 'definition' then 'dl'
11
+ else 'ul'
12
+ end
13
+ end
14
+
15
+ def items
16
+ children_to_liquid(@model.items)
17
+ end
18
+ end
19
+
20
+ DropFactory.register(CoreModel::ListBlock, ListBlockDrop)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class ListItemDrop < Base
7
+ def content
8
+ content_to_liquid(@model.content)
9
+ end
10
+
11
+ def nested_list
12
+ child = @model.nested_list
13
+ DropFactory.create(child) if child
14
+ end
15
+ end
16
+
17
+ DropFactory.register(CoreModel::ListItem, ListItemDrop)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class TableCellDrop < Base
7
+ def header?
8
+ @model.header == true
9
+ end
10
+
11
+ def html_tag
12
+ header? ? 'th' : 'td'
13
+ end
14
+
15
+ def colspan
16
+ @model.colspan&.to_s
17
+ end
18
+
19
+ def rowspan
20
+ @model.rowspan&.to_s
21
+ end
22
+
23
+ def style
24
+ "text-align: #{@model.alignment}" if @model.alignment
25
+ end
26
+
27
+ def content
28
+ content_to_liquid(@model.renderable_content)
29
+ end
30
+ end
31
+
32
+ DropFactory.register(CoreModel::TableCell, TableCellDrop)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class TableDrop < Base
7
+ def rows
8
+ children_to_liquid(@model.rows)
9
+ end
10
+ end
11
+
12
+ DropFactory.register(CoreModel::Table, TableDrop)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class TableRowDrop < Base
7
+ def header?
8
+ @model.header == true
9
+ end
10
+
11
+ def html_tag
12
+ 'tr'
13
+ end
14
+
15
+ def cells
16
+ children_to_liquid(@model.cells)
17
+ end
18
+ end
19
+
20
+ DropFactory.register(CoreModel::TableRow, TableRowDrop)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class TermDrop < Base
7
+ def text
8
+ Escape.escape_html(@model.text.to_s)
9
+ end
10
+
11
+ def term_ref
12
+ @model.text.to_s
13
+ end
14
+
15
+ def css_class
16
+ t = @model.type || 'term'
17
+ "term term-#{t}"
18
+ end
19
+ end
20
+
21
+ DropFactory.register(CoreModel::Term, TermDrop)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class TextContentDrop < Base
7
+ def text
8
+ Escape.escape_html(@model.text.to_s)
9
+ end
10
+ end
11
+
12
+ DropFactory.register(CoreModel::TextContent, TextContentDrop)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class TocDrop < Base
7
+ def entries
8
+ children_to_liquid(@model.entries)
9
+ end
10
+ end
11
+
12
+ DropFactory.register(CoreModel::Toc, TocDrop)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Drop
6
+ class TocEntryDrop < Base
7
+ def title
8
+ TitleText.escape(@model.title)
9
+ end
10
+
11
+ def number
12
+ @model.number
13
+ end
14
+
15
+ def level
16
+ @model.level
17
+ end
18
+
19
+ def children
20
+ children_to_liquid(@model.children)
21
+ end
22
+
23
+ def numbered_title
24
+ n = number
25
+ n ? "#{n}. #{title}" : title
26
+ end
27
+ end
28
+
29
+ DropFactory.register(CoreModel::TocEntry, TocEntryDrop)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Drop namespace — manages Liquid drop layer for template rendering.
4
+ #
5
+ # Loading order matters: Base must load before DropFactory, and all
6
+ # concrete drops must load after DropFactory (they self-register).
7
+ # Each drop calls DropFactory.register at load time.
8
+ module Coradoc
9
+ module Html
10
+ module Drop
11
+ end
12
+ end
13
+ end
14
+
15
+ # Base must load first (DropFactory depends on it)
16
+ require 'coradoc/html/drop/base'
17
+ # DropFactory loads next
18
+ require 'coradoc/html/drop/drop_factory'
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi'
4
+ require 'json'
5
+
6
+ module Coradoc
7
+ module Html
8
+ module Escape
9
+ module_function
10
+
11
+ def escape_html(text)
12
+ CGI.escapeHTML(text.to_s)
13
+ end
14
+
15
+ def escape_attr(value)
16
+ value.to_s
17
+ .gsub('&', '&amp;')
18
+ .gsub('"', '&quot;')
19
+ .gsub('<', '&lt;')
20
+ .gsub('>', '&gt;')
21
+ end
22
+
23
+ def safe_json(data)
24
+ json = data.is_a?(String) ? data : JSON.generate(data)
25
+ json.gsub('</script', '<\\/script')
26
+ end
27
+ end
28
+ end
29
+ end
@@ -7,18 +7,10 @@ module Coradoc
7
7
  # Pre-compiled regexes for performance
8
8
  INNER_WHITESPACE_REGEX_1 = /\n stem:\[/
9
9
  INNER_WHITESPACE_REGEX_2 = /(stem:\[([^\]]|\\\])*\])\n(?=\S)/
10
- INNER_WHITESPACE_REGEX_3 = /(stem:\[([^\]]|\\\])*\])\s+(?=[\^-])/
11
10
  NEWLINES_REGEX = /\n{3,}/
12
11
  LEADING_NEWLINE_REGEX = /\A\n+/
13
12
  WHITESPACE_REGEX = /[ \t\r\n]+/
14
13
  TRAILING_WHITESPACE_REGEX = /[ \t\r\n]+\z/
15
- MULTIPLE_WHITESPACE_REGEX = /[ \t]{2,}/
16
- TAG_BORDER_REGEXES = {
17
- asterisk: /\s?\*{2,}/,
18
- underscore: /\s?_{2,}/,
19
- tilde: /\s?~{2,}/,
20
- bracket: /\s?\[.*?\]\s?/
21
- }.freeze
22
14
 
23
15
  def tidy(string)
24
16
  return string.transform_values { |i| tidy(i) } if string.is_a? Hash
@@ -74,23 +66,7 @@ module Coradoc
74
66
  result
75
67
  end
76
68
 
77
- # Find non-asterisk content that is enclosed by two or
78
- # more asterisks. Ensure that only one whitespace occurs
79
- # in the border area.
80
- # Same for underscores and brackets.
81
69
  def clean_tag_borders(string)
82
- # result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
83
- # preserve_border_whitespaces(match, default_border: Coradoc::Input::HTML.config.tag_border) do
84
- # match.strip.sub("** ", "**").sub(" **", "**")
85
- # end
86
- # end
87
-
88
- # result = string.gsub(/\s?_{2,}.*?_{2,}\s?/) do |match|
89
- # preserve_border_whitespaces(match, default_border: Coradoc::Input::HTML.config.tag_border) do
90
- # match.strip.sub("__ ", "__").sub(" __", "__")
91
- # end
92
- # end
93
-
94
70
  result = string.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
95
71
  preserve_border_whitespaces(
96
72
  match,
@@ -111,29 +87,24 @@ module Coradoc
111
87
  string.gsub(/(\*\*|~~|__)\s([.!?'"])/, '\\1\\2')
112
88
  end
113
89
 
114
- # preprocesses HTML, rather than postprocessing it
115
90
  def preprocess_word_html(string)
116
91
  clean_headings(scrub_whitespace(string.dup))
117
92
  end
118
93
 
119
94
  def scrub_whitespace(string)
120
- string.gsub!(/&nbsp;|&#xA0;|\u00a0/i, '&#xA0;') # HTML encoded spaces
121
- string = Coradoc.strip_unicode(string) # Strip document-level leading and trailing whitespace
122
- string.gsub!(/( +)$/, ' ') # line trailing whitespace
123
- string.gsub!("\n\n\n\n", "\n\n") # Quadruple line breaks
124
- # string.delete!('?| ') # Unicode non-breaking spaces, injected as tabs
95
+ string.gsub!(/&nbsp;|&#xA0;| /i, '&#xA0;')
96
+ string = Coradoc.strip_unicode(string)
97
+ string.gsub!(/( +)$/, ' ')
98
+ string.gsub!("\n\n\n\n", "\n\n")
125
99
  string
126
100
  end
127
101
 
128
- # following added by me
129
102
  def clean_headings(string)
130
103
  string.gsub!(%r{<h([1-9])[^>]*></h\1>}, ' ')
131
- # I don't know why Libre Office is inserting them, but they need to go
132
104
  string.gsub!(
133
105
  %r{<h([1-9])[^>]* style="vertical-align: super;[^>]*>(.+?)</h\1>},
134
106
  '<sup>\\2</sup>'
135
107
  )
136
- # I absolutely don't know why Libre Office is rendering superscripts as h1
137
108
  string
138
109
  end
139
110
 
@@ -53,11 +53,12 @@ module Coradoc
53
53
  end
54
54
 
55
55
  def self.declare_option(option)
56
+ attr_accessor option
57
+
58
+ original_reader = instance_method(option)
56
59
  define_method(option) do
57
- @inline_options[option] || instance_variable_get(:"@#{option}")
60
+ @inline_options[option] || original_reader.bind_call(self)
58
61
  end
59
-
60
- attr_writer option
61
62
  end
62
63
 
63
64
  declare_option :unknown_tags
@@ -7,6 +7,8 @@ module Coradoc
7
7
  module Html
8
8
  module Converters
9
9
  class A < Base
10
+ INSTANCE = new
11
+
10
12
  def to_coradoc(node, state = {})
11
13
  # Use treat_children_coradoc to get CoreModel elements
12
14
  content = treat_children_coradoc(node, state)
@@ -31,14 +33,8 @@ module Coradoc
31
33
  # For cross-references
32
34
  if href.to_s.start_with?('#')
33
35
  ref_id = href.sub(/^#/, '').gsub(/\s/, '').gsub(/__+/, '_')
34
- # Convert content to string
35
- content_str = if content.is_a?(Array)
36
- content.map { |c| c.is_a?(Coradoc::CoreModel::Base) ? c.content : c.to_s }.join
37
- else
38
- content.to_s
39
- end
40
- return Coradoc::CoreModel::InlineElement.new(
41
- format_type: 'xref',
36
+ content_str = extract_text_from_content(content)
37
+ return Coradoc::CoreModel::CrossReferenceElement.new(
42
38
  target: ref_id,
43
39
  content: content_str.strip.empty? ? nil : content_str.strip
44
40
  )
@@ -50,25 +46,18 @@ module Coradoc
50
46
  ambigous_characters = /[\w.?&#=%;\[\u{ff}-\u{10ffff}]/
51
47
  right_constrain = textnode_after_start_with?(node, ambigous_characters)
52
48
 
53
- # Convert content to string for the link
54
- content_str = if content.is_a?(Array)
55
- content.map { |c| c.is_a?(Coradoc::CoreModel::Base) && c.content ? c.content : c.to_s }.join
56
- else
57
- content.to_s
58
- end
49
+ content_str = extract_text_from_content(content)
59
50
 
60
51
  out = []
61
52
  # Add leading space if needed
62
53
  if textnode_before_end_with?(node, ambigous_characters)
63
- out << Coradoc::CoreModel::InlineElement.new(
64
- format_type: 'text',
54
+ out << Coradoc::CoreModel::TextElement.new(
65
55
  content: ' '
66
56
  )
67
57
  end
68
58
 
69
59
  # Create link element
70
- link = Coradoc::CoreModel::InlineElement.new(
71
- format_type: 'link',
60
+ link = Coradoc::CoreModel::LinkElement.new(
72
61
  target: href,
73
62
  content: content_str.strip,
74
63
  metadata: {
@@ -83,7 +72,7 @@ module Coradoc
83
72
  end
84
73
  end
85
74
 
86
- register :a, A.new
75
+ register :a, A::INSTANCE
87
76
  end
88
77
  end
89
78
  end
@@ -5,18 +5,17 @@ module Coradoc
5
5
  module Html
6
6
  module Converters
7
7
  class Aside < Base
8
+ INSTANCE = new
9
+
8
10
  def to_coradoc(node, state = {})
9
11
  content = treat_children_coradoc(node, state)
10
- # Use AnnotationBlock with annotation_type: "sidebar" for aside elements
11
- Coradoc::CoreModel::AnnotationBlock.new(
12
- annotation_type: 'sidebar',
13
- block_semantic_type: :sidebar,
12
+ Coradoc::CoreModel::SidebarBlock.new(
14
13
  children: content
15
14
  )
16
15
  end
17
16
  end
18
17
 
19
- register :aside, Aside.new
18
+ register :aside, Aside::INSTANCE
20
19
  end
21
20
  end
22
21
  end
@@ -4,46 +4,17 @@ module Coradoc
4
4
  module Input
5
5
  module Html
6
6
  module Converters
7
- class Audio < Base
8
- def to_coradoc(node, _state = {})
9
- src = node['src']
10
- id = node['id']
11
- title = extract_title(node)
12
- options(node)
7
+ class Audio < MediaBase
8
+ INSTANCE = new
13
9
 
14
- # Use Block with custom attributes to store audio info
15
- # CoreModel doesn't have a specific Audio type, so we use Block
16
- # with element_attributes to store audio-specific data
17
- Coradoc::CoreModel::Block.new(
18
- element_type: 'audio',
19
- block_semantic_type: :audio,
20
- content: src,
21
- title: title,
22
- id: id,
23
- element_attributes: {
24
- autoplay: node['autoplay'],
25
- loop: node['loop'],
26
- controls: node['controls']
27
- }.compact
28
- )
29
- end
30
-
31
- def extract_title(node)
32
- title = node.at('./track') || node.at('.//source')
33
- return '' if title.nil?
34
-
35
- title['label'] || title['srclang'] || ''
36
- end
10
+ private
37
11
 
38
- def options(node)
39
- autoplay = node['autoplay']
40
- loop_attr = node['loop']
41
- controls = node['controls']
42
- [autoplay, loop_attr, controls].compact
12
+ def semantic_type
13
+ :audio
43
14
  end
44
15
  end
45
16
 
46
- register :audio, Audio.new
17
+ register :audio, Audio::INSTANCE
47
18
  end
48
19
  end
49
20
  end
@@ -5,24 +5,6 @@ module Coradoc
5
5
  module Html
6
6
  module Converters
7
7
  class Base
8
- # Default implementation to convert a given Nokogiri node
9
- # to a CoreModel type.
10
- # Can be overriden by subclasses.
11
- def convert(node, state = {})
12
- to_coradoc(node, state)
13
- end
14
-
15
- # NOTE: treat_children won't run plugin hooks
16
- def treat_children(node, state)
17
- node.children.map do |child|
18
- treat(child, state)
19
- end
20
- end
21
-
22
- def treat(node, state)
23
- Converters.process(node, state)
24
- end
25
-
26
8
  def treat_children_coradoc(node, state)
27
9
  results = node.children.map do |child|
28
10
  treat_coradoc(child, state)
@@ -44,15 +26,15 @@ module Coradoc
44
26
  def node_has_ancestor?(node, name)
45
27
  case name
46
28
  when String
47
- node.ancestors.map(&:name).include?(name)
29
+ node.ancestors(name).any?
48
30
  when Array
49
- (node.ancestors.map(&:name) & name).any?
31
+ name.any? { |n| node.ancestors(n).any? }
50
32
  end
51
33
  end
52
34
 
53
35
  def textnode_before_end_with?(node, str)
54
- return nil unless [String, Regexp].include?(str.class)
55
- return nil if str.is_a?(String) && str.empty?
36
+ return false unless [String, Regexp].include?(str.class)
37
+ return false if str.is_a?(String) && str.empty?
56
38
 
57
39
  str = /#{Regexp.escape(str)}/ if str.is_a?(String)
58
40
  str = /(?:#{str})\z/
@@ -62,8 +44,8 @@ module Coradoc
62
44
  end
63
45
 
64
46
  def textnode_after_start_with?(node, str)
65
- return nil unless [String, Regexp].include?(str.class)
66
- return nil if str.is_a?(String) && str.empty?
47
+ return false unless [String, Regexp].include?(str.class)
48
+ return false if str.is_a?(String) && str.empty?
67
49
 
68
50
  str = /#{Regexp.escape(str)}/ if str.is_a?(String)
69
51
  str = /\A(?:#{str})/
@@ -105,9 +87,29 @@ module Coradoc
105
87
  after.text[0]&.match?(/\w|,|;|"|\.\?!/)
106
88
  end
107
89
 
108
- # Helper to escape text content
109
- def escape_text(text)
110
- text.to_s.gsub(/[<>&]/, '<' => '&lt;', '>' => '&gt;', '&' => '&amp;')
90
+ # Extract plain text from a mixed content array.
91
+ # Handles String, InlineElement (via .content), and other
92
+ # CoreModel::Base (via .content or .title).
93
+ def extract_text_from_content(content)
94
+ return content if content.is_a?(String)
95
+ return '' if content.nil?
96
+
97
+ content.map do |item|
98
+ case item
99
+ when String
100
+ item
101
+ when Coradoc::CoreModel::InlineElement
102
+ item.content.to_s
103
+ when Coradoc::CoreModel::Base
104
+ if item.content
105
+ item.content.to_s
106
+ else
107
+ ''
108
+ end
109
+ else
110
+ item.to_s
111
+ end
112
+ end.join
111
113
  end
112
114
  end
113
115
  end
@@ -5,20 +5,22 @@ module Coradoc
5
5
  module Html
6
6
  module Converters
7
7
  class Blockquote < Base
8
+ INSTANCE = new
9
+
8
10
  def to_coradoc(node, state = {})
9
11
  id = node['id']
10
12
  cite = node['cite']
11
13
  content = treat_children_coradoc(node, state)
12
14
 
13
15
  Coradoc::CoreModel::QuoteBlock.new(
14
- content: content,
16
+ children: content,
15
17
  id: id,
16
18
  attribution: cite
17
19
  )
18
20
  end
19
21
  end
20
22
 
21
- register :blockquote, Blockquote.new
23
+ register :blockquote, Blockquote::INSTANCE
22
24
  end
23
25
  end
24
26
  end