coradoc-html 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/lib/coradoc/html/base.rb +157 -0
  4. data/lib/coradoc/html/config.rb +467 -0
  5. data/lib/coradoc/html/converter_base.rb +177 -0
  6. data/lib/coradoc/html/converters/admonition.rb +180 -0
  7. data/lib/coradoc/html/converters/attribute.rb +68 -0
  8. data/lib/coradoc/html/converters/attribute_reference.rb +60 -0
  9. data/lib/coradoc/html/converters/audio.rb +165 -0
  10. data/lib/coradoc/html/converters/base.rb +615 -0
  11. data/lib/coradoc/html/converters/bibliography.rb +82 -0
  12. data/lib/coradoc/html/converters/bibliography_entry.rb +108 -0
  13. data/lib/coradoc/html/converters/block_image.rb +72 -0
  14. data/lib/coradoc/html/converters/bold.rb +34 -0
  15. data/lib/coradoc/html/converters/break.rb +32 -0
  16. data/lib/coradoc/html/converters/comment_block.rb +42 -0
  17. data/lib/coradoc/html/converters/comment_line.rb +54 -0
  18. data/lib/coradoc/html/converters/cross_reference.rb +59 -0
  19. data/lib/coradoc/html/converters/document.rb +108 -0
  20. data/lib/coradoc/html/converters/example.rb +114 -0
  21. data/lib/coradoc/html/converters/highlight.rb +34 -0
  22. data/lib/coradoc/html/converters/include.rb +68 -0
  23. data/lib/coradoc/html/converters/inline_image.rb +41 -0
  24. data/lib/coradoc/html/converters/italic.rb +34 -0
  25. data/lib/coradoc/html/converters/line_break.rb +31 -0
  26. data/lib/coradoc/html/converters/link.rb +46 -0
  27. data/lib/coradoc/html/converters/list_item.rb +75 -0
  28. data/lib/coradoc/html/converters/listing.rb +99 -0
  29. data/lib/coradoc/html/converters/literal.rb +102 -0
  30. data/lib/coradoc/html/converters/monospace.rb +34 -0
  31. data/lib/coradoc/html/converters/open.rb +78 -0
  32. data/lib/coradoc/html/converters/ordered.rb +53 -0
  33. data/lib/coradoc/html/converters/paragraph.rb +46 -0
  34. data/lib/coradoc/html/converters/quote.rb +113 -0
  35. data/lib/coradoc/html/converters/reviewer_comment.rb +74 -0
  36. data/lib/coradoc/html/converters/reviewer_note.rb +134 -0
  37. data/lib/coradoc/html/converters/section.rb +90 -0
  38. data/lib/coradoc/html/converters/sidebar.rb +113 -0
  39. data/lib/coradoc/html/converters/source.rb +137 -0
  40. data/lib/coradoc/html/converters/source_code.rb +16 -0
  41. data/lib/coradoc/html/converters/span.rb +61 -0
  42. data/lib/coradoc/html/converters/strikethrough.rb +34 -0
  43. data/lib/coradoc/html/converters/subscript.rb +34 -0
  44. data/lib/coradoc/html/converters/superscript.rb +34 -0
  45. data/lib/coradoc/html/converters/table.rb +85 -0
  46. data/lib/coradoc/html/converters/table_cell.rb +203 -0
  47. data/lib/coradoc/html/converters/table_row.rb +45 -0
  48. data/lib/coradoc/html/converters/template_html_converter.rb +105 -0
  49. data/lib/coradoc/html/converters/term.rb +58 -0
  50. data/lib/coradoc/html/converters/text_element.rb +44 -0
  51. data/lib/coradoc/html/converters/underline.rb +34 -0
  52. data/lib/coradoc/html/converters/unordered.rb +47 -0
  53. data/lib/coradoc/html/converters/verse.rb +105 -0
  54. data/lib/coradoc/html/converters/video.rb +179 -0
  55. data/lib/coradoc/html/element_mapping.rb +210 -0
  56. data/lib/coradoc/html/entity.rb +137 -0
  57. data/lib/coradoc/html/input/cleaner.rb +163 -0
  58. data/lib/coradoc/html/input/config.rb +79 -0
  59. data/lib/coradoc/html/input/converters/a.rb +90 -0
  60. data/lib/coradoc/html/input/converters/aside.rb +23 -0
  61. data/lib/coradoc/html/input/converters/audio.rb +50 -0
  62. data/lib/coradoc/html/input/converters/base.rb +116 -0
  63. data/lib/coradoc/html/input/converters/blockquote.rb +25 -0
  64. data/lib/coradoc/html/input/converters/br.rb +19 -0
  65. data/lib/coradoc/html/input/converters/bypass.rb +83 -0
  66. data/lib/coradoc/html/input/converters/code.rb +25 -0
  67. data/lib/coradoc/html/input/converters/div.rb +25 -0
  68. data/lib/coradoc/html/input/converters/dl.rb +106 -0
  69. data/lib/coradoc/html/input/converters/drop.rb +28 -0
  70. data/lib/coradoc/html/input/converters/em.rb +23 -0
  71. data/lib/coradoc/html/input/converters/figure.rb +58 -0
  72. data/lib/coradoc/html/input/converters/h.rb +76 -0
  73. data/lib/coradoc/html/input/converters/head.rb +30 -0
  74. data/lib/coradoc/html/input/converters/hr.rb +20 -0
  75. data/lib/coradoc/html/input/converters/ignore.rb +22 -0
  76. data/lib/coradoc/html/input/converters/img.rb +110 -0
  77. data/lib/coradoc/html/input/converters/li.rb +35 -0
  78. data/lib/coradoc/html/input/converters/mark.rb +21 -0
  79. data/lib/coradoc/html/input/converters/markup.rb +107 -0
  80. data/lib/coradoc/html/input/converters/math.rb +46 -0
  81. data/lib/coradoc/html/input/converters/ol.rb +46 -0
  82. data/lib/coradoc/html/input/converters/p.rb +81 -0
  83. data/lib/coradoc/html/input/converters/pass_through.rb +19 -0
  84. data/lib/coradoc/html/input/converters/pre.rb +59 -0
  85. data/lib/coradoc/html/input/converters/q.rb +24 -0
  86. data/lib/coradoc/html/input/converters/strong.rb +22 -0
  87. data/lib/coradoc/html/input/converters/sub.rb +40 -0
  88. data/lib/coradoc/html/input/converters/sup.rb +40 -0
  89. data/lib/coradoc/html/input/converters/table.rb +64 -0
  90. data/lib/coradoc/html/input/converters/td.rb +70 -0
  91. data/lib/coradoc/html/input/converters/text.rb +67 -0
  92. data/lib/coradoc/html/input/converters/th.rb +20 -0
  93. data/lib/coradoc/html/input/converters/tr.rb +28 -0
  94. data/lib/coradoc/html/input/converters/video.rb +53 -0
  95. data/lib/coradoc/html/input/converters.rb +122 -0
  96. data/lib/coradoc/html/input/errors.rb +22 -0
  97. data/lib/coradoc/html/input/html_converter.rb +170 -0
  98. data/lib/coradoc/html/input/plugin.rb +169 -0
  99. data/lib/coradoc/html/input/plugins/plateau.rb +229 -0
  100. data/lib/coradoc/html/input/postprocessor.rb +31 -0
  101. data/lib/coradoc/html/input.rb +68 -0
  102. data/lib/coradoc/html/output.rb +95 -0
  103. data/lib/coradoc/html/renderer.rb +409 -0
  104. data/lib/coradoc/html/spa.rb +309 -0
  105. data/lib/coradoc/html/static.rb +293 -0
  106. data/lib/coradoc/html/template_config.rb +151 -0
  107. data/lib/coradoc/html/template_helpers.rb +58 -0
  108. data/lib/coradoc/html/template_locator.rb +114 -0
  109. data/lib/coradoc/html/theme/base.rb +231 -0
  110. data/lib/coradoc/html/theme/classic_renderer.rb +390 -0
  111. data/lib/coradoc/html/theme/modern/components/ui_components.rb +344 -0
  112. data/lib/coradoc/html/theme/modern/css_generator.rb +311 -0
  113. data/lib/coradoc/html/theme/modern/javascript_generator.rb +314 -0
  114. data/lib/coradoc/html/theme/modern/serializers/document_serializer.rb +382 -0
  115. data/lib/coradoc/html/theme/modern/tailwind_config_builder.rb +164 -0
  116. data/lib/coradoc/html/theme/modern/vue_template_generator.rb +374 -0
  117. data/lib/coradoc/html/theme/modern_renderer.rb +250 -0
  118. data/lib/coradoc/html/theme/registry.rb +153 -0
  119. data/lib/coradoc/html/theme.rb +13 -0
  120. data/lib/coradoc/html/transform/from_core_model.rb +32 -0
  121. data/lib/coradoc/html/transform/to_core_model.rb +39 -0
  122. data/lib/coradoc/html/version.rb +7 -0
  123. data/lib/coradoc/html.rb +255 -0
  124. metadata +264 -0
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ # Converter for CoreModel::Block (bibliography) to HTML bibliography section
7
+ class Bibliography < Base
8
+ # Convert CoreModel::Block (bibliography) to HTML bibliography section
9
+ def self.to_html(bibliography, _options = {})
10
+ return '' unless bibliography
11
+
12
+ # Build section attributes
13
+ attrs = build_attributes(bibliography)
14
+
15
+ # Build title
16
+ title_html = build_title(bibliography)
17
+
18
+ # Process bibliography entries (children)
19
+ entries = bibliography.children || []
20
+ entries_html = entries.map do |entry|
21
+ BibliographyEntry.to_html(entry)
22
+ end.join("\n")
23
+
24
+ # Combine into bibliography section
25
+ bib_html = ''
26
+ bib_html += "#{title_html}\n" if title_html
27
+ bib_html += %(<div class="bibliography-entries">\n#{entries_html}\n</div>) unless entries_html.empty?
28
+
29
+ %(<section#{attrs}>\n#{bib_html}\n</section>)
30
+ end
31
+
32
+ # Convert HTML bibliography section to CoreModel::Block (bibliography)
33
+ def self.to_coradoc(element, _options = {})
34
+ return nil unless element.name == 'section'
35
+ return nil unless element['class']&.include?('bibliography')
36
+
37
+ # Extract title
38
+ title_elem = element.at_css('h1, h2, h3, h4, h5, h6, .bibliography-title')
39
+ title = title_elem&.text&.strip
40
+
41
+ # Extract entries
42
+ entries_container = element.at_css('.bibliography-entries')
43
+ entries = if entries_container
44
+ entries_container.css('.bibliography-entry').map do |entry_elem|
45
+ BibliographyEntry.to_coradoc(entry_elem)
46
+ end.compact
47
+ else
48
+ []
49
+ end
50
+
51
+ # Extract ID if present
52
+ id = element['id']
53
+
54
+ Coradoc::CoreModel::Block.new(
55
+ element_type: 'bibliography',
56
+ title: title,
57
+ id: id,
58
+ children: entries
59
+ )
60
+ end
61
+
62
+ def self.build_attributes(bibliography)
63
+ attrs = [%( class="bibliography")]
64
+
65
+ # Add ID if present
66
+ attrs << %( id="#{escape_attribute(bibliography.id)}") if bibliography.id
67
+
68
+ attrs.join
69
+ end
70
+
71
+ def self.build_title(bibliography)
72
+ return nil unless bibliography.title
73
+
74
+ title_text = bibliography.title.to_s
75
+ return nil if title_text.empty?
76
+
77
+ %(<h2 class="bibliography-title">#{escape_html(title_text)}</h2>)
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ # Converter for CoreModel::Block (bibliography entry) to HTML bibliography entry
7
+ class BibliographyEntry < Base
8
+ # Convert CoreModel::Block (bibliography entry) to HTML bibliography entry
9
+ def self.to_html(entry, _options = {})
10
+ return '' unless entry
11
+
12
+ # Build entry attributes
13
+ attrs = build_attributes(entry)
14
+
15
+ # Get entry ID from metadata
16
+ entry_id = entry.metadata&.dig(:anchor_name) || entry.metadata&.dig(:document_id) || entry.id
17
+
18
+ # Build anchor if ID present
19
+ anchor_html = if entry_id
20
+ %(<a id="#{escape_attribute(entry_id)}" class="bibliography-anchor"></a>)
21
+ else
22
+ ''
23
+ end
24
+
25
+ # Get citation label
26
+ label = entry.metadata&.dig(:label) || entry_id || ''
27
+
28
+ # Get entry reference text
29
+ content = entry.content || ''
30
+
31
+ # Process content
32
+ content_html = process_content(content)
33
+
34
+ # Combine into entry
35
+ entry_html = anchor_html
36
+ entry_html += %(<span class="bibliography-label">#{escape_html(label)}</span> ) unless label.empty?
37
+ entry_html += content_html
38
+
39
+ %(<div#{attrs}>#{entry_html}</div>)
40
+ end
41
+
42
+ # Convert HTML bibliography entry to CoreModel::Block (bibliography entry)
43
+ def self.to_coradoc(element, _options = {})
44
+ return nil unless element.name == 'div'
45
+ return nil unless element['class']&.include?('bibliography-entry')
46
+
47
+ # Extract anchor/ID
48
+ anchor = element.at_css('.bibliography-anchor, a[id]')
49
+ entry_id = anchor&.[]('id')
50
+
51
+ # Extract label
52
+ label_elem = element.at_css('.bibliography-label')
53
+ label = label_elem&.text&.strip
54
+
55
+ # Extract content (everything except anchor and label)
56
+ content_nodes = element.children.reject do |node|
57
+ node == anchor || node == label_elem || (node.text? && node.text.strip.empty?)
58
+ end
59
+
60
+ content = extract_content(content_nodes)
61
+
62
+ Coradoc::CoreModel::Block.new(
63
+ element_type: 'bibliography_entry',
64
+ content: content,
65
+ id: entry_id,
66
+ metadata: {
67
+ label: label
68
+ }
69
+ )
70
+ end
71
+
72
+ def self.build_attributes(_entry)
73
+ %( class="bibliography-entry")
74
+ end
75
+
76
+ def self.process_content(content)
77
+ return '' if content.nil?
78
+
79
+ if content.is_a?(String)
80
+ escape_html(content)
81
+ elsif content.is_a?(Array)
82
+ content.map { |item| convert_item(item) }.join
83
+ else
84
+ convert_item(content)
85
+ end
86
+ end
87
+
88
+ def self.convert_item(item)
89
+ case item
90
+ when String
91
+ escape_html(item)
92
+ else
93
+ convert_content_to_html(item)
94
+ end
95
+ end
96
+
97
+ def self.extract_content(nodes)
98
+ # Extract and convert content nodes
99
+ nodes.map do |node|
100
+ if node.text?
101
+ end
102
+ node.text
103
+ end.compact.join
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ # Converter for CoreModel::Image (block image)
7
+ class BlockImage < Base
8
+ def self.to_html(model, _state = {})
9
+ img_attrs = {}
10
+ # Remove leading colons from src (source format syntax artifact)
11
+ src = model.src
12
+ src = src.sub(/^:+/, '') if src
13
+ img_attrs[:src] = src if src
14
+ img_attrs[:id] = model.id if model.id
15
+ img_attrs[:alt] = model.alt || model.caption || ''
16
+
17
+ # Extract additional attributes
18
+ img_attrs[:width] = model.width if model.width
19
+ img_attrs[:height] = model.height if model.height
20
+
21
+ img_element = build_element('img', nil, img_attrs)
22
+
23
+ # Wrap in figure if we have a caption
24
+ if model.caption && !model.caption.empty?
25
+ figcaption = build_element('figcaption', model.caption)
26
+ content = "#{img_element}\n#{figcaption}"
27
+ build_element('figure', "\n#{content}\n")
28
+ else
29
+ # Just a plain img element wrapped in a div for block display
30
+ build_element('div', img_element, { class: 'image' })
31
+ end
32
+ end
33
+
34
+ def self.to_coradoc(node, _state = {})
35
+ # Handle both <figure> and <div class="image"> cases
36
+ if node.name == 'figure'
37
+ img_node = node.at_css('img')
38
+ return nil unless img_node
39
+
40
+ attrs = extract_attributes(img_node)
41
+ figcaption = node.at_css('figcaption')
42
+ caption = figcaption&.text
43
+
44
+ Coradoc::CoreModel::Image.new(
45
+ src: attrs[:src],
46
+ id: attrs[:id],
47
+ caption: caption,
48
+ alt: attrs[:alt],
49
+ width: attrs[:width],
50
+ height: attrs[:height],
51
+ inline: false
52
+ )
53
+ elsif node.name == 'div' && node['class'] == 'image'
54
+ img_node = node.at_css('img')
55
+ return nil unless img_node
56
+
57
+ attrs = extract_attributes(img_node)
58
+
59
+ Coradoc::CoreModel::Image.new(
60
+ src: attrs[:src],
61
+ id: attrs[:id],
62
+ alt: attrs[:alt],
63
+ width: attrs[:width],
64
+ height: attrs[:height],
65
+ inline: false
66
+ )
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ # Converter for Bold inline element
7
+ class Bold < Base
8
+ class << self
9
+ # Convert HTML <strong> or <b> to CoreModel::InlineElement
10
+ # @param node [Nokogiri::XML::Node] HTML node
11
+ # @param state [Hash] Conversion state
12
+ # @return [Coradoc::CoreModel::InlineElement] Bold inline element
13
+ def to_coradoc(node, state = {})
14
+ content = treat_children(node, state)
15
+ Coradoc::CoreModel::InlineElement.new(
16
+ format_type: 'bold',
17
+ content: content
18
+ )
19
+ end
20
+
21
+ # Convert CoreModel::InlineElement (bold) to HTML <strong>
22
+ # @param model [Coradoc::CoreModel::InlineElement] Bold model
23
+ # @param state [Hash] Conversion state
24
+ # @return [String] HTML string
25
+ def to_html(model, state = {})
26
+ content = convert_content_to_html(model.content, state)
27
+ attributes = extract_model_attributes(model)
28
+ build_element('strong', content, attributes)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ # Converter for Break (horizontal rule)
7
+ class Break < Base
8
+ class << self
9
+ # Convert HTML <hr> to CoreModel::InlineElement (break)
10
+ # @param node [Nokogiri::XML::Node] HTML node
11
+ # @param state [Hash] Conversion state
12
+ # @return [Coradoc::CoreModel::InlineElement] Break inline element
13
+ def to_coradoc(_node, _state = {})
14
+ Coradoc::CoreModel::InlineElement.new(
15
+ format_type: 'break',
16
+ metadata: { break_type: 'thematic' }
17
+ )
18
+ end
19
+
20
+ # Convert CoreModel::InlineElement (break) to HTML <hr>
21
+ # @param model [Coradoc::CoreModel::InlineElement] Break model
22
+ # @param state [Hash] Conversion state
23
+ # @return [String] HTML string
24
+ def to_html(model, _state = {})
25
+ attributes = extract_model_attributes(model)
26
+ build_element('hr', nil, attributes)
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ # Converter for CoreModel::Block with element_type "comment"
7
+ class CommentBlock < Base
8
+ # Convert CoreModel::Block (comment) to HTML comment
9
+ def self.to_html(comment_block, options = {})
10
+ return '' unless comment_block
11
+
12
+ # Check if comments should be preserved
13
+ return '' unless options[:preserve_comments]
14
+
15
+ # Get comment text (CoreModel::Block has content attribute)
16
+ text = comment_block.content.to_s
17
+
18
+ # HTML comments cannot contain --
19
+ # Replace -- with - - to avoid breaking the comment
20
+ safe_text = text.gsub('--', '- -')
21
+
22
+ # Preserve newlines in comment block
23
+ # Multi-line comment blocks should preserve their internal newlines
24
+ "<!--\n#{escape_html(safe_text)}\n-->"
25
+ end
26
+
27
+ # Convert HTML comment to CoreModel::Block (comment)
28
+ def self.to_coradoc(element, _options = {})
29
+ return nil unless element.comment?
30
+
31
+ # Extract comment text
32
+ text = element.text.to_s
33
+
34
+ Coradoc::CoreModel::Block.new(
35
+ element_type: 'comment',
36
+ content: text
37
+ )
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ class CommentLine < Base
7
+ # Convert CoreModel to HTML comment
8
+ def self.to_html(comment, options = {})
9
+ return '' unless comment
10
+
11
+ # Check if comments should be preserved
12
+ return '' unless options[:preserve_comments]
13
+
14
+ # Get comment text - check for content or text attribute
15
+ text = if comment.content
16
+ comment.content
17
+ elsif comment.text
18
+ comment.text
19
+ else
20
+ ''
21
+ end
22
+
23
+ text = text.to_s
24
+
25
+ # HTML comments cannot contain --
26
+ # Replace -- with - - to avoid breaking the comment
27
+ safe_text = text.gsub('--', '- -')
28
+
29
+ # Preserve newlines in comment text
30
+ # Empty comments (just "//") should become newlines in HTML comments
31
+ if safe_text.strip.empty?
32
+ "<!--\n-->"
33
+ else
34
+ "<!-- #{escape_html(safe_text)} -->"
35
+ end
36
+ end
37
+
38
+ # Convert HTML comment to CoreModel
39
+ def self.to_coradoc(element, _options = {})
40
+ return nil unless element.comment?
41
+
42
+ # Extract comment text
43
+ text = element.text.to_s.strip
44
+
45
+ # For now, return an InlineElement with special format_type for comment
46
+ Coradoc::CoreModel::InlineElement.new(
47
+ format_type: 'comment',
48
+ content: text
49
+ )
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ # Converter for CoreModel::InlineElement with format_type "xref"
7
+ class CrossReference < Base
8
+ class << self
9
+ # Convert CoreModel::InlineElement (xref) to HTML
10
+ # @param model [Coradoc::CoreModel::InlineElement] CrossReference model
11
+ # @param state [Hash] Conversion state
12
+ # @return [String] HTML anchor tag
13
+ def to_html(model, _state = {})
14
+ href = model.target.to_s
15
+ # Create anchor link to internal reference
16
+ # Format: <a href="#section-id">section-id</a> or with text from content
17
+ text = if model.content&.to_s&.strip != ''
18
+ model.content.to_s
19
+ else
20
+ href
21
+ end
22
+
23
+ # Ensure href starts with # for internal links
24
+ link_href = href.start_with?('#') ? href : "##{href}"
25
+
26
+ %(<a href="#{escape_attribute(link_href)}">#{escape_html(text)}</a>)
27
+ end
28
+
29
+ # Convert HTML anchor to CoreModel::InlineElement (xref)
30
+ # @param node [Nokogiri::XML::Node] HTML anchor node
31
+ # @param state [Hash] Conversion state
32
+ # @return [Coradoc::CoreModel::InlineElement] CrossReference model
33
+ def to_coradoc(node, _state = {})
34
+ href = node['href'].to_s
35
+ text = node.text.strip
36
+
37
+ # Only treat internal links as cross-references
38
+ if href.start_with?('#')
39
+ ref_id = href[1..] # Remove leading #
40
+ content = text.empty? || text == ref_id ? nil : text
41
+ Coradoc::CoreModel::InlineElement.new(
42
+ format_type: 'xref',
43
+ target: ref_id,
44
+ content: content
45
+ )
46
+ else
47
+ # External links become regular links
48
+ Coradoc::CoreModel::InlineElement.new(
49
+ format_type: 'link',
50
+ target: href,
51
+ content: text
52
+ )
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Html
5
+ module Converters
6
+ # Converter for Document
7
+ class Document < Base
8
+ class << self
9
+ # Convert HTML document to CoreModel::StructuralElement
10
+ # @param node [Nokogiri::XML::Document, Nokogiri::XML::Node] HTML document or article node
11
+ # @param state [Hash] Conversion state
12
+ # @return [Coradoc::CoreModel::StructuralElement] Document model
13
+ def to_coradoc(node, state = {})
14
+ # Find the main content area
15
+ body = find_body_content(node)
16
+
17
+ # Extract document metadata
18
+ metadata = extract_metadata(node, state)
19
+
20
+ # Process body content
21
+ content = treat_children(body, state)
22
+
23
+ # Create document
24
+ doc = Coradoc::CoreModel::StructuralElement.new(
25
+ element_type: 'document',
26
+ title: metadata[:title],
27
+ children: content
28
+ )
29
+
30
+ # Store author in metadata if present
31
+ doc.metadata = (doc.metadata || {}).merge(author: metadata[:author]) if metadata[:author]
32
+
33
+ doc
34
+ end
35
+
36
+ # Convert CoreModel::StructuralElement to HTML
37
+ # @param model [Coradoc::CoreModel::StructuralElement] Document model
38
+ # @param state [Hash] Conversion state
39
+ # @return [String] HTML string
40
+ def to_html(model, state = {})
41
+ # Handle CoreModel::StructuralElement
42
+ return convert_core_model_document(model, state) if model.is_a?(Coradoc::CoreModel::StructuralElement)
43
+
44
+ # Fallback for other types
45
+ ''
46
+ end
47
+
48
+ # Convert CoreModel::StructuralElement to HTML
49
+ # @param model [Coradoc::CoreModel::StructuralElement] CoreModel document
50
+ # @param state [Hash] Conversion state
51
+ # @return [String] HTML string
52
+ def convert_core_model_document(model, state = {})
53
+ parts = []
54
+
55
+ # Add title if present
56
+ if model.title
57
+ title_text = model.title.is_a?(String) ? model.title : model.title.to_s
58
+ parts << build_element('h1', title_text) unless title_text.empty?
59
+ end
60
+
61
+ # Convert children
62
+ model.children&.each do |child|
63
+ html = convert_content_to_html(child, state)
64
+ parts << html if html && !html.empty?
65
+ end
66
+
67
+ # Wrap in article tag with id="content" for CSS styling
68
+ content = parts.join("\n")
69
+ attributes = { id: 'content' }
70
+ attributes[:id] = model.id if model.id
71
+ build_element('article', content, attributes)
72
+ end
73
+
74
+ private
75
+
76
+ # Find the body content in HTML document
77
+ def find_body_content(node)
78
+ case node
79
+ when Nokogiri::HTML::Document
80
+ node.at('body') || node.at('article') || node.root
81
+ when Nokogiri::XML::Document
82
+ node.at('body') || node.at('article') || node.root
83
+ else
84
+ node
85
+ end
86
+ end
87
+
88
+ # Extract document metadata from HTML
89
+ def extract_metadata(node, _state)
90
+ metadata = { attributes: {} }
91
+
92
+ # Extract title from <title> or <h1> (node is Nokogiri::XML::Node or Document)
93
+ if node.is_a?(Nokogiri::XML::Document) || node.is_a?(Nokogiri::XML::Node)
94
+ title_node = node.at('title') || node.at('h1')
95
+ metadata[:title] = title_node.text.strip if title_node
96
+
97
+ # Extract author from meta tag
98
+ author_meta = node.at('meta[name="author"]')
99
+ metadata[:author] = author_meta['content'] if author_meta
100
+ end
101
+
102
+ metadata
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end