coradoc-markdown 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/lib/coradoc/markdown/errors.rb +28 -0
- data/lib/coradoc/markdown/model/abbreviation.rb +27 -0
- data/lib/coradoc/markdown/model/attribute_list.rb +98 -0
- data/lib/coradoc/markdown/model/base.rb +86 -0
- data/lib/coradoc/markdown/model/blockquote.rb +21 -0
- data/lib/coradoc/markdown/model/code.rb +11 -0
- data/lib/coradoc/markdown/model/code_block.rb +24 -0
- data/lib/coradoc/markdown/model/definition_item.rb +24 -0
- data/lib/coradoc/markdown/model/definition_list.rb +47 -0
- data/lib/coradoc/markdown/model/definition_term.rb +21 -0
- data/lib/coradoc/markdown/model/document.rb +39 -0
- data/lib/coradoc/markdown/model/emphasis.rb +11 -0
- data/lib/coradoc/markdown/model/extension.rb +92 -0
- data/lib/coradoc/markdown/model/footnote.rb +31 -0
- data/lib/coradoc/markdown/model/footnote_reference.rb +22 -0
- data/lib/coradoc/markdown/model/heading.rb +44 -0
- data/lib/coradoc/markdown/model/highlight.rb +18 -0
- data/lib/coradoc/markdown/model/horizontal_rule.rb +16 -0
- data/lib/coradoc/markdown/model/image.rb +19 -0
- data/lib/coradoc/markdown/model/link.rb +19 -0
- data/lib/coradoc/markdown/model/list.rb +22 -0
- data/lib/coradoc/markdown/model/list_item.rb +29 -0
- data/lib/coradoc/markdown/model/math.rb +50 -0
- data/lib/coradoc/markdown/model/paragraph.rb +28 -0
- data/lib/coradoc/markdown/model/strikethrough.rb +18 -0
- data/lib/coradoc/markdown/model/strong.rb +11 -0
- data/lib/coradoc/markdown/model/table.rb +13 -0
- data/lib/coradoc/markdown/model/text.rb +15 -0
- data/lib/coradoc/markdown/parser/ast_processor.rb +543 -0
- data/lib/coradoc/markdown/parser/block_parser.rb +745 -0
- data/lib/coradoc/markdown/parser/html_entities.rb +2149 -0
- data/lib/coradoc/markdown/parser/inline_parser.rb +274 -0
- data/lib/coradoc/markdown/parser/parslet_extras.rb +215 -0
- data/lib/coradoc/markdown/parser.rb +11 -0
- data/lib/coradoc/markdown/parser_util.rb +90 -0
- data/lib/coradoc/markdown/serializer.rb +199 -0
- data/lib/coradoc/markdown/toc_generator.rb +215 -0
- data/lib/coradoc/markdown/transform/from_core_model.rb +325 -0
- data/lib/coradoc/markdown/transform/text_extraction.rb +19 -0
- data/lib/coradoc/markdown/transform/to_core_model.rb +287 -0
- data/lib/coradoc/markdown/transformer.rb +463 -0
- data/lib/coradoc/markdown/version.rb +7 -0
- data/lib/coradoc/markdown.rb +190 -0
- metadata +173 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Markdown
|
|
5
|
+
module Transform
|
|
6
|
+
# Transforms CoreModel models to Markdown equivalents
|
|
7
|
+
#
|
|
8
|
+
# This transformer converts the canonical CoreModel representation
|
|
9
|
+
# to format-specific Markdown model.
|
|
10
|
+
class FromCoreModel
|
|
11
|
+
class << self
|
|
12
|
+
# Transform a CoreModel to Markdown model
|
|
13
|
+
#
|
|
14
|
+
# @param model [Coradoc::CoreModel::Base] CoreModel to transform
|
|
15
|
+
# @return [Coradoc::Markdown::Base] Markdown model equivalent
|
|
16
|
+
def transform(model)
|
|
17
|
+
case model
|
|
18
|
+
when Coradoc::CoreModel::StructuralElement
|
|
19
|
+
transform_structural_element(model)
|
|
20
|
+
when Coradoc::CoreModel::AnnotationBlock
|
|
21
|
+
# Must be checked before Block since AnnotationBlock < Block
|
|
22
|
+
transform_annotation_block(model)
|
|
23
|
+
when Coradoc::CoreModel::Block
|
|
24
|
+
transform_block(model)
|
|
25
|
+
when Coradoc::CoreModel::ListBlock
|
|
26
|
+
transform_list(model)
|
|
27
|
+
when Coradoc::CoreModel::DefinitionList
|
|
28
|
+
transform_definition_list(model)
|
|
29
|
+
when Coradoc::CoreModel::Table
|
|
30
|
+
transform_table(model)
|
|
31
|
+
when Coradoc::CoreModel::Image
|
|
32
|
+
transform_image(model)
|
|
33
|
+
when Coradoc::CoreModel::InlineElement
|
|
34
|
+
transform_inline(model)
|
|
35
|
+
when Coradoc::CoreModel::Footnote
|
|
36
|
+
transform_footnote(model)
|
|
37
|
+
when Coradoc::CoreModel::FootnoteReference
|
|
38
|
+
transform_footnote_reference(model)
|
|
39
|
+
when Coradoc::CoreModel::Abbreviation
|
|
40
|
+
transform_abbreviation(model)
|
|
41
|
+
when Coradoc::CoreModel::Toc
|
|
42
|
+
Coradoc::Markdown::Extension.toc
|
|
43
|
+
when Coradoc::CoreModel::Term
|
|
44
|
+
Coradoc::Markdown::Strong.new(text: model.text.to_s)
|
|
45
|
+
when Coradoc::CoreModel::Bibliography
|
|
46
|
+
transform_bibliography(model)
|
|
47
|
+
when Coradoc::CoreModel::BibliographyEntry
|
|
48
|
+
transform_bibliography_entry(model)
|
|
49
|
+
when Coradoc::CoreModel::TocEntry
|
|
50
|
+
Coradoc::Markdown::Text.new(content: model.title.to_s)
|
|
51
|
+
when Array
|
|
52
|
+
model.map { |item| transform(item) }
|
|
53
|
+
else
|
|
54
|
+
model
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def transform_structural_element(element)
|
|
61
|
+
case element.element_type
|
|
62
|
+
when 'document'
|
|
63
|
+
transform_document(element)
|
|
64
|
+
when 'section'
|
|
65
|
+
transform_section(element)
|
|
66
|
+
else
|
|
67
|
+
transform_generic_element(element)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def transform_document(doc)
|
|
72
|
+
blocks = Array(doc.children).map { |child| transform(child) }
|
|
73
|
+
|
|
74
|
+
Coradoc::Markdown::Document.new(
|
|
75
|
+
id: doc.id,
|
|
76
|
+
blocks: blocks
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def transform_section(section)
|
|
81
|
+
Coradoc::Markdown::Heading.new(
|
|
82
|
+
level: section.level || 1,
|
|
83
|
+
text: section.title.to_s
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def transform_generic_element(element)
|
|
88
|
+
blocks = Array(element.children).map { |child| transform(child) }
|
|
89
|
+
|
|
90
|
+
Coradoc::Markdown::Document.new(
|
|
91
|
+
id: element.id,
|
|
92
|
+
blocks: blocks
|
|
93
|
+
)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def transform_block(block)
|
|
97
|
+
case block.element_type
|
|
98
|
+
when 'paragraph'
|
|
99
|
+
transform_paragraph(block)
|
|
100
|
+
when 'comment'
|
|
101
|
+
Coradoc::Markdown::Extension.comment(block.content.to_s)
|
|
102
|
+
else
|
|
103
|
+
transform_delimited_block(block)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def transform_paragraph(block)
|
|
108
|
+
content = block.renderable_content
|
|
109
|
+
if content.is_a?(Array) && content.any? { |c| !c.is_a?(String) }
|
|
110
|
+
# Mixed content with inline elements
|
|
111
|
+
children = content.map { |c| transform_inline_content(c) }
|
|
112
|
+
Coradoc::Markdown::Paragraph.new(text: block.flat_text, children: children)
|
|
113
|
+
else
|
|
114
|
+
Coradoc::Markdown::Paragraph.new(text: block.flat_text)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def transform_inline_content(element)
|
|
119
|
+
case element
|
|
120
|
+
when Coradoc::CoreModel::InlineElement
|
|
121
|
+
transform_inline(element)
|
|
122
|
+
when String
|
|
123
|
+
element
|
|
124
|
+
else
|
|
125
|
+
element.to_s
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def transform_delimited_block(block)
|
|
130
|
+
semantic = resolve_markdown_semantic(block)
|
|
131
|
+
|
|
132
|
+
case semantic
|
|
133
|
+
when :source_code, :listing
|
|
134
|
+
transform_code_block(block)
|
|
135
|
+
when :quote, :verse
|
|
136
|
+
transform_blockquote(block)
|
|
137
|
+
when :horizontal_rule
|
|
138
|
+
transform_horizontal_rule(block)
|
|
139
|
+
when :pass
|
|
140
|
+
Coradoc::Markdown::Extension.nomarkdown(block.content.to_s)
|
|
141
|
+
when :literal
|
|
142
|
+
transform_code_block(block)
|
|
143
|
+
else
|
|
144
|
+
transform_paragraph(block)
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def resolve_markdown_semantic(block)
|
|
149
|
+
# Polymorphic dispatch: typed classes override semantic_type
|
|
150
|
+
semantic = block.resolve_semantic_type
|
|
151
|
+
return semantic if semantic
|
|
152
|
+
|
|
153
|
+
# Backward compat: derive from delimiter_type
|
|
154
|
+
markdown_delimiter_to_semantic(block.delimiter_type)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def markdown_delimiter_to_semantic(delimiter)
|
|
158
|
+
case delimiter
|
|
159
|
+
when '```', '~' then :source_code
|
|
160
|
+
when '>' then :quote
|
|
161
|
+
when '---', '***', '___' then :horizontal_rule
|
|
162
|
+
when '++++' then :pass
|
|
163
|
+
else nil
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def transform_code_block(block)
|
|
168
|
+
Coradoc::Markdown::CodeBlock.new(
|
|
169
|
+
code: block.content.to_s,
|
|
170
|
+
language: block.language
|
|
171
|
+
)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def transform_blockquote(block)
|
|
175
|
+
content = block.flat_text
|
|
176
|
+
|
|
177
|
+
Coradoc::Markdown::Blockquote.new(content: content)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def transform_horizontal_rule(_block)
|
|
181
|
+
Coradoc::Markdown::HorizontalRule.new
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def transform_list(list)
|
|
185
|
+
items = Array(list.items).map do |item|
|
|
186
|
+
content = item.renderable_content
|
|
187
|
+
if content.is_a?(Array) && content.any? { |c| !c.is_a?(String) }
|
|
188
|
+
children = content.map { |c| transform_inline_content(c) }
|
|
189
|
+
Coradoc::Markdown::ListItem.new(text: item.flat_text, children: children)
|
|
190
|
+
else
|
|
191
|
+
Coradoc::Markdown::ListItem.new(text: item.flat_text)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
Coradoc::Markdown::List.new(
|
|
196
|
+
ordered: list.marker_type == 'ordered',
|
|
197
|
+
items: items
|
|
198
|
+
)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def transform_table(table)
|
|
202
|
+
# Extract headers from first row if cells are marked as headers
|
|
203
|
+
headers = []
|
|
204
|
+
rows = []
|
|
205
|
+
|
|
206
|
+
table_rows = Array(table.rows)
|
|
207
|
+
if table_rows.any?
|
|
208
|
+
first_row = table_rows.first
|
|
209
|
+
first_row_cells = Array(first_row&.cells)
|
|
210
|
+
|
|
211
|
+
# Check if first row has header cells
|
|
212
|
+
if first_row_cells.any?(&:header)
|
|
213
|
+
headers = first_row_cells.map(&:flat_text)
|
|
214
|
+
table_rows = table_rows[1..] || []
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Convert remaining rows to pipe-separated strings
|
|
218
|
+
rows = table_rows.map do |row|
|
|
219
|
+
Array(row.cells).map(&:flat_text).join(' | ')
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
Coradoc::Markdown::Table.new(
|
|
224
|
+
headers: headers,
|
|
225
|
+
rows: rows
|
|
226
|
+
)
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def transform_image(image)
|
|
230
|
+
Coradoc::Markdown::Image.new(
|
|
231
|
+
src: image.src,
|
|
232
|
+
alt: image.alt.to_s
|
|
233
|
+
)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def transform_inline(element)
|
|
237
|
+
case element.format_type
|
|
238
|
+
when 'bold'
|
|
239
|
+
Coradoc::Markdown::Strong.new(text: element.content.to_s)
|
|
240
|
+
when 'italic'
|
|
241
|
+
Coradoc::Markdown::Emphasis.new(text: element.content.to_s)
|
|
242
|
+
when 'monospace'
|
|
243
|
+
Coradoc::Markdown::Code.new(text: element.content.to_s)
|
|
244
|
+
when 'link'
|
|
245
|
+
Coradoc::Markdown::Link.new(
|
|
246
|
+
text: element.content.to_s,
|
|
247
|
+
url: element.target.to_s
|
|
248
|
+
)
|
|
249
|
+
when 'footnote'
|
|
250
|
+
Coradoc::Markdown::FootnoteReference.new(id: element.target.to_s)
|
|
251
|
+
when 'stem'
|
|
252
|
+
Coradoc::Markdown::Math.inline(element.content.to_s)
|
|
253
|
+
when 'highlight'
|
|
254
|
+
Coradoc::Markdown::Highlight.new(text: element.content.to_s)
|
|
255
|
+
when 'strikethrough'
|
|
256
|
+
Coradoc::Markdown::Strikethrough.new(text: element.content.to_s)
|
|
257
|
+
when 'subscript'
|
|
258
|
+
"<sub>#{element.content}</sub>"
|
|
259
|
+
when 'superscript'
|
|
260
|
+
"<sup>#{element.content}</sup>"
|
|
261
|
+
when 'underline'
|
|
262
|
+
"<u>#{element.content}</u>"
|
|
263
|
+
when 'xref'
|
|
264
|
+
"[#{element.content}](##{element.target})"
|
|
265
|
+
else
|
|
266
|
+
element.content.to_s
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def transform_definition_list(dl)
|
|
271
|
+
items = Array(dl.items).map do |item|
|
|
272
|
+
definitions = Array(item.definitions).map do |defn|
|
|
273
|
+
Coradoc::Markdown::DefinitionItem.new(content: defn.to_s)
|
|
274
|
+
end
|
|
275
|
+
Coradoc::Markdown::DefinitionTerm.new(
|
|
276
|
+
text: item.term.to_s,
|
|
277
|
+
definitions: definitions
|
|
278
|
+
)
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
Coradoc::Markdown::DefinitionList.new(items: items)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def transform_footnote(fn)
|
|
285
|
+
Coradoc::Markdown::Footnote.new(
|
|
286
|
+
id: fn.id.to_s,
|
|
287
|
+
content: fn.content.to_s,
|
|
288
|
+
backlink: fn.backlink
|
|
289
|
+
)
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def transform_footnote_reference(ref)
|
|
293
|
+
Coradoc::Markdown::FootnoteReference.new(id: ref.id.to_s)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def transform_abbreviation(abbr)
|
|
297
|
+
Coradoc::Markdown::Abbreviation.new(
|
|
298
|
+
term: abbr.term.to_s,
|
|
299
|
+
definition: abbr.definition.to_s
|
|
300
|
+
)
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def transform_annotation_block(annotation)
|
|
304
|
+
text = annotation.flat_text
|
|
305
|
+
Coradoc::Markdown::Paragraph.new(
|
|
306
|
+
text: "**#{annotation.annotation_type}:** #{text}"
|
|
307
|
+
)
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
def transform_bibliography(bib)
|
|
311
|
+
entries = Array(bib.entries).map { |e| transform(e) }
|
|
312
|
+
blocks = []
|
|
313
|
+
blocks << Coradoc::Markdown::Heading.new(level: 2, text: bib.title.to_s) if bib.title
|
|
314
|
+
blocks.concat(entries)
|
|
315
|
+
Coradoc::Markdown::Document.new(id: bib.id, blocks: blocks)
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def transform_bibliography_entry(entry)
|
|
319
|
+
Coradoc::Markdown::Paragraph.new(text: entry.display_text)
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Markdown
|
|
5
|
+
module Transform
|
|
6
|
+
# Shared text extraction for Markdown model objects.
|
|
7
|
+
#
|
|
8
|
+
# Handles nil, plain strings, and Markdown::Text instances.
|
|
9
|
+
module TextExtraction
|
|
10
|
+
def extract_text(text)
|
|
11
|
+
return '' if text.nil?
|
|
12
|
+
return text.content.to_s if text.is_a?(Coradoc::Markdown::Text)
|
|
13
|
+
|
|
14
|
+
text.to_s
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'coradoc/core_model'
|
|
4
|
+
|
|
5
|
+
module Coradoc
|
|
6
|
+
module Markdown
|
|
7
|
+
module Transform
|
|
8
|
+
# Transforms Markdown models to CoreModel equivalents
|
|
9
|
+
#
|
|
10
|
+
# This transformer converts the format-specific Markdown model
|
|
11
|
+
# to the canonical CoreModel representation.
|
|
12
|
+
class ToCoreModel
|
|
13
|
+
class << self
|
|
14
|
+
include TextExtraction
|
|
15
|
+
# Transform a Markdown model to CoreModel
|
|
16
|
+
#
|
|
17
|
+
# @param model [Coradoc::Markdown::Base] Markdown model to transform
|
|
18
|
+
# @return [Coradoc::CoreModel::Base] CoreModel equivalent
|
|
19
|
+
def transform(model)
|
|
20
|
+
case model
|
|
21
|
+
when Coradoc::Markdown::Document
|
|
22
|
+
transform_document(model)
|
|
23
|
+
when Coradoc::Markdown::Heading
|
|
24
|
+
transform_heading(model)
|
|
25
|
+
when Coradoc::Markdown::Paragraph
|
|
26
|
+
transform_paragraph(model)
|
|
27
|
+
when Coradoc::Markdown::CodeBlock
|
|
28
|
+
transform_code_block(model)
|
|
29
|
+
when Coradoc::Markdown::Blockquote
|
|
30
|
+
transform_blockquote(model)
|
|
31
|
+
when Coradoc::Markdown::List
|
|
32
|
+
transform_list(model)
|
|
33
|
+
when Coradoc::Markdown::DefinitionList
|
|
34
|
+
transform_definition_list(model)
|
|
35
|
+
when Coradoc::Markdown::Table
|
|
36
|
+
transform_table(model)
|
|
37
|
+
when Coradoc::Markdown::Image
|
|
38
|
+
transform_image(model)
|
|
39
|
+
when Coradoc::Markdown::Link
|
|
40
|
+
transform_link(model)
|
|
41
|
+
when Coradoc::Markdown::Emphasis
|
|
42
|
+
transform_inline(model, 'italic')
|
|
43
|
+
when Coradoc::Markdown::Strong
|
|
44
|
+
transform_inline(model, 'bold')
|
|
45
|
+
when Coradoc::Markdown::Code
|
|
46
|
+
transform_inline(model, 'monospace')
|
|
47
|
+
when Coradoc::Markdown::Highlight
|
|
48
|
+
transform_inline(model, 'highlight')
|
|
49
|
+
when Coradoc::Markdown::Strikethrough
|
|
50
|
+
transform_inline(model, 'strikethrough')
|
|
51
|
+
when Coradoc::Markdown::Footnote
|
|
52
|
+
transform_footnote(model)
|
|
53
|
+
when Coradoc::Markdown::FootnoteReference
|
|
54
|
+
transform_footnote_reference(model)
|
|
55
|
+
when Coradoc::Markdown::Abbreviation
|
|
56
|
+
transform_abbreviation(model)
|
|
57
|
+
when Coradoc::Markdown::HorizontalRule
|
|
58
|
+
transform_horizontal_rule(model)
|
|
59
|
+
when Coradoc::Markdown::Math
|
|
60
|
+
transform_math(model)
|
|
61
|
+
when Coradoc::Markdown::Extension
|
|
62
|
+
transform_extension(model)
|
|
63
|
+
when Coradoc::Markdown::AttributeList
|
|
64
|
+
transform_attribute_list(model)
|
|
65
|
+
when Coradoc::Markdown::Text
|
|
66
|
+
model.content.to_s
|
|
67
|
+
when Array
|
|
68
|
+
model.map { |item| transform(item) }
|
|
69
|
+
else
|
|
70
|
+
model
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private
|
|
75
|
+
|
|
76
|
+
def transform_document(doc)
|
|
77
|
+
children = Array(doc.blocks).map { |block| transform(block) }
|
|
78
|
+
|
|
79
|
+
Coradoc::CoreModel::StructuralElement.new(
|
|
80
|
+
element_type: 'document',
|
|
81
|
+
id: doc.id,
|
|
82
|
+
title: extract_title(doc),
|
|
83
|
+
children: children
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def transform_heading(heading)
|
|
88
|
+
Coradoc::CoreModel::StructuralElement.new(
|
|
89
|
+
element_type: 'section',
|
|
90
|
+
level: heading.level,
|
|
91
|
+
title: extract_text(heading.text),
|
|
92
|
+
children: []
|
|
93
|
+
)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def transform_paragraph(para)
|
|
97
|
+
content = extract_text(para.text)
|
|
98
|
+
|
|
99
|
+
Coradoc::CoreModel::Block.new(
|
|
100
|
+
element_type: 'paragraph',
|
|
101
|
+
content: content
|
|
102
|
+
)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def transform_code_block(block)
|
|
106
|
+
Coradoc::CoreModel::SourceBlock.new(
|
|
107
|
+
element_type: 'block',
|
|
108
|
+
content: block.code.to_s,
|
|
109
|
+
language: block.language
|
|
110
|
+
)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def transform_blockquote(blockquote)
|
|
114
|
+
Coradoc::CoreModel::QuoteBlock.new(
|
|
115
|
+
element_type: 'block',
|
|
116
|
+
content: blockquote.content.to_s
|
|
117
|
+
)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def transform_list(list)
|
|
121
|
+
items = Array(list.items).map do |item|
|
|
122
|
+
Coradoc::CoreModel::ListItem.new(
|
|
123
|
+
content: extract_text(item.text),
|
|
124
|
+
marker: list.ordered ? '1.' : '*'
|
|
125
|
+
)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
Coradoc::CoreModel::ListBlock.new(
|
|
129
|
+
marker_type: list.ordered ? 'ordered' : 'unordered',
|
|
130
|
+
items: items
|
|
131
|
+
)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def transform_table(table)
|
|
135
|
+
# Convert Markdown table to CoreModel table
|
|
136
|
+
rows = []
|
|
137
|
+
|
|
138
|
+
# Add header row if present
|
|
139
|
+
if table.headers.any?
|
|
140
|
+
rows << Coradoc::CoreModel::TableRow.new(
|
|
141
|
+
cells: table.headers.map do |h|
|
|
142
|
+
Coradoc::CoreModel::TableCell.new(content: h.to_s, header: true)
|
|
143
|
+
end
|
|
144
|
+
)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Add data rows
|
|
148
|
+
table.rows.each do |row|
|
|
149
|
+
cells = if row.is_a?(Array)
|
|
150
|
+
row.map { |c| Coradoc::CoreModel::TableCell.new(content: c.to_s, header: false) }
|
|
151
|
+
else
|
|
152
|
+
[Coradoc::CoreModel::TableCell.new(content: row.to_s, header: false)]
|
|
153
|
+
end
|
|
154
|
+
rows << Coradoc::CoreModel::TableRow.new(cells: cells)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
Coradoc::CoreModel::Table.new(rows: rows)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def transform_image(image)
|
|
161
|
+
Coradoc::CoreModel::Image.new(
|
|
162
|
+
src: image.src,
|
|
163
|
+
alt: image.alt.to_s
|
|
164
|
+
)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def transform_link(link)
|
|
168
|
+
Coradoc::CoreModel::InlineElement.new(
|
|
169
|
+
format_type: 'link',
|
|
170
|
+
target: link.url,
|
|
171
|
+
content: extract_text(link.text)
|
|
172
|
+
)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def transform_inline(inline, format_type)
|
|
176
|
+
Coradoc::CoreModel::InlineElement.new(
|
|
177
|
+
format_type: format_type,
|
|
178
|
+
content: extract_text(inline.text)
|
|
179
|
+
)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def transform_horizontal_rule(_rule)
|
|
183
|
+
Coradoc::CoreModel::Block.new(
|
|
184
|
+
element_type: 'block',
|
|
185
|
+
block_semantic_type: :horizontal_rule
|
|
186
|
+
)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def transform_definition_list(dl)
|
|
190
|
+
items = Array(dl.items).map do |term|
|
|
191
|
+
definitions = Array(term.definitions).map do |defn|
|
|
192
|
+
defn.content.to_s
|
|
193
|
+
end
|
|
194
|
+
Coradoc::CoreModel::DefinitionItem.new(
|
|
195
|
+
term: term.text.to_s,
|
|
196
|
+
definitions: definitions
|
|
197
|
+
)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
Coradoc::CoreModel::DefinitionList.new(items: items)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def transform_footnote(fn)
|
|
204
|
+
Coradoc::CoreModel::Footnote.new(
|
|
205
|
+
id: fn.id.to_s,
|
|
206
|
+
content: fn.content.to_s,
|
|
207
|
+
backlink: fn.backlink.nil? || fn.backlink
|
|
208
|
+
)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def transform_footnote_reference(ref)
|
|
212
|
+
Coradoc::CoreModel::FootnoteReference.new(id: ref.id.to_s)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def transform_abbreviation(abbr)
|
|
216
|
+
Coradoc::CoreModel::Abbreviation.new(
|
|
217
|
+
term: abbr.term.to_s,
|
|
218
|
+
definition: abbr.definition.to_s
|
|
219
|
+
)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def transform_math(math)
|
|
223
|
+
if math.inline?
|
|
224
|
+
Coradoc::CoreModel::InlineElement.new(
|
|
225
|
+
format_type: 'stem',
|
|
226
|
+
content: math.content.to_s
|
|
227
|
+
)
|
|
228
|
+
else
|
|
229
|
+
Coradoc::CoreModel::PassBlock.new(
|
|
230
|
+
element_type: 'block',
|
|
231
|
+
content: math.content.to_s,
|
|
232
|
+
language: 'latexmath'
|
|
233
|
+
)
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def transform_extension(ext)
|
|
238
|
+
case ext.name.to_sym
|
|
239
|
+
when :toc
|
|
240
|
+
Coradoc::CoreModel::Toc.new
|
|
241
|
+
when :comment
|
|
242
|
+
Coradoc::CoreModel::Block.new(
|
|
243
|
+
element_type: 'comment',
|
|
244
|
+
content: ext.content.to_s
|
|
245
|
+
)
|
|
246
|
+
when :nomarkdown
|
|
247
|
+
Coradoc::CoreModel::PassBlock.new(
|
|
248
|
+
element_type: 'block',
|
|
249
|
+
content: ext.content.to_s
|
|
250
|
+
)
|
|
251
|
+
else
|
|
252
|
+
# Unknown extensions: preserve content as a generic block
|
|
253
|
+
Coradoc::CoreModel::Block.new(
|
|
254
|
+
element_type: 'paragraph',
|
|
255
|
+
content: ext.content.to_s
|
|
256
|
+
)
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def transform_attribute_list(attr_list)
|
|
261
|
+
attrs = []
|
|
262
|
+
attrs << Coradoc::CoreModel::ElementAttribute.new(name: 'id', value: attr_list.id.to_s) if attr_list.id
|
|
263
|
+
attr_list.classes.each do |cls|
|
|
264
|
+
attrs << Coradoc::CoreModel::ElementAttribute.new(name: 'class', value: cls.to_s)
|
|
265
|
+
end
|
|
266
|
+
attr_list.attributes.each do |k, v|
|
|
267
|
+
attrs << Coradoc::CoreModel::ElementAttribute.new(name: k.to_s, value: v.to_s)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
Coradoc::CoreModel::StructuralElement.new(
|
|
271
|
+
element_type: 'attribute_list',
|
|
272
|
+
children: attrs
|
|
273
|
+
)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def extract_title(doc)
|
|
277
|
+
# Try to get title from first heading
|
|
278
|
+
first_block = Array(doc.blocks).first
|
|
279
|
+
return extract_text(first_block.text) if first_block.is_a?(Coradoc::Markdown::Heading)
|
|
280
|
+
|
|
281
|
+
nil
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
end
|