coradoc-adoc 2.0.7 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/asciidoc/delimiter_mapping.rb +16 -0
- data/lib/coradoc/asciidoc/transform/element_transformers/block_transformer.rb +89 -0
- data/lib/coradoc/asciidoc/transform/element_transformers/document_transformer.rb +43 -0
- data/lib/coradoc/asciidoc/transform/element_transformers/inline_transformer.rb +56 -0
- data/lib/coradoc/asciidoc/transform/element_transformers/list_transformer.rb +90 -0
- data/lib/coradoc/asciidoc/transform/element_transformers/other_transformer.rb +61 -0
- data/lib/coradoc/asciidoc/transform/element_transformers/table_transformer.rb +49 -0
- data/lib/coradoc/asciidoc/transform/element_transformers.rb +16 -0
- data/lib/coradoc/asciidoc/transform/from_core_model.rb +6 -77
- data/lib/coradoc/asciidoc/transform/inline_transform_visitor.rb +59 -0
- data/lib/coradoc/asciidoc/transform/text_extract_visitor.rb +126 -0
- data/lib/coradoc/asciidoc/transform/to_core_model.rb +42 -569
- data/lib/coradoc/asciidoc/transform/to_core_model_registrations.rb +31 -70
- data/lib/coradoc/asciidoc/transform/transformer_registry.rb +80 -0
- data/lib/coradoc/asciidoc/transform.rb +5 -1
- data/lib/coradoc/asciidoc/version.rb +1 -1
- data/lib/coradoc/asciidoc.rb +1 -0
- metadata +12 -2
- data/lib/coradoc/asciidoc/transform/registry.rb +0 -146
|
@@ -5,11 +5,8 @@ require_relative 'to_core_model_registrations'
|
|
|
5
5
|
module Coradoc
|
|
6
6
|
module AsciiDoc
|
|
7
7
|
module Transform
|
|
8
|
-
# Transforms AsciiDoc models to CoreModel equivalents
|
|
9
8
|
class ToCoreModel
|
|
10
|
-
|
|
11
|
-
self.class.transform(model)
|
|
12
|
-
end
|
|
9
|
+
include Coradoc::Transform::Base
|
|
13
10
|
|
|
14
11
|
class << self
|
|
15
12
|
def transform(model)
|
|
@@ -17,412 +14,51 @@ module Coradoc
|
|
|
17
14
|
return model unless model.is_a?(Coradoc::AsciiDoc::Model::Base)
|
|
18
15
|
|
|
19
16
|
transformer = Registry.lookup(model.class)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
transform_with_case(model)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def transform_with_case(model)
|
|
26
|
-
case model
|
|
27
|
-
when Coradoc::AsciiDoc::Model::Document
|
|
28
|
-
transform_document(model)
|
|
29
|
-
when Coradoc::AsciiDoc::Model::Section
|
|
30
|
-
transform_section(model)
|
|
31
|
-
when Coradoc::AsciiDoc::Model::Paragraph
|
|
32
|
-
transform_paragraph(model)
|
|
33
|
-
when Coradoc::AsciiDoc::Model::Block::SourceCode
|
|
34
|
-
transform_source_block(model)
|
|
35
|
-
when Coradoc::AsciiDoc::Model::Block::Quote
|
|
36
|
-
transform_typed_block(model, Coradoc::CoreModel::QuoteBlock)
|
|
37
|
-
when Coradoc::AsciiDoc::Model::Block::Example
|
|
38
|
-
transform_typed_block(model, Coradoc::CoreModel::ExampleBlock)
|
|
39
|
-
when Coradoc::AsciiDoc::Model::Block::Side
|
|
40
|
-
transform_typed_block(model, Coradoc::CoreModel::SidebarBlock)
|
|
41
|
-
when Coradoc::AsciiDoc::Model::Block::Literal
|
|
42
|
-
transform_typed_block(model, Coradoc::CoreModel::LiteralBlock)
|
|
43
|
-
when Coradoc::AsciiDoc::Model::Block::Open
|
|
44
|
-
transform_typed_block(model, Coradoc::CoreModel::OpenBlock)
|
|
45
|
-
when Coradoc::AsciiDoc::Model::Block::Pass
|
|
46
|
-
transform_typed_block(model, Coradoc::CoreModel::PassBlock)
|
|
47
|
-
when Coradoc::AsciiDoc::Model::Block::Listing
|
|
48
|
-
transform_typed_block(model, Coradoc::CoreModel::ListingBlock)
|
|
49
|
-
when Coradoc::AsciiDoc::Model::Block::Core
|
|
50
|
-
transform_block(model, model.delimiter.to_s)
|
|
51
|
-
when Coradoc::AsciiDoc::Model::Table
|
|
52
|
-
transform_table(model)
|
|
53
|
-
when Coradoc::AsciiDoc::Model::TableRow
|
|
54
|
-
transform_table_row(model)
|
|
55
|
-
when Coradoc::AsciiDoc::Model::TableCell
|
|
56
|
-
transform_table_cell(model)
|
|
57
|
-
when Coradoc::AsciiDoc::Model::List::Unordered
|
|
58
|
-
transform_list(model, 'unordered')
|
|
59
|
-
when Coradoc::AsciiDoc::Model::List::Ordered
|
|
60
|
-
transform_list(model, 'ordered')
|
|
61
|
-
when Coradoc::AsciiDoc::Model::List::Definition
|
|
62
|
-
transform_list(model, 'definition')
|
|
63
|
-
when Coradoc::AsciiDoc::Model::Term
|
|
64
|
-
transform_term(model)
|
|
65
|
-
when Coradoc::AsciiDoc::Model::Admonition
|
|
66
|
-
transform_admonition(model)
|
|
67
|
-
when Coradoc::AsciiDoc::Model::Inline::Bold
|
|
68
|
-
transform_inline(model, 'bold')
|
|
69
|
-
when Coradoc::AsciiDoc::Model::Inline::Italic
|
|
70
|
-
transform_inline(model, 'italic')
|
|
71
|
-
when Coradoc::AsciiDoc::Model::Inline::Monospace
|
|
72
|
-
transform_inline(model, 'monospace')
|
|
73
|
-
when Coradoc::AsciiDoc::Model::Inline::Highlight
|
|
74
|
-
transform_inline(model, 'highlight')
|
|
75
|
-
when Coradoc::AsciiDoc::Model::Inline::Link
|
|
76
|
-
transform_link(model)
|
|
77
|
-
when Coradoc::AsciiDoc::Model::Inline::CrossReference
|
|
78
|
-
transform_cross_reference(model)
|
|
79
|
-
when Coradoc::AsciiDoc::Model::Inline::Stem
|
|
80
|
-
transform_stem(model)
|
|
81
|
-
when Coradoc::AsciiDoc::Model::CommentBlock
|
|
82
|
-
Coradoc::CoreModel::CommentBlock.new(
|
|
83
|
-
content: model.text.to_s
|
|
84
|
-
)
|
|
85
|
-
when Coradoc::AsciiDoc::Model::Bibliography
|
|
86
|
-
transform_bibliography(model)
|
|
87
|
-
when Coradoc::AsciiDoc::Model::BibliographyEntry
|
|
88
|
-
transform_bibliography_entry(model)
|
|
89
|
-
when Coradoc::AsciiDoc::Model::Image::BlockImage
|
|
90
|
-
transform_image(model)
|
|
91
|
-
when Coradoc::AsciiDoc::Model::TextElement
|
|
92
|
-
extract_text_content(model)
|
|
93
|
-
else
|
|
94
|
-
model
|
|
95
|
-
end
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def transform_document(doc)
|
|
99
|
-
title_text = extract_title_text(doc.header&.title)
|
|
100
|
-
attributes = extract_document_attributes(doc)
|
|
101
|
-
Coradoc::CoreModel::DocumentElement.new(
|
|
102
|
-
id: doc.id,
|
|
103
|
-
title: title_text,
|
|
104
|
-
attributes: attributes,
|
|
105
|
-
children: transform(doc.sections || doc.contents || [])
|
|
106
|
-
)
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
def transform_section(section, parent_id: nil)
|
|
110
|
-
title_text = extract_title_text(section.title)
|
|
111
|
-
section_id = section.id || Coradoc::CoreModel::IdGenerator.generate_from_title(
|
|
112
|
-
title_text, parent_id: parent_id
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
content_children = transform(section.contents || [])
|
|
116
|
-
nested_sections = (section.sections || []).map do |child|
|
|
117
|
-
transform_section(child, parent_id: section_id)
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
Coradoc::CoreModel::SectionElement.new(
|
|
121
|
-
id: section_id,
|
|
122
|
-
level: section.level,
|
|
123
|
-
title: title_text,
|
|
124
|
-
children: content_children + nested_sections
|
|
125
|
-
)
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
def transform_paragraph(para)
|
|
129
|
-
children = transform_inline_content(para.content)
|
|
130
|
-
|
|
131
|
-
Coradoc::CoreModel::ParagraphBlock.new(
|
|
132
|
-
id: para.id,
|
|
133
|
-
content: extract_text_content(para.content),
|
|
134
|
-
children: children
|
|
135
|
-
)
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
def transform_source_block(block)
|
|
139
|
-
content_lines = Array(block.lines).reject do |line|
|
|
140
|
-
line.is_a?(Coradoc::AsciiDoc::Model::LineBreak) ||
|
|
141
|
-
line.is_a?(Coradoc::AsciiDoc::Model::Break::PageBreak)
|
|
142
|
-
end.map do |line|
|
|
143
|
-
extract_text_content(line)
|
|
144
|
-
end.join("\n")
|
|
145
|
-
|
|
146
|
-
language = extract_block_language(block)
|
|
147
|
-
|
|
148
|
-
Coradoc::CoreModel::SourceBlock.new(
|
|
149
|
-
id: block.id,
|
|
150
|
-
title: extract_title_text(block.title),
|
|
151
|
-
content: content_lines,
|
|
152
|
-
language: language
|
|
153
|
-
)
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
def transform_block(block, semantic_type_or_delimiter)
|
|
157
|
-
content_lines = extract_block_lines(block)
|
|
158
|
-
semantic_type = if semantic_type_or_delimiter.is_a?(Symbol)
|
|
159
|
-
semantic_type_or_delimiter
|
|
160
|
-
else
|
|
161
|
-
asciidoc_delimiter_to_semantic(semantic_type_or_delimiter)
|
|
162
|
-
end
|
|
163
|
-
|
|
164
|
-
Coradoc::CoreModel::Block.new(
|
|
165
|
-
block_semantic_type: semantic_type,
|
|
166
|
-
delimiter_type: semantic_type_or_delimiter.is_a?(String) ? semantic_type_or_delimiter : nil,
|
|
167
|
-
id: block.id,
|
|
168
|
-
title: extract_title_text(block.title),
|
|
169
|
-
content: content_lines,
|
|
170
|
-
language: extract_block_language(block)
|
|
171
|
-
)
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
def transform_typed_block(block, klass, extra_attrs = {})
|
|
175
|
-
lines = Array(block.lines).reject do |line|
|
|
176
|
-
line.is_a?(Coradoc::AsciiDoc::Model::LineBreak) ||
|
|
177
|
-
line.is_a?(Coradoc::AsciiDoc::Model::Break::PageBreak)
|
|
178
|
-
end
|
|
179
|
-
|
|
180
|
-
has_nested_blocks = lines.any?(Coradoc::AsciiDoc::Model::Block::Core)
|
|
181
|
-
|
|
182
|
-
if has_nested_blocks
|
|
183
|
-
children = lines.map { |line| transform(line) }
|
|
184
|
-
klass.new(
|
|
185
|
-
id: block.id,
|
|
186
|
-
title: extract_title_text(block.title),
|
|
187
|
-
children: children,
|
|
188
|
-
language: extract_block_language(block),
|
|
189
|
-
**extra_attrs
|
|
190
|
-
)
|
|
191
|
-
else
|
|
192
|
-
content_lines = lines.map { |line| extract_text_content(line) }.join("\n")
|
|
193
|
-
klass.new(
|
|
194
|
-
id: block.id,
|
|
195
|
-
title: extract_title_text(block.title),
|
|
196
|
-
content: content_lines,
|
|
197
|
-
language: extract_block_language(block),
|
|
198
|
-
**extra_attrs
|
|
199
|
-
)
|
|
200
|
-
end
|
|
17
|
+
transformer ? transformer.call(model) : model
|
|
201
18
|
end
|
|
202
19
|
|
|
203
20
|
def extract_block_lines(block)
|
|
204
|
-
Array(block.lines).reject do |line|
|
|
21
|
+
non_break_lines = Array(block.lines).reject do |line|
|
|
205
22
|
line.is_a?(Coradoc::AsciiDoc::Model::LineBreak) ||
|
|
206
23
|
line.is_a?(Coradoc::AsciiDoc::Model::Break::PageBreak)
|
|
207
|
-
end
|
|
24
|
+
end
|
|
25
|
+
non_break_lines.map do |line|
|
|
208
26
|
extract_text_content(line)
|
|
209
27
|
end.join("\n")
|
|
210
28
|
end
|
|
211
29
|
|
|
212
|
-
def
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
end
|
|
216
|
-
|
|
217
|
-
Coradoc::CoreModel::Table.new(
|
|
218
|
-
id: table.id,
|
|
219
|
-
title: table.title&.to_s,
|
|
220
|
-
rows: rows
|
|
221
|
-
)
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
def transform_table_row(row)
|
|
225
|
-
cells = Array(row.columns).map do |cell|
|
|
226
|
-
transform_table_cell(cell)
|
|
227
|
-
end
|
|
228
|
-
Coradoc::CoreModel::TableRow.new(
|
|
229
|
-
cells: cells,
|
|
230
|
-
header: row.header
|
|
231
|
-
)
|
|
232
|
-
end
|
|
233
|
-
|
|
234
|
-
def transform_table_cell(cell)
|
|
235
|
-
children = transform_inline_content(cell.content)
|
|
236
|
-
|
|
237
|
-
Coradoc::CoreModel::TableCell.new(
|
|
238
|
-
content: extract_text_content(cell.content),
|
|
239
|
-
alignment: cell.horizontal_alignment,
|
|
240
|
-
vertical_alignment: cell.vertical_alignment,
|
|
241
|
-
colspan: cell.colspan,
|
|
242
|
-
rowspan: cell.rowspan,
|
|
243
|
-
style: cell.style_name,
|
|
244
|
-
children: children
|
|
245
|
-
)
|
|
246
|
-
end
|
|
247
|
-
|
|
248
|
-
def list_marker_type(list)
|
|
249
|
-
case list
|
|
250
|
-
when Coradoc::AsciiDoc::Model::List::Ordered then 'ordered'
|
|
251
|
-
when Coradoc::AsciiDoc::Model::List::Unordered then 'unordered'
|
|
252
|
-
when Coradoc::AsciiDoc::Model::List::Definition then 'definition'
|
|
253
|
-
else 'unordered'
|
|
254
|
-
end
|
|
255
|
-
end
|
|
256
|
-
|
|
257
|
-
def transform_list(list, marker_type)
|
|
258
|
-
items = Array(list.items).map do |item|
|
|
259
|
-
if item.is_a?(Coradoc::AsciiDoc::Model::List::DefinitionItem)
|
|
260
|
-
term_content = item.terms
|
|
261
|
-
def_content = item.contents
|
|
262
|
-
|
|
263
|
-
# Re-parse raw text through inline parser for structured content
|
|
264
|
-
term_parts = term_content.is_a?(Array) ? term_content : [term_content]
|
|
265
|
-
parsed_terms = term_parts.flat_map do |part|
|
|
266
|
-
parse_inline_text(part)
|
|
267
|
-
end
|
|
268
|
-
|
|
269
|
-
parsed_defs = parse_inline_text(def_content)
|
|
270
|
-
|
|
271
|
-
term_children = transform_inline_content(parsed_terms)
|
|
272
|
-
def_children = transform_inline_content(parsed_defs)
|
|
273
|
-
|
|
274
|
-
di = Coradoc::CoreModel::DefinitionItem.new(
|
|
275
|
-
term: extract_text_content(term_children),
|
|
276
|
-
definitions: [extract_text_content(def_children)],
|
|
277
|
-
term_children: term_children,
|
|
278
|
-
definition_children: def_children
|
|
279
|
-
)
|
|
280
|
-
di.id = item.id if item.id
|
|
281
|
-
di
|
|
282
|
-
else
|
|
283
|
-
content_val = item.content
|
|
284
|
-
children = transform_inline_content(content_val)
|
|
285
|
-
|
|
286
|
-
li = Coradoc::CoreModel::ListItem.new(
|
|
287
|
-
content: extract_text_content(content_val),
|
|
288
|
-
marker: item.marker
|
|
289
|
-
)
|
|
290
|
-
li.children = children
|
|
291
|
-
|
|
292
|
-
if item.nested.is_a?(Coradoc::AsciiDoc::Model::List::Core)
|
|
293
|
-
nested_core = transform_list(item.nested, list_marker_type(item.nested))
|
|
294
|
-
li.children << nested_core
|
|
295
|
-
elsif item.nested.is_a?(Array)
|
|
296
|
-
item.nested.each do |n|
|
|
297
|
-
next unless n.is_a?(Coradoc::AsciiDoc::Model::List::Core)
|
|
298
|
-
|
|
299
|
-
li.children << transform_list(n, list_marker_type(n))
|
|
300
|
-
end
|
|
301
|
-
end
|
|
30
|
+
def extract_title_text(title)
|
|
31
|
+
return nil if title.nil?
|
|
32
|
+
return title.to_s unless title.is_a?(Coradoc::AsciiDoc::Model::Title)
|
|
302
33
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
end
|
|
34
|
+
content = title.content
|
|
35
|
+
return '' if content.nil?
|
|
306
36
|
|
|
307
|
-
if
|
|
308
|
-
|
|
37
|
+
if content.is_a?(String)
|
|
38
|
+
content
|
|
39
|
+
elsif content.is_a?(Array)
|
|
40
|
+
content.map { |c| extract_text_content(c) }.join
|
|
309
41
|
else
|
|
310
|
-
|
|
311
|
-
marker_type: marker_type,
|
|
312
|
-
items: items
|
|
313
|
-
)
|
|
314
|
-
end
|
|
315
|
-
end
|
|
316
|
-
|
|
317
|
-
def transform_term(term)
|
|
318
|
-
Coradoc::CoreModel::Term.new(
|
|
319
|
-
text: term.term.to_s,
|
|
320
|
-
type: term.type&.to_s || 'preferred',
|
|
321
|
-
lang: term.lang&.to_s || 'en'
|
|
322
|
-
)
|
|
323
|
-
end
|
|
324
|
-
|
|
325
|
-
def transform_admonition(admonition)
|
|
326
|
-
children = transform_inline_content(admonition.content)
|
|
327
|
-
block = Coradoc::CoreModel::AnnotationBlock.new(
|
|
328
|
-
annotation_type: admonition.type,
|
|
329
|
-
content: extract_text_content(admonition.content)
|
|
330
|
-
)
|
|
331
|
-
block.children = children
|
|
332
|
-
block
|
|
333
|
-
end
|
|
334
|
-
|
|
335
|
-
def transform_inline(inline, format_type)
|
|
336
|
-
klass = Coradoc::CoreModel::InlineElement.format_type_class(format_type)
|
|
337
|
-
klass.new(
|
|
338
|
-
content: extract_text_content(inline.content)
|
|
339
|
-
)
|
|
340
|
-
end
|
|
341
|
-
|
|
342
|
-
def transform_inline_text(inline, format_type)
|
|
343
|
-
klass = Coradoc::CoreModel::InlineElement.format_type_class(format_type)
|
|
344
|
-
klass.new(
|
|
345
|
-
content: inline.text.to_s
|
|
346
|
-
)
|
|
347
|
-
end
|
|
348
|
-
|
|
349
|
-
def transform_inline_footnote(footnote)
|
|
350
|
-
parsed_content = parse_and_transform_inline(footnote.text.to_s)
|
|
351
|
-
Coradoc::CoreModel::FootnoteElement.new(
|
|
352
|
-
target: footnote.id,
|
|
353
|
-
content: parsed_content
|
|
354
|
-
)
|
|
355
|
-
end
|
|
356
|
-
|
|
357
|
-
def transform_link(link)
|
|
358
|
-
Coradoc::CoreModel::LinkElement.new(
|
|
359
|
-
target: link.path,
|
|
360
|
-
content: link.name || link.path
|
|
361
|
-
)
|
|
362
|
-
end
|
|
363
|
-
|
|
364
|
-
def transform_cross_reference(xref)
|
|
365
|
-
Coradoc::CoreModel::CrossReferenceElement.new(
|
|
366
|
-
target: xref.href,
|
|
367
|
-
content: xref.args&.first || xref.href
|
|
368
|
-
)
|
|
369
|
-
end
|
|
370
|
-
|
|
371
|
-
def transform_stem(stem)
|
|
372
|
-
Coradoc::CoreModel::StemElement.new(
|
|
373
|
-
content: stem.content,
|
|
374
|
-
stem_type: stem.type || 'stem'
|
|
375
|
-
)
|
|
376
|
-
end
|
|
377
|
-
|
|
378
|
-
def transform_image(image)
|
|
379
|
-
Coradoc::CoreModel::Image.new(
|
|
380
|
-
src: image.src,
|
|
381
|
-
alt: image.title&.to_s,
|
|
382
|
-
width: image.attributes&.[]('width'),
|
|
383
|
-
height: image.attributes&.[]('height')
|
|
384
|
-
)
|
|
385
|
-
end
|
|
386
|
-
|
|
387
|
-
def transform_bibliography(bib)
|
|
388
|
-
entries = Array(bib.entries).map do |entry|
|
|
389
|
-
transform_bibliography_entry(entry)
|
|
42
|
+
extract_text_content(content)
|
|
390
43
|
end
|
|
391
|
-
|
|
392
|
-
Coradoc::CoreModel::Bibliography.new(
|
|
393
|
-
id: bib.id,
|
|
394
|
-
title: bib.title.to_s,
|
|
395
|
-
level: nil,
|
|
396
|
-
entries: entries
|
|
397
|
-
)
|
|
398
44
|
end
|
|
399
45
|
|
|
400
|
-
def
|
|
401
|
-
|
|
402
|
-
anchor_name: entry.anchor_name,
|
|
403
|
-
document_id: entry.document_id,
|
|
404
|
-
ref_text: entry.ref_text.to_s
|
|
405
|
-
)
|
|
46
|
+
def extract_text_content(content)
|
|
47
|
+
TextExtractVisitor.new.extract(content)
|
|
406
48
|
end
|
|
407
49
|
|
|
408
|
-
|
|
50
|
+
def extract_block_language(block)
|
|
51
|
+
lang = block.lang
|
|
52
|
+
return lang if lang.is_a?(String) && !lang.empty?
|
|
409
53
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
ADOC_DELIMITER_CHAR_TO_SEMANTIC = {
|
|
413
|
-
'-' => :source_code,
|
|
414
|
-
'=' => :example,
|
|
415
|
-
'_' => :quote,
|
|
416
|
-
'*' => :sidebar,
|
|
417
|
-
'.' => :literal,
|
|
418
|
-
'+' => :pass
|
|
419
|
-
}.freeze
|
|
54
|
+
attrs = block.attributes
|
|
55
|
+
return nil unless attrs.is_a?(Coradoc::AsciiDoc::Model::AttributeList)
|
|
420
56
|
|
|
421
|
-
|
|
422
|
-
return
|
|
57
|
+
named_lang = attrs['language']
|
|
58
|
+
return named_lang.to_s if named_lang
|
|
423
59
|
|
|
424
|
-
|
|
425
|
-
|
|
60
|
+
positional = attrs.positional
|
|
61
|
+
positional[1]&.value&.to_s if positional.length > 1
|
|
426
62
|
end
|
|
427
63
|
|
|
428
64
|
def extract_document_attributes(doc)
|
|
@@ -435,23 +71,13 @@ module Coradoc
|
|
|
435
71
|
metadata
|
|
436
72
|
end
|
|
437
73
|
|
|
438
|
-
def
|
|
439
|
-
|
|
440
|
-
return lang if lang.is_a?(String) && !lang.empty?
|
|
441
|
-
|
|
442
|
-
attrs = block.attributes
|
|
443
|
-
return nil unless attrs.is_a?(Coradoc::AsciiDoc::Model::AttributeList)
|
|
444
|
-
|
|
445
|
-
named_lang = attrs['language']
|
|
446
|
-
return named_lang.to_s if named_lang
|
|
74
|
+
def asciidoc_delimiter_to_semantic(delimiter)
|
|
75
|
+
return :open if delimiter && delimiter.length < 4
|
|
447
76
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
positional[1]&.value&.to_s if positional.length > 1
|
|
77
|
+
char = delimiter&.[](0)
|
|
78
|
+
DelimiterMapping::CHAR_TO_SEMANTIC[char] || :open
|
|
451
79
|
end
|
|
452
80
|
|
|
453
|
-
# Parse raw text through the inline parser to extract inline elements
|
|
454
|
-
# (anchors, cross-references, monospace, etc.)
|
|
455
81
|
def parse_inline_text(raw_text)
|
|
456
82
|
return [] if raw_text.nil? || raw_text.to_s.strip.empty?
|
|
457
83
|
|
|
@@ -477,179 +103,26 @@ module Coradoc
|
|
|
477
103
|
end
|
|
478
104
|
|
|
479
105
|
def transform_inline_content(content)
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
case content
|
|
483
|
-
when Array
|
|
484
|
-
result = []
|
|
485
|
-
content.each_with_index do |item, idx|
|
|
486
|
-
transformed = transform_inline_content(item)
|
|
487
|
-
next if transformed.empty?
|
|
488
|
-
|
|
489
|
-
needs_space = idx.positive? &&
|
|
490
|
-
item.is_a?(Coradoc::AsciiDoc::Model::TextElement) &&
|
|
491
|
-
item.line_break != '+'
|
|
492
|
-
result << Coradoc::CoreModel::TextContent.new(text: ' ') if needs_space
|
|
493
|
-
result.concat(transformed)
|
|
494
|
-
end
|
|
495
|
-
result
|
|
496
|
-
when Coradoc::AsciiDoc::Model::TextElement
|
|
497
|
-
transform_inline_content(content.content)
|
|
498
|
-
when Coradoc::AsciiDoc::Model::Term
|
|
499
|
-
[Coradoc::CoreModel::TermElement.new(
|
|
500
|
-
content: content.term.to_s
|
|
501
|
-
)]
|
|
502
|
-
when String
|
|
503
|
-
content.empty? ? [] : [Coradoc::CoreModel::TextContent.new(text: content)]
|
|
504
|
-
when Coradoc::AsciiDoc::Model::Base
|
|
505
|
-
[transform(content)]
|
|
506
|
-
else
|
|
507
|
-
text = extract_text_content(content)
|
|
508
|
-
text.empty? ? [] : [Coradoc::CoreModel::TextContent.new(text: text)]
|
|
509
|
-
end
|
|
510
|
-
end
|
|
511
|
-
|
|
512
|
-
def extract_core_model_text(model)
|
|
513
|
-
case model
|
|
514
|
-
when Coradoc::CoreModel::ListBlock
|
|
515
|
-
model.items.map do |item|
|
|
516
|
-
item.is_a?(Coradoc::CoreModel::ListItem) ? "* #{item.flat_text}" : item.to_s
|
|
517
|
-
end.join("\n")
|
|
518
|
-
when Coradoc::CoreModel::AnnotationBlock
|
|
519
|
-
"#{model.annotation_type}: #{model.flat_text}"
|
|
520
|
-
when Coradoc::CoreModel::Block
|
|
521
|
-
model.flat_text
|
|
522
|
-
when Coradoc::CoreModel::Image
|
|
523
|
-
model.alt || ''
|
|
524
|
-
when Coradoc::CoreModel::InlineElement
|
|
525
|
-
model.content.to_s
|
|
526
|
-
else
|
|
527
|
-
''
|
|
528
|
-
end
|
|
529
|
-
end
|
|
530
|
-
|
|
531
|
-
def extract_title_text(title)
|
|
532
|
-
return nil if title.nil?
|
|
533
|
-
return title.to_s unless title.is_a?(Coradoc::AsciiDoc::Model::Title)
|
|
534
|
-
|
|
535
|
-
content = title.content
|
|
536
|
-
return '' if content.nil?
|
|
537
|
-
|
|
538
|
-
if content.is_a?(String)
|
|
539
|
-
content
|
|
540
|
-
elsif content.is_a?(Array)
|
|
541
|
-
content.map { |c| extract_text_content(c) }.join
|
|
542
|
-
else
|
|
543
|
-
extract_text_content(content)
|
|
544
|
-
end
|
|
545
|
-
end
|
|
546
|
-
|
|
547
|
-
def extract_text_content(content)
|
|
548
|
-
case content
|
|
549
|
-
when nil
|
|
550
|
-
''
|
|
551
|
-
when String
|
|
552
|
-
content
|
|
553
|
-
when Array
|
|
554
|
-
result = []
|
|
555
|
-
content.each_with_index do |item, idx|
|
|
556
|
-
text = extract_text_content(item)
|
|
557
|
-
result << text if text && !text.empty?
|
|
558
|
-
|
|
559
|
-
next unless idx < content.length - 1 && text && !text.empty?
|
|
560
|
-
|
|
561
|
-
result << ' ' if item.is_a?(Coradoc::AsciiDoc::Model::TextElement) && item.line_break != '+'
|
|
562
|
-
end
|
|
563
|
-
result.join
|
|
564
|
-
when Coradoc::AsciiDoc::Model::TextElement
|
|
565
|
-
if content.content.is_a?(Array)
|
|
566
|
-
extract_text_content(content.content)
|
|
567
|
-
else
|
|
568
|
-
content.content.to_s
|
|
569
|
-
end
|
|
570
|
-
when Coradoc::AsciiDoc::Model::Inline::Bold,
|
|
571
|
-
Coradoc::AsciiDoc::Model::Inline::Italic,
|
|
572
|
-
Coradoc::AsciiDoc::Model::Inline::Monospace,
|
|
573
|
-
Coradoc::AsciiDoc::Model::Inline::Highlight,
|
|
574
|
-
Coradoc::AsciiDoc::Model::Inline::Strikethrough,
|
|
575
|
-
Coradoc::AsciiDoc::Model::Inline::Subscript,
|
|
576
|
-
Coradoc::AsciiDoc::Model::Inline::Superscript,
|
|
577
|
-
Coradoc::AsciiDoc::Model::Inline::Underline
|
|
578
|
-
extract_text_content(content.content)
|
|
579
|
-
when Coradoc::AsciiDoc::Model::Inline::Link
|
|
580
|
-
content.name || content.path || ''
|
|
581
|
-
when Coradoc::AsciiDoc::Model::Inline::CrossReference
|
|
582
|
-
content.href || ''
|
|
583
|
-
when Coradoc::AsciiDoc::Model::Inline::Stem
|
|
584
|
-
content.content.to_s
|
|
585
|
-
when Coradoc::AsciiDoc::Model::Inline::Footnote
|
|
586
|
-
if content.content
|
|
587
|
-
extract_text_content(content.content)
|
|
588
|
-
else
|
|
589
|
-
''
|
|
590
|
-
end
|
|
591
|
-
when Coradoc::AsciiDoc::Model::Inline::AttributeReference
|
|
592
|
-
"{#{content.name}}"
|
|
593
|
-
when Coradoc::AsciiDoc::Model::Term
|
|
594
|
-
content.term.to_s
|
|
595
|
-
when Coradoc::CoreModel::TextContent
|
|
596
|
-
content.text.to_s
|
|
597
|
-
when Coradoc::CoreModel::Image
|
|
598
|
-
content.alt || content.src || ''
|
|
599
|
-
when Coradoc::AsciiDoc::Model::Image::Core
|
|
600
|
-
content.alt || content.src || ''
|
|
601
|
-
when Coradoc::AsciiDoc::Model::Base
|
|
602
|
-
if content.content
|
|
603
|
-
extract_text_content(content.content)
|
|
604
|
-
else
|
|
605
|
-
''
|
|
606
|
-
end
|
|
607
|
-
else
|
|
608
|
-
if content.is_a?(String)
|
|
609
|
-
content
|
|
610
|
-
elsif content.class.name.start_with?('Parslet::')
|
|
611
|
-
content.to_s
|
|
612
|
-
else
|
|
613
|
-
''
|
|
614
|
-
end
|
|
615
|
-
end
|
|
106
|
+
InlineTransformVisitor.new(self).transform(content)
|
|
616
107
|
end
|
|
617
108
|
|
|
618
109
|
def parse_and_transform_inline(text)
|
|
619
110
|
return text if text.nil? || text.to_s.strip.empty?
|
|
620
111
|
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
/\{[a-zA-Z_]+\}/,
|
|
626
|
-
%r{https?://},
|
|
627
|
-
/<[^>]+>/
|
|
628
|
-
]
|
|
629
|
-
|
|
630
|
-
has_inline_markup = inline_patterns.any? { |pattern| text =~ pattern }
|
|
631
|
-
return text unless has_inline_markup
|
|
632
|
-
|
|
633
|
-
begin
|
|
634
|
-
parsed_elements = Coradoc::AsciiDoc::Transformer.parse_inline_content(text)
|
|
635
|
-
content_array = parsed_elements.flat_map do |element|
|
|
636
|
-
if element.is_a?(Coradoc::AsciiDoc::Model::TextElement)
|
|
637
|
-
element.content
|
|
638
|
-
else
|
|
639
|
-
element
|
|
640
|
-
end
|
|
641
|
-
end
|
|
112
|
+
parsed_elements = Coradoc::AsciiDoc::Transformer.parse_inline_content(text)
|
|
113
|
+
content_array = parsed_elements.flat_map do |element|
|
|
114
|
+
element.is_a?(Coradoc::AsciiDoc::Model::TextElement) ? element.content : element
|
|
115
|
+
end
|
|
642
116
|
|
|
643
|
-
|
|
117
|
+
transformed = transform_inline_content(content_array)
|
|
644
118
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
end
|
|
650
|
-
rescue StandardError
|
|
651
|
-
text
|
|
119
|
+
if transformed.all?(Coradoc::CoreModel::TextContent)
|
|
120
|
+
transformed.map(&:text).join
|
|
121
|
+
else
|
|
122
|
+
transformed
|
|
652
123
|
end
|
|
124
|
+
rescue Parslet::ParseFailed
|
|
125
|
+
text
|
|
653
126
|
end
|
|
654
127
|
end
|
|
655
128
|
end
|