coradoc-adoc 2.0.7 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,8 @@ require_relative 'to_core_model_registrations'
5
5
  module Coradoc
6
6
  module AsciiDoc
7
7
  module Transform
8
- # Transforms AsciiDoc models to CoreModel equivalents
9
8
  class ToCoreModel
10
- def transform(model)
11
- self.class.transform(model)
12
- end
9
+ include Coradoc::Transform::Base
13
10
 
14
11
  class << self
15
12
  def transform(model)
@@ -17,412 +14,51 @@ module Coradoc
17
14
  return model unless model.is_a?(Coradoc::AsciiDoc::Model::Base)
18
15
 
19
16
  transformer = Registry.lookup(model.class)
20
- return transformer.call(model) if transformer
21
-
22
- transform_with_case(model)
23
- end
24
-
25
- def transform_with_case(model)
26
- case model
27
- when Coradoc::AsciiDoc::Model::Document
28
- transform_document(model)
29
- when Coradoc::AsciiDoc::Model::Section
30
- transform_section(model)
31
- when Coradoc::AsciiDoc::Model::Paragraph
32
- transform_paragraph(model)
33
- when Coradoc::AsciiDoc::Model::Block::SourceCode
34
- transform_source_block(model)
35
- when Coradoc::AsciiDoc::Model::Block::Quote
36
- transform_typed_block(model, Coradoc::CoreModel::QuoteBlock)
37
- when Coradoc::AsciiDoc::Model::Block::Example
38
- transform_typed_block(model, Coradoc::CoreModel::ExampleBlock)
39
- when Coradoc::AsciiDoc::Model::Block::Side
40
- transform_typed_block(model, Coradoc::CoreModel::SidebarBlock)
41
- when Coradoc::AsciiDoc::Model::Block::Literal
42
- transform_typed_block(model, Coradoc::CoreModel::LiteralBlock)
43
- when Coradoc::AsciiDoc::Model::Block::Open
44
- transform_typed_block(model, Coradoc::CoreModel::OpenBlock)
45
- when Coradoc::AsciiDoc::Model::Block::Pass
46
- transform_typed_block(model, Coradoc::CoreModel::PassBlock)
47
- when Coradoc::AsciiDoc::Model::Block::Listing
48
- transform_typed_block(model, Coradoc::CoreModel::ListingBlock)
49
- when Coradoc::AsciiDoc::Model::Block::Core
50
- transform_block(model, model.delimiter.to_s)
51
- when Coradoc::AsciiDoc::Model::Table
52
- transform_table(model)
53
- when Coradoc::AsciiDoc::Model::TableRow
54
- transform_table_row(model)
55
- when Coradoc::AsciiDoc::Model::TableCell
56
- transform_table_cell(model)
57
- when Coradoc::AsciiDoc::Model::List::Unordered
58
- transform_list(model, 'unordered')
59
- when Coradoc::AsciiDoc::Model::List::Ordered
60
- transform_list(model, 'ordered')
61
- when Coradoc::AsciiDoc::Model::List::Definition
62
- transform_list(model, 'definition')
63
- when Coradoc::AsciiDoc::Model::Term
64
- transform_term(model)
65
- when Coradoc::AsciiDoc::Model::Admonition
66
- transform_admonition(model)
67
- when Coradoc::AsciiDoc::Model::Inline::Bold
68
- transform_inline(model, 'bold')
69
- when Coradoc::AsciiDoc::Model::Inline::Italic
70
- transform_inline(model, 'italic')
71
- when Coradoc::AsciiDoc::Model::Inline::Monospace
72
- transform_inline(model, 'monospace')
73
- when Coradoc::AsciiDoc::Model::Inline::Highlight
74
- transform_inline(model, 'highlight')
75
- when Coradoc::AsciiDoc::Model::Inline::Link
76
- transform_link(model)
77
- when Coradoc::AsciiDoc::Model::Inline::CrossReference
78
- transform_cross_reference(model)
79
- when Coradoc::AsciiDoc::Model::Inline::Stem
80
- transform_stem(model)
81
- when Coradoc::AsciiDoc::Model::CommentBlock
82
- Coradoc::CoreModel::CommentBlock.new(
83
- content: model.text.to_s
84
- )
85
- when Coradoc::AsciiDoc::Model::Bibliography
86
- transform_bibliography(model)
87
- when Coradoc::AsciiDoc::Model::BibliographyEntry
88
- transform_bibliography_entry(model)
89
- when Coradoc::AsciiDoc::Model::Image::BlockImage
90
- transform_image(model)
91
- when Coradoc::AsciiDoc::Model::TextElement
92
- extract_text_content(model)
93
- else
94
- model
95
- end
96
- end
97
-
98
- def transform_document(doc)
99
- title_text = extract_title_text(doc.header&.title)
100
- attributes = extract_document_attributes(doc)
101
- Coradoc::CoreModel::DocumentElement.new(
102
- id: doc.id,
103
- title: title_text,
104
- attributes: attributes,
105
- children: transform(doc.sections || doc.contents || [])
106
- )
107
- end
108
-
109
- def transform_section(section, parent_id: nil)
110
- title_text = extract_title_text(section.title)
111
- section_id = section.id || Coradoc::CoreModel::IdGenerator.generate_from_title(
112
- title_text, parent_id: parent_id
113
- )
114
-
115
- content_children = transform(section.contents || [])
116
- nested_sections = (section.sections || []).map do |child|
117
- transform_section(child, parent_id: section_id)
118
- end
119
-
120
- Coradoc::CoreModel::SectionElement.new(
121
- id: section_id,
122
- level: section.level,
123
- title: title_text,
124
- children: content_children + nested_sections
125
- )
126
- end
127
-
128
- def transform_paragraph(para)
129
- children = transform_inline_content(para.content)
130
-
131
- Coradoc::CoreModel::ParagraphBlock.new(
132
- id: para.id,
133
- content: extract_text_content(para.content),
134
- children: children
135
- )
136
- end
137
-
138
- def transform_source_block(block)
139
- content_lines = Array(block.lines).reject do |line|
140
- line.is_a?(Coradoc::AsciiDoc::Model::LineBreak) ||
141
- line.is_a?(Coradoc::AsciiDoc::Model::Break::PageBreak)
142
- end.map do |line|
143
- extract_text_content(line)
144
- end.join("\n")
145
-
146
- language = extract_block_language(block)
147
-
148
- Coradoc::CoreModel::SourceBlock.new(
149
- id: block.id,
150
- title: extract_title_text(block.title),
151
- content: content_lines,
152
- language: language
153
- )
154
- end
155
-
156
- def transform_block(block, semantic_type_or_delimiter)
157
- content_lines = extract_block_lines(block)
158
- semantic_type = if semantic_type_or_delimiter.is_a?(Symbol)
159
- semantic_type_or_delimiter
160
- else
161
- asciidoc_delimiter_to_semantic(semantic_type_or_delimiter)
162
- end
163
-
164
- Coradoc::CoreModel::Block.new(
165
- block_semantic_type: semantic_type,
166
- delimiter_type: semantic_type_or_delimiter.is_a?(String) ? semantic_type_or_delimiter : nil,
167
- id: block.id,
168
- title: extract_title_text(block.title),
169
- content: content_lines,
170
- language: extract_block_language(block)
171
- )
172
- end
173
-
174
- def transform_typed_block(block, klass, extra_attrs = {})
175
- lines = Array(block.lines).reject do |line|
176
- line.is_a?(Coradoc::AsciiDoc::Model::LineBreak) ||
177
- line.is_a?(Coradoc::AsciiDoc::Model::Break::PageBreak)
178
- end
179
-
180
- has_nested_blocks = lines.any?(Coradoc::AsciiDoc::Model::Block::Core)
181
-
182
- if has_nested_blocks
183
- children = lines.map { |line| transform(line) }
184
- klass.new(
185
- id: block.id,
186
- title: extract_title_text(block.title),
187
- children: children,
188
- language: extract_block_language(block),
189
- **extra_attrs
190
- )
191
- else
192
- content_lines = lines.map { |line| extract_text_content(line) }.join("\n")
193
- klass.new(
194
- id: block.id,
195
- title: extract_title_text(block.title),
196
- content: content_lines,
197
- language: extract_block_language(block),
198
- **extra_attrs
199
- )
200
- end
17
+ transformer ? transformer.call(model) : model
201
18
  end
202
19
 
203
20
  def extract_block_lines(block)
204
- Array(block.lines).reject do |line|
21
+ non_break_lines = Array(block.lines).reject do |line|
205
22
  line.is_a?(Coradoc::AsciiDoc::Model::LineBreak) ||
206
23
  line.is_a?(Coradoc::AsciiDoc::Model::Break::PageBreak)
207
- end.map do |line|
24
+ end
25
+ non_break_lines.map do |line|
208
26
  extract_text_content(line)
209
27
  end.join("\n")
210
28
  end
211
29
 
212
- def transform_table(table)
213
- rows = Array(table.rows).map do |row|
214
- transform_table_row(row)
215
- end
216
-
217
- Coradoc::CoreModel::Table.new(
218
- id: table.id,
219
- title: table.title&.to_s,
220
- rows: rows
221
- )
222
- end
223
-
224
- def transform_table_row(row)
225
- cells = Array(row.columns).map do |cell|
226
- transform_table_cell(cell)
227
- end
228
- Coradoc::CoreModel::TableRow.new(
229
- cells: cells,
230
- header: row.header
231
- )
232
- end
233
-
234
- def transform_table_cell(cell)
235
- children = transform_inline_content(cell.content)
236
-
237
- Coradoc::CoreModel::TableCell.new(
238
- content: extract_text_content(cell.content),
239
- alignment: cell.horizontal_alignment,
240
- vertical_alignment: cell.vertical_alignment,
241
- colspan: cell.colspan,
242
- rowspan: cell.rowspan,
243
- style: cell.style_name,
244
- children: children
245
- )
246
- end
247
-
248
- def list_marker_type(list)
249
- case list
250
- when Coradoc::AsciiDoc::Model::List::Ordered then 'ordered'
251
- when Coradoc::AsciiDoc::Model::List::Unordered then 'unordered'
252
- when Coradoc::AsciiDoc::Model::List::Definition then 'definition'
253
- else 'unordered'
254
- end
255
- end
256
-
257
- def transform_list(list, marker_type)
258
- items = Array(list.items).map do |item|
259
- if item.is_a?(Coradoc::AsciiDoc::Model::List::DefinitionItem)
260
- term_content = item.terms
261
- def_content = item.contents
262
-
263
- # Re-parse raw text through inline parser for structured content
264
- term_parts = term_content.is_a?(Array) ? term_content : [term_content]
265
- parsed_terms = term_parts.flat_map do |part|
266
- parse_inline_text(part)
267
- end
268
-
269
- parsed_defs = parse_inline_text(def_content)
270
-
271
- term_children = transform_inline_content(parsed_terms)
272
- def_children = transform_inline_content(parsed_defs)
273
-
274
- di = Coradoc::CoreModel::DefinitionItem.new(
275
- term: extract_text_content(term_children),
276
- definitions: [extract_text_content(def_children)],
277
- term_children: term_children,
278
- definition_children: def_children
279
- )
280
- di.id = item.id if item.id
281
- di
282
- else
283
- content_val = item.content
284
- children = transform_inline_content(content_val)
285
-
286
- li = Coradoc::CoreModel::ListItem.new(
287
- content: extract_text_content(content_val),
288
- marker: item.marker
289
- )
290
- li.children = children
291
-
292
- if item.nested.is_a?(Coradoc::AsciiDoc::Model::List::Core)
293
- nested_core = transform_list(item.nested, list_marker_type(item.nested))
294
- li.children << nested_core
295
- elsif item.nested.is_a?(Array)
296
- item.nested.each do |n|
297
- next unless n.is_a?(Coradoc::AsciiDoc::Model::List::Core)
298
-
299
- li.children << transform_list(n, list_marker_type(n))
300
- end
301
- end
30
+ def extract_title_text(title)
31
+ return nil if title.nil?
32
+ return title.to_s unless title.is_a?(Coradoc::AsciiDoc::Model::Title)
302
33
 
303
- li
304
- end
305
- end
34
+ content = title.content
35
+ return '' if content.nil?
306
36
 
307
- if marker_type == 'definition'
308
- Coradoc::CoreModel::DefinitionList.new(items: items)
37
+ if content.is_a?(String)
38
+ content
39
+ elsif content.is_a?(Array)
40
+ content.map { |c| extract_text_content(c) }.join
309
41
  else
310
- Coradoc::CoreModel::ListBlock.new(
311
- marker_type: marker_type,
312
- items: items
313
- )
314
- end
315
- end
316
-
317
- def transform_term(term)
318
- Coradoc::CoreModel::Term.new(
319
- text: term.term.to_s,
320
- type: term.type&.to_s || 'preferred',
321
- lang: term.lang&.to_s || 'en'
322
- )
323
- end
324
-
325
- def transform_admonition(admonition)
326
- children = transform_inline_content(admonition.content)
327
- block = Coradoc::CoreModel::AnnotationBlock.new(
328
- annotation_type: admonition.type,
329
- content: extract_text_content(admonition.content)
330
- )
331
- block.children = children
332
- block
333
- end
334
-
335
- def transform_inline(inline, format_type)
336
- klass = Coradoc::CoreModel::InlineElement.format_type_class(format_type)
337
- klass.new(
338
- content: extract_text_content(inline.content)
339
- )
340
- end
341
-
342
- def transform_inline_text(inline, format_type)
343
- klass = Coradoc::CoreModel::InlineElement.format_type_class(format_type)
344
- klass.new(
345
- content: inline.text.to_s
346
- )
347
- end
348
-
349
- def transform_inline_footnote(footnote)
350
- parsed_content = parse_and_transform_inline(footnote.text.to_s)
351
- Coradoc::CoreModel::FootnoteElement.new(
352
- target: footnote.id,
353
- content: parsed_content
354
- )
355
- end
356
-
357
- def transform_link(link)
358
- Coradoc::CoreModel::LinkElement.new(
359
- target: link.path,
360
- content: link.name || link.path
361
- )
362
- end
363
-
364
- def transform_cross_reference(xref)
365
- Coradoc::CoreModel::CrossReferenceElement.new(
366
- target: xref.href,
367
- content: xref.args&.first || xref.href
368
- )
369
- end
370
-
371
- def transform_stem(stem)
372
- Coradoc::CoreModel::StemElement.new(
373
- content: stem.content,
374
- stem_type: stem.type || 'stem'
375
- )
376
- end
377
-
378
- def transform_image(image)
379
- Coradoc::CoreModel::Image.new(
380
- src: image.src,
381
- alt: image.title&.to_s,
382
- width: image.attributes&.[]('width'),
383
- height: image.attributes&.[]('height')
384
- )
385
- end
386
-
387
- def transform_bibliography(bib)
388
- entries = Array(bib.entries).map do |entry|
389
- transform_bibliography_entry(entry)
42
+ extract_text_content(content)
390
43
  end
391
-
392
- Coradoc::CoreModel::Bibliography.new(
393
- id: bib.id,
394
- title: bib.title.to_s,
395
- level: nil,
396
- entries: entries
397
- )
398
44
  end
399
45
 
400
- def transform_bibliography_entry(entry)
401
- Coradoc::CoreModel::BibliographyEntry.new(
402
- anchor_name: entry.anchor_name,
403
- document_id: entry.document_id,
404
- ref_text: entry.ref_text.to_s
405
- )
46
+ def extract_text_content(content)
47
+ TextExtractVisitor.new.extract(content)
406
48
  end
407
49
 
408
- private
50
+ def extract_block_language(block)
51
+ lang = block.lang
52
+ return lang if lang.is_a?(String) && !lang.empty?
409
53
 
410
- # AsciiDoc delimiters are any length of the same character (4+ for most, 2+ for open).
411
- # We map by the first character to handle all lengths correctly.
412
- ADOC_DELIMITER_CHAR_TO_SEMANTIC = {
413
- '-' => :source_code,
414
- '=' => :example,
415
- '_' => :quote,
416
- '*' => :sidebar,
417
- '.' => :literal,
418
- '+' => :pass
419
- }.freeze
54
+ attrs = block.attributes
55
+ return nil unless attrs.is_a?(Coradoc::AsciiDoc::Model::AttributeList)
420
56
 
421
- def asciidoc_delimiter_to_semantic(delimiter)
422
- return :open if delimiter && delimiter.length < 4
57
+ named_lang = attrs['language']
58
+ return named_lang.to_s if named_lang
423
59
 
424
- char = delimiter&.[](0)
425
- ADOC_DELIMITER_CHAR_TO_SEMANTIC[char] || :open
60
+ positional = attrs.positional
61
+ positional[1]&.value&.to_s if positional.length > 1
426
62
  end
427
63
 
428
64
  def extract_document_attributes(doc)
@@ -435,23 +71,13 @@ module Coradoc
435
71
  metadata
436
72
  end
437
73
 
438
- def extract_block_language(block)
439
- lang = block.lang
440
- return lang if lang.is_a?(String) && !lang.empty?
441
-
442
- attrs = block.attributes
443
- return nil unless attrs.is_a?(Coradoc::AsciiDoc::Model::AttributeList)
444
-
445
- named_lang = attrs['language']
446
- return named_lang.to_s if named_lang
74
+ def asciidoc_delimiter_to_semantic(delimiter)
75
+ return :open if delimiter && delimiter.length < 4
447
76
 
448
- # For [source,yaml], the language is the second positional attribute
449
- positional = attrs.positional
450
- positional[1]&.value&.to_s if positional.length > 1
77
+ char = delimiter&.[](0)
78
+ DelimiterMapping::CHAR_TO_SEMANTIC[char] || :open
451
79
  end
452
80
 
453
- # Parse raw text through the inline parser to extract inline elements
454
- # (anchors, cross-references, monospace, etc.)
455
81
  def parse_inline_text(raw_text)
456
82
  return [] if raw_text.nil? || raw_text.to_s.strip.empty?
457
83
 
@@ -477,179 +103,26 @@ module Coradoc
477
103
  end
478
104
 
479
105
  def transform_inline_content(content)
480
- return [] if content.nil?
481
-
482
- case content
483
- when Array
484
- result = []
485
- content.each_with_index do |item, idx|
486
- transformed = transform_inline_content(item)
487
- next if transformed.empty?
488
-
489
- needs_space = idx.positive? &&
490
- item.is_a?(Coradoc::AsciiDoc::Model::TextElement) &&
491
- item.line_break != '+'
492
- result << Coradoc::CoreModel::TextContent.new(text: ' ') if needs_space
493
- result.concat(transformed)
494
- end
495
- result
496
- when Coradoc::AsciiDoc::Model::TextElement
497
- transform_inline_content(content.content)
498
- when Coradoc::AsciiDoc::Model::Term
499
- [Coradoc::CoreModel::TermElement.new(
500
- content: content.term.to_s
501
- )]
502
- when String
503
- content.empty? ? [] : [Coradoc::CoreModel::TextContent.new(text: content)]
504
- when Coradoc::AsciiDoc::Model::Base
505
- [transform(content)]
506
- else
507
- text = extract_text_content(content)
508
- text.empty? ? [] : [Coradoc::CoreModel::TextContent.new(text: text)]
509
- end
510
- end
511
-
512
- def extract_core_model_text(model)
513
- case model
514
- when Coradoc::CoreModel::ListBlock
515
- model.items.map do |item|
516
- item.is_a?(Coradoc::CoreModel::ListItem) ? "* #{item.flat_text}" : item.to_s
517
- end.join("\n")
518
- when Coradoc::CoreModel::AnnotationBlock
519
- "#{model.annotation_type}: #{model.flat_text}"
520
- when Coradoc::CoreModel::Block
521
- model.flat_text
522
- when Coradoc::CoreModel::Image
523
- model.alt || ''
524
- when Coradoc::CoreModel::InlineElement
525
- model.content.to_s
526
- else
527
- ''
528
- end
529
- end
530
-
531
- def extract_title_text(title)
532
- return nil if title.nil?
533
- return title.to_s unless title.is_a?(Coradoc::AsciiDoc::Model::Title)
534
-
535
- content = title.content
536
- return '' if content.nil?
537
-
538
- if content.is_a?(String)
539
- content
540
- elsif content.is_a?(Array)
541
- content.map { |c| extract_text_content(c) }.join
542
- else
543
- extract_text_content(content)
544
- end
545
- end
546
-
547
- def extract_text_content(content)
548
- case content
549
- when nil
550
- ''
551
- when String
552
- content
553
- when Array
554
- result = []
555
- content.each_with_index do |item, idx|
556
- text = extract_text_content(item)
557
- result << text if text && !text.empty?
558
-
559
- next unless idx < content.length - 1 && text && !text.empty?
560
-
561
- result << ' ' if item.is_a?(Coradoc::AsciiDoc::Model::TextElement) && item.line_break != '+'
562
- end
563
- result.join
564
- when Coradoc::AsciiDoc::Model::TextElement
565
- if content.content.is_a?(Array)
566
- extract_text_content(content.content)
567
- else
568
- content.content.to_s
569
- end
570
- when Coradoc::AsciiDoc::Model::Inline::Bold,
571
- Coradoc::AsciiDoc::Model::Inline::Italic,
572
- Coradoc::AsciiDoc::Model::Inline::Monospace,
573
- Coradoc::AsciiDoc::Model::Inline::Highlight,
574
- Coradoc::AsciiDoc::Model::Inline::Strikethrough,
575
- Coradoc::AsciiDoc::Model::Inline::Subscript,
576
- Coradoc::AsciiDoc::Model::Inline::Superscript,
577
- Coradoc::AsciiDoc::Model::Inline::Underline
578
- extract_text_content(content.content)
579
- when Coradoc::AsciiDoc::Model::Inline::Link
580
- content.name || content.path || ''
581
- when Coradoc::AsciiDoc::Model::Inline::CrossReference
582
- content.href || ''
583
- when Coradoc::AsciiDoc::Model::Inline::Stem
584
- content.content.to_s
585
- when Coradoc::AsciiDoc::Model::Inline::Footnote
586
- if content.content
587
- extract_text_content(content.content)
588
- else
589
- ''
590
- end
591
- when Coradoc::AsciiDoc::Model::Inline::AttributeReference
592
- "{#{content.name}}"
593
- when Coradoc::AsciiDoc::Model::Term
594
- content.term.to_s
595
- when Coradoc::CoreModel::TextContent
596
- content.text.to_s
597
- when Coradoc::CoreModel::Image
598
- content.alt || content.src || ''
599
- when Coradoc::AsciiDoc::Model::Image::Core
600
- content.alt || content.src || ''
601
- when Coradoc::AsciiDoc::Model::Base
602
- if content.content
603
- extract_text_content(content.content)
604
- else
605
- ''
606
- end
607
- else
608
- if content.is_a?(String)
609
- content
610
- elsif content.class.name.start_with?('Parslet::')
611
- content.to_s
612
- else
613
- ''
614
- end
615
- end
106
+ InlineTransformVisitor.new(self).transform(content)
616
107
  end
617
108
 
618
109
  def parse_and_transform_inline(text)
619
110
  return text if text.nil? || text.to_s.strip.empty?
620
111
 
621
- inline_patterns = [
622
- /stem:\[/,
623
- /term:\[/,
624
- /footnote:\[/,
625
- /\{[a-zA-Z_]+\}/,
626
- %r{https?://},
627
- /<[^>]+>/
628
- ]
629
-
630
- has_inline_markup = inline_patterns.any? { |pattern| text =~ pattern }
631
- return text unless has_inline_markup
632
-
633
- begin
634
- parsed_elements = Coradoc::AsciiDoc::Transformer.parse_inline_content(text)
635
- content_array = parsed_elements.flat_map do |element|
636
- if element.is_a?(Coradoc::AsciiDoc::Model::TextElement)
637
- element.content
638
- else
639
- element
640
- end
641
- end
112
+ parsed_elements = Coradoc::AsciiDoc::Transformer.parse_inline_content(text)
113
+ content_array = parsed_elements.flat_map do |element|
114
+ element.is_a?(Coradoc::AsciiDoc::Model::TextElement) ? element.content : element
115
+ end
642
116
 
643
- transformed = transform_inline_content(content_array)
117
+ transformed = transform_inline_content(content_array)
644
118
 
645
- if transformed.all?(Coradoc::CoreModel::TextContent)
646
- transformed.map(&:text).join
647
- else
648
- transformed
649
- end
650
- rescue StandardError
651
- text
119
+ if transformed.all?(Coradoc::CoreModel::TextContent)
120
+ transformed.map(&:text).join
121
+ else
122
+ transformed
652
123
  end
124
+ rescue Parslet::ParseFailed
125
+ text
653
126
  end
654
127
  end
655
128
  end