coradoc-adoc 2.0.0 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@ module Coradoc
13
13
 
14
14
  class << self
15
15
  def transform(model)
16
- return model.map { |item| transform(item) }.compact if model.is_a?(Array)
16
+ return model.filter_map { |item| transform(item) } if model.is_a?(Array)
17
17
  return model unless model.is_a?(Coradoc::AsciiDoc::Model::Base)
18
18
 
19
19
  transformer = Registry.lookup(model.class)
@@ -79,8 +79,7 @@ module Coradoc
79
79
  when Coradoc::AsciiDoc::Model::Inline::Stem
80
80
  transform_stem(model)
81
81
  when Coradoc::AsciiDoc::Model::CommentBlock
82
- Coradoc::CoreModel::Block.new(
83
- element_type: 'comment',
82
+ Coradoc::CoreModel::CommentBlock.new(
84
83
  content: model.text.to_s
85
84
  )
86
85
  when Coradoc::AsciiDoc::Model::Bibliography
@@ -99,8 +98,7 @@ module Coradoc
99
98
  def transform_document(doc)
100
99
  title_text = extract_title_text(doc.header&.title)
101
100
  attributes = extract_document_attributes(doc)
102
- Coradoc::CoreModel::StructuralElement.new(
103
- element_type: 'document',
101
+ Coradoc::CoreModel::DocumentElement.new(
104
102
  id: doc.id,
105
103
  title: title_text,
106
104
  attributes: attributes,
@@ -108,14 +106,19 @@ module Coradoc
108
106
  )
109
107
  end
110
108
 
111
- def transform_section(section)
109
+ def transform_section(section, parent_id: nil)
112
110
  title_text = extract_title_text(section.title)
111
+ section_id = section.id || Coradoc::CoreModel::IdGenerator.generate_from_title(
112
+ title_text, parent_id: parent_id
113
+ )
114
+
113
115
  content_children = transform(section.contents || [])
114
- nested_sections = transform(section.sections || [])
116
+ nested_sections = (section.sections || []).map do |child|
117
+ transform_section(child, parent_id: section_id)
118
+ end
115
119
 
116
- Coradoc::CoreModel::StructuralElement.new(
117
- element_type: 'section',
118
- id: section.id,
120
+ Coradoc::CoreModel::SectionElement.new(
121
+ id: section_id,
119
122
  level: section.level,
120
123
  title: title_text,
121
124
  children: content_children + nested_sections
@@ -125,9 +128,7 @@ module Coradoc
125
128
  def transform_paragraph(para)
126
129
  children = transform_inline_content(para.content)
127
130
 
128
- Coradoc::CoreModel::Block.new(
129
- element_type: 'paragraph',
130
- block_semantic_type: :paragraph,
131
+ Coradoc::CoreModel::ParagraphBlock.new(
131
132
  id: para.id,
132
133
  content: extract_text_content(para.content),
133
134
  children: children
@@ -145,7 +146,6 @@ module Coradoc
145
146
  language = extract_block_language(block)
146
147
 
147
148
  Coradoc::CoreModel::SourceBlock.new(
148
- element_type: 'block',
149
149
  id: block.id,
150
150
  title: extract_title_text(block.title),
151
151
  content: content_lines,
@@ -162,7 +162,6 @@ module Coradoc
162
162
  end
163
163
 
164
164
  Coradoc::CoreModel::Block.new(
165
- element_type: 'block',
166
165
  block_semantic_type: semantic_type,
167
166
  delimiter_type: semantic_type_or_delimiter.is_a?(String) ? semantic_type_or_delimiter : nil,
168
167
  id: block.id,
@@ -173,16 +172,32 @@ module Coradoc
173
172
  end
174
173
 
175
174
  def transform_typed_block(block, klass, extra_attrs = {})
176
- content_lines = extract_block_lines(block)
175
+ lines = Array(block.lines).reject do |line|
176
+ line.is_a?(Coradoc::AsciiDoc::Model::LineBreak) ||
177
+ line.is_a?(Coradoc::AsciiDoc::Model::Break::PageBreak)
178
+ end
177
179
 
178
- klass.new(
179
- element_type: 'block',
180
- id: block.id,
181
- title: extract_title_text(block.title),
182
- content: content_lines,
183
- language: extract_block_language(block),
184
- **extra_attrs
185
- )
180
+ has_nested_blocks = lines.any?(Coradoc::AsciiDoc::Model::Block::Core)
181
+
182
+ if has_nested_blocks
183
+ children = lines.map { |line| transform(line) }
184
+ klass.new(
185
+ id: block.id,
186
+ title: extract_title_text(block.title),
187
+ children: children,
188
+ language: extract_block_language(block),
189
+ **extra_attrs
190
+ )
191
+ else
192
+ content_lines = lines.map { |line| extract_text_content(line) }.join("\n")
193
+ klass.new(
194
+ id: block.id,
195
+ title: extract_title_text(block.title),
196
+ content: content_lines,
197
+ language: extract_block_language(block),
198
+ **extra_attrs
199
+ )
200
+ end
186
201
  end
187
202
 
188
203
  def extract_block_lines(block)
@@ -230,16 +245,40 @@ module Coradoc
230
245
  )
231
246
  end
232
247
 
248
+ def list_marker_type(list)
249
+ case list
250
+ when Coradoc::AsciiDoc::Model::List::Ordered then 'ordered'
251
+ when Coradoc::AsciiDoc::Model::List::Unordered then 'unordered'
252
+ when Coradoc::AsciiDoc::Model::List::Definition then 'definition'
253
+ else 'unordered'
254
+ end
255
+ end
256
+
233
257
  def transform_list(list, marker_type)
234
258
  items = Array(list.items).map do |item|
235
259
  if item.is_a?(Coradoc::AsciiDoc::Model::List::DefinitionItem)
236
260
  term_content = item.terms
237
261
  def_content = item.contents
238
262
 
239
- Coradoc::CoreModel::DefinitionItem.new(
240
- term: extract_text_content(term_content),
241
- definitions: [extract_text_content(def_content)]
263
+ # Re-parse raw text through inline parser for structured content
264
+ term_parts = term_content.is_a?(Array) ? term_content : [term_content]
265
+ parsed_terms = term_parts.flat_map do |part|
266
+ parse_inline_text(part)
267
+ end
268
+
269
+ parsed_defs = parse_inline_text(def_content)
270
+
271
+ term_children = transform_inline_content(parsed_terms)
272
+ def_children = transform_inline_content(parsed_defs)
273
+
274
+ di = Coradoc::CoreModel::DefinitionItem.new(
275
+ term: extract_text_content(term_children),
276
+ definitions: [extract_text_content(def_children)],
277
+ term_children: term_children,
278
+ definition_children: def_children
242
279
  )
280
+ di.id = item.id if item.id
281
+ di
243
282
  else
244
283
  content_val = item.content
245
284
  children = transform_inline_content(content_val)
@@ -249,6 +288,18 @@ module Coradoc
249
288
  marker: item.marker
250
289
  )
251
290
  li.children = children
291
+
292
+ if item.nested.is_a?(Coradoc::AsciiDoc::Model::List::Core)
293
+ nested_core = transform_list(item.nested, list_marker_type(item.nested))
294
+ li.children << nested_core
295
+ elsif item.nested.is_a?(Array)
296
+ item.nested.each do |n|
297
+ next unless n.is_a?(Coradoc::AsciiDoc::Model::List::Core)
298
+
299
+ li.children << transform_list(n, list_marker_type(n))
300
+ end
301
+ end
302
+
252
303
  li
253
304
  end
254
305
  end
@@ -272,54 +323,53 @@ module Coradoc
272
323
  end
273
324
 
274
325
  def transform_admonition(admonition)
275
- Coradoc::CoreModel::AnnotationBlock.new(
326
+ children = transform_inline_content(admonition.content)
327
+ block = Coradoc::CoreModel::AnnotationBlock.new(
276
328
  annotation_type: admonition.type,
277
329
  content: extract_text_content(admonition.content)
278
330
  )
331
+ block.children = children
332
+ block
279
333
  end
280
334
 
281
335
  def transform_inline(inline, format_type)
282
- Coradoc::CoreModel::InlineElement.new(
283
- format_type: format_type,
336
+ klass = Coradoc::CoreModel::InlineElement.format_type_class(format_type)
337
+ klass.new(
284
338
  content: extract_text_content(inline.content)
285
339
  )
286
340
  end
287
341
 
288
342
  def transform_inline_text(inline, format_type)
289
- Coradoc::CoreModel::InlineElement.new(
290
- format_type: format_type,
343
+ klass = Coradoc::CoreModel::InlineElement.format_type_class(format_type)
344
+ klass.new(
291
345
  content: inline.text.to_s
292
346
  )
293
347
  end
294
348
 
295
349
  def transform_inline_footnote(footnote)
296
350
  parsed_content = parse_and_transform_inline(footnote.text.to_s)
297
- Coradoc::CoreModel::InlineElement.new(
298
- format_type: 'footnote',
351
+ Coradoc::CoreModel::FootnoteElement.new(
299
352
  target: footnote.id,
300
353
  content: parsed_content
301
354
  )
302
355
  end
303
356
 
304
357
  def transform_link(link)
305
- Coradoc::CoreModel::InlineElement.new(
306
- format_type: 'link',
358
+ Coradoc::CoreModel::LinkElement.new(
307
359
  target: link.path,
308
360
  content: link.name || link.path
309
361
  )
310
362
  end
311
363
 
312
364
  def transform_cross_reference(xref)
313
- Coradoc::CoreModel::InlineElement.new(
314
- format_type: 'xref',
365
+ Coradoc::CoreModel::CrossReferenceElement.new(
315
366
  target: xref.href,
316
367
  content: xref.args&.first || xref.href
317
368
  )
318
369
  end
319
370
 
320
371
  def transform_stem(stem)
321
- Coradoc::CoreModel::InlineElement.new(
322
- format_type: 'stem',
372
+ Coradoc::CoreModel::StemElement.new(
323
373
  content: stem.content,
324
374
  stem_type: stem.type || 'stem'
325
375
  )
@@ -376,9 +426,13 @@ module Coradoc
376
426
  end
377
427
 
378
428
  def extract_document_attributes(doc)
379
- return {} unless doc.document_attributes
429
+ return nil unless doc.document_attributes
380
430
 
381
- doc.document_attributes.to_hash
431
+ metadata = Coradoc::CoreModel::Metadata.new
432
+ doc.document_attributes.to_hash.each do |key, value|
433
+ metadata[key.to_s] = value.to_s
434
+ end
435
+ metadata
382
436
  end
383
437
 
384
438
  def extract_block_language(block)
@@ -396,26 +450,62 @@ module Coradoc
396
450
  positional[1]&.value&.to_s if positional.length > 1
397
451
  end
398
452
 
453
+ # Parse raw text through the inline parser to extract inline elements
454
+ # (anchors, cross-references, monospace, etc.)
455
+ def parse_inline_text(raw_text)
456
+ return [] if raw_text.nil? || raw_text.to_s.strip.empty?
457
+
458
+ text = raw_text.to_s
459
+ parser = Coradoc::AsciiDoc::Parser::Base.new
460
+ transformer = Coradoc::AsciiDoc::Transformer.new
461
+
462
+ parsed = parser.text_any.parse(text)
463
+ result = transformer.apply({ text: parsed })
464
+
465
+ case result
466
+ when Coradoc::AsciiDoc::Model::TextElement
467
+ result.content.is_a?(Array) ? result.content : [result.content]
468
+ when Array
469
+ result
470
+ when Coradoc::AsciiDoc::Model::Base
471
+ [result]
472
+ else
473
+ [text]
474
+ end
475
+ rescue Parslet::ParseFailed
476
+ [text]
477
+ end
478
+
399
479
  def transform_inline_content(content)
400
480
  return [] if content.nil?
401
481
 
402
482
  case content
403
483
  when Array
404
- content.flat_map { |item| transform_inline_content(item) }
484
+ result = []
485
+ content.each_with_index do |item, idx|
486
+ transformed = transform_inline_content(item)
487
+ next if transformed.empty?
488
+
489
+ needs_space = idx.positive? &&
490
+ item.is_a?(Coradoc::AsciiDoc::Model::TextElement) &&
491
+ item.line_break != '+'
492
+ result << Coradoc::CoreModel::TextContent.new(text: ' ') if needs_space
493
+ result.concat(transformed)
494
+ end
495
+ result
405
496
  when Coradoc::AsciiDoc::Model::TextElement
406
497
  transform_inline_content(content.content)
407
498
  when Coradoc::AsciiDoc::Model::Term
408
- [Coradoc::CoreModel::InlineElement.new(
409
- format_type: 'term',
499
+ [Coradoc::CoreModel::TermElement.new(
410
500
  content: content.term.to_s
411
501
  )]
412
502
  when String
413
- content.empty? ? [] : [content]
503
+ content.empty? ? [] : [Coradoc::CoreModel::TextContent.new(text: content)]
414
504
  when Coradoc::AsciiDoc::Model::Base
415
505
  [transform(content)]
416
506
  else
417
507
  text = extract_text_content(content)
418
- text.empty? ? [] : [text]
508
+ text.empty? ? [] : [Coradoc::CoreModel::TextContent.new(text: text)]
419
509
  end
420
510
  end
421
511
 
@@ -502,8 +592,12 @@ module Coradoc
502
592
  "{#{content.name}}"
503
593
  when Coradoc::AsciiDoc::Model::Term
504
594
  content.term.to_s
595
+ when Coradoc::CoreModel::TextContent
596
+ content.text.to_s
505
597
  when Coradoc::CoreModel::Image
506
598
  content.alt || content.src || ''
599
+ when Coradoc::AsciiDoc::Model::Image::Core
600
+ content.alt || content.src || ''
507
601
  when Coradoc::AsciiDoc::Model::Base
508
602
  if content.content
509
603
  extract_text_content(content.content)
@@ -548,8 +642,8 @@ module Coradoc
548
642
 
549
643
  transformed = transform_inline_content(content_array)
550
644
 
551
- if transformed.all? { |item| item.is_a?(String) }
552
- transformed.join
645
+ if transformed.all?(Coradoc::CoreModel::TextContent)
646
+ transformed.map(&:text).join
553
647
  else
554
648
  transformed
555
649
  end
@@ -64,8 +64,7 @@ module Coradoc
64
64
  Registry.register(
65
65
  Coradoc::AsciiDoc::Model::CommentBlock,
66
66
  lambda { |model|
67
- Coradoc::CoreModel::Block.new(
68
- element_type: 'comment',
67
+ Coradoc::CoreModel::CommentBlock.new(
69
68
  content: model.text.to_s
70
69
  )
71
70
  }
@@ -19,11 +19,17 @@ module Coradoc
19
19
 
20
20
  # Convert nested array to proper List object if needed
21
21
  if nested.is_a?(Array) && nested.any?
22
- first_marker = nested.first.is_a?(Model::List::Item) ? nested.first.marker : marker
23
- nested = if first_marker.to_s.start_with?('.', '1', 'a', 'A', 'i', 'I')
24
- Model::List::Ordered.new(items: nested)
22
+ nested = if nested.all?(Model::List::Core)
23
+ nested.first
24
+ elsif nested.all?(Model::List::Item)
25
+ first_marker = nested.first.marker
26
+ if first_marker.to_s.lstrip.start_with?('.', '1', 'a', 'A', 'i', 'I')
27
+ Model::List::Ordered.new(items: nested)
28
+ else
29
+ Model::List::Unordered.new(items: nested)
30
+ end
25
31
  else
26
- Model::List::Unordered.new(items: nested)
32
+ nested
27
33
  end
28
34
  end
29
35
 
@@ -61,9 +67,21 @@ module Coradoc
61
67
  Model::List::Ordered.new(items: list_items, attrs: attribute_list)
62
68
  end
63
69
 
64
- # Definition list term
65
- rule(dlist_term: simple(:term), delimiter: simple(:_delim)) do
66
- term.to_s
70
+ # Definition list term (with optional anchor)
71
+ rule(dlist_term: subtree(:term_data), delimiter: simple(:_delim)) do
72
+ case term_data
73
+ when Hash
74
+ text = term_data[:text]
75
+ text = text.to_s if text.is_a?(Parslet::Slice) || text.is_a?(String)
76
+ text = text.content.to_s if text.is_a?(Model::TextElement)
77
+ id = term_data[:id]
78
+ id = id.to_s if id.is_a?(Parslet::Slice)
79
+ { text: text.to_s, id: id }
80
+ when Model::TextElement
81
+ { text: term_data.content.to_s, id: term_data.id }
82
+ else
83
+ { text: term_data.to_s, id: nil }
84
+ end
67
85
  end
68
86
 
69
87
  # Definition list item
@@ -73,7 +91,17 @@ module Coradoc
73
91
  definition: simple(:contents)
74
92
  }
75
93
  ) do
76
- Model::List::DefinitionItem.new(terms: terms, contents: contents)
94
+ term_strings = terms.map do |t|
95
+ t.is_a?(Hash) ? t[:text].to_s : t.to_s
96
+ end
97
+ item_id = nil
98
+ terms.each do |t|
99
+ next unless t.is_a?(Hash) && t[:id]
100
+
101
+ item_id = t[:id].to_s
102
+ break
103
+ end
104
+ Model::List::DefinitionItem.new(terms: term_strings, contents: contents, id: item_id)
77
105
  end
78
106
 
79
107
  # Definition list item with hash terms (single term case)
@@ -83,11 +111,13 @@ module Coradoc
83
111
  terms_data = item_data[:terms]
84
112
  definition = item_data[:definition]
85
113
 
86
- # Extract terms
114
+ # Extract terms and optional id from structured dlist_term output
115
+ item_id = nil
87
116
  terms = if terms_data.is_a?(Array)
88
117
  terms_data.map do |t|
89
- if t.is_a?(Hash) && t[:dlist_term]
90
- t[:dlist_term].to_s
118
+ if t.is_a?(Hash)
119
+ item_id ||= t[:id].to_s if t[:id]
120
+ t[:text].to_s
91
121
  else
92
122
  t.to_s
93
123
  end
@@ -97,11 +127,9 @@ module Coradoc
97
127
  end
98
128
 
99
129
  # Extract definition
100
- if definition.is_a?(Parslet::Slice)
101
- end
102
130
  contents = definition.to_s
103
131
 
104
- Model::List::DefinitionItem.new(terms: terms, contents: contents)
132
+ Model::List::DefinitionItem.new(terms: terms, contents: contents, id: item_id)
105
133
  end
106
134
 
107
135
  rule(definition_list: sequence(:list_items)) do
@@ -115,14 +143,6 @@ module Coradoc
115
143
  ) do
116
144
  Model::List::Definition.new(items: list_items, attrs: attribute_list)
117
145
  end
118
-
119
- # List containing definition_list
120
- rule(
121
- attribute_list: simple(:attribute_list),
122
- definition_list: sequence(:list_items)
123
- ) do
124
- Model::List::Definition.new(items: list_items, attrs: attribute_list)
125
- end
126
146
  end
127
147
  end
128
148
  end
@@ -223,8 +223,11 @@ module Coradoc
223
223
  cell_opts[:repeat] = true if format_str.include?('*')
224
224
  end
225
225
 
226
+ # Strip escaped delimiters (\| → |) in cell content
227
+ unescaped_content = content.to_s.gsub(/\\([|!,:;])/, '\1')
228
+
226
229
  # Parse content based on style
227
- parsed_content = parse_inline_content(content, style)
230
+ parsed_content = parse_inline_content(unescaped_content, style)
228
231
  cell_opts[:content] = parsed_content
229
232
 
230
233
  Model::TableCell.new(**cell_opts)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Coradoc
4
4
  module AsciiDoc
5
- VERSION = '2.0.0'
5
+ VERSION = '2.0.6'
6
6
  end
7
7
  end
@@ -32,6 +32,7 @@ module Coradoc
32
32
  autoload :Transformer, "#{__dir__}/asciidoc/transformer"
33
33
  autoload :Serializer, "#{__dir__}/asciidoc/serializer"
34
34
  autoload :Transform, "#{__dir__}/asciidoc/transform"
35
+ autoload :Builder, "#{__dir__}/asciidoc/builder"
35
36
  end
36
37
  end
37
38
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coradoc-adoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -13,30 +13,30 @@ dependencies:
13
13
  name: coradoc
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - ">="
16
+ - - "~>"
17
17
  - !ruby/object:Gem::Version
18
- version: '0'
18
+ version: '2.0'
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
- - - ">="
23
+ - - "~>"
24
24
  - !ruby/object:Gem::Version
25
- version: '0'
25
+ version: '2.0'
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: lutaml-model
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
30
30
  - - "~>"
31
31
  - !ruby/object:Gem::Version
32
- version: '0.7'
32
+ version: 0.8.0
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '0.7'
39
+ version: 0.8.0
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: parslet
42
42
  requirement: !ruby/object:Gem::Requirement
@@ -103,7 +103,14 @@ extensions: []
103
103
  extra_rdoc_files: []
104
104
  files:
105
105
  - ".rspec"
106
+ - Rakefile
106
107
  - lib/coradoc/asciidoc.rb
108
+ - lib/coradoc/asciidoc/builder.rb
109
+ - lib/coradoc/asciidoc/builder/block_builder.rb
110
+ - lib/coradoc/asciidoc/builder/detection.rb
111
+ - lib/coradoc/asciidoc/builder/element_builder.rb
112
+ - lib/coradoc/asciidoc/builder/list_builder.rb
113
+ - lib/coradoc/asciidoc/builder/text_builder.rb
107
114
  - lib/coradoc/asciidoc/model.rb
108
115
  - lib/coradoc/asciidoc/model/admonition.rb
109
116
  - lib/coradoc/asciidoc/model/anchorable.rb
@@ -330,7 +337,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
330
337
  requirements:
331
338
  - - ">="
332
339
  - !ruby/object:Gem::Version
333
- version: 3.0.0
340
+ version: 3.3.0
334
341
  required_rubygems_version: !ruby/object:Gem::Requirement
335
342
  requirements:
336
343
  - - ">="