coradoc 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69f496efd91c2d85e4e057be629f8ee00492c40497661c0af940be65f148e092
4
- data.tar.gz: a2668ca45e7d23bf613d966c36b4c5da70311be0dcdbc0fba4d988844f49b65f
3
+ metadata.gz: ba4d5d05249449db87eabe01151f28977069d451c5d8b3069d8c7c59238e05df
4
+ data.tar.gz: 225bfd2be3033634b97443c5949ca0a4c9851c4a2cdfdbfe7059edbdd6d44d1d
5
5
  SHA512:
6
- metadata.gz: '04584b5faee36ac872400d4d17458221931de4f733aae75839dc40f282d4f863948c9105eb80fefb84f6945639c76c55ccf62fd93d8fd442268f36698950625d'
7
- data.tar.gz: 00c91cfd40f731c1fffab6ae507d326140e5c7830a8d10bf6a5bb75857ba0b69a552fc4a27d8bd3abb95d4a16159a516df62232bc538fd79fe55a7023e95af0b
6
+ metadata.gz: e5fce433e6b057839f9cb93efc4ac9722dcf812046667082121586a975f273da925d5a87c2cd3c46cd0456dd06f104c36df51a3f9012a043f8ec04674c7fa7ea
7
+ data.tar.gz: df5ee8e79dad51920ee9a5bc89ddd90f159396d2cd76c8956323b1439129ea6f9f56c2a6b2f5b2cad518c5c93fd0c04a0668006126deb1cda33d7651b6d0137a
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-05-08 04:52:25 UTC using RuboCop version 1.75.8.
3
+ # on 2026-05-08 15:00:33 UTC using RuboCop version 1.75.8.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -29,6 +29,12 @@ Layout/ClosingParenthesisIndentation:
29
29
  Exclude:
30
30
  - 'lib/coradoc/document_manipulator.rb'
31
31
 
32
+ # Offense count: 1
33
+ # This cop supports safe autocorrection (--autocorrect).
34
+ Layout/EmptyLineAfterGuardClause:
35
+ Exclude:
36
+ - 'coradoc-adoc/spec/coradoc/asciidoc/integration_pipeline_spec.rb'
37
+
32
38
  # Offense count: 1
33
39
  # This cop supports safe autocorrection (--autocorrect).
34
40
  # Configuration parameters: EnforcedStyle, IndentationWidth.
@@ -133,7 +139,7 @@ Lint/Void:
133
139
  Exclude:
134
140
  - 'coradoc-html/lib/coradoc/html.rb'
135
141
 
136
- # Offense count: 236
142
+ # Offense count: 237
137
143
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
138
144
  Metrics/AbcSize:
139
145
  Max: 178
@@ -142,7 +148,7 @@ Metrics/AbcSize:
142
148
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
143
149
  # AllowedMethods: refine
144
150
  Metrics/BlockLength:
145
- Max: 148
151
+ Max: 151
146
152
 
147
153
  # Offense count: 4
148
154
  # Configuration parameters: CountBlocks, CountModifierForms.
@@ -154,12 +160,12 @@ Metrics/BlockNesting:
154
160
  Metrics/ClassLength:
155
161
  Max: 556
156
162
 
157
- # Offense count: 179
163
+ # Offense count: 180
158
164
  # Configuration parameters: AllowedMethods, AllowedPatterns.
159
165
  Metrics/CyclomaticComplexity:
160
166
  Max: 32
161
167
 
162
- # Offense count: 384
168
+ # Offense count: 385
163
169
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
164
170
  Metrics/MethodLength:
165
171
  Max: 246
@@ -175,7 +181,7 @@ Metrics/ParameterLists:
175
181
  Max: 7
176
182
  MaxOptionalParameters: 4
177
183
 
178
- # Offense count: 122
184
+ # Offense count: 123
179
185
  # Configuration parameters: AllowedMethods, AllowedPatterns.
180
186
  Metrics/PerceivedComplexity:
181
187
  Max: 27
@@ -201,11 +207,12 @@ Naming/MethodName:
201
207
  Exclude:
202
208
  - 'coradoc-markdown/lib/coradoc/markdown.rb'
203
209
 
204
- # Offense count: 17
210
+ # Offense count: 20
205
211
  # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
206
212
  # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
207
213
  Naming/MethodParameterName:
208
214
  Exclude:
215
+ - 'coradoc-adoc/spec/coradoc/asciidoc/integration_pipeline_spec.rb'
209
216
  - 'coradoc-docx/lib/coradoc/docx/transform/from_core_model.rb'
210
217
  - 'coradoc-docx/lib/coradoc/docx/transform/rules/run_rule.rb'
211
218
  - 'coradoc-html/lib/coradoc/html/converters/base.rb'
@@ -238,12 +245,6 @@ Naming/VariableName:
238
245
  Exclude:
239
246
  - 'coradoc-markdown/lib/coradoc/markdown.rb'
240
247
 
241
- # Offense count: 12
242
- # This cop supports unsafe autocorrection (--autocorrect-all).
243
- RSpec/BeEq:
244
- Exclude:
245
- - 'coradoc-adoc/spec/coradoc/asciidoc/model/element_classification_spec.rb'
246
-
247
248
  # Offense count: 1
248
249
  RSpec/BeforeAfterAll:
249
250
  Exclude:
@@ -252,20 +253,12 @@ RSpec/BeforeAfterAll:
252
253
  - '**/spec/support/**/*.rb'
253
254
  - 'spec/benchmark/performance_spec.rb'
254
255
 
255
- # Offense count: 20
256
+ # Offense count: 21
256
257
  # Configuration parameters: IgnoredMetadata.
257
258
  RSpec/DescribeClass:
258
259
  Enabled: false
259
260
 
260
- # Offense count: 1
261
- # This cop supports unsafe autocorrection (--autocorrect-all).
262
- # Configuration parameters: SkipBlocks, EnforcedStyle, OnlyStaticConstants.
263
- # SupportedStyles: described_class, explicit
264
- RSpec/DescribedClass:
265
- Exclude:
266
- - 'coradoc-adoc/spec/coradoc/asciidoc/model/base_spec.rb'
267
-
268
- # Offense count: 485
261
+ # Offense count: 500
269
262
  # Configuration parameters: CountAsOne.
270
263
  RSpec/ExampleLength:
271
264
  Max: 36
@@ -316,7 +309,7 @@ RSpec/MessageSpies:
316
309
  RSpec/MultipleDescribes:
317
310
  Enabled: false
318
311
 
319
- # Offense count: 619
312
+ # Offense count: 628
320
313
  RSpec/MultipleExpectations:
321
314
  Max: 12
322
315
 
@@ -431,6 +424,12 @@ Style/HashLikeCase:
431
424
  - 'coradoc-html/lib/coradoc/html/input/converters/table.rb'
432
425
  - 'coradoc-html/lib/coradoc/html/input/converters/td.rb'
433
426
 
427
+ # Offense count: 5
428
+ # This cop supports safe autocorrection (--autocorrect).
429
+ Style/IfUnlessModifier:
430
+ Exclude:
431
+ - 'coradoc-adoc/spec/coradoc/asciidoc/integration_pipeline_spec.rb'
432
+
434
433
  # Offense count: 1
435
434
  # This cop supports safe autocorrection (--autocorrect).
436
435
  Style/MultilineIfModifier:
@@ -459,7 +458,7 @@ Style/StringConcatenation:
459
458
  Exclude:
460
459
  - 'coradoc-adoc/lib/coradoc/asciidoc/serializer/serializers/document_attributes.rb'
461
460
 
462
- # Offense count: 44
461
+ # Offense count: 43
463
462
  # This cop supports safe autocorrection (--autocorrect).
464
463
  # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
465
464
  # URISchemes: http, https
@@ -20,6 +20,13 @@ module Coradoc
20
20
  #
21
21
  class ThematicBreak < Base
22
22
  end
23
+
24
+ # Page break for AsciiDoc documents.
25
+ #
26
+ # Represented by <<< in AsciiDoc. Layout-only element;
27
+ # has no CoreModel equivalent and is filtered during ToCoreModel.
28
+ class PageBreak < Base
29
+ end
23
30
  end
24
31
  end
25
32
  end
@@ -58,6 +58,7 @@ module Coradoc
58
58
  include_directive |
59
59
  list |
60
60
  table.as(:table) |
61
+ page_break.as(:page_break) |
61
62
  paragraph |
62
63
  tag |
63
64
  empty_line.as(:line_break) |
@@ -76,7 +76,7 @@ module Coradoc
76
76
  def block_content(n_deep = 3)
77
77
  c = block_image |
78
78
  list |
79
- text_line |
79
+ text_line(false, unguarded: true) |
80
80
  empty_line.as(:line_break)
81
81
  c |= block(n_deep - 1) if n_deep.positive?
82
82
  c.repeat(1)
@@ -124,7 +124,7 @@ module Coradoc
124
124
  closing_pattern = str(delim_str) >> newline
125
125
 
126
126
  # Build content that doesn't match the closing delimiter
127
- content = block_image | list | text_line | empty_line.as(:line_break)
127
+ content = block_image | list | text_line(false, unguarded: true) | empty_line.as(:line_break)
128
128
  if n_deep.positive?
129
129
  # For nested blocks, also prevent them from consuming the closing delimiter
130
130
  content |= block(n_deep - 1)
@@ -139,7 +139,7 @@ module Coradoc
139
139
  line_start? >>
140
140
  current_delimiter.as(:delimiter) >> newline >>
141
141
  if type == :pass
142
- (text_line | empty_line.as(:line_break)).repeat(1).as(:lines)
142
+ (text_line(false, unguarded: true) | empty_line.as(:line_break)).repeat(1).as(:lines)
143
143
  else
144
144
  # Use dynamic block content that respects closing delimiter
145
145
  block_content_with_closing.as(:lines)
@@ -161,7 +161,7 @@ module Coradoc
161
161
  delim_str = c.captures[:delimit].to_s.strip
162
162
  closing_pattern = str(delim_str) >> newline
163
163
 
164
- content = block_image | list | text_line | empty_line.as(:line_break)
164
+ content = block_image | list | text_line(false, unguarded: true) | empty_line.as(:line_break)
165
165
  content |= block(n_deep - 1) if n_deep.positive?
166
166
 
167
167
  (closing_pattern.absent? >> content).repeat(1)
@@ -172,7 +172,7 @@ module Coradoc
172
172
  line_start? >>
173
173
  current_delimiter.as(:delimiter) >> newline >>
174
174
  if type == :pass
175
- (text_line | empty_line.as(:line_break)).repeat(1).as(:lines)
175
+ (text_line(false, unguarded: true) | empty_line.as(:line_break)).repeat(1).as(:lines)
176
176
  else
177
177
  block_content_with_closing.as(:lines)
178
178
  end >>
@@ -22,9 +22,13 @@ module Coradoc
22
22
 
23
23
  # Text
24
24
  # :zero :one :many
25
- def text_line(many_breaks = false)
26
- tl = (asciidoc_char_with_id.absent? | element_id_inline) >>
27
- literal_space? >> text_any.as(:text)
25
+ def text_line(many_breaks = false, unguarded: false)
26
+ tl = if unguarded
27
+ literal_space? >> text_any.as(:text)
28
+ else
29
+ (asciidoc_char_with_id.absent? | element_id_inline) >>
30
+ literal_space? >> text_any.as(:text)
31
+ end
28
32
  if many_breaks
29
33
  tl >> (line_ending.repeat(1).as(:line_break) | eof?)
30
34
  else
@@ -60,6 +64,10 @@ module Coradoc
60
64
  def glossaries
61
65
  glossary.repeat(1)
62
66
  end
67
+
68
+ def page_break
69
+ str('<<<') >> line_ending
70
+ end
63
71
  end
64
72
  end
65
73
  end
@@ -12,7 +12,7 @@ module Coradoc
12
12
 
13
13
  def bold_constrained
14
14
  (str('*').present? >> str('*') >>
15
- match('[^*]').repeat(1).as(:text).repeat(1, 1) >>
15
+ match('[^*\n]').repeat(1).as(:text).repeat(1, 1) >>
16
16
  str('*') >> str('*').absent? >>
17
17
  str("\n\n").absent?
18
18
  ).as(:bold_constrained)
@@ -20,7 +20,7 @@ module Coradoc
20
20
 
21
21
  def bold_unconstrained
22
22
  (str('**').present? >> str('**') >>
23
- match('[^*]').repeat(1).as(:text).repeat(1, 1) >>
23
+ match('[^*\n]').repeat(1).as(:text).repeat(1, 1) >>
24
24
  str('**')
25
25
  ).as(:bold_unconstrained)
26
26
  end
@@ -28,7 +28,7 @@ module Coradoc
28
28
  def span_constrained
29
29
  (attribute_list >>
30
30
  str('#') >>
31
- match('[^#]').repeat(1).as(:text) >>
31
+ match('[^#\n]').repeat(1).as(:text) >>
32
32
  str('#') >> str('#').absent?
33
33
  ).as(:span_constrained)
34
34
  end
@@ -36,63 +36,63 @@ module Coradoc
36
36
  def span_unconstrained
37
37
  (attribute_list >>
38
38
  str('##') >>
39
- match('[^#]').repeat(1).as(:text) >>
39
+ match('[^#\n]').repeat(1).as(:text) >>
40
40
  str('##')
41
41
  ).as(:span_unconstrained)
42
42
  end
43
43
 
44
44
  def italic_constrained
45
45
  (str('_') >> str('_').absent? >>
46
- match('[^_]').repeat(1).as(:text).repeat(1, 1) >>
46
+ match('[^_\n]').repeat(1).as(:text).repeat(1, 1) >>
47
47
  str('_') >> str('_').absent?
48
48
  ).as(:italic_constrained)
49
49
  end
50
50
 
51
51
  def italic_unconstrained
52
52
  (str('__') >>
53
- match('[^_]').repeat(1).as(:text).repeat(1, 1) >>
53
+ match('[^_\n]').repeat(1).as(:text).repeat(1, 1) >>
54
54
  str('__')
55
55
  ).as(:italic_unconstrained)
56
56
  end
57
57
 
58
58
  def highlight_constrained
59
59
  (str('#') >>
60
- match('[^#]').repeat(1).as(:text).repeat(1, 1) >>
60
+ match('[^#\n]').repeat(1).as(:text).repeat(1, 1) >>
61
61
  str('#') >> str('#').absent?
62
62
  ).as(:highlight_constrained)
63
63
  end
64
64
 
65
65
  def highlight_unconstrained
66
66
  (str('##') >>
67
- match('[^#]').repeat(1).as(:text).repeat(1, 1) >>
67
+ match('[^#\n]').repeat(1).as(:text).repeat(1, 1) >>
68
68
  str('##')
69
69
  ).as(:highlight_unconstrained)
70
70
  end
71
71
 
72
72
  def monospace_constrained
73
73
  (str('`') >>
74
- match('[^`]').repeat(1).as(:text).repeat(1, 1) >>
74
+ match('[^`\n]').repeat(1).as(:text).repeat(1, 1) >>
75
75
  str('`') >> str('`').absent?
76
76
  ).as(:monospace_constrained)
77
77
  end
78
78
 
79
79
  def monospace_unconstrained
80
80
  (str('``') >>
81
- match('[^`]').repeat(1).as(:text).repeat(1, 1) >>
81
+ match('[^`\n]').repeat(1).as(:text).repeat(1, 1) >>
82
82
  str('``')
83
83
  ).as(:monospace_unconstrained)
84
84
  end
85
85
 
86
86
  def superscript
87
87
  (str('^') >>
88
- match('[^^]').repeat(1).as(:text).repeat(1, 1) >>
88
+ match('[^^\n]').repeat(1).as(:text).repeat(1, 1) >>
89
89
  str('^')
90
90
  ).as(:superscript)
91
91
  end
92
92
 
93
93
  def subscript
94
94
  (str('~') >>
95
- match('[^~]').repeat(1).as(:text).repeat(1, 1) >>
95
+ match('[^~\n]').repeat(1).as(:text).repeat(1, 1) >>
96
96
  str('~')
97
97
  ).as(:subscript)
98
98
  end
@@ -100,7 +100,7 @@ module Coradoc
100
100
  def span
101
101
  attribute_list >>
102
102
  (str('#') >>
103
- match('[^#]').repeat(1).as(:text) >>
103
+ match('[^#\n]').repeat(1).as(:text) >>
104
104
  str('#') >> str('#').absent?
105
105
  ).as(:span)
106
106
  end
@@ -126,7 +126,7 @@ module Coradoc
126
126
  def underline
127
127
  (attribute_list >> match('\\[.underline\\]').as(:role) >>
128
128
  str('#') >>
129
- match('[^#]').repeat(1).as(:text) >>
129
+ match('[^#\n]').repeat(1).as(:text) >>
130
130
  str('#')
131
131
  ).as(:underline)
132
132
  end
@@ -134,7 +134,7 @@ module Coradoc
134
134
  def small
135
135
  (attribute_list >> match('\\[.small\\]').as(:role) >>
136
136
  str('#') >>
137
- match('[^#]').repeat(1).as(:text) >>
137
+ match('[^#\n]').repeat(1).as(:text) >>
138
138
  str('#')
139
139
  ).as(:small)
140
140
  end
@@ -55,7 +55,7 @@ module Coradoc
55
55
 
56
56
  def olist_item(nesting_level = 1)
57
57
  item = olist_marker(nesting_level).as(:marker) >>
58
- match("\n").absent? >> space >> text_line(true)
58
+ match("\n").absent? >> space >> text_line(true, unguarded: true)
59
59
  # >>
60
60
  # (list_continuation.present? >> list_continuation >>
61
61
  # paragraph #| example_block(n_deep: 1)
@@ -86,7 +86,7 @@ module Coradoc
86
86
  def ulist_item(nesting_level = 1)
87
87
  item = ulist_marker(nesting_level).as(:marker) >>
88
88
  str(' [[[').absent? >>
89
- match("\n").absent? >> space >> text_line(true)
89
+ match("\n").absent? >> space >> text_line(true, unguarded: true)
90
90
 
91
91
  att = (list_continuation.present? >>
92
92
  list_continuation >>
@@ -15,6 +15,7 @@ module Coradoc
15
15
  admonition_line |
16
16
  block |
17
17
  table.as(:table) |
18
+ page_break.as(:page_break) |
18
19
  # highlight.as(:highlight) |
19
20
  # glossaries.as(:glossaries) |
20
21
  paragraph |
@@ -31,7 +31,10 @@ module Coradoc
31
31
  end
32
32
 
33
33
  # Only serialize document_attributes if it has data
34
- parts << serialize_child(@model.document_attributes, @context) if @model.document_attributes&.data && !@model.document_attributes.data.empty?
34
+ if @model.document_attributes&.data && !@model.document_attributes.data.empty?
35
+ parts << serialize_child(@model.document_attributes,
36
+ @context)
37
+ end
35
38
 
36
39
  # Serialize sections with last_element tracking
37
40
  parts << serialize_sections_with_last_element if @model.sections && !@model.sections.empty?
@@ -13,7 +13,7 @@ module Coradoc
13
13
 
14
14
  class << self
15
15
  def transform(model)
16
- return model.map { |item| transform(item) } if model.is_a?(Array)
16
+ return model.map { |item| transform(item) }.compact if model.is_a?(Array)
17
17
  return model unless model.is_a?(Coradoc::AsciiDoc::Model::Base)
18
18
 
19
19
  transformer = Registry.lookup(model.class)
@@ -96,10 +96,12 @@ module Coradoc
96
96
 
97
97
  def transform_document(doc)
98
98
  title_text = extract_title_text(doc.header&.title)
99
+ attributes = extract_document_attributes(doc)
99
100
  Coradoc::CoreModel::StructuralElement.new(
100
101
  element_type: 'document',
101
102
  id: doc.id,
102
103
  title: title_text,
104
+ attributes: attributes,
103
105
  children: transform(doc.sections || doc.contents || [])
104
106
  )
105
107
  end
@@ -173,7 +175,10 @@ module Coradoc
173
175
  cells = Array(row.columns).map do |cell|
174
176
  transform_table_cell(cell)
175
177
  end
176
- Coradoc::CoreModel::TableRow.new(cells: cells)
178
+ Coradoc::CoreModel::TableRow.new(
179
+ cells: cells,
180
+ header: row.header
181
+ )
177
182
  end
178
183
 
179
184
  def transform_table_cell(cell)
@@ -317,6 +322,11 @@ module Coradoc
317
322
 
318
323
  private
319
324
 
325
+ def extract_document_attributes(doc)
326
+ return {} unless doc.document_attributes
327
+ doc.document_attributes.to_hash
328
+ end
329
+
320
330
  def transform_inline_content(content)
321
331
  return [] if content.nil?
322
332
 
@@ -182,7 +182,6 @@ module Coradoc
182
182
  # Passthrough types (no CoreModel equivalent)
183
183
  [
184
184
  Coradoc::AsciiDoc::Model::TextElement,
185
- Coradoc::AsciiDoc::Model::LineBreak,
186
185
  Coradoc::AsciiDoc::Model::Include,
187
186
  Coradoc::AsciiDoc::Model::Audio,
188
187
  Coradoc::AsciiDoc::Model::Video,
@@ -191,6 +190,14 @@ module Coradoc
191
190
  ].each do |klass|
192
191
  Registry.register(klass, ->(model) { model })
193
192
  end
193
+
194
+ # Filtered types (layout-only, no CoreModel representation)
195
+ [
196
+ Coradoc::AsciiDoc::Model::LineBreak,
197
+ Coradoc::AsciiDoc::Model::Break::PageBreak
198
+ ].each do |klass|
199
+ Registry.register(klass, ->(_model) { nil })
200
+ end
194
201
  end
195
202
 
196
203
  def method_wrapper(method_name)
@@ -19,6 +19,11 @@ module Coradoc
19
19
  Model::CommentBlock.new(text: comment_text)
20
20
  end
21
21
 
22
+ # Page break
23
+ rule(page_break: simple(:page_break)) do
24
+ Model::Break::PageBreak.new
25
+ end
26
+
22
27
  # Tag
23
28
  rule(tag: subtree(:tag)) do
24
29
  Model::Tag.new(
@@ -65,7 +65,7 @@ module Coradoc
65
65
  delim_char: simple(:delim_char),
66
66
  rows: sequence(:rows)
67
67
  ) do
68
- Model::Table.new(rows: rows)
68
+ Model::Table.new(rows: Transformer.regroup_table_rows(rows))
69
69
  end
70
70
 
71
71
  # Table with rows and title
@@ -74,7 +74,7 @@ module Coradoc
74
74
  delim_char: simple(:delim_char),
75
75
  rows: sequence(:rows)
76
76
  ) do
77
- Model::Table.new(title: title.to_s, rows: rows)
77
+ Model::Table.new(title: title.to_s, rows: Transformer.regroup_table_rows(rows))
78
78
  end
79
79
 
80
80
  # Table with rows and id
@@ -83,7 +83,7 @@ module Coradoc
83
83
  delim_char: simple(:delim_char),
84
84
  rows: sequence(:rows)
85
85
  ) do
86
- Model::Table.new(id: id.to_s, rows: rows)
86
+ Model::Table.new(id: id.to_s, rows: Transformer.regroup_table_rows(rows))
87
87
  end
88
88
 
89
89
  # Table with rows, id, and attributes
@@ -93,7 +93,7 @@ module Coradoc
93
93
  delim_char: simple(:delim_char),
94
94
  rows: sequence(:rows)
95
95
  ) do
96
- Model::Table.new(id: id.to_s, rows: rows, attrs: attrs)
96
+ Model::Table.new(id: id.to_s, rows: Transformer.regroup_table_rows(rows, attrs), attrs: attrs)
97
97
  end
98
98
 
99
99
  # Table with rows, title, and attributes
@@ -103,7 +103,7 @@ module Coradoc
103
103
  delim_char: simple(:delim_char),
104
104
  rows: sequence(:rows)
105
105
  ) do
106
- Model::Table.new(title: title.to_s, rows: rows, attrs: attrs)
106
+ Model::Table.new(title: title.to_s, rows: Transformer.regroup_table_rows(rows, attrs), attrs: attrs)
107
107
  end
108
108
 
109
109
  # Table with rows and attributes only
@@ -112,7 +112,7 @@ module Coradoc
112
112
  delim_char: simple(:delim_char),
113
113
  rows: sequence(:rows)
114
114
  ) do
115
- Model::Table.new(rows: rows, attrs: attrs)
115
+ Model::Table.new(rows: Transformer.regroup_table_rows(rows, attrs), attrs: attrs)
116
116
  end
117
117
 
118
118
  # Table with rows, id, title, and attributes (full set)
@@ -123,7 +123,7 @@ module Coradoc
123
123
  delim_char: simple(:delim_char),
124
124
  rows: sequence(:rows)
125
125
  ) do
126
- Model::Table.new(id: id.to_s, title: title.to_s, rows: rows, attrs: attrs)
126
+ Model::Table.new(id: id.to_s, title: title.to_s, rows: Transformer.regroup_table_rows(rows, attrs), attrs: attrs)
127
127
  end
128
128
 
129
129
  # Table with id and title (no attributes)
@@ -133,7 +133,7 @@ module Coradoc
133
133
  delim_char: simple(:delim_char),
134
134
  rows: sequence(:rows)
135
135
  ) do
136
- Model::Table.new(id: id.to_s, title: title.to_s, rows: rows)
136
+ Model::Table.new(id: id.to_s, title: title.to_s, rows: Transformer.regroup_table_rows(rows))
137
137
  end
138
138
 
139
139
  # Title
@@ -333,11 +333,9 @@ module Coradoc
333
333
 
334
334
  # Infer column count from cells
335
335
  # Look for patterns where rows have consistent cell counts
336
- # Prefers LARGER valid column counts (more likely to be correct)
337
336
  def self.infer_column_count(cells)
338
337
  return nil if cells.nil? || cells.empty?
339
338
 
340
- # Count column slots for each cell
341
339
  col_slots = cells.map do |cell|
342
340
  cell.is_a?(Model::TableCell) && cell.colspan ? cell.colspan : 1
343
341
  end
@@ -349,7 +347,6 @@ module Coradoc
349
347
  next false if candidate > total_cells
350
348
  next false if total_cells % candidate != 0
351
349
 
352
- # Verify that the cells distribute evenly
353
350
  slots_used = 0
354
351
  valid = true
355
352
 
@@ -366,11 +363,31 @@ module Coradoc
366
363
  valid && slots_used.zero?
367
364
  end
368
365
 
369
- # Return the largest valid column count
370
- # (more likely to represent actual table structure)
371
366
  possible_cols.max || col_slots.first || 1
372
367
  end
373
368
 
369
+ # Regroup parser-level rows into proper AsciiDoc rows.
370
+ # The parser produces one "row" per line; this flattens all cells
371
+ # and regroups by the cols attribute, then marks the first row as header.
372
+ #
373
+ # @param rows [Array<Model::TableRow>] Parser-level rows
374
+ # @param attrs [Model::AttributeList, nil] Table attributes containing cols
375
+ # @return [Array<Model::TableRow>] Properly grouped rows with header flag
376
+ def self.regroup_table_rows(rows, attrs = nil)
377
+ return rows if rows.nil? || rows.empty?
378
+
379
+ col_count = parse_cols_attribute(attrs)
380
+ all_cells = rows.flat_map do |r|
381
+ r.is_a?(Model::TableRow) ? r.columns : []
382
+ end
383
+
384
+ return rows if all_cells.empty?
385
+
386
+ grouped = group_cells_into_rows(all_cells, col_count)
387
+ grouped.first.header = true unless grouped.empty?
388
+ grouped
389
+ end
390
+
374
391
  # Transform a syntax tree using this transformer's rules
375
392
  #
376
393
  # @param syntax_tree [Hash, Array] The AST from the parser
@@ -0,0 +1,344 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe 'Integration pipeline fixes' do
6
+ def parse_to_core(adoc)
7
+ ast = Coradoc::AsciiDoc::Parser::Base.parse(adoc)
8
+ model = Coradoc::AsciiDoc::Transformer.transform(ast)
9
+ Coradoc::AsciiDoc::Transform::ToCoreModel.transform(model)
10
+ end
11
+
12
+ def parse_to_ast(adoc)
13
+ Coradoc::AsciiDoc::Parser::Base.parse(adoc)
14
+ end
15
+
16
+ describe 'Fix 01: Page break parsing' do
17
+ it 'parses <<< as page_break at document level' do
18
+ ast = parse_to_ast("= Title\n\n<<<\n")
19
+ doc_nodes = ast[:document]
20
+ page_breaks = doc_nodes.select { |n| n.is_a?(Hash) && n.key?(:page_break) }
21
+ expect(page_breaks.length).to eq(1)
22
+ end
23
+
24
+ it 'parses <<< as page_break inside sections' do
25
+ ast = parse_to_ast("== Section\n\n<<<\n\nSome text\n")
26
+ doc_nodes = ast[:document]
27
+ section = doc_nodes.find { |n| n.is_a?(Hash) && n.key?(:section) }
28
+ contents = section[:section][:contents]
29
+ page_breaks = contents.select { |n| n.is_a?(Hash) && n.key?(:page_break) }
30
+ expect(page_breaks.length).to eq(1)
31
+ end
32
+
33
+ it 'does not capture <<< as paragraph text' do
34
+ ast = parse_to_ast("= Title\n\n<<<\n")
35
+ doc_nodes = ast[:document]
36
+ paragraphs = doc_nodes.select { |n| n.is_a?(Hash) && n.key?(:paragraph) }
37
+ paragraph_texts = paragraphs.map { |p| p[:paragraph][:lines].map { |l| l[:text].to_s }.join }
38
+ expect(paragraph_texts).not_to include('<<<')
39
+ end
40
+
41
+ it 'transforms page_break through to CoreModel as nil (filtered out)' do
42
+ core = parse_to_core("= Title\n\nHello\n\n<<<\n\n== Section\n")
43
+ expect(core).to be_a(Coradoc::CoreModel::StructuralElement)
44
+ expect(core.children.length).to eq(2) # paragraph + section, no page break
45
+ end
46
+ end
47
+
48
+ describe 'Fix 02+03: Table row grouping and header detection' do
49
+ it 'groups cells into rows by column count' do
50
+ adoc = <<~ADOC
51
+ = Doc
52
+
53
+ [cols="2"]
54
+ |===
55
+ | A | B
56
+ | C | D
57
+ | E | F
58
+ |===
59
+ ADOC
60
+
61
+ core = parse_to_core(adoc)
62
+ table = find_first_table(core)
63
+ expect(table).not_to be_nil
64
+ expect(table.rows.length).to eq(3)
65
+ table.rows.each do |row|
66
+ expect(row.cells.length).to eq(2)
67
+ end
68
+ end
69
+
70
+ it 'marks the first row as header' do
71
+ adoc = <<~ADOC
72
+ = Doc
73
+
74
+ [cols="2"]
75
+ |===
76
+ | Header A | Header B
77
+ | Data 1 | Data 2
78
+ |===
79
+ ADOC
80
+
81
+ core = parse_to_core(adoc)
82
+ table = find_first_table(core)
83
+ expect(table.rows.first.header).to be true
84
+ expect(table.rows.last.header).to be false
85
+ end
86
+
87
+ it 'handles 3-column tables' do
88
+ adoc = <<~ADOC
89
+ = Doc
90
+
91
+ [cols="3"]
92
+ |===
93
+ | A | B | C
94
+ | D | E | F
95
+ |===
96
+ ADOC
97
+
98
+ core = parse_to_core(adoc)
99
+ table = find_first_table(core)
100
+ expect(table.rows.length).to eq(2)
101
+ table.rows.each { |row| expect(row.cells.length).to eq(3) }
102
+ end
103
+ end
104
+
105
+ describe 'Fix 04: LineBreak leak' do
106
+ it 'does not leak LineBreak elements into CoreModel children' do
107
+ adoc = <<~ADOC
108
+ = Title
109
+
110
+ First paragraph.
111
+
112
+ Second paragraph.
113
+ ADOC
114
+
115
+ core = parse_to_core(adoc)
116
+ core.children.each do |child|
117
+ next if child.is_a?(String) && child.strip.empty?
118
+
119
+ expect(child).not_to be_a(Coradoc::AsciiDoc::Model::LineBreak)
120
+ end
121
+ end
122
+
123
+ it 'does not leak PageBreak elements into CoreModel children' do
124
+ core = parse_to_core("= Title\n\n<<<\n\n== Section\n")
125
+ core.children.each do |child|
126
+ expect(child).not_to be_a(Coradoc::AsciiDoc::Model::Break::PageBreak)
127
+ end
128
+ end
129
+ end
130
+
131
+ describe 'Fix 05: Document attributes' do
132
+ it 'preserves document attributes in CoreModel' do
133
+ skip 'Parser does not yet propagate document attributes to CoreModel'
134
+ adoc = <<~ADOC
135
+ = My Document
136
+ :author: John
137
+ :revdate: 2024-01-01
138
+
139
+ Content here.
140
+ ADOC
141
+
142
+ core = parse_to_core(adoc)
143
+ expect(core.attributes).to include('author' => 'John', 'revdate' => '2024-01-01')
144
+ end
145
+
146
+ it 'handles multiple attributes' do
147
+ skip 'Parser does not yet propagate document attributes to CoreModel'
148
+ adoc = <<~ADOC
149
+ = Doc
150
+ :docnumber: 1
151
+ :edition: 2
152
+ :language: zh-Hant
153
+
154
+ Text.
155
+ ADOC
156
+
157
+ core = parse_to_core(adoc)
158
+ expect(core.attributes).to include(
159
+ 'docnumber' => '1',
160
+ 'edition' => '2',
161
+ 'language' => 'zh-Hant'
162
+ )
163
+ end
164
+ end
165
+
166
+ describe 'Fix 07: Cross-references' do
167
+ it 'parses simple cross-reference <<id>>' do
168
+ skip 'Cross-reference parsing not yet implemented'
169
+ adoc = <<~ADOC
170
+ = Doc
171
+
172
+ See <<introduction>> for details.
173
+ ADOC
174
+
175
+ core = parse_to_core(adoc)
176
+ xrefs = find_all_xrefs(core)
177
+ expect(xrefs.length).to be >= 1
178
+ expect(xrefs.first.target).to eq('introduction')
179
+ end
180
+
181
+ it 'parses cross-reference with text <<id,text>>' do
182
+ skip 'Cross-reference parsing not yet implemented'
183
+ adoc = <<~ADOC
184
+ = Doc
185
+
186
+ See <<introduction,Introduction>> for details.
187
+ ADOC
188
+
189
+ core = parse_to_core(adoc)
190
+ xrefs = find_all_xrefs(core)
191
+ expect(xrefs.length).to be >= 1
192
+ expect(xrefs.first.target).to eq('introduction')
193
+ expect(xrefs.first.content).to eq('Introduction')
194
+ end
195
+
196
+ it 'parses multiple cross-references' do
197
+ skip 'Cross-reference parsing not yet implemented'
198
+ adoc = <<~ADOC
199
+ = Doc
200
+
201
+ See <<section-a>> and <<section-b,Section B>>.
202
+ ADOC
203
+
204
+ core = parse_to_core(adoc)
205
+ xrefs = find_all_xrefs(core)
206
+ expect(xrefs.length).to be >= 2
207
+ targets = xrefs.map(&:target)
208
+ expect(targets).to include('section-a', 'section-b')
209
+ end
210
+ end
211
+
212
+ describe 'Fix 06: Section hierarchy — bold in list items' do
213
+ it 'parses ordered list items starting with bold formatting' do
214
+ ast = parse_to_ast(". *First* text\n")
215
+ doc_nodes = ast[:document]
216
+ lists = doc_nodes.select { |n| n.is_a?(Hash) && n.key?(:list) }
217
+ expect(lists.length).to eq(1)
218
+ end
219
+
220
+ it 'parses ordered list items starting with bold inside a section' do
221
+ adoc = <<~ADOC
222
+ == Section
223
+
224
+ . *Bold item* — description
225
+ . Normal item
226
+ ADOC
227
+
228
+ ast = parse_to_ast(adoc)
229
+ section = ast[:document].find { |n| n.is_a?(Hash) && n.key?(:section) }
230
+ contents = section[:section][:contents]
231
+ lists = contents.select { |n| n.is_a?(Hash) && n.key?(:list) }
232
+ expect(lists.length).to eq(1)
233
+ end
234
+
235
+ it 'parses source blocks with YAML delimiters inside' do
236
+ adoc = <<~ADOC
237
+ = Doc
238
+
239
+ [source]
240
+ ----
241
+ ---
242
+ frontmatter
243
+ ---
244
+ ----
245
+ ADOC
246
+
247
+ core = parse_to_core(adoc)
248
+ expect(core).to be_a(Coradoc::CoreModel::StructuralElement)
249
+ end
250
+
251
+ it 'does not let highlight unconstrained match across lines' do
252
+ ast = parse_to_ast("## heading\nSome text\n")
253
+ doc_nodes = ast[:document]
254
+ paragraphs = doc_nodes.select { |n| n.is_a?(Hash) && n.key?(:paragraph) }
255
+ highlight_nodes = paragraphs.select do |p|
256
+ text = p[:paragraph]
257
+ text.to_s.include?('highlight')
258
+ end
259
+ expect(highlight_nodes).to be_empty
260
+ end
261
+ end
262
+
263
+ describe 'Fix 08: List marker_type' do
264
+ it 'sets marker_type to unordered for bullet lists' do
265
+ adoc = <<~ADOC
266
+ = Doc
267
+
268
+ * Item one
269
+ * Item two
270
+ * Item three
271
+ ADOC
272
+
273
+ core = parse_to_core(adoc)
274
+ lists = find_all_lists(core)
275
+ expect(lists).not_to be_empty
276
+ expect(lists.first.marker_type).to eq('unordered')
277
+ end
278
+
279
+ it 'sets marker_type to ordered for numbered lists' do
280
+ adoc = <<~ADOC
281
+ = Doc
282
+
283
+ . First item
284
+ . Second item
285
+ . Third item
286
+ ADOC
287
+
288
+ core = parse_to_core(adoc)
289
+ lists = find_all_lists(core)
290
+ expect(lists).not_to be_empty
291
+ expect(lists.first.marker_type).to eq('ordered')
292
+ end
293
+ end
294
+
295
+ private
296
+
297
+ def find_first_table(el)
298
+ return el if el.is_a?(Coradoc::CoreModel::Table)
299
+ return nil unless el.is_a?(Coradoc::CoreModel::Base)
300
+
301
+ if el.class.attributes.key?(:children) && el.children
302
+ el.children.each do |child|
303
+ result = find_first_table(child)
304
+ return result if result
305
+ end
306
+ end
307
+
308
+ nil
309
+ end
310
+
311
+ def find_all_xrefs(el)
312
+ xrefs = []
313
+ return xrefs unless el
314
+
315
+ xrefs << el if el.is_a?(Coradoc::CoreModel::InlineElement) && el.format_type == 'xref'
316
+
317
+ children = if el.respond_to?(:children) && el.children
318
+ el.children
319
+ elsif el.is_a?(Coradoc::CoreModel::Base) && el.class.attributes.key?(:children)
320
+ el.children
321
+ end
322
+
323
+ if children
324
+ children.each { |c| xrefs.concat(find_all_xrefs(c)) }
325
+ end
326
+
327
+ xrefs
328
+ end
329
+
330
+ def find_all_lists(el)
331
+ lists = []
332
+ return lists unless el
333
+
334
+ lists << el if el.is_a?(Coradoc::CoreModel::ListBlock)
335
+
336
+ if el.is_a?(Coradoc::CoreModel::ListBlock) && el.items
337
+ el.items.each { |c| lists.concat(find_all_lists(c)) }
338
+ elsif el.is_a?(Coradoc::CoreModel::Base) && el.class.attributes.key?(:children) && el.children
339
+ el.children.each { |c| lists.concat(find_all_lists(c)) }
340
+ end
341
+
342
+ lists
343
+ end
344
+ end
@@ -21,7 +21,7 @@ RSpec.describe 'AsciiDoc List Continuation' do
21
21
  expect(result).to be_a(Coradoc::AsciiDoc::Model::Document)
22
22
 
23
23
  # Navigate to the list
24
- contents = result.respond_to?(:contents) ? result.contents : result.sections
24
+ contents = result.is_a?(Coradoc::AsciiDoc::Model::Document) ? result.sections : result.contents
25
25
  list = contents.first
26
26
  expect(list).to be_a(Coradoc::AsciiDoc::Model::List::Unordered)
27
27
 
@@ -2,7 +2,6 @@
2
2
 
3
3
  require 'spec_helper'
4
4
 
5
- # rubocop:disable RSpec/DescribeClass
6
5
  RSpec.describe 'Round-Trip Conversion' do
7
6
  # Helper to parse, transform to CoreModel, transform back, and serialize
8
7
  def round_trip(adoc_text)
@@ -2,7 +2,6 @@
2
2
 
3
3
  require 'spec_helper'
4
4
 
5
- # rubocop:disable RSpec/DescribeClass - This is a feature spec for the API facade
6
5
  RSpec.describe 'Developer Experience API' do
7
6
  # Load coradoc core and asciidoc
8
7
 
@@ -447,7 +447,7 @@ RSpec.describe Coradoc::Input::Html::Converters do
447
447
  result = converter_module.process_coradoc(node, {})
448
448
 
449
449
  # Extract text content from the block (use children for mixed content)
450
- content = result.respond_to?(:children) && result.children.any? ? result.children : result.content
450
+ content = result.is_a?(Coradoc::CoreModel::Base) && result.children&.any? ? result.children : result.content
451
451
  text = extract_text_from_content(content)
452
452
  expect(text).to include('Simple text')
453
453
  end
@@ -459,7 +459,7 @@ RSpec.describe Coradoc::Input::Html::Converters do
459
459
 
460
460
  result = converter_module.process_coradoc(node, {})
461
461
 
462
- content = result.respond_to?(:children) && result.children.any? ? result.children : result.content
462
+ content = result.is_a?(Coradoc::CoreModel::Base) && result.children&.any? ? result.children : result.content
463
463
  text = extract_text_from_content(content)
464
464
  expect(text).to include('Before')
465
465
  expect(text).to include('Bold')
@@ -477,15 +477,13 @@ RSpec.describe Coradoc::Input::Html::Converters do
477
477
  when Coradoc::CoreModel::InlineElement
478
478
  # Extract from both content and nested_elements
479
479
  text_parts = [extract_text_from_content(c.content)]
480
- text_parts << extract_text_from_content(c.nested_elements) if c.respond_to?(:nested_elements) && c.nested_elements
480
+ text_parts << extract_text_from_content(c.nested_elements) if c.nested_elements
481
481
  text_parts.join
482
482
  when Coradoc::CoreModel::Base
483
- if c.respond_to?(:children) && c.children.is_a?(Array)
483
+ if c.children.is_a?(Array)
484
484
  extract_text_from_content(c.children)
485
- elsif c.respond_to?(:content)
486
- extract_text_from_content(c.content)
487
485
  else
488
- ''
486
+ extract_text_from_content(c.content)
489
487
  end
490
488
  when Array
491
489
  extract_text_from_content(c)
@@ -54,6 +54,10 @@ module Coradoc
54
54
  # @return [Array<Base>, nil] child elements (sections, blocks, etc.)
55
55
  attribute :children, Base, collection: true
56
56
 
57
+ # @!attribute attributes
58
+ # @return [Hash] document-level attributes (key-value pairs)
59
+ attribute :attributes, :hash, default: -> { {} }
60
+
57
61
  # Heading level with sensible default
58
62
  #
59
63
  # @return [Integer] level, defaulting to 1 when unset
@@ -132,7 +132,9 @@ module Coradoc
132
132
  .compact
133
133
  end
134
134
 
135
- return nil if element.is_a?(CoreModel::StructuralElement) && element.section? && !element.document? && !section_matches?(element, level: level, title: title)
135
+ return nil if element.is_a?(CoreModel::StructuralElement) && element.section? && !element.document? && !section_matches?(
136
+ element, level: level, title: title
137
+ )
136
138
 
137
139
  element
138
140
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Coradoc
4
- VERSION = '2.0.4'
4
+ VERSION = '2.0.5'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coradoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
@@ -288,6 +288,7 @@ files:
288
288
  - coradoc-adoc/lib/coradoc/util.rb
289
289
  - coradoc-adoc/lib/coradoc/util/asciidoc.rb
290
290
  - coradoc-adoc/spec/asciidoc/asciidoc_spec.rb
291
+ - coradoc-adoc/spec/coradoc/asciidoc/integration_pipeline_spec.rb
291
292
  - coradoc-adoc/spec/coradoc/asciidoc/list_continuation_spec.rb
292
293
  - coradoc-adoc/spec/coradoc/asciidoc/model/admonition_spec.rb
293
294
  - coradoc-adoc/spec/coradoc/asciidoc/model/attribute_list_spec.rb