coradoc-adoc 2.0.8 → 2.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/lib/coradoc/asciidoc/model/bibliography_entry.rb +18 -0
  3. data/lib/coradoc/asciidoc/model/document.rb +9 -0
  4. data/lib/coradoc/asciidoc/model/glossaries.rb +1 -1
  5. data/lib/coradoc/asciidoc/model/list/base.rb +41 -0
  6. data/lib/coradoc/asciidoc/model/list/core.rb +4 -24
  7. data/lib/coradoc/asciidoc/model/list/definition.rb +7 -0
  8. data/lib/coradoc/asciidoc/model/list/definition_item.rb +7 -1
  9. data/lib/coradoc/asciidoc/model/list/item.rb +1 -1
  10. data/lib/coradoc/asciidoc/model/list/nestable.rb +7 -3
  11. data/lib/coradoc/asciidoc/model/list.rb +4 -2
  12. data/lib/coradoc/asciidoc/parser/base.rb +10 -70
  13. data/lib/coradoc/asciidoc/parser/block.rb +3 -22
  14. data/lib/coradoc/asciidoc/parser/block_assembler.rb +37 -100
  15. data/lib/coradoc/asciidoc/parser/block_header.rb +55 -0
  16. data/lib/coradoc/asciidoc/parser/frontmatter_parser.rb +24 -0
  17. data/lib/coradoc/asciidoc/parser/list.rb +18 -13
  18. data/lib/coradoc/asciidoc/parser/paragraph.rb +1 -3
  19. data/lib/coradoc/asciidoc/parser/rule_dispatcher.rb +158 -0
  20. data/lib/coradoc/asciidoc/parser/section.rb +1 -3
  21. data/lib/coradoc/asciidoc/parser/table.rb +1 -4
  22. data/lib/coradoc/asciidoc/parser/text.rb +1 -3
  23. data/lib/coradoc/asciidoc/parser.rb +1 -0
  24. data/lib/coradoc/asciidoc/serializer/serializers/base.rb +1 -1
  25. data/lib/coradoc/asciidoc/serializer/serializers/document.rb +7 -0
  26. data/lib/coradoc/asciidoc/serializer/serializers/list/definition.rb +3 -1
  27. data/lib/coradoc/asciidoc/serializer/serializers/list/definition_item.rb +21 -1
  28. data/lib/coradoc/asciidoc/transform/element_transformers/block_transformer.rb +10 -1
  29. data/lib/coradoc/asciidoc/transform/element_transformers/document_transformer.rb +15 -1
  30. data/lib/coradoc/asciidoc/transform/element_transformers/list_transformer.rb +6 -0
  31. data/lib/coradoc/asciidoc/transform/element_transformers/other_transformer.rb +3 -1
  32. data/lib/coradoc/asciidoc/transform/from_core_model.rb +46 -9
  33. data/lib/coradoc/asciidoc/transform/from_core_model_registrations.rb +5 -1
  34. data/lib/coradoc/asciidoc/transform/frontmatter_attribute_map.rb +112 -0
  35. data/lib/coradoc/asciidoc/transform/text_extract_visitor.rb +33 -1
  36. data/lib/coradoc/asciidoc/transform/to_core_model.rb +10 -2
  37. data/lib/coradoc/asciidoc/transform/to_core_model_registrations.rb +15 -10
  38. data/lib/coradoc/asciidoc/transform.rb +1 -0
  39. data/lib/coradoc/asciidoc/transformer/attribute_list_normalizer.rb +69 -0
  40. data/lib/coradoc/asciidoc/transformer/block_rules.rb +4 -42
  41. data/lib/coradoc/asciidoc/transformer/block_type_classifier.rb +56 -0
  42. data/lib/coradoc/asciidoc/transformer/header_rules.rb +15 -53
  43. data/lib/coradoc/asciidoc/transformer/inline_rules.rb +39 -57
  44. data/lib/coradoc/asciidoc/transformer/list_rules.rb +54 -11
  45. data/lib/coradoc/asciidoc/transformer/misc_rules.rb +1 -24
  46. data/lib/coradoc/asciidoc/transformer/structural_rules.rb +18 -81
  47. data/lib/coradoc/asciidoc/transformer/table_cell_builder.rb +161 -0
  48. data/lib/coradoc/asciidoc/transformer/table_layout.rb +135 -0
  49. data/lib/coradoc/asciidoc/transformer/text_rules.rb +1 -25
  50. data/lib/coradoc/asciidoc/transformer.rb +38 -294
  51. data/lib/coradoc/asciidoc/version.rb +1 -1
  52. data/lib/coradoc/asciidoc.rb +6 -3
  53. metadata +10 -1
@@ -157,35 +157,12 @@ module Coradoc
157
157
  end
158
158
  end
159
159
 
160
- content = lines.map do |line|
161
- if line.is_a?(Hash) && line.key?(:text)
162
- text_content = line[:text]
163
- line_break = line[:line_break]
164
-
165
- transformed_text = if text_content.is_a?(Array)
166
- text_content.map do |item|
167
- if item.is_a?(Hash)
168
- Transformer.new.apply(item)
169
- else
170
- item
171
- end
172
- end
173
- else
174
- text_content
175
- end
176
-
177
- Model::TextElement.new(content: transformed_text, line_break: line_break)
178
- else
179
- line
180
- end
181
- end
182
-
183
160
  Model::ReviewerNote.new(
184
161
  reviewer: attrs[:reviewer],
185
162
  date: attrs[:date],
186
163
  from: attrs[:from],
187
164
  to: attrs[:to],
188
- content: content
165
+ content: Transformer.lines_to_text_elements(lines)
189
166
  )
190
167
  end
191
168
  end
@@ -60,81 +60,19 @@ module Coradoc
60
60
  table
61
61
  end
62
62
 
63
- # Table with rows (new parser output - rows captured explicitly)
64
- rule(
65
- delim_char: simple(:delim_char),
66
- rows: sequence(:rows)
67
- ) do
68
- Model::Table.new(rows: Transformer.regroup_table_rows(rows))
69
- end
70
-
71
- # Table with rows and title
72
- rule(
73
- title: simple(:title),
74
- delim_char: simple(:delim_char),
75
- rows: sequence(:rows)
76
- ) do
77
- Model::Table.new(title: title.to_s, rows: Transformer.regroup_table_rows(rows))
78
- end
79
-
80
- # Table with rows and id
81
- rule(
82
- id: simple(:id),
83
- delim_char: simple(:delim_char),
84
- rows: sequence(:rows)
85
- ) do
86
- Model::Table.new(id: id.to_s, rows: Transformer.regroup_table_rows(rows))
87
- end
88
-
89
- # Table with rows, id, and attributes
90
- rule(
91
- id: simple(:id),
92
- attribute_list: simple(:attrs),
93
- delim_char: simple(:delim_char),
94
- rows: sequence(:rows)
95
- ) do
96
- Model::Table.new(id: id.to_s, rows: Transformer.regroup_table_rows(rows, attrs), attrs: attrs)
97
- end
98
-
99
- # Table with rows, title, and attributes
100
- rule(
101
- title: simple(:title),
102
- attribute_list: simple(:attrs),
103
- delim_char: simple(:delim_char),
104
- rows: sequence(:rows)
105
- ) do
106
- Model::Table.new(title: title.to_s, rows: Transformer.regroup_table_rows(rows, attrs), attrs: attrs)
107
- end
108
-
109
- # Table with rows and attributes only
110
- rule(
111
- attribute_list: simple(:attrs),
112
- delim_char: simple(:delim_char),
113
- rows: sequence(:rows)
114
- ) do
115
- Model::Table.new(rows: Transformer.regroup_table_rows(rows, attrs), attrs: attrs)
116
- end
117
-
118
- # Table with rows, id, title, and attributes (full set)
119
- rule(
120
- id: simple(:id),
121
- title: simple(:title),
122
- attribute_list: simple(:attrs),
123
- delim_char: simple(:delim_char),
124
- rows: sequence(:rows)
125
- ) do
126
- Model::Table.new(id: id.to_s, title: title.to_s, rows: Transformer.regroup_table_rows(rows, attrs),
127
- attrs: attrs)
128
- end
129
-
130
- # Table with id and title (no attributes)
131
- rule(
132
- id: simple(:id),
133
- title: simple(:title),
134
- delim_char: simple(:delim_char),
135
- rows: sequence(:rows)
136
- ) do
137
- Model::Table.new(id: id.to_s, title: title.to_s, rows: Transformer.regroup_table_rows(rows))
63
+ # Unified Table rule. Every variant (with or without title, id,
64
+ # attributes) flows through here. Parser::BlockHeader always
65
+ # captures attribute_lists as a sequence, so we funnel through
66
+ # coerce_attribute_list before constructing the model.
67
+ rule(table: subtree(:table)) do
68
+ id = table[:id]&.to_s
69
+ title = table[:title]&.to_s
70
+ attrs = AttributeListNormalizer.coerce(table[:attribute_list])
71
+ rows = table[:rows]
72
+ opts = { rows: Transformer.regroup_table_rows(rows, attrs), attrs: attrs }
73
+ opts[:id] = id if id
74
+ opts[:title] = title unless title.nil? || title.empty?
75
+ Model::Table.new(**opts)
138
76
  end
139
77
 
140
78
  # Title
@@ -171,7 +109,7 @@ module Coradoc
171
109
 
172
110
  id = title.id if title.is_a?(Model::Title) && title.id && !id
173
111
 
174
- attribute_list = section[:attribute_list] || nil
112
+ attribute_list = AttributeListNormalizer.coerce(section[:attribute_list])
175
113
  contents = section[:contents] || []
176
114
  sections = section[:sections]
177
115
  Model::Section.new(
@@ -200,12 +138,11 @@ module Coradoc
200
138
 
201
139
  # Bibliography entry
202
140
  rule(bibliography_entry: subtree(:bib_entry)) do
203
- anchor_name = bib_entry[:anchor_name]
204
- document_id = bib_entry[:document_id]
205
- ref_text = bib_entry[:ref_text]
206
- line_break = bib_entry[:line_break]
207
141
  Model::BibliographyEntry.new(
208
- anchor_name:, document_id:, ref_text:, line_break:
142
+ anchor_name: bib_entry[:anchor_name],
143
+ document_id: bib_entry[:document_id],
144
+ ref_text: Model::BibliographyEntry.coerce_ref_text(bib_entry[:ref_text]),
145
+ line_break: bib_entry[:line_break]
209
146
  )
210
147
  end
211
148
  end
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module AsciiDoc
5
+ class Transformer < Parslet::Transform
6
+ # Pure functions for parsing the cell-format prefix (`2+^.^a`) and
7
+ # building a Model::TableCell from raw parser values.
8
+ #
9
+ # Extracted from the Transformer god class. The format spec can come
10
+ # in two shapes from the parser — a Hash of named captures or a raw
11
+ # String from a capture group — and the layout/alignment/style parsing
12
+ # for each used to be inline in Transformer#build_table_cell.
13
+ module TableCellBuilder
14
+ module_function
15
+
16
+ # @param format [Hash, String, Object, nil] Cell format from parser
17
+ # @param content [Object] Cell content
18
+ # @return [Model::TableCell]
19
+ def build(format, content)
20
+ cell_opts = {}
21
+ style = parse_format(format, cell_opts)
22
+
23
+ unescaped_content = content.to_s.gsub(/\\([|!,:;])/, '\1')
24
+ cell_opts[:content] = parse_inline_content(unescaped_content, style)
25
+
26
+ Model::TableCell.new(**cell_opts)
27
+ end
28
+
29
+ # Coerce a raw parser cell value into a TableCell.
30
+ # Used by TableLayout.group_cells_into_rows when it encounters
31
+ # cells that the parser emitted as Hashes or plain strings.
32
+ # @param cell [Model::TableCell, Hash, Object]
33
+ # @return [Model::TableCell]
34
+ def normalize_cell(cell)
35
+ case cell
36
+ when Model::TableCell then cell
37
+ when Hash
38
+ content = cell[:text] || cell[:content] || ''
39
+ Model::TableCell.new(content: parse_inline_content(content))
40
+ else
41
+ Model::TableCell.new(content: parse_inline_content(cell))
42
+ end
43
+ end
44
+
45
+ # Parse inline content from raw text for a cell.
46
+ # @param text [String, nil]
47
+ # @param style [String, nil] 'a' (AsciiDoc), 'l' (literal), or nil
48
+ # @return [Array<Model::TextElement>]
49
+ def parse_inline_content(text, style = nil)
50
+ return [Model::TextElement.new(content: '')] if text.nil? || text.to_s.strip.empty?
51
+
52
+ return parse_block_content(text) if style == 'a'
53
+ return [Model::TextElement.new(content: text.to_s)] if style == 'l'
54
+
55
+ parser = Coradoc::AsciiDoc::Parser::Base.new
56
+ begin
57
+ ast = parser.text_any.parse(text.to_s)
58
+ transformed = Transformer.new.apply(ast)
59
+ content_array = transformed.is_a?(Array) ? transformed : [transformed]
60
+ [Model::TextElement.new(content: content_array)]
61
+ rescue Parslet::ParseFailed
62
+ [Model::TextElement.new(content: text.to_s)]
63
+ end
64
+ end
65
+
66
+ # Parse block-level AsciiDoc content (for 'a' style cells).
67
+ # @param text [String, nil]
68
+ # @return [Array]
69
+ def parse_block_content(text)
70
+ return [Model::TextElement.new(content: '')] if text.nil? || text.to_s.strip.empty?
71
+
72
+ parser = Coradoc::AsciiDoc::Parser::Base.new
73
+ text_str = text.to_s
74
+
75
+ if /^(\*+|-+|\d+\.)/m.match?(text_str)
76
+ list_match = text_str.match(/\n(\*+|-+|\d+\.)(.*)$/m)
77
+ if list_match
78
+ list_text = list_match[1] + list_match[2]
79
+ begin
80
+ ast = parser.list.parse(list_text)
81
+ transformed = Transformer.new.apply(ast)
82
+
83
+ before_list = text_str[0, list_match.begin(1) - 1].strip
84
+ before_elements = []
85
+ unless before_list.empty?
86
+ begin
87
+ before_ast = parser.text_any.parse(before_list)
88
+ before_transformed = Transformer.new.apply(before_ast)
89
+ before_array = before_transformed.is_a?(Array) ? before_transformed : [before_transformed]
90
+ before_elements = [Model::TextElement.new(content: before_array)]
91
+ rescue Parslet::ParseFailed
92
+ before_elements = [Model::TextElement.new(content: before_list)]
93
+ end
94
+ end
95
+
96
+ return before_elements + [transformed]
97
+ rescue Parslet::ParseFailed
98
+ # fall through to inline parsing
99
+ end
100
+ end
101
+ end
102
+
103
+ begin
104
+ ast = parser.text_any.parse(text_str)
105
+ transformed = Transformer.new.apply(ast)
106
+ content_array = transformed.is_a?(Array) ? transformed : [transformed]
107
+ [Model::TextElement.new(content: content_array)]
108
+ rescue Parslet::ParseFailed
109
+ [Model::TextElement.new(content: text_str)]
110
+ end
111
+ end
112
+
113
+ # Parse the cell-format prefix and populate cell_opts.
114
+ # @param format [Hash, String, Object, nil]
115
+ # @param cell_opts [Hash] Mutated in place
116
+ # @return [String, nil] The parsed style character
117
+ def parse_format(format, cell_opts)
118
+ if format.is_a?(Hash)
119
+ parse_format_hash(format, cell_opts)
120
+ elsif format.is_a?(String)
121
+ parse_format_string(format, cell_opts)
122
+ end
123
+ end
124
+
125
+ def parse_format_hash(format, cell_opts)
126
+ cell_opts[:colspan] = format[:colspan].to_i if format[:colspan]
127
+
128
+ if format[:rowspan]
129
+ rowspan_str = format[:rowspan].to_s.sub(/^\./, '')
130
+ cell_opts[:rowspan] = rowspan_str.to_i if rowspan_str.match?(/^\d+$/)
131
+ end
132
+
133
+ cell_opts[:halign] = format[:halign].to_s if format[:halign]
134
+
135
+ if format[:valign]
136
+ valign_str = format[:valign].to_s.sub(/^\./, '')
137
+ cell_opts[:valign] = valign_str if %w[< ^ >].include?(valign_str)
138
+ end
139
+
140
+ style = format[:style].to_s if format[:style]
141
+ cell_opts[:style] = style
142
+ cell_opts[:repeat] = true if format[:repeat]
143
+ style
144
+ end
145
+
146
+ def parse_format_string(format_str, cell_opts)
147
+ cell_opts[:colspan] = Regexp.last_match(1).to_i if format_str =~ /^(\d+)\+/
148
+ cell_opts[:rowspan] = Regexp.last_match(1).to_i if format_str =~ /\.(\d+)/
149
+ cell_opts[:halign] = Regexp.last_match(0) if format_str =~ /[<>^]/
150
+ cell_opts[:valign] = Regexp.last_match(0)[1] if format_str =~ /\.[.^<>]/
151
+
152
+ style = Regexp.last_match(0) if format_str =~ /[dsemalhv]/
153
+ cell_opts[:style] = style
154
+
155
+ cell_opts[:repeat] = true if format_str.include?('*')
156
+ style
157
+ end
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module AsciiDoc
5
+ class Transformer < Parslet::Transform
6
+ # Pure functions for table row/column layout.
7
+ #
8
+ # Extracted from the Transformer god class so that:
9
+ # - the Transformer stays focused on rule wiring
10
+ # - the layout math can be unit-tested in isolation
11
+ # - future formats (Markdown tables, DOCX) can reuse the math
12
+ # without reaching into AsciiDoc::Transformer
13
+ module TableLayout
14
+ module_function
15
+
16
+ # Parse the `cols=` attribute to determine column count.
17
+ # @param attrs [Model::AttributeList, Hash, nil]
18
+ # @return [Integer, nil]
19
+ def parse_cols_attribute(attrs)
20
+ return nil if attrs.nil?
21
+
22
+ cols_value = if attrs.is_a?(Model::AttributeList)
23
+ attrs.named.find { |n| n.name.to_s == 'cols' }&.value
24
+ elsif attrs.is_a?(Hash)
25
+ attrs['cols'] || attrs[:cols]
26
+ end
27
+
28
+ return nil if cols_value.nil?
29
+
30
+ cols_str = cols_value.is_a?(Array) ? cols_value.first.to_s : cols_value.to_s
31
+ cols_str = cols_str.gsub(/^["']|["']$/, '')
32
+
33
+ return Regexp.last_match(1).to_i if cols_str =~ /^(\d+)\*$/
34
+ return cols_str.split(',').size if cols_str.include?(',')
35
+
36
+ cols_str.to_i if /^\d+$/.match?(cols_str)
37
+ end
38
+
39
+ # Group a flat list of cells into rows of `col_count` slots.
40
+ # @param cells [Array<Model::TableCell, Hash, Object>]
41
+ # @param explicit_col_count [Integer, nil]
42
+ # @return [Array<Model::TableRow>]
43
+ def group_cells_into_rows(cells, explicit_col_count = nil)
44
+ return [] if cells.nil? || cells.empty?
45
+
46
+ normalized_cells = cells.map { |cell| TableCellBuilder.normalize_cell(cell) }
47
+
48
+ col_count = explicit_col_count
49
+ if col_count.nil? || col_count.zero?
50
+ col_count = infer_column_count(normalized_cells)
51
+ end
52
+ col_count = normalized_cells.size if col_count.nil? || col_count.zero?
53
+
54
+ rows = []
55
+ current_row = []
56
+ current_slots = 0
57
+
58
+ normalized_cells.each do |cell|
59
+ colspan = cell.is_a?(Model::TableCell) && cell.colspan ? cell.colspan : 1
60
+
61
+ current_row << cell
62
+ current_slots += colspan
63
+ next unless current_slots >= col_count
64
+
65
+ rows << Model::TableRow.new(columns: current_row)
66
+ current_row = []
67
+ current_slots = 0
68
+ end
69
+
70
+ rows << Model::TableRow.new(columns: current_row) if current_row.any?
71
+ rows
72
+ end
73
+
74
+ # Infer a column count that consistently divides the colspan slots.
75
+ # @param cells [Array<Model::TableCell>]
76
+ # @return [Integer, nil]
77
+ def infer_column_count(cells)
78
+ return nil if cells.nil? || cells.empty?
79
+
80
+ col_slots = cells.map do |cell|
81
+ cell.is_a?(Model::TableCell) && cell.colspan ? cell.colspan : 1
82
+ end
83
+ total_cells = col_slots.sum
84
+
85
+ possible_cols = (1..[total_cells, 12].min).select do |candidate|
86
+ next false if candidate > total_cells
87
+ next false if total_cells % candidate != 0
88
+
89
+ slots_used = 0
90
+ valid = true
91
+
92
+ col_slots.each do |slots|
93
+ slots_used += slots
94
+ if slots_used == candidate
95
+ slots_used = 0
96
+ elsif slots_used > candidate
97
+ valid = false
98
+ break
99
+ end
100
+ end
101
+
102
+ valid && slots_used.zero?
103
+ end
104
+
105
+ possible_cols.max || col_slots.first || 1
106
+ end
107
+
108
+ # Regroup parser-level rows into proper AsciiDoc rows.
109
+ # The parser produces one "row" per line; this flattens all cells
110
+ # and regroups by the cols attribute, then marks the first row as header.
111
+ # @param rows [Array<Model::TableRow>]
112
+ # @param attrs [Model::AttributeList, nil]
113
+ # @return [Array<Model::TableRow>]
114
+ def regroup_table_rows(rows, attrs = nil)
115
+ return rows if rows.nil? || rows.empty?
116
+
117
+ col_count = parse_cols_attribute(attrs)
118
+ if col_count.nil? && rows.first.is_a?(Model::TableRow) && rows.first.columns.any?
119
+ col_count = rows.first.columns.sum { |c| (c.colspan || 1).to_i }
120
+ end
121
+
122
+ all_cells = rows.flat_map do |r|
123
+ r.is_a?(Model::TableRow) ? r.columns : []
124
+ end
125
+
126
+ return rows if all_cells.empty?
127
+
128
+ grouped = group_cells_into_rows(all_cells, col_count)
129
+ grouped.first.header = true unless grouped.empty?
130
+ grouped
131
+ end
132
+ end
133
+ end
134
+ end
135
+ end
@@ -68,32 +68,8 @@ module Coradoc
68
68
 
69
69
  # Paragraph
70
70
  rule(paragraph: subtree(:paragraph)) do
71
- lines = paragraph[:lines] || []
72
- content = lines.map do |line|
73
- if line.is_a?(Hash) && line.key?(:text)
74
- text_content = line[:text]
75
- line_break = line[:line_break]
76
-
77
- transformed_text = if text_content.is_a?(Array)
78
- text_content.map do |item|
79
- if item.is_a?(Hash)
80
- Transformer.new.apply(item)
81
- else
82
- item
83
- end
84
- end
85
- else
86
- text_content
87
- end
88
-
89
- Model::TextElement.new(content: transformed_text, line_break: line_break)
90
- else
91
- line
92
- end
93
- end
94
-
95
71
  Model::Paragraph.new(
96
- content: content,
72
+ content: Transformer.lines_to_text_elements(paragraph[:lines]),
97
73
  id: paragraph[:id],
98
74
  attributes: paragraph[:attribute_list],
99
75
  title: paragraph[:title]