coradoc-adoc 2.0.9 → 2.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coradoc/asciidoc/model/bibliography_entry.rb +18 -0
- data/lib/coradoc/asciidoc/model/document.rb +9 -0
- data/lib/coradoc/asciidoc/model/glossaries.rb +1 -1
- data/lib/coradoc/asciidoc/model/list/base.rb +41 -0
- data/lib/coradoc/asciidoc/model/list/core.rb +4 -24
- data/lib/coradoc/asciidoc/model/list/definition.rb +7 -0
- data/lib/coradoc/asciidoc/model/list/definition_item.rb +1 -1
- data/lib/coradoc/asciidoc/model/list/item.rb +1 -1
- data/lib/coradoc/asciidoc/model/list/nestable.rb +7 -3
- data/lib/coradoc/asciidoc/model/list.rb +4 -2
- data/lib/coradoc/asciidoc/parser/base.rb +10 -70
- data/lib/coradoc/asciidoc/parser/block.rb +3 -22
- data/lib/coradoc/asciidoc/parser/block_assembler.rb +37 -100
- data/lib/coradoc/asciidoc/parser/block_header.rb +55 -0
- data/lib/coradoc/asciidoc/parser/frontmatter_parser.rb +24 -0
- data/lib/coradoc/asciidoc/parser/paragraph.rb +1 -3
- data/lib/coradoc/asciidoc/parser/rule_dispatcher.rb +158 -0
- data/lib/coradoc/asciidoc/parser/section.rb +1 -3
- data/lib/coradoc/asciidoc/parser/table.rb +1 -4
- data/lib/coradoc/asciidoc/parser/text.rb +1 -3
- data/lib/coradoc/asciidoc/parser.rb +1 -0
- data/lib/coradoc/asciidoc/serializer/serializers/base.rb +1 -1
- data/lib/coradoc/asciidoc/serializer/serializers/document.rb +7 -0
- data/lib/coradoc/asciidoc/serializer/serializers/list/definition.rb +3 -1
- data/lib/coradoc/asciidoc/transform/element_transformers/block_transformer.rb +10 -1
- data/lib/coradoc/asciidoc/transform/element_transformers/document_transformer.rb +15 -1
- data/lib/coradoc/asciidoc/transform/element_transformers/other_transformer.rb +3 -1
- data/lib/coradoc/asciidoc/transform/from_core_model.rb +33 -3
- data/lib/coradoc/asciidoc/transform/from_core_model_registrations.rb +5 -1
- data/lib/coradoc/asciidoc/transform/frontmatter_attribute_map.rb +112 -0
- data/lib/coradoc/asciidoc/transform/text_extract_visitor.rb +33 -1
- data/lib/coradoc/asciidoc/transform/to_core_model.rb +10 -2
- data/lib/coradoc/asciidoc/transform/to_core_model_registrations.rb +15 -10
- data/lib/coradoc/asciidoc/transform.rb +1 -0
- data/lib/coradoc/asciidoc/transformer/attribute_list_normalizer.rb +69 -0
- data/lib/coradoc/asciidoc/transformer/block_rules.rb +4 -42
- data/lib/coradoc/asciidoc/transformer/block_type_classifier.rb +56 -0
- data/lib/coradoc/asciidoc/transformer/header_rules.rb +15 -53
- data/lib/coradoc/asciidoc/transformer/inline_rules.rb +39 -57
- data/lib/coradoc/asciidoc/transformer/misc_rules.rb +1 -24
- data/lib/coradoc/asciidoc/transformer/structural_rules.rb +18 -81
- data/lib/coradoc/asciidoc/transformer/table_cell_builder.rb +161 -0
- data/lib/coradoc/asciidoc/transformer/table_layout.rb +135 -0
- data/lib/coradoc/asciidoc/transformer/text_rules.rb +1 -25
- data/lib/coradoc/asciidoc/transformer.rb +38 -294
- data/lib/coradoc/asciidoc/version.rb +1 -1
- data/lib/coradoc/asciidoc.rb +6 -3
- metadata +10 -1
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module AsciiDoc
|
|
5
|
+
class Transformer < Parslet::Transform
|
|
6
|
+
# Pure functions for parsing the cell-format prefix (`2+^.^a`) and
|
|
7
|
+
# building a Model::TableCell from raw parser values.
|
|
8
|
+
#
|
|
9
|
+
# Extracted from the Transformer god class. The format spec can come
|
|
10
|
+
# in two shapes from the parser — a Hash of named captures or a raw
|
|
11
|
+
# String from a capture group — and the layout/alignment/style parsing
|
|
12
|
+
# for each used to be inline in Transformer#build_table_cell.
|
|
13
|
+
module TableCellBuilder
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
# @param format [Hash, String, Object, nil] Cell format from parser
|
|
17
|
+
# @param content [Object] Cell content
|
|
18
|
+
# @return [Model::TableCell]
|
|
19
|
+
def build(format, content)
|
|
20
|
+
cell_opts = {}
|
|
21
|
+
style = parse_format(format, cell_opts)
|
|
22
|
+
|
|
23
|
+
unescaped_content = content.to_s.gsub(/\\([|!,:;])/, '\1')
|
|
24
|
+
cell_opts[:content] = parse_inline_content(unescaped_content, style)
|
|
25
|
+
|
|
26
|
+
Model::TableCell.new(**cell_opts)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Coerce a raw parser cell value into a TableCell.
|
|
30
|
+
# Used by TableLayout.group_cells_into_rows when it encounters
|
|
31
|
+
# cells that the parser emitted as Hashes or plain strings.
|
|
32
|
+
# @param cell [Model::TableCell, Hash, Object]
|
|
33
|
+
# @return [Model::TableCell]
|
|
34
|
+
def normalize_cell(cell)
|
|
35
|
+
case cell
|
|
36
|
+
when Model::TableCell then cell
|
|
37
|
+
when Hash
|
|
38
|
+
content = cell[:text] || cell[:content] || ''
|
|
39
|
+
Model::TableCell.new(content: parse_inline_content(content))
|
|
40
|
+
else
|
|
41
|
+
Model::TableCell.new(content: parse_inline_content(cell))
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Parse inline content from raw text for a cell.
|
|
46
|
+
# @param text [String, nil]
|
|
47
|
+
# @param style [String, nil] 'a' (AsciiDoc), 'l' (literal), or nil
|
|
48
|
+
# @return [Array<Model::TextElement>]
|
|
49
|
+
def parse_inline_content(text, style = nil)
|
|
50
|
+
return [Model::TextElement.new(content: '')] if text.nil? || text.to_s.strip.empty?
|
|
51
|
+
|
|
52
|
+
return parse_block_content(text) if style == 'a'
|
|
53
|
+
return [Model::TextElement.new(content: text.to_s)] if style == 'l'
|
|
54
|
+
|
|
55
|
+
parser = Coradoc::AsciiDoc::Parser::Base.new
|
|
56
|
+
begin
|
|
57
|
+
ast = parser.text_any.parse(text.to_s)
|
|
58
|
+
transformed = Transformer.new.apply(ast)
|
|
59
|
+
content_array = transformed.is_a?(Array) ? transformed : [transformed]
|
|
60
|
+
[Model::TextElement.new(content: content_array)]
|
|
61
|
+
rescue Parslet::ParseFailed
|
|
62
|
+
[Model::TextElement.new(content: text.to_s)]
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Parse block-level AsciiDoc content (for 'a' style cells).
|
|
67
|
+
# @param text [String, nil]
|
|
68
|
+
# @return [Array]
|
|
69
|
+
def parse_block_content(text)
|
|
70
|
+
return [Model::TextElement.new(content: '')] if text.nil? || text.to_s.strip.empty?
|
|
71
|
+
|
|
72
|
+
parser = Coradoc::AsciiDoc::Parser::Base.new
|
|
73
|
+
text_str = text.to_s
|
|
74
|
+
|
|
75
|
+
if /^(\*+|-+|\d+\.)/m.match?(text_str)
|
|
76
|
+
list_match = text_str.match(/\n(\*+|-+|\d+\.)(.*)$/m)
|
|
77
|
+
if list_match
|
|
78
|
+
list_text = list_match[1] + list_match[2]
|
|
79
|
+
begin
|
|
80
|
+
ast = parser.list.parse(list_text)
|
|
81
|
+
transformed = Transformer.new.apply(ast)
|
|
82
|
+
|
|
83
|
+
before_list = text_str[0, list_match.begin(1) - 1].strip
|
|
84
|
+
before_elements = []
|
|
85
|
+
unless before_list.empty?
|
|
86
|
+
begin
|
|
87
|
+
before_ast = parser.text_any.parse(before_list)
|
|
88
|
+
before_transformed = Transformer.new.apply(before_ast)
|
|
89
|
+
before_array = before_transformed.is_a?(Array) ? before_transformed : [before_transformed]
|
|
90
|
+
before_elements = [Model::TextElement.new(content: before_array)]
|
|
91
|
+
rescue Parslet::ParseFailed
|
|
92
|
+
before_elements = [Model::TextElement.new(content: before_list)]
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
return before_elements + [transformed]
|
|
97
|
+
rescue Parslet::ParseFailed
|
|
98
|
+
# fall through to inline parsing
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
begin
|
|
104
|
+
ast = parser.text_any.parse(text_str)
|
|
105
|
+
transformed = Transformer.new.apply(ast)
|
|
106
|
+
content_array = transformed.is_a?(Array) ? transformed : [transformed]
|
|
107
|
+
[Model::TextElement.new(content: content_array)]
|
|
108
|
+
rescue Parslet::ParseFailed
|
|
109
|
+
[Model::TextElement.new(content: text_str)]
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Parse the cell-format prefix and populate cell_opts.
|
|
114
|
+
# @param format [Hash, String, Object, nil]
|
|
115
|
+
# @param cell_opts [Hash] Mutated in place
|
|
116
|
+
# @return [String, nil] The parsed style character
|
|
117
|
+
def parse_format(format, cell_opts)
|
|
118
|
+
if format.is_a?(Hash)
|
|
119
|
+
parse_format_hash(format, cell_opts)
|
|
120
|
+
elsif format.is_a?(String)
|
|
121
|
+
parse_format_string(format, cell_opts)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def parse_format_hash(format, cell_opts)
|
|
126
|
+
cell_opts[:colspan] = format[:colspan].to_i if format[:colspan]
|
|
127
|
+
|
|
128
|
+
if format[:rowspan]
|
|
129
|
+
rowspan_str = format[:rowspan].to_s.sub(/^\./, '')
|
|
130
|
+
cell_opts[:rowspan] = rowspan_str.to_i if rowspan_str.match?(/^\d+$/)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
cell_opts[:halign] = format[:halign].to_s if format[:halign]
|
|
134
|
+
|
|
135
|
+
if format[:valign]
|
|
136
|
+
valign_str = format[:valign].to_s.sub(/^\./, '')
|
|
137
|
+
cell_opts[:valign] = valign_str if %w[< ^ >].include?(valign_str)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
style = format[:style].to_s if format[:style]
|
|
141
|
+
cell_opts[:style] = style
|
|
142
|
+
cell_opts[:repeat] = true if format[:repeat]
|
|
143
|
+
style
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def parse_format_string(format_str, cell_opts)
|
|
147
|
+
cell_opts[:colspan] = Regexp.last_match(1).to_i if format_str =~ /^(\d+)\+/
|
|
148
|
+
cell_opts[:rowspan] = Regexp.last_match(1).to_i if format_str =~ /\.(\d+)/
|
|
149
|
+
cell_opts[:halign] = Regexp.last_match(0) if format_str =~ /[<>^]/
|
|
150
|
+
cell_opts[:valign] = Regexp.last_match(0)[1] if format_str =~ /\.[.^<>]/
|
|
151
|
+
|
|
152
|
+
style = Regexp.last_match(0) if format_str =~ /[dsemalhv]/
|
|
153
|
+
cell_opts[:style] = style
|
|
154
|
+
|
|
155
|
+
cell_opts[:repeat] = true if format_str.include?('*')
|
|
156
|
+
style
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module AsciiDoc
|
|
5
|
+
class Transformer < Parslet::Transform
|
|
6
|
+
# Pure functions for table row/column layout.
|
|
7
|
+
#
|
|
8
|
+
# Extracted from the Transformer god class so that:
|
|
9
|
+
# - the Transformer stays focused on rule wiring
|
|
10
|
+
# - the layout math can be unit-tested in isolation
|
|
11
|
+
# - future formats (Markdown tables, DOCX) can reuse the math
|
|
12
|
+
# without reaching into AsciiDoc::Transformer
|
|
13
|
+
module TableLayout
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
# Parse the `cols=` attribute to determine column count.
|
|
17
|
+
# @param attrs [Model::AttributeList, Hash, nil]
|
|
18
|
+
# @return [Integer, nil]
|
|
19
|
+
def parse_cols_attribute(attrs)
|
|
20
|
+
return nil if attrs.nil?
|
|
21
|
+
|
|
22
|
+
cols_value = if attrs.is_a?(Model::AttributeList)
|
|
23
|
+
attrs.named.find { |n| n.name.to_s == 'cols' }&.value
|
|
24
|
+
elsif attrs.is_a?(Hash)
|
|
25
|
+
attrs['cols'] || attrs[:cols]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
return nil if cols_value.nil?
|
|
29
|
+
|
|
30
|
+
cols_str = cols_value.is_a?(Array) ? cols_value.first.to_s : cols_value.to_s
|
|
31
|
+
cols_str = cols_str.gsub(/^["']|["']$/, '')
|
|
32
|
+
|
|
33
|
+
return Regexp.last_match(1).to_i if cols_str =~ /^(\d+)\*$/
|
|
34
|
+
return cols_str.split(',').size if cols_str.include?(',')
|
|
35
|
+
|
|
36
|
+
cols_str.to_i if /^\d+$/.match?(cols_str)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Group a flat list of cells into rows of `col_count` slots.
|
|
40
|
+
# @param cells [Array<Model::TableCell, Hash, Object>]
|
|
41
|
+
# @param explicit_col_count [Integer, nil]
|
|
42
|
+
# @return [Array<Model::TableRow>]
|
|
43
|
+
def group_cells_into_rows(cells, explicit_col_count = nil)
|
|
44
|
+
return [] if cells.nil? || cells.empty?
|
|
45
|
+
|
|
46
|
+
normalized_cells = cells.map { |cell| TableCellBuilder.normalize_cell(cell) }
|
|
47
|
+
|
|
48
|
+
col_count = explicit_col_count
|
|
49
|
+
if col_count.nil? || col_count.zero?
|
|
50
|
+
col_count = infer_column_count(normalized_cells)
|
|
51
|
+
end
|
|
52
|
+
col_count = normalized_cells.size if col_count.nil? || col_count.zero?
|
|
53
|
+
|
|
54
|
+
rows = []
|
|
55
|
+
current_row = []
|
|
56
|
+
current_slots = 0
|
|
57
|
+
|
|
58
|
+
normalized_cells.each do |cell|
|
|
59
|
+
colspan = cell.is_a?(Model::TableCell) && cell.colspan ? cell.colspan : 1
|
|
60
|
+
|
|
61
|
+
current_row << cell
|
|
62
|
+
current_slots += colspan
|
|
63
|
+
next unless current_slots >= col_count
|
|
64
|
+
|
|
65
|
+
rows << Model::TableRow.new(columns: current_row)
|
|
66
|
+
current_row = []
|
|
67
|
+
current_slots = 0
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
rows << Model::TableRow.new(columns: current_row) if current_row.any?
|
|
71
|
+
rows
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Infer a column count that consistently divides the colspan slots.
|
|
75
|
+
# @param cells [Array<Model::TableCell>]
|
|
76
|
+
# @return [Integer, nil]
|
|
77
|
+
def infer_column_count(cells)
|
|
78
|
+
return nil if cells.nil? || cells.empty?
|
|
79
|
+
|
|
80
|
+
col_slots = cells.map do |cell|
|
|
81
|
+
cell.is_a?(Model::TableCell) && cell.colspan ? cell.colspan : 1
|
|
82
|
+
end
|
|
83
|
+
total_cells = col_slots.sum
|
|
84
|
+
|
|
85
|
+
possible_cols = (1..[total_cells, 12].min).select do |candidate|
|
|
86
|
+
next false if candidate > total_cells
|
|
87
|
+
next false if total_cells % candidate != 0
|
|
88
|
+
|
|
89
|
+
slots_used = 0
|
|
90
|
+
valid = true
|
|
91
|
+
|
|
92
|
+
col_slots.each do |slots|
|
|
93
|
+
slots_used += slots
|
|
94
|
+
if slots_used == candidate
|
|
95
|
+
slots_used = 0
|
|
96
|
+
elsif slots_used > candidate
|
|
97
|
+
valid = false
|
|
98
|
+
break
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
valid && slots_used.zero?
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
possible_cols.max || col_slots.first || 1
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Regroup parser-level rows into proper AsciiDoc rows.
|
|
109
|
+
# The parser produces one "row" per line; this flattens all cells
|
|
110
|
+
# and regroups by the cols attribute, then marks the first row as header.
|
|
111
|
+
# @param rows [Array<Model::TableRow>]
|
|
112
|
+
# @param attrs [Model::AttributeList, nil]
|
|
113
|
+
# @return [Array<Model::TableRow>]
|
|
114
|
+
def regroup_table_rows(rows, attrs = nil)
|
|
115
|
+
return rows if rows.nil? || rows.empty?
|
|
116
|
+
|
|
117
|
+
col_count = parse_cols_attribute(attrs)
|
|
118
|
+
if col_count.nil? && rows.first.is_a?(Model::TableRow) && rows.first.columns.any?
|
|
119
|
+
col_count = rows.first.columns.sum { |c| (c.colspan || 1).to_i }
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
all_cells = rows.flat_map do |r|
|
|
123
|
+
r.is_a?(Model::TableRow) ? r.columns : []
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
return rows if all_cells.empty?
|
|
127
|
+
|
|
128
|
+
grouped = group_cells_into_rows(all_cells, col_count)
|
|
129
|
+
grouped.first.header = true unless grouped.empty?
|
|
130
|
+
grouped
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
@@ -68,32 +68,8 @@ module Coradoc
|
|
|
68
68
|
|
|
69
69
|
# Paragraph
|
|
70
70
|
rule(paragraph: subtree(:paragraph)) do
|
|
71
|
-
lines = paragraph[:lines] || []
|
|
72
|
-
content = lines.map do |line|
|
|
73
|
-
if line.is_a?(Hash) && line.key?(:text)
|
|
74
|
-
text_content = line[:text]
|
|
75
|
-
line_break = line[:line_break]
|
|
76
|
-
|
|
77
|
-
transformed_text = if text_content.is_a?(Array)
|
|
78
|
-
text_content.map do |item|
|
|
79
|
-
if item.is_a?(Hash)
|
|
80
|
-
Transformer.new.apply(item)
|
|
81
|
-
else
|
|
82
|
-
item
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
else
|
|
86
|
-
text_content
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
Model::TextElement.new(content: transformed_text, line_break: line_break)
|
|
90
|
-
else
|
|
91
|
-
line
|
|
92
|
-
end
|
|
93
|
-
end
|
|
94
|
-
|
|
95
71
|
Model::Paragraph.new(
|
|
96
|
-
content:
|
|
72
|
+
content: Transformer.lines_to_text_elements(paragraph[:lines]),
|
|
97
73
|
id: paragraph[:id],
|
|
98
74
|
attributes: paragraph[:attribute_list],
|
|
99
75
|
title: paragraph[:title]
|