llm-docs-builder 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +13 -0
- data/.github/workflows/docker.yml +2 -2
- data/.github/workflows/push.yml +2 -2
- data/.gitignore +8 -0
- data/CHANGELOG.md +13 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +47 -18
- data/README.md +19 -0
- data/lib/llm_docs_builder/cli.rb +32 -10
- data/lib/llm_docs_builder/comparator.rb +5 -75
- data/lib/llm_docs_builder/config.rb +42 -2
- data/lib/llm_docs_builder/helpers/prune_trailing_unsafe_link_separator.rb +31 -0
- data/lib/llm_docs_builder/helpers/squeeze_blank_lines_outside_fences.rb +71 -0
- data/lib/llm_docs_builder/helpers.rb +9 -0
- data/lib/llm_docs_builder/html_detector.rb +159 -0
- data/lib/llm_docs_builder/html_to_markdown/figure_code_block_renderer.rb +181 -0
- data/lib/llm_docs_builder/html_to_markdown/table_markup_renderer.rb +597 -0
- data/lib/llm_docs_builder/html_to_markdown_converter.rb +792 -0
- data/lib/llm_docs_builder/markdown_transformer.rb +30 -5
- data/lib/llm_docs_builder/output_formatter.rb +1 -1
- data/lib/llm_docs_builder/transformers/base_transformer.rb +13 -1
- data/lib/llm_docs_builder/url_fetcher.rb +138 -0
- data/lib/llm_docs_builder/version.rb +1 -1
- data/lib/llm_docs_builder.rb +11 -0
- data/llm-docs-builder.gemspec +1 -0
- metadata +23 -1
|
@@ -0,0 +1,597 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmDocsBuilder
|
|
4
|
+
module HtmlToMarkdown
|
|
5
|
+
# Handles conversion of HTML table markup to Markdown table format
|
|
6
|
+
class TableMarkupRenderer
|
|
7
|
+
# Initialize a new table markup renderer
|
|
8
|
+
#
|
|
9
|
+
# @param inline_collapser [Proc] callable for collapsing inline content
|
|
10
|
+
# @param block_renderer [Proc] callable for rendering block elements
|
|
11
|
+
def initialize(inline_collapser:, block_renderer:)
|
|
12
|
+
@inline_collapser = inline_collapser
|
|
13
|
+
@block_renderer = block_renderer
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Main entry point for rendering HTML tables to Markdown
|
|
17
|
+
#
|
|
18
|
+
# @param table_node [Nokogiri::XML::Node] the HTML table element to convert
|
|
19
|
+
# @return [String] markdown table or HTML if table cannot be converted
|
|
20
|
+
def render_table(table_node)
|
|
21
|
+
return table_node.to_html if table_contains_nested_tables?(table_node)
|
|
22
|
+
return render_table_with_rowspan_cells(table_node) if table_contains_rowspan_cells?(table_node)
|
|
23
|
+
return render_table_with_colspan_cells(table_node) if table_contains_colspan_cells?(table_node)
|
|
24
|
+
|
|
25
|
+
caption_text = caption_text_for(table_node)
|
|
26
|
+
|
|
27
|
+
rows = table_node.css('tr').map do |row|
|
|
28
|
+
cells = row.element_children.select { |child| %w[th td].include?(child.name.downcase) }
|
|
29
|
+
next if cells.empty?
|
|
30
|
+
|
|
31
|
+
header_candidate = row.ancestors('thead').any? ||
|
|
32
|
+
cells.all? { |cell| cell.name.casecmp('th').zero? }
|
|
33
|
+
|
|
34
|
+
{
|
|
35
|
+
header: header_candidate,
|
|
36
|
+
values: cells.map { |cell| render_table_cell(cell) }
|
|
37
|
+
}
|
|
38
|
+
end.compact
|
|
39
|
+
return '' if rows.empty?
|
|
40
|
+
|
|
41
|
+
header_index = rows.find_index { |row| row[:header] }
|
|
42
|
+
|
|
43
|
+
if header_index
|
|
44
|
+
header_values = rows[header_index][:values]
|
|
45
|
+
data_values = rows.each_with_index.filter_map do |row, index|
|
|
46
|
+
next if index == header_index
|
|
47
|
+
|
|
48
|
+
row[:values]
|
|
49
|
+
end
|
|
50
|
+
else
|
|
51
|
+
header_values = rows.first[:values]
|
|
52
|
+
data_values = rows.drop(1).map { |row| row[:values] }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
column_count = [header_values.length, data_values.map(&:length).max || 0].max
|
|
56
|
+
column_count = 1 if column_count.zero?
|
|
57
|
+
|
|
58
|
+
header = pad_table_row(header_values, column_count)
|
|
59
|
+
data_rows = data_values.map { |row| pad_table_row(row, column_count) }
|
|
60
|
+
|
|
61
|
+
header_cells = header.map { |value| table_cell_data(value) }
|
|
62
|
+
data_cells = data_rows.map { |row| row.map { |value| table_cell_data(value) } }
|
|
63
|
+
|
|
64
|
+
column_specs = compute_table_column_specs(header_cells, data_cells)
|
|
65
|
+
column_widths = column_specs.map { |spec| spec[:width] }
|
|
66
|
+
|
|
67
|
+
lines = []
|
|
68
|
+
lines.concat(format_table_row(header_cells, column_specs))
|
|
69
|
+
lines << render_table_separator(column_widths)
|
|
70
|
+
data_cells.each do |row_cells|
|
|
71
|
+
lines.concat(format_table_row(row_cells, column_specs))
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
table_markdown = lines.join("\n")
|
|
75
|
+
|
|
76
|
+
with_optional_caption(caption_text, table_markdown)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
# Render table that contains rowspan cells
|
|
82
|
+
#
|
|
83
|
+
# @param table_node [Nokogiri::XML::Element] table element
|
|
84
|
+
# @return [String] markdown table
|
|
85
|
+
def render_table_with_rowspan_cells(table_node)
|
|
86
|
+
caption_text = caption_text_for(table_node)
|
|
87
|
+
|
|
88
|
+
span_slots = []
|
|
89
|
+
rows = []
|
|
90
|
+
|
|
91
|
+
table_node.css('tr').each do |row|
|
|
92
|
+
cells = row.element_children.select { |child| %w[th td].include?(child.name.downcase) }
|
|
93
|
+
next if cells.empty?
|
|
94
|
+
|
|
95
|
+
header_candidate = row.ancestors('thead').any? ||
|
|
96
|
+
cells.all? { |cell| cell.name.casecmp('th').zero? }
|
|
97
|
+
|
|
98
|
+
expanded_cells = expand_row_for_rowspans(cells, span_slots)
|
|
99
|
+
rows << { header: header_candidate, cells: expanded_cells }
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
return table_node.to_html if rows.empty?
|
|
103
|
+
|
|
104
|
+
column_count = rows.map { |row| row[:cells].length }.max || 1
|
|
105
|
+
column_count = 1 if column_count.zero?
|
|
106
|
+
|
|
107
|
+
header_index = find_header_index(rows, default: 0)
|
|
108
|
+
|
|
109
|
+
header_values = pad_table_row(rows[header_index][:cells], column_count)
|
|
110
|
+
header_lines = format_rowspan_row_text(header_values).to_s.split("\n", -1)
|
|
111
|
+
header_lines = [''] if header_lines.empty?
|
|
112
|
+
header_lines.map! { |line| line.empty? ? ' ' : line }
|
|
113
|
+
|
|
114
|
+
header_cells = header_values.map { |value| table_cell_data(value) }
|
|
115
|
+
column_widths = column_widths_from_cells(header_cells)
|
|
116
|
+
|
|
117
|
+
lines = header_lines.map { |line| format_bordered_row_content(line) }
|
|
118
|
+
lines << render_table_separator(column_widths)
|
|
119
|
+
|
|
120
|
+
rows.each_with_index do |row, index|
|
|
121
|
+
next if index == header_index
|
|
122
|
+
|
|
123
|
+
padded_cells = pad_table_row(row[:cells], column_count)
|
|
124
|
+
row_lines = format_rowspan_row_text(padded_cells).to_s.split("\n", -1)
|
|
125
|
+
row_lines = [''] if row_lines.empty?
|
|
126
|
+
|
|
127
|
+
row_lines.each do |line|
|
|
128
|
+
display = line.empty? ? ' ' : line
|
|
129
|
+
lines << format_bordered_row_content(display)
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
table_markdown = lines.join("\n")
|
|
134
|
+
|
|
135
|
+
with_optional_caption(caption_text, table_markdown)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Render table that contains colspan cells
|
|
139
|
+
#
|
|
140
|
+
# @param table_node [Nokogiri::XML::Element] table element
|
|
141
|
+
# @return [String] markdown table
|
|
142
|
+
def render_table_with_colspan_cells(table_node)
|
|
143
|
+
caption_text = caption_text_for(table_node)
|
|
144
|
+
|
|
145
|
+
rows = table_node.css('tr').map do |row|
|
|
146
|
+
cells = row.element_children.select { |child| %w[th td].include?(child.name.downcase) }
|
|
147
|
+
next if cells.empty?
|
|
148
|
+
|
|
149
|
+
header_candidate = row.ancestors('thead').any? ||
|
|
150
|
+
cells.all? { |cell| cell.name.casecmp('th').zero? }
|
|
151
|
+
|
|
152
|
+
values = cells.map { |cell| render_table_cell(cell) }
|
|
153
|
+
# Escape literal pipes in each cell to avoid creating bogus columns when joined
|
|
154
|
+
escaped_values = values.map { |v| sanitize_table_cell_line(v, escape_pipes: true) }
|
|
155
|
+
text = escaped_values.join(' | ').strip
|
|
156
|
+
|
|
157
|
+
{
|
|
158
|
+
header: header_candidate,
|
|
159
|
+
values: values,
|
|
160
|
+
text: text
|
|
161
|
+
}
|
|
162
|
+
end.compact
|
|
163
|
+
return table_node.to_html if rows.empty?
|
|
164
|
+
|
|
165
|
+
header_index = find_header_index(rows, default: 0)
|
|
166
|
+
header = rows[header_index]
|
|
167
|
+
data_rows = rows.each_with_index.filter_map { |row, index| index == header_index ? nil : row }
|
|
168
|
+
|
|
169
|
+
column_count = rows.map { |row| row[:values].length }.max || 1
|
|
170
|
+
column_count = 1 if column_count.zero?
|
|
171
|
+
|
|
172
|
+
header_values = pad_table_row(header[:values] || [], column_count)
|
|
173
|
+
header_cells = header_values.map { |value| table_cell_data(value) }
|
|
174
|
+
column_widths = column_widths_from_cells(header_cells)
|
|
175
|
+
|
|
176
|
+
lines = []
|
|
177
|
+
lines << format_bordered_row_content(header[:text])
|
|
178
|
+
lines << render_table_separator(column_widths)
|
|
179
|
+
data_rows.each { |row| lines << format_bordered_row_content(row[:text]) }
|
|
180
|
+
|
|
181
|
+
table_markdown = lines.join("\n")
|
|
182
|
+
|
|
183
|
+
with_optional_caption(caption_text, table_markdown)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Expand row cells accounting for rowspan effects
|
|
187
|
+
#
|
|
188
|
+
# @param cells [Array<Nokogiri::XML::Element>] cell elements
|
|
189
|
+
# @param span_slots [Array<Integer>] tracking array for rowspan state
|
|
190
|
+
# @return [Array<String>] expanded cell values
|
|
191
|
+
def expand_row_for_rowspans(cells, span_slots)
|
|
192
|
+
row_cells = []
|
|
193
|
+
column = 0
|
|
194
|
+
|
|
195
|
+
cells.each do |cell|
|
|
196
|
+
while span_slots[column].to_i.positive?
|
|
197
|
+
row_cells << ''
|
|
198
|
+
span_slots[column] = span_slots[column].to_i - 1
|
|
199
|
+
span_slots[column] = nil if span_slots[column].to_i <= 0
|
|
200
|
+
column += 1
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
value = render_table_cell(cell)
|
|
204
|
+
colspan = parse_integer(cell['colspan']) || 1
|
|
205
|
+
colspan = 1 if colspan <= 0
|
|
206
|
+
rowspan = parse_integer(cell['rowspan']) || 1
|
|
207
|
+
rowspan = 1 if rowspan <= 0
|
|
208
|
+
|
|
209
|
+
colspan.times do |offset|
|
|
210
|
+
row_cells << (offset.zero? ? value : '')
|
|
211
|
+
target_index = column + offset
|
|
212
|
+
span_slots[target_index] = (rowspan - 1 if rowspan > 1)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
column += colspan
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
while span_slots[column].to_i.positive?
|
|
219
|
+
row_cells << ''
|
|
220
|
+
span_slots[column] = span_slots[column].to_i - 1
|
|
221
|
+
span_slots[column] = nil if span_slots[column].to_i <= 0
|
|
222
|
+
column += 1
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
row_cells
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Format row text for rowspan tables
|
|
229
|
+
#
|
|
230
|
+
# @param cells [Array<String>] cell values
|
|
231
|
+
# @return [String] formatted row text
|
|
232
|
+
def format_rowspan_row_text(cells)
|
|
233
|
+
values =
|
|
234
|
+
if cells.is_a?(Array)
|
|
235
|
+
cells.map(&:to_s)
|
|
236
|
+
else
|
|
237
|
+
# Fallback: split a pre-joined string if encountered
|
|
238
|
+
cells.to_s.split(' | ')
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Escape literal pipes per cell so row assembly with ' | ' doesn't introduce extra columns
|
|
242
|
+
safe_values = values.map { |value| sanitize_table_cell_line(value, escape_pipes: true) }
|
|
243
|
+
|
|
244
|
+
split_values =
|
|
245
|
+
safe_values.map do |value|
|
|
246
|
+
segments = value.gsub(/\r\n?/, "\n").split("\n")
|
|
247
|
+
segments = [''] if segments.empty?
|
|
248
|
+
segments
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
max_lines = split_values.map(&:length).max || 0
|
|
252
|
+
return '' if max_lines.zero?
|
|
253
|
+
|
|
254
|
+
column_widths =
|
|
255
|
+
split_values.map do |segments|
|
|
256
|
+
segments.map(&:length).max || 0
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
lines = Array.new(max_lines) do |row_index|
|
|
260
|
+
row_values =
|
|
261
|
+
split_values.each_with_index.map do |segments, column_index|
|
|
262
|
+
segment = segments[row_index] || ''
|
|
263
|
+
width = column_widths[column_index]
|
|
264
|
+
if width.positive? && !segment.empty?
|
|
265
|
+
segment.ljust(width)
|
|
266
|
+
else
|
|
267
|
+
segment
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
row_values.join(' | ')
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
lines.join("\n")
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Render individual table cell content
|
|
278
|
+
#
|
|
279
|
+
# @param cell [Nokogiri::XML::Element] cell element
|
|
280
|
+
# @return [String] rendered cell content
|
|
281
|
+
def render_table_cell(cell)
|
|
282
|
+
content = @block_renderer.call(cell.children, depth: 0)
|
|
283
|
+
return '' if content.nil?
|
|
284
|
+
|
|
285
|
+
cleaned = content.strip
|
|
286
|
+
return cleaned unless cleaned.empty?
|
|
287
|
+
|
|
288
|
+
@inline_collapser.call(cell)
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Extract data from table cell value
|
|
292
|
+
#
|
|
293
|
+
# @param value [String] cell value
|
|
294
|
+
# @return [Hash] cell data with lines and pipe_split flag
|
|
295
|
+
def table_cell_data(value)
|
|
296
|
+
text = value.to_s
|
|
297
|
+
return { lines: [''], pipe_split: false } if text.empty?
|
|
298
|
+
|
|
299
|
+
pipe_split = false
|
|
300
|
+
|
|
301
|
+
lines =
|
|
302
|
+
text
|
|
303
|
+
.gsub(/\r\n?/, "\n")
|
|
304
|
+
.split("\n")
|
|
305
|
+
.flat_map do |line|
|
|
306
|
+
segments, split_flag = split_table_cell_line(line)
|
|
307
|
+
pipe_split ||= split_flag
|
|
308
|
+
segments
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
lines.reject! { |segment| segment.strip.empty? }
|
|
312
|
+
lines = [''] if lines.empty?
|
|
313
|
+
|
|
314
|
+
{ lines: lines, pipe_split: pipe_split }
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Split table cell line into segments
|
|
318
|
+
#
|
|
319
|
+
# @param line [String] cell line text
|
|
320
|
+
# @return [Array<Array<String>, Boolean>] segments and split flag
|
|
321
|
+
def split_table_cell_line(line)
|
|
322
|
+
return [[''], false] if line.nil? || line.empty?
|
|
323
|
+
|
|
324
|
+
# We always treat a cell line as a single segment and escape literal pipes
|
|
325
|
+
# that are outside of code spans. This keeps column integrity intact.
|
|
326
|
+
sanitized_line = sanitize_table_cell_line(line, escape_pipes: true)
|
|
327
|
+
[[sanitized_line], false]
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
# Sanitize table cell line text
|
|
331
|
+
#
|
|
332
|
+
# @param text [String] cell text
|
|
333
|
+
# @param escape_pipes [Boolean] whether to escape pipe characters
|
|
334
|
+
# @return [String] sanitized text
|
|
335
|
+
def sanitize_table_cell_line(text, escape_pipes: false)
|
|
336
|
+
raw = text.to_s
|
|
337
|
+
return '' if raw.empty?
|
|
338
|
+
|
|
339
|
+
sanitized = +''
|
|
340
|
+
index = 0
|
|
341
|
+
length = raw.length
|
|
342
|
+
inside_code = false
|
|
343
|
+
fence_length = 0
|
|
344
|
+
|
|
345
|
+
while index < length
|
|
346
|
+
char = raw[index]
|
|
347
|
+
|
|
348
|
+
if char == '\\'
|
|
349
|
+
sanitized << '\\\\'
|
|
350
|
+
index += 1
|
|
351
|
+
if index < length
|
|
352
|
+
sanitized << raw[index]
|
|
353
|
+
index += 1
|
|
354
|
+
end
|
|
355
|
+
next
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
if char == '`'
|
|
359
|
+
run_length = 1
|
|
360
|
+
run_length += 1 while index + run_length < length && raw[index + run_length] == '`'
|
|
361
|
+
|
|
362
|
+
sanitized << ('`' * run_length)
|
|
363
|
+
index += run_length
|
|
364
|
+
|
|
365
|
+
if inside_code
|
|
366
|
+
inside_code = false if run_length == fence_length
|
|
367
|
+
fence_length = 0 unless inside_code
|
|
368
|
+
else
|
|
369
|
+
inside_code = true
|
|
370
|
+
fence_length = run_length
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
next
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
if char == '|' && escape_pipes && !inside_code
|
|
377
|
+
sanitized << '\\|'
|
|
378
|
+
index += 1
|
|
379
|
+
next
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
sanitized << char
|
|
383
|
+
index += 1
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
sanitized.strip
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Check if table contains nested tables
|
|
390
|
+
#
|
|
391
|
+
# @param table_node [Nokogiri::XML::Element] table element
|
|
392
|
+
# @return [Boolean] true if nested tables exist
|
|
393
|
+
def table_contains_nested_tables?(table_node)
|
|
394
|
+
table_node.css('table').any?
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
# Check if table contains rowspan cells
|
|
398
|
+
#
|
|
399
|
+
# @param table_node [Nokogiri::XML::Element] table element
|
|
400
|
+
# @return [Boolean] true if rowspan cells exist
|
|
401
|
+
def table_contains_rowspan_cells?(table_node)
|
|
402
|
+
table_node.css('td[rowspan], th[rowspan]').any? do |cell|
|
|
403
|
+
span_value_significant?(cell['rowspan'])
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
# Check if table contains colspan cells
|
|
408
|
+
#
|
|
409
|
+
# @param table_node [Nokogiri::XML::Element] table element
|
|
410
|
+
# @return [Boolean] true if colspan cells exist
|
|
411
|
+
def table_contains_colspan_cells?(table_node)
|
|
412
|
+
table_node.css('td[colspan], th[colspan]').any? do |cell|
|
|
413
|
+
span_value_significant?(cell['colspan'])
|
|
414
|
+
end
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Check if span value is significant (not 1 or empty)
|
|
418
|
+
#
|
|
419
|
+
# @param raw_value [String] span attribute value
|
|
420
|
+
# @return [Boolean] true if span is significant
|
|
421
|
+
def span_value_significant?(raw_value)
|
|
422
|
+
return false if raw_value.nil?
|
|
423
|
+
|
|
424
|
+
value = raw_value.to_s.strip
|
|
425
|
+
return true if value.empty?
|
|
426
|
+
return false if value == '1'
|
|
427
|
+
|
|
428
|
+
integer = value.to_i
|
|
429
|
+
return true if integer > 1
|
|
430
|
+
|
|
431
|
+
integer <= 0 || value != integer.to_s
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
# Pad table row to specified length
|
|
435
|
+
#
|
|
436
|
+
# @param values [Array<String>] row values
|
|
437
|
+
# @param length [Integer] desired length
|
|
438
|
+
# @return [Array<String>] padded row
|
|
439
|
+
def pad_table_row(values, length)
|
|
440
|
+
padded = values.nil? ? [] : values.dup
|
|
441
|
+
padded = [] if padded.nil?
|
|
442
|
+
|
|
443
|
+
padded << '' while padded.length < length
|
|
444
|
+
|
|
445
|
+
padded[0, length]
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
# Compute column specifications (width and padding)
|
|
449
|
+
#
|
|
450
|
+
# @param header_cells [Array<Hash>] header cell data
|
|
451
|
+
# @param data_cells [Array<Array<Hash>>] data cell data
|
|
452
|
+
# @return [Array<Hash>] column specifications
|
|
453
|
+
def compute_table_column_specs(header_cells, data_cells)
|
|
454
|
+
column_count = header_cells.length
|
|
455
|
+
|
|
456
|
+
column_count.times.map do |index|
|
|
457
|
+
header_cell = header_cells[index] || { lines: [''], pipe_split: false }
|
|
458
|
+
column_cells = data_cells.map { |row| row[index] || { lines: [''], pipe_split: false } }
|
|
459
|
+
|
|
460
|
+
header_width = header_cell[:lines].map(&:length).max || 0
|
|
461
|
+
content_width = column_cells.map { |cell| cell[:lines].map(&:length).max || 0 }.max || 0
|
|
462
|
+
|
|
463
|
+
requires_padding =
|
|
464
|
+
([header_cell] + column_cells).any? do |cell|
|
|
465
|
+
cell[:lines].length > 1 && !cell[:pipe_split]
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
width =
|
|
469
|
+
if requires_padding
|
|
470
|
+
[header_width, content_width].max
|
|
471
|
+
else
|
|
472
|
+
header_width
|
|
473
|
+
end
|
|
474
|
+
width = [width, 1].max
|
|
475
|
+
|
|
476
|
+
{ width: width, pad: requires_padding }
|
|
477
|
+
end
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
# Format table row with column specifications
|
|
481
|
+
#
|
|
482
|
+
# @param row_cells [Array<Hash>] cell data
|
|
483
|
+
# @param column_specs [Array<Hash>] column specifications
|
|
484
|
+
# @return [Array<String>] formatted row lines
|
|
485
|
+
def format_table_row(row_cells, column_specs)
|
|
486
|
+
row_height = row_cells.map { |cell| cell[:lines].length }.max || 0
|
|
487
|
+
row_height = 1 if row_height.zero?
|
|
488
|
+
|
|
489
|
+
rows = []
|
|
490
|
+
|
|
491
|
+
row_height.times do |line_index|
|
|
492
|
+
values = column_specs.each_index.map do |column_index|
|
|
493
|
+
cell = row_cells[column_index] || { lines: [''], pipe_split: false }
|
|
494
|
+
line = cell[:lines][line_index] || ''
|
|
495
|
+
spec = column_specs[column_index]
|
|
496
|
+
spec[:pad] ? pad_table_cell_line(line, spec[:width]) : line.to_s
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
next if values.all? { |value| value.strip.empty? }
|
|
500
|
+
|
|
501
|
+
rows << "| #{values.join(' | ')} |"
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
if rows.empty?
|
|
505
|
+
placeholder = column_specs.map { |spec| ' ' * spec[:width] }.join(' | ')
|
|
506
|
+
["| #{placeholder} |"]
|
|
507
|
+
else
|
|
508
|
+
rows
|
|
509
|
+
end
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
# Pad table cell line to specified width
|
|
513
|
+
#
|
|
514
|
+
# @param text [String] cell text
|
|
515
|
+
# @param width [Integer] target width
|
|
516
|
+
# @return [String] padded text
|
|
517
|
+
def pad_table_cell_line(text, width)
|
|
518
|
+
value = text.to_s
|
|
519
|
+
width <= 0 ? value : value.ljust(width)
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
# Render table separator line
|
|
523
|
+
#
|
|
524
|
+
# @param column_widths [Array<Integer>] column widths
|
|
525
|
+
# @return [String] separator line
|
|
526
|
+
def render_table_separator(column_widths)
|
|
527
|
+
"|#{column_widths.map { |width| '-' * [width + 2, 3].max }.join('|')}|"
|
|
528
|
+
end
|
|
529
|
+
|
|
530
|
+
# Table helpers
|
|
531
|
+
|
|
532
|
+
# Extract caption text from table
|
|
533
|
+
#
|
|
534
|
+
# @param table_node [Nokogiri::XML::Element] table element
|
|
535
|
+
# @return [String, nil] caption text or nil
|
|
536
|
+
def caption_text_for(table_node)
|
|
537
|
+
caption = table_node.at_css('caption')
|
|
538
|
+
text = @inline_collapser.call(caption).strip if caption
|
|
539
|
+
text = nil if text.nil? || text.empty?
|
|
540
|
+
text
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
# Prepend caption to table markdown if present
|
|
544
|
+
#
|
|
545
|
+
# @param caption_text [String, nil] caption text
|
|
546
|
+
# @param table_markdown [String] table markdown
|
|
547
|
+
# @return [String] table with optional caption
|
|
548
|
+
def with_optional_caption(caption_text, table_markdown)
|
|
549
|
+
caption_text ? "#{caption_text}\n\n#{table_markdown}" : table_markdown
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
# Find index of header row
|
|
553
|
+
#
|
|
554
|
+
# @param rows [Array<Hash>] row data
|
|
555
|
+
# @param default [Integer, nil] default index if no header found
|
|
556
|
+
# @return [Integer, nil] header row index
|
|
557
|
+
def find_header_index(rows, default: nil)
|
|
558
|
+
idx = rows.find_index { |row| row[:header] }
|
|
559
|
+
idx.nil? ? default : idx
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
# Format row content with borders
|
|
563
|
+
#
|
|
564
|
+
# @param content [String] row content
|
|
565
|
+
# @return [String] bordered row
|
|
566
|
+
def format_bordered_row_content(content)
|
|
567
|
+
value = content.to_s
|
|
568
|
+
value = ' ' if value.empty?
|
|
569
|
+
"| #{value} |"
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
# Calculate column widths from cell data
|
|
573
|
+
#
|
|
574
|
+
# @param cells [Array<Hash>] cell data
|
|
575
|
+
# @return [Array<Integer>] column widths
|
|
576
|
+
def column_widths_from_cells(cells)
|
|
577
|
+
cells.map do |cell|
|
|
578
|
+
width = cell[:lines].map(&:length).max || 0
|
|
579
|
+
[width, 1].max
|
|
580
|
+
end
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
# Parse integer from string value
|
|
584
|
+
#
|
|
585
|
+
# @param raw [String, nil] raw value
|
|
586
|
+
# @return [Integer, nil] parsed integer or nil
|
|
587
|
+
def parse_integer(raw)
|
|
588
|
+
return nil if raw.nil?
|
|
589
|
+
|
|
590
|
+
str = raw.to_s.strip
|
|
591
|
+
return nil unless str.match?(/\A[+-]?\d+\z/)
|
|
592
|
+
|
|
593
|
+
str.to_i
|
|
594
|
+
end
|
|
595
|
+
end
|
|
596
|
+
end
|
|
597
|
+
end
|