markdown-merge 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +251 -0
  4. data/CITATION.cff +20 -0
  5. data/CODE_OF_CONDUCT.md +134 -0
  6. data/CONTRIBUTING.md +227 -0
  7. data/FUNDING.md +74 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +1087 -0
  10. data/REEK +0 -0
  11. data/RUBOCOP.md +71 -0
  12. data/SECURITY.md +21 -0
  13. data/lib/markdown/merge/cleanse/block_spacing.rb +253 -0
  14. data/lib/markdown/merge/cleanse/code_fence_spacing.rb +294 -0
  15. data/lib/markdown/merge/cleanse/condensed_link_refs.rb +405 -0
  16. data/lib/markdown/merge/cleanse.rb +42 -0
  17. data/lib/markdown/merge/code_block_merger.rb +300 -0
  18. data/lib/markdown/merge/conflict_resolver.rb +128 -0
  19. data/lib/markdown/merge/debug_logger.rb +26 -0
  20. data/lib/markdown/merge/document_problems.rb +190 -0
  21. data/lib/markdown/merge/file_aligner.rb +196 -0
  22. data/lib/markdown/merge/file_analysis.rb +353 -0
  23. data/lib/markdown/merge/file_analysis_base.rb +629 -0
  24. data/lib/markdown/merge/freeze_node.rb +93 -0
  25. data/lib/markdown/merge/gap_line_node.rb +136 -0
  26. data/lib/markdown/merge/link_definition_formatter.rb +49 -0
  27. data/lib/markdown/merge/link_definition_node.rb +157 -0
  28. data/lib/markdown/merge/link_parser.rb +421 -0
  29. data/lib/markdown/merge/link_reference_rehydrator.rb +320 -0
  30. data/lib/markdown/merge/markdown_structure.rb +123 -0
  31. data/lib/markdown/merge/merge_result.rb +166 -0
  32. data/lib/markdown/merge/node_type_normalizer.rb +126 -0
  33. data/lib/markdown/merge/output_builder.rb +166 -0
  34. data/lib/markdown/merge/partial_template_merger.rb +334 -0
  35. data/lib/markdown/merge/smart_merger.rb +221 -0
  36. data/lib/markdown/merge/smart_merger_base.rb +621 -0
  37. data/lib/markdown/merge/table_match_algorithm.rb +504 -0
  38. data/lib/markdown/merge/table_match_refiner.rb +136 -0
  39. data/lib/markdown/merge/version.rb +12 -0
  40. data/lib/markdown/merge/whitespace_normalizer.rb +251 -0
  41. data/lib/markdown/merge.rb +149 -0
  42. data/lib/markdown-merge.rb +4 -0
  43. data/sig/markdown/merge.rbs +341 -0
  44. data.tar.gz.sig +0 -0
  45. metadata +365 -0
  46. metadata.gz.sig +0 -0
@@ -0,0 +1,504 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Algorithm for computing match scores between two Markdown tables.
6
+ #
7
+ # This algorithm uses multiple factors to determine how well two tables match:
8
+ # - (A) Percentage of matching header cells (using Levenshtein similarity)
9
+ # - (B) Percentage of matching cells in the first column (using Levenshtein similarity)
10
+ # - (C) Average percentage of matching cells in rows with matching first column
11
+ # - (D) Percentage of matching total cells
12
+ # - (E) Position distance weight (closer tables score higher)
13
+ #
14
+ # Cell comparisons use Levenshtein distance to compute similarity, allowing
15
+ # partial matches (e.g., "Value" vs "Values" would get a high similarity score).
16
+ #
17
+ # The final score is the weighted average of these factors.
18
+ #
19
+ # @example Basic usage
20
+ # algorithm = TableMatchAlgorithm.new
21
+ # score = algorithm.call(table_a, table_b)
22
+ #
23
+ # @example With position information
24
+ # algorithm = TableMatchAlgorithm.new(
25
+ # position_a: 0, # First table in template
26
+ # position_b: 2, # Third table in destination
27
+ # total_tables_a: 3,
28
+ # total_tables_b: 3
29
+ # )
30
+ # score = algorithm.call(table_a, table_b)
31
+ class TableMatchAlgorithm
32
+ # Default weights for each factor in the algorithm
33
+ DEFAULT_WEIGHTS = {
34
+ header_match: 0.25, # (A) Header row matching
35
+ first_column: 0.20, # (B) First column matching
36
+ row_content: 0.25, # (C) Content in matching rows
37
+ total_cells: 0.15, # (D) Overall cell matching
38
+ position: 0.15, # (E) Position distance
39
+ }.freeze
40
+
41
+ # Minimum similarity threshold to consider cells as potentially matching
42
+ # for first column lookup (used in row content matching)
43
+ FIRST_COLUMN_SIMILARITY_THRESHOLD = 0.7
44
+
45
+ # @return [Integer, nil] Position of table A in its document (0-indexed)
46
+ attr_reader :position_a
47
+
48
+ # @return [Integer, nil] Position of table B in its document (0-indexed)
49
+ attr_reader :position_b
50
+
51
+ # @return [Integer] Total number of tables in document A
52
+ attr_reader :total_tables_a
53
+
54
+ # @return [Integer] Total number of tables in document B
55
+ attr_reader :total_tables_b
56
+
57
+ # @return [Hash] Weights for each scoring factor
58
+ attr_reader :weights
59
+
60
+ # @return [Symbol] The markdown backend being used
61
+ attr_reader :backend
62
+
63
+ # Initialize the table match algorithm.
64
+ #
65
+ # @param position_a [Integer, nil] Position of first table in its document
66
+ # @param position_b [Integer, nil] Position of second table in its document
67
+ # @param total_tables_a [Integer] Total tables in first document (default: 1)
68
+ # @param total_tables_b [Integer] Total tables in second document (default: 1)
69
+ # @param weights [Hash] Custom weights for scoring factors
70
+ # @param backend [Symbol] Markdown backend for type normalization (default: :commonmarker)
71
+ def initialize(position_a: nil, position_b: nil, total_tables_a: 1, total_tables_b: 1, weights: {}, backend: :commonmarker)
72
+ @position_a = position_a
73
+ @position_b = position_b
74
+ @total_tables_a = [total_tables_a, 1].max
75
+ @total_tables_b = [total_tables_b, 1].max
76
+ @weights = DEFAULT_WEIGHTS.merge(weights)
77
+ @backend = backend
78
+ end
79
+
80
+ # Compute the match score between two tables.
81
+ #
82
+ # @param table_a [Object] First table node
83
+ # @param table_b [Object] Second table node
84
+ # @return [Float] Score between 0.0 and 1.0
85
+ def call(table_a, table_b)
86
+ rows_a = extract_rows(table_a)
87
+ rows_b = extract_rows(table_b)
88
+
89
+ return 0.0 if rows_a.empty? || rows_b.empty?
90
+
91
+ scores = {
92
+ header_match: compute_header_match(rows_a, rows_b),
93
+ first_column: compute_first_column_match(rows_a, rows_b),
94
+ row_content: compute_row_content_match(rows_a, rows_b),
95
+ total_cells: compute_total_cells_match(rows_a, rows_b),
96
+ position: compute_position_score,
97
+ }
98
+
99
+ weighted_average(scores)
100
+ end
101
+
102
+ private
103
+
104
+ # Compute Levenshtein distance between two strings.
105
+ #
106
+ # Uses the Wagner-Fischer algorithm with O(min(m,n)) space.
107
+ #
108
+ # @param str_a [String] First string
109
+ # @param str_b [String] Second string
110
+ # @return [Integer] Edit distance between the strings
111
+ def levenshtein_distance(str_a, str_b)
112
+ return str_b.length if str_a.empty?
113
+ return str_a.length if str_b.empty?
114
+
115
+ # Ensure str_a is the shorter string for space optimization
116
+ if str_a.length > str_b.length
117
+ str_a, str_b = str_b, str_a
118
+ end
119
+
120
+ m = str_a.length
121
+ n = str_b.length
122
+
123
+ # Only need two rows at a time
124
+ prev_row = (0..m).to_a
125
+ curr_row = Array.new(m + 1, 0)
126
+
127
+ (1..n).each do |j|
128
+ curr_row[0] = j
129
+
130
+ (1..m).each do |i|
131
+ cost = (str_a[i - 1] == str_b[j - 1]) ? 0 : 1
132
+ curr_row[i] = [
133
+ curr_row[i - 1] + 1, # insertion
134
+ prev_row[i] + 1, # deletion
135
+ prev_row[i - 1] + cost, # substitution
136
+ ].min
137
+ end
138
+
139
+ prev_row, curr_row = curr_row, prev_row
140
+ end
141
+
142
+ prev_row[m]
143
+ end
144
+
145
+ # Compute similarity between two strings using Levenshtein distance.
146
+ #
147
+ # @param str_a [String] First string
148
+ # @param str_b [String] Second string
149
+ # @return [Float] Similarity score between 0.0 and 1.0
150
+ def string_similarity(str_a, str_b)
151
+ a = normalize(str_a)
152
+ b = normalize(str_b)
153
+
154
+ return 1.0 if a == b
155
+ return 1.0 if a.empty? && b.empty?
156
+ return 0.0 if a.empty? || b.empty?
157
+
158
+ max_len = [a.length, b.length].max
159
+ distance = levenshtein_distance(a, b)
160
+
161
+ 1.0 - (distance.to_f / max_len)
162
+ end
163
+
164
+ # Extract rows from a table node as arrays of cell text.
165
+ #
166
+ # Subclasses may override this for parser-specific iteration.
167
+ #
168
+ # @param table [Object] Table node
169
+ # @return [Array<Array<String>>] Array of rows, each row is array of cell texts
170
+ def extract_rows(table)
171
+ rows = []
172
+ child = table.first_child
173
+ while child
174
+ if table_row_type?(child)
175
+ rows << extract_cells(child)
176
+ end
177
+ child = next_sibling(child)
178
+ end
179
+ rows
180
+ end
181
+
182
+ # Check if a node is a table row type.
183
+ #
184
+ # Uses NodeTypeNormalizer to map backend-specific types to canonical types,
185
+ # enabling portable type checking across different markdown parsers.
186
+ #
187
+ # NOTE: We use `type` here instead of `merge_type` because this method operates
188
+ # on child nodes of tables (table_row, table_header), not top-level statements.
189
+ # Only top-level statements are wrapped by NodeTypeNormalizer with `merge_type`.
190
+ # However, we use NodeTypeNormalizer.canonical_type to normalize the raw type.
191
+ #
192
+ # @param node [Object] Node to check
193
+ # @return [Boolean] true if this is a table row
194
+ def table_row_type?(node)
195
+ return false unless node.respond_to?(:type)
196
+
197
+ # Normalize the type using NodeTypeNormalizer for backend portability
198
+ canonical = NodeTypeNormalizer.canonical_type(node.type, @backend || :commonmarker)
199
+ canonical == :table_row || canonical == :table_header
200
+ end
201
+
202
+ # Get the next sibling of a node.
203
+ #
204
+ # Different parsers use different methods (next vs next_sibling).
205
+ #
206
+ # @param node [Object] Current node
207
+ # @return [Object, nil] Next sibling or nil
208
+ def next_sibling(node)
209
+ if node.respond_to?(:next_sibling)
210
+ node.next_sibling
211
+ elsif node.respond_to?(:next)
212
+ node.next
213
+ end
214
+ end
215
+
216
+ # Extract cell texts from a table row.
217
+ #
218
+ # Uses NodeTypeNormalizer to map backend-specific types to canonical types,
219
+ # enabling portable type checking across different markdown parsers.
220
+ #
221
+ # NOTE: We use `type` here instead of `merge_type` because this method operates
222
+ # on child nodes of table rows (table_cell), not top-level statements.
223
+ # Only top-level statements are wrapped by NodeTypeNormalizer with `merge_type`.
224
+ # However, we use NodeTypeNormalizer.canonical_type to normalize the raw type.
225
+ #
226
+ # @param row [Object] Table row node
227
+ # @return [Array<String>] Array of cell text contents
228
+ def extract_cells(row)
229
+ cells = []
230
+ child = row.first_child
231
+ while child
232
+ if child.respond_to?(:type)
233
+ canonical = NodeTypeNormalizer.canonical_type(child.type, @backend || :commonmarker)
234
+ if canonical == :table_cell
235
+ cells << extract_text_content(child)
236
+ end
237
+ end
238
+ child = next_sibling(child)
239
+ end
240
+ cells
241
+ end
242
+
243
+ # Extract all text content from a node.
244
+ #
245
+ # Uses recursive traversal instead of `walk` for compatibility
246
+ # with tree_haver nodes which don't have a `walk` method.
247
+ #
248
+ # @param node [Object] Node to extract text from
249
+ # @return [String] Concatenated text content
250
+ def extract_text_content(node)
251
+ text_parts = []
252
+ collect_text_recursive(node, text_parts)
253
+ text_parts.join.strip
254
+ end
255
+
256
+ # Recursively collect text content from a node and its descendants.
257
+ #
258
+ # Uses NodeTypeNormalizer to map backend-specific types to canonical types,
259
+ # enabling portable type checking across different markdown parsers.
260
+ #
261
+ # NOTE: We use `type` here instead of `merge_type` because this method operates
262
+ # on child nodes (text, code), not top-level statements.
263
+ # Only top-level statements are wrapped by NodeTypeNormalizer with `merge_type`.
264
+ # However, we use NodeTypeNormalizer.canonical_type to normalize the raw type.
265
+ #
266
+ # @param node [Object] The node to traverse
267
+ # @param text_parts [Array<String>] Array to accumulate text into
268
+ # @return [void]
269
+ def collect_text_recursive(node, text_parts)
270
+ # Normalize the type using NodeTypeNormalizer for backend portability
271
+ canonical_type = NodeTypeNormalizer.canonical_type(node.type, @backend || :commonmarker)
272
+
273
+ # Collect text from text and code nodes
274
+ if canonical_type == :text || canonical_type == :code
275
+ content = if node.respond_to?(:string_content)
276
+ node.string_content.to_s
277
+ elsif node.respond_to?(:text)
278
+ node.text.to_s
279
+ else
280
+ ""
281
+ end
282
+ text_parts << content unless content.empty?
283
+ end
284
+
285
+ # Recurse into children - support both children array and first_child iteration
286
+ if node.respond_to?(:children)
287
+ node.children.each do |child|
288
+ collect_text_recursive(child, text_parts)
289
+ end
290
+ elsif node.respond_to?(:first_child)
291
+ child = node.first_child
292
+ while child
293
+ collect_text_recursive(child, text_parts)
294
+ child = if child.respond_to?(:next_sibling)
295
+ child.next_sibling
296
+ else
297
+ (child.respond_to?(:next) ? child.next : nil)
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ # (A) Compute header row match percentage using Levenshtein similarity.
304
+ #
305
+ # @param rows_a [Array<Array<String>>] Rows from table A
306
+ # @param rows_b [Array<Array<String>>] Rows from table B
307
+ # @return [Float] Average similarity of header cells (0.0-1.0)
308
+ def compute_header_match(rows_a, rows_b)
309
+ header_a = rows_a.first || []
310
+ header_b = rows_b.first || []
311
+
312
+ return 1.0 if header_a.empty? && header_b.empty?
313
+ return 0.0 if header_a.empty? || header_b.empty?
314
+
315
+ max_cells = [header_a.size, header_b.size].max
316
+
317
+ # Compute similarity for each cell pair
318
+ similarities = header_a.zip(header_b).map do |a, b|
319
+ next 0.0 if a.nil? || b.nil?
320
+
321
+ string_similarity(a, b)
322
+ end
323
+
324
+ # Pad with zeros for missing cells
325
+ (max_cells - similarities.size).times { similarities << 0.0 }
326
+
327
+ similarities.sum / max_cells
328
+ end
329
+
330
+ # (B) Compute first column match percentage using Levenshtein similarity.
331
+ #
332
+ # @param rows_a [Array<Array<String>>] Rows from table A
333
+ # @param rows_b [Array<Array<String>>] Rows from table B
334
+ # @return [Float] Percentage of matching first column cells (0.0-1.0)
335
+ def compute_first_column_match(rows_a, rows_b)
336
+ col_a = rows_a.map { |row| row.first }.compact
337
+ col_b = rows_b.map { |row| row.first }.compact
338
+
339
+ return 1.0 if col_a.empty? && col_b.empty?
340
+ return 0.0 if col_a.empty? || col_b.empty?
341
+
342
+ # For each cell in column A, find best match in column B
343
+ total_similarity = 0.0
344
+ col_a.each do |cell_a|
345
+ best_match = col_b.map { |cell_b| string_similarity(cell_a, cell_b) }.max || 0.0
346
+ total_similarity += best_match
347
+ end
348
+
349
+ # Also check cells in B that might not have matches in A
350
+ col_b.each do |cell_b|
351
+ best_match = col_a.map { |cell_a| string_similarity(cell_a, cell_b) }.max || 0.0
352
+ total_similarity += best_match
353
+ end
354
+
355
+ # Average over total cells
356
+ total_cells = col_a.size + col_b.size
357
+ (total_cells > 0) ? total_similarity / total_cells : 0.0
358
+ end
359
+
360
+ # (C) Compute average match percentage for rows with matching first column.
361
+ #
362
+ # Uses Levenshtein similarity to find matching rows by first column.
363
+ #
364
+ # @param rows_a [Array<Array<String>>] Rows from table A
365
+ # @param rows_b [Array<Array<String>>] Rows from table B
366
+ # @return [Float] Average percentage of matching cells in linked rows (0.0-1.0)
367
+ def compute_row_content_match(rows_a, rows_b)
368
+ return 0.0 if rows_a.empty? || rows_b.empty?
369
+
370
+ match_scores = []
371
+
372
+ rows_a.each do |row_a|
373
+ first_col_a = row_a.first
374
+ next if first_col_a.nil?
375
+
376
+ # Find best matching row in B based on first column similarity
377
+ best_row_match = nil
378
+ best_first_col_similarity = 0.0
379
+
380
+ rows_b.each do |row_b|
381
+ first_col_b = row_b.first
382
+ next if first_col_b.nil?
383
+
384
+ similarity = string_similarity(first_col_a, first_col_b)
385
+ if similarity > best_first_col_similarity && similarity >= FIRST_COLUMN_SIMILARITY_THRESHOLD
386
+ best_first_col_similarity = similarity
387
+ best_row_match = row_b
388
+ end
389
+ end
390
+
391
+ next unless best_row_match
392
+
393
+ # Compute row content similarity
394
+ match_scores << row_match_score(row_a, best_row_match)
395
+ end
396
+
397
+ return 0.0 if match_scores.empty?
398
+
399
+ match_scores.sum / match_scores.size
400
+ end
401
+
402
+ # Compute match score between two rows using Levenshtein similarity.
403
+ #
404
+ # @param row_a [Array<String>] First row
405
+ # @param row_b [Array<String>] Second row
406
+ # @return [Float] Average similarity of cells (0.0-1.0)
407
+ def row_match_score(row_a, row_b)
408
+ max_cells = [row_a.size, row_b.size].max
409
+ return 1.0 if max_cells == 0
410
+
411
+ similarities = row_a.zip(row_b).map do |a, b|
412
+ next 0.0 if a.nil? || b.nil?
413
+
414
+ string_similarity(a, b)
415
+ end
416
+
417
+ # Pad with zeros for missing cells
418
+ (max_cells - similarities.size).times { similarities << 0.0 }
419
+
420
+ similarities.sum / max_cells
421
+ end
422
+
423
+ # (D) Compute total cells match percentage using Levenshtein similarity.
424
+ #
425
+ # @param rows_a [Array<Array<String>>] Rows from table A
426
+ # @param rows_b [Array<Array<String>>] Rows from table B
427
+ # @return [Float] Percentage of matching total cells (0.0-1.0)
428
+ def compute_total_cells_match(rows_a, rows_b)
429
+ cells_a = rows_a.flatten.compact
430
+ cells_b = rows_b.flatten.compact
431
+
432
+ return 1.0 if cells_a.empty? && cells_b.empty?
433
+ return 0.0 if cells_a.empty? || cells_b.empty?
434
+
435
+ # For each cell in A, find best match in B
436
+ used_b_indices = Set.new
437
+ total_similarity = 0.0
438
+
439
+ cells_a.each do |cell_a|
440
+ best_similarity = 0.0
441
+ best_index = nil
442
+
443
+ cells_b.each_with_index do |cell_b, idx|
444
+ next if used_b_indices.include?(idx)
445
+
446
+ similarity = string_similarity(cell_a, cell_b)
447
+ if similarity > best_similarity
448
+ best_similarity = similarity
449
+ best_index = idx
450
+ end
451
+ end
452
+
453
+ if best_index && best_similarity > 0.5
454
+ used_b_indices << best_index
455
+ total_similarity += best_similarity
456
+ end
457
+ end
458
+
459
+ # Calculate score based on how many cells found good matches
460
+ max_cells = [cells_a.size, cells_b.size].max
461
+ total_similarity / max_cells
462
+ end
463
+
464
+ # (E) Compute position-based score.
465
+ #
466
+ # Tables at similar positions in their documents score higher.
467
+ #
468
+ # @return [Float] Position similarity score (0.0-1.0)
469
+ def compute_position_score
470
+ return 1.0 if position_a.nil? || position_b.nil?
471
+
472
+ # Normalize positions to 0-1 range based on total tables
473
+ norm_pos_a = position_a.to_f / total_tables_a
474
+ norm_pos_b = position_b.to_f / total_tables_b
475
+
476
+ # Distance is absolute difference in normalized positions
477
+ distance = (norm_pos_a - norm_pos_b).abs
478
+
479
+ # Convert to similarity (1.0 = same position, 0.0 = max distance)
480
+ 1.0 - distance
481
+ end
482
+
483
+ # Normalize a cell value for comparison.
484
+ #
485
+ # @param value [String, nil] Cell value
486
+ # @return [String] Normalized value (downcased, stripped)
487
+ def normalize(value)
488
+ value.to_s.strip.downcase
489
+ end
490
+
491
+ # Compute weighted average of scores.
492
+ #
493
+ # @param scores [Hash<Symbol, Float>] Individual scores by factor
494
+ # @return [Float] Weighted average score
495
+ def weighted_average(scores)
496
+ total_weight = weights.values.sum
497
+ return 0.0 if total_weight == 0
498
+
499
+ weighted_sum = scores.sum { |key, score| score * weights.fetch(key, 0) }
500
+ weighted_sum / total_weight
501
+ end
502
+ end
503
+ end
504
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Match refiner for Markdown tables that didn't match by exact signature.
6
+ #
7
+ # This refiner uses the TableMatchAlgorithm to pair tables that have:
8
+ # - Similar but not identical headers
9
+ # - Similar structure (row/column counts)
10
+ # - Similar content in key columns
11
+ #
12
+ # Tables are matched using a multi-factor scoring algorithm that considers:
13
+ # - Header cell similarity
14
+ # - First column (row label) similarity
15
+ # - Overall content overlap
16
+ # - Position in document
17
+ #
18
+ # @example Basic usage
19
+ # refiner = TableMatchRefiner.new(threshold: 0.5)
20
+ # matches = refiner.call(template_nodes, dest_nodes)
21
+ #
22
+ # @example With custom algorithm options
23
+ # refiner = TableMatchRefiner.new(
24
+ # threshold: 0.6,
25
+ # algorithm_options: {
26
+ # weights: { header_match: 0.4, position: 0.1 }
27
+ # }
28
+ # )
29
+ #
30
+ # @see Ast::Merge::MatchRefinerBase
31
+ # @see TableMatchAlgorithm
32
+ class TableMatchRefiner < Ast::Merge::MatchRefinerBase
33
+ # @return [Hash] Options passed to TableMatchAlgorithm
34
+ attr_reader :algorithm_options
35
+
36
+ # @return [Symbol] The markdown backend being used
37
+ attr_reader :backend
38
+
39
+ # Initialize a table match refiner.
40
+ #
41
+ # @param threshold [Float] Minimum score to accept a match (default: 0.5)
42
+ # @param algorithm_options [Hash] Options for TableMatchAlgorithm
43
+ # @param backend [Symbol] Markdown backend for type normalization (default: :commonmarker)
44
+ def initialize(threshold: DEFAULT_THRESHOLD, algorithm_options: {}, backend: :commonmarker, **options)
45
+ super(threshold: threshold, node_types: [:table], **options)
46
+ @algorithm_options = algorithm_options
47
+ @backend = backend
48
+ end
49
+
50
+ # Find matches between unmatched table nodes.
51
+ #
52
+ # @param template_nodes [Array] Unmatched nodes from template
53
+ # @param dest_nodes [Array] Unmatched nodes from destination
54
+ # @param context [Hash] Additional context (may contain :template_analysis, :dest_analysis)
55
+ # @return [Array<MatchResult>] Array of table matches
56
+ def call(template_nodes, dest_nodes, context = {})
57
+ template_tables = extract_tables(template_nodes)
58
+ dest_tables = extract_tables(dest_nodes)
59
+
60
+ return [] if template_tables.empty? || dest_tables.empty?
61
+
62
+ # Build position information for better matching
63
+ total_template = template_tables.size
64
+ total_dest = dest_tables.size
65
+
66
+ greedy_match(template_tables, dest_tables) do |t_node, d_node|
67
+ t_idx = template_tables.index(t_node) || 0
68
+ d_idx = dest_tables.index(d_node) || 0
69
+
70
+ compute_table_similarity(t_node, d_node, t_idx, d_idx, total_template, total_dest)
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ # Extract table nodes from a collection.
77
+ #
78
+ # @param nodes [Array] Nodes to filter
79
+ # @return [Array] Table nodes
80
+ def extract_tables(nodes)
81
+ nodes.select { |n| table_node?(n) }
82
+ end
83
+
84
+ # Check if a node is a table.
85
+ #
86
+ # Handles wrapped nodes (merge_type is symbol) and raw nodes (type is string).
87
+ #
88
+ # @param node [Object] Node to check
89
+ # @return [Boolean]
90
+ def table_node?(node)
91
+ # Check if it's a typed wrapper node first
92
+ if Ast::Merge::NodeTyping.typed_node?(node)
93
+ return Ast::Merge::NodeTyping.merge_type_for(node) == :table
94
+ end
95
+
96
+ # Check merge_type directly (wrapped nodes from NodeTypeNormalizer)
97
+ if node.respond_to?(:merge_type) && node.merge_type
98
+ return node.merge_type == :table
99
+ end
100
+
101
+ # Check raw type (string comparison for tree_haver nodes)
102
+ if node.respond_to?(:type)
103
+ node_type = node.type
104
+ return node_type == :table || node_type == "table" || node_type.to_s == "table"
105
+ end
106
+
107
+ # Fallback: class name check
108
+ return true if node.class.name.to_s.include?("Table")
109
+
110
+ false
111
+ end
112
+
113
+ # Compute similarity score between two tables.
114
+ #
115
+ # @param t_table [Object] Template table
116
+ # @param d_table [Object] Destination table
117
+ # @param t_idx [Integer] Template table index
118
+ # @param d_idx [Integer] Destination table index
119
+ # @param total_t [Integer] Total template tables
120
+ # @param total_d [Integer] Total destination tables
121
+ # @return [Float] Similarity score (0.0-1.0)
122
+ def compute_table_similarity(t_table, d_table, t_idx, d_idx, total_t, total_d)
123
+ algorithm = TableMatchAlgorithm.new(
124
+ position_a: t_idx,
125
+ position_b: d_idx,
126
+ total_tables_a: total_t,
127
+ total_tables_b: total_d,
128
+ backend: @backend,
129
+ **algorithm_options,
130
+ )
131
+
132
+ algorithm.call(t_table, d_table)
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markdown
4
+ module Merge
5
+ # Version information for Markdown::Merge
6
+ module Version
7
+ # Current version of the markdown-merge gem
8
+ VERSION = "1.0.0"
9
+ end
10
+ VERSION = Version::VERSION # traditional location
11
+ end
12
+ end