canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,197 @@
1
+ # Character visualization and categorization map
2
+ # Format:
3
+ # unicode: XXXX (hex code without 0x prefix, e.g., 2005 for U+2005)
4
+ # OR character: "x" (for printable ASCII characters like space)
5
+ # visualization: The symbol to display instead
6
+ # category: whitespace, line_endings, zero_width, directional, or control
7
+ # name: Human-readable name (optional, will use Unicode::Name if not provided)
8
+
9
+ characters:
10
+ # Common whitespace characters
11
+ - character: " "
12
+ visualization: "░"
13
+ category: whitespace
14
+ name: "Space"
15
+
16
+ - character: "\t"
17
+ visualization: "⇥"
18
+ category: whitespace
19
+ name: "Tab"
20
+
21
+ - unicode: "00A0"
22
+ visualization: "␣"
23
+ category: whitespace
24
+ name: "No-Break Space"
25
+
26
+ # Line endings
27
+ - character: "\n"
28
+ visualization: "↵"
29
+ category: line_endings
30
+ name: "Line Feed"
31
+
32
+ - character: "\r"
33
+ visualization: "⏎"
34
+ category: line_endings
35
+ name: "Carriage Return"
36
+
37
+ - character: "\r\n"
38
+ visualization: "↵"
39
+ category: line_endings
40
+ name: "CRLF"
41
+
42
+ - unicode: "0085"
43
+ visualization: "⏎"
44
+ category: line_endings
45
+ name: "Next Line"
46
+
47
+ - unicode: "2028"
48
+ visualization: "⤓"
49
+ category: line_endings
50
+ name: "Line Separator"
51
+
52
+ - unicode: "2029"
53
+ visualization: "⤓"
54
+ category: line_endings
55
+ name: "Paragraph Separator"
56
+
57
+ # Unicode spaces
58
+ - unicode: "2002"
59
+ visualization: "▭"
60
+ category: whitespace
61
+ name: "En Space"
62
+
63
+ - unicode: "2003"
64
+ visualization: "▬"
65
+ category: whitespace
66
+ name: "Em Space"
67
+
68
+ - unicode: "2005"
69
+ visualization: "⏓"
70
+ category: whitespace
71
+ name: "Four-Per-Em Space"
72
+
73
+ - unicode: "2006"
74
+ visualization: "⏕"
75
+ category: whitespace
76
+ name: "Six-Per-Em Space"
77
+
78
+ - unicode: "2009"
79
+ visualization: "▯"
80
+ category: whitespace
81
+ name: "Thin Space"
82
+
83
+ - unicode: "200A"
84
+ visualization: "▮"
85
+ category: whitespace
86
+ name: "Hair Space"
87
+
88
+ - unicode: "2007"
89
+ visualization: "□"
90
+ category: whitespace
91
+ name: "Figure Space"
92
+
93
+ - unicode: "202F"
94
+ visualization: "▫"
95
+ category: whitespace
96
+ name: "Narrow No-Break Space"
97
+
98
+ - unicode: "205F"
99
+ visualization: "▭"
100
+ category: whitespace
101
+ name: "Medium Mathematical Space"
102
+
103
+ - unicode: "3000"
104
+ visualization: "⎵"
105
+ category: whitespace
106
+ name: "Ideographic Space"
107
+
108
+ - unicode: "303F"
109
+ visualization: "⏑"
110
+ category: whitespace
111
+ name: "Ideographic Half Fill Space"
112
+
113
+ # Zero-width characters
114
+ - unicode: "200B"
115
+ visualization: "→"
116
+ category: zero_width
117
+ name: "Zero Width Space"
118
+
119
+ - unicode: "200C"
120
+ visualization: "↛"
121
+ category: zero_width
122
+ name: "Zero Width Non-Joiner"
123
+
124
+ - unicode: "200D"
125
+ visualization: "⇢"
126
+ category: zero_width
127
+ name: "Zero Width Joiner"
128
+
129
+ - unicode: "FEFF"
130
+ visualization: "⇨"
131
+ category: zero_width
132
+ name: "Zero Width No-Break Space"
133
+
134
+ # Directional markers
135
+ - unicode: "200E"
136
+ visualization: "⟹"
137
+ category: directional
138
+ name: "Left-To-Right Mark"
139
+
140
+ - unicode: "200F"
141
+ visualization: "⟸"
142
+ category: directional
143
+ name: "Right-To-Left Mark"
144
+
145
+ - unicode: "202A"
146
+ visualization: "⇒"
147
+ category: directional
148
+ name: "Left-To-Right Embedding"
149
+
150
+ - unicode: "202B"
151
+ visualization: "⇐"
152
+ category: directional
153
+ name: "Right-To-Left Embedding"
154
+
155
+ - unicode: "202C"
156
+ visualization: "↔"
157
+ category: directional
158
+ name: "Pop Directional Formatting"
159
+
160
+ - unicode: "202D"
161
+ visualization: "⇉"
162
+ category: directional
163
+ name: "Left-To-Right Override"
164
+
165
+ - unicode: "202E"
166
+ visualization: "⇇"
167
+ category: directional
168
+ name: "Right-To-Left Override"
169
+
170
+ # Control characters
171
+ - unicode: "0000"
172
+ visualization: "␀"
173
+ category: control
174
+ name: "Null"
175
+
176
+ - unicode: "00AD"
177
+ visualization: "­‐"
178
+ category: control
179
+ name: "Soft Hyphen"
180
+
181
+ - unicode: "0008"
182
+ visualization: "␈"
183
+ category: control
184
+ name: "Backspace"
185
+
186
+ - unicode: "007F"
187
+ visualization: "␡"
188
+ category: control
189
+ name: "Delete"
190
+
191
+ # Category display names
192
+ category_names:
193
+ whitespace: "Whitespace"
194
+ line_endings: "Line Endings"
195
+ zero_width: "Zero-Width Characters"
196
+ directional: "Directional Markers"
197
+ control: "Control Characters"
@@ -0,0 +1,431 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "diff_detail_formatter"
4
+
5
+ module Canon
6
+ class DiffFormatter
7
+ # Verbose diff output helper for CANON_VERBOSE mode
8
+ # Can be activated by:
9
+ # 1. Environment variable: CANON_VERBOSE=1
10
+ # 2. Diff option: verbose_diff: true
11
+ # Provides beautiful, readable output
12
+ module DebugOutput
13
+ class << self
14
+ def enabled?(verbose_diff_option = false)
15
+ verbose_diff_option ||
16
+ ENV["CANON_VERBOSE"] == "1" ||
17
+ ENV["CANON_VERBOSE"] == "true"
18
+ end
19
+
20
+ # Return ONLY CANON VERBOSE tables (not Semantic Diff Report)
21
+ # Semantic Diff Report is now part of main diff output
22
+ def verbose_tables_only(comparison_result, formatter_options = {})
23
+ verbose_diff = formatter_options[:verbose_diff] || false
24
+ return "" unless enabled?(verbose_diff)
25
+
26
+ require "table_tennis"
27
+
28
+ output = []
29
+ output << ""
30
+ output << "=" * 80
31
+ output << "CANON VERBOSE MODE - DETAILED OPTIONS"
32
+ output << "=" * 80
33
+ output << ""
34
+
35
+ # Show match options as a table
36
+ output << format_match_options_table(comparison_result)
37
+ output << ""
38
+
39
+ # Show formatter options as a table
40
+ output << format_formatter_options_table(formatter_options)
41
+ output << ""
42
+
43
+ # Show comparison summary
44
+ output << format_comparison_summary(comparison_result)
45
+ output << ""
46
+
47
+ output << "=" * 80
48
+ output << ""
49
+
50
+ output.join("\n")
51
+ end
52
+
53
+ # Backward compatibility alias
54
+ def debug_info(comparison_result, formatter_options = {})
55
+ verbose_tables_only(comparison_result, formatter_options)
56
+ end
57
+
58
+ def format_match_options_table(comparison_result)
59
+ return "MATCH OPTIONS: (not available)" unless comparison_result.is_a?(Canon::Comparison::ComparisonResult)
60
+ return "MATCH OPTIONS: (not available)" unless comparison_result.match_options
61
+
62
+ rows = comparison_result.match_options.map do |dimension, behavior|
63
+ {
64
+ dimension: dimension.to_s,
65
+ behavior: behavior.to_s,
66
+ description: dimension_description(dimension, behavior),
67
+ }
68
+ end
69
+
70
+ TableTennis.new(
71
+ rows,
72
+ title: "Match Options (#{comparison_result.format.to_s.upcase})",
73
+ columns: %i[dimension behavior description],
74
+ headers: { dimension: "Dimension", behavior: "Behavior",
75
+ description: "Meaning" },
76
+ zebra: true,
77
+ ).to_s
78
+ end
79
+
80
+ def dimension_description(dimension, behavior)
81
+ # Special handling for preprocessing dimension
82
+ if dimension.to_s == "preprocessing"
83
+ return case behavior
84
+ when :none
85
+ "No preprocessing (compare as-is)"
86
+ when :c14n
87
+ "Canonicalize (XML C14N normalization)"
88
+ when :normalize
89
+ "Normalize (collapse whitespace, trim lines)"
90
+ when :format
91
+ "Pretty-format (consistent indentation)"
92
+ when :rendered
93
+ "As browser-rendered (compacted whitespace, to_html)"
94
+ else
95
+ behavior.to_s
96
+ end
97
+ end
98
+
99
+ # Standard dimension descriptions
100
+ case behavior
101
+ when :ignore
102
+ "Differences IGNORED (informative)"
103
+ when :normalize
104
+ "Normalized then compared (normative if different after normalization)"
105
+ when :strict
106
+ "Must match exactly (normative)"
107
+ when :strip
108
+ "Strip leading/trailing whitespace only"
109
+ when :compact
110
+ "Collapse whitespace runs to single space"
111
+ else
112
+ behavior.to_s
113
+ end
114
+ end
115
+
116
+ def format_formatter_options_table(formatter_options)
117
+ rows = formatter_options.map do |key, value|
118
+ {
119
+ option: key.to_s,
120
+ value: format_value(value),
121
+ impact: option_impact(key, value),
122
+ }
123
+ end
124
+
125
+ TableTennis.new(
126
+ rows,
127
+ title: "Formatter Options",
128
+ columns: %i[option value impact],
129
+ headers: { option: "Option", value: "Value", impact: "Impact" },
130
+ zebra: true,
131
+ ).to_s
132
+ end
133
+
134
+ def format_value(value)
135
+ case value
136
+ when Symbol
137
+ value.to_s
138
+ when Integer, String
139
+ value.to_s
140
+ when true, false
141
+ value.to_s
142
+ when nil
143
+ "(nil)"
144
+ else
145
+ value.class.name
146
+ end
147
+ end
148
+
149
+ def option_impact(key, value)
150
+ case key
151
+ when :show_diffs
152
+ case value
153
+ when :all
154
+ "Show all diffs (normative + informative)"
155
+ when :normative
156
+ "Show only normative (semantic) diffs"
157
+ when :informative
158
+ "Show only informative (textual) diffs"
159
+ else
160
+ value.to_s
161
+ end
162
+ when :mode
163
+ value == :by_line ? "Line-by-line diff" : "Object tree diff"
164
+ when :context_lines
165
+ "#{value} lines of context around diffs"
166
+ when :diff_grouping_lines
167
+ value ? "Group diffs within #{value} lines" : "No grouping"
168
+ else
169
+ "-"
170
+ end
171
+ end
172
+
173
+ def format_comparison_summary(comparison_result)
174
+ return "COMPARISON RESULT: (not a ComparisonResult object)" unless comparison_result.is_a?(Canon::Comparison::ComparisonResult)
175
+
176
+ normative_count = comparison_result.normative_differences.length
177
+ informative_count = comparison_result.informative_differences.length
178
+
179
+ rows = [
180
+ {
181
+ metric: "Equivalent?",
182
+ value: comparison_result.equivalent? ? "✓ YES" : "✗ NO",
183
+ detail: comparison_result.equivalent? ? "Documents are semantically equivalent" : "Documents have semantic differences",
184
+ },
185
+ {
186
+ metric: "Normative Diffs",
187
+ value: normative_count.positive? ? "#{normative_count} diffs" : "0",
188
+ detail: "Semantic differences that matter",
189
+ },
190
+ {
191
+ metric: "Informative Diffs",
192
+ value: informative_count.positive? ? "#{informative_count} diffs" : "0",
193
+ detail: "Textual/formatting differences (ignored)",
194
+ },
195
+ {
196
+ metric: "Total Diffs",
197
+ value: comparison_result.differences.length.to_s,
198
+ detail: "All differences found",
199
+ },
200
+ ]
201
+
202
+ TableTennis.new(
203
+ rows,
204
+ title: "Comparison Result Summary",
205
+ columns: %i[metric value detail],
206
+ headers: { metric: "Metric", value: "Value",
207
+ detail: "Description" },
208
+ zebra: true,
209
+ ).to_s
210
+ end
211
+
212
+ def format_differences_tree(differences)
213
+ output = []
214
+ output << "DIFFERENCES TREE:"
215
+ output << ""
216
+
217
+ # Create table rows for each difference
218
+ rows = differences.map.with_index do |diff, i|
219
+ if diff.is_a?(Canon::Diff::DiffNode)
220
+ detail1, detail2 = format_node_diff_detail(diff)
221
+
222
+ {
223
+ "#": i + 1,
224
+ dimension: diff.dimension.to_s,
225
+ marker: diff.normative? ? "+/-" : "~",
226
+ diff1: detail1,
227
+ diff2: detail2,
228
+ }
229
+ elsif diff.is_a?(Hash)
230
+ {
231
+ "#": i + 1,
232
+ dimension: diff[:dimension] || "(unknown)",
233
+ marker: "+/-",
234
+ diff1: "(hash)",
235
+ diff2: "(hash)",
236
+ }
237
+ else
238
+ {
239
+ "#": i + 1,
240
+ dimension: "-",
241
+ marker: "-",
242
+ diff1: "-",
243
+ diff2: "-",
244
+ }
245
+ end
246
+ end
247
+
248
+ output << TableTennis.new(
249
+ rows,
250
+ title: "Differences Detail (#{differences.length} total)",
251
+ columns: %i[# dimension marker diff1 diff2],
252
+ headers: {
253
+ "#": "#",
254
+ dimension: "Dimension",
255
+ marker: "Marker",
256
+ diff1: "Expected (File 1)",
257
+ diff2: "Actual (File 2)",
258
+ },
259
+ zebra: true,
260
+ mark: ->(row) { row[:marker] == "+/-" },
261
+ ).to_s
262
+
263
+ output.join("\n")
264
+ end
265
+
266
+ def format_node_brief(node)
267
+ return "(nil)" if node.nil?
268
+
269
+ if node.respond_to?(:name)
270
+ "<#{node.name}>"
271
+ elsif node.respond_to?(:content)
272
+ content = node.content.to_s
273
+ if content&.length && content.length > 30
274
+ "\"#{content[0..27]}...\""
275
+ else
276
+ "\"#{content || ''}\""
277
+ end
278
+ elsif node.respond_to?(:text)
279
+ text = node.text.to_s
280
+ if text&.length && text.length > 30
281
+ "\"#{text[0..27]}...\""
282
+ else
283
+ "\"#{text || ''}\""
284
+ end
285
+ else
286
+ node.class.name
287
+ end
288
+ end
289
+
290
+ # Format detailed information about what differed in the nodes
291
+ def format_node_diff_detail(diff)
292
+ node1 = diff.node1
293
+ node2 = diff.node2
294
+
295
+ # For attribute differences, show which attributes differ
296
+ if diff.dimension == :attribute_whitespace &&
297
+ node1.respond_to?(:attributes) && node2.respond_to?(:attributes)
298
+ attrs1 = format_attributes(node1)
299
+ attrs2 = format_attributes(node2)
300
+ return ["<#{node1.name}> #{attrs1}", "<#{node2.name}> #{attrs2}"]
301
+ end
302
+
303
+ # For element differences, show element names
304
+ if node1.respond_to?(:name) && node2.respond_to?(:name)
305
+ if node1.name == node2.name
306
+ # Same element name, different content
307
+ end
308
+ return ["<#{node1.name}>", "<#{node2.name}>"]
309
+
310
+ return ["<#{node1.name}>", "<#{node2.name}>"]
311
+ end
312
+
313
+ # For text differences, show content preview
314
+ if %i[text_content structural_whitespace].include?(diff.dimension)
315
+ content1 = get_node_content(node1)
316
+ content2 = get_node_content(node2)
317
+ return [format_content_preview(content1),
318
+ format_content_preview(content2)]
319
+ end
320
+
321
+ # Fallback to brief format
322
+ [format_node_brief(node1), format_node_brief(node2)]
323
+ end
324
+
325
+ def format_attributes(node)
326
+ return "" unless node.respond_to?(:attributes)
327
+
328
+ attrs = node.attributes
329
+ return "" if attrs.empty?
330
+
331
+ # Format as name="value"
332
+ attr_strs = attrs.map do |key, val|
333
+ name = if key.is_a?(String)
334
+ key
335
+ else
336
+ (key.respond_to?(:name) ? key.name : key.to_s)
337
+ end
338
+ value = val.respond_to?(:value) ? val.value : val.to_s
339
+ "#{name}=\"#{value}\""
340
+ end.sort
341
+
342
+ # Limit to first 3 attributes
343
+ if attr_strs.length > 3
344
+ "#{attr_strs[0..2].join(' ')} ..."
345
+ else
346
+ attr_strs.join(" ")
347
+ end
348
+ end
349
+
350
+ def get_node_content(node)
351
+ if node.respond_to?(:content)
352
+ node.content.to_s
353
+ elsif node.respond_to?(:text)
354
+ node.text.to_s
355
+ else
356
+ ""
357
+ end
358
+ end
359
+
360
+ def format_content_preview(content)
361
+ return '""' if content.nil? || content.empty?
362
+
363
+ # Show first 40 chars
364
+ if content.length > 40
365
+ "\"#{content[0..37]}...\""
366
+ else
367
+ "\"#{content}\""
368
+ end
369
+ end
370
+
371
+ def debug_diff_structure(diff_report)
372
+ return "" unless enabled?
373
+
374
+ require "table_tennis"
375
+
376
+ output = []
377
+ output << ""
378
+ output << "DIFF STRUCTURE (DiffReport):"
379
+ output << ""
380
+
381
+ if diff_report.nil? || diff_report.contexts.empty?
382
+ output << " (no diff contexts)"
383
+ return output.join("\n")
384
+ end
385
+
386
+ output << " Total contexts: #{diff_report.contexts.length}"
387
+ output << ""
388
+
389
+ # Show contexts and blocks in table format
390
+ diff_report.contexts.each_with_index do |context, ctx_idx|
391
+ output << " Context #{ctx_idx + 1}: Lines #{context.start_line}-#{context.end_line}"
392
+ output << ""
393
+
394
+ if context.diff_blocks.any?
395
+ block_rows = context.diff_blocks.map.with_index do |block, blk_idx|
396
+ {
397
+ "#": blk_idx + 1,
398
+ range: "#{block.start_idx}-#{block.end_idx}",
399
+ size: block.size,
400
+ types: block.types.join(", "),
401
+ normative: block.normative? ? "✓ NORMATIVE" : "✗ informative",
402
+ dimension: block.diff_node&.dimension&.to_s || "-",
403
+ lines: block.diff_lines&.length || 0,
404
+ }
405
+ end
406
+
407
+ output << TableTennis.new(
408
+ block_rows,
409
+ title: " Diff Blocks in Context #{ctx_idx + 1}",
410
+ columns: %i[# range size types normative dimension lines],
411
+ headers: {
412
+ "#": "#",
413
+ range: "Line Range",
414
+ size: "Size",
415
+ types: "Types",
416
+ normative: "Normative?",
417
+ dimension: "Dimension",
418
+ lines: "Lines",
419
+ },
420
+ mark: ->(row) { row[:normative] == "✓ NORMATIVE" },
421
+ ).to_s
422
+ output << ""
423
+ end
424
+ end
425
+
426
+ output.join("\n")
427
+ end
428
+ end
429
+ end
430
+ end
431
+ end