canon 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +50 -26
  3. data/README.adoc +8 -3
  4. data/docs/advanced/diff-pipeline.adoc +36 -9
  5. data/docs/features/diff-formatting/colors-and-symbols.adoc +82 -0
  6. data/docs/features/diff-formatting/index.adoc +12 -0
  7. data/docs/features/diff-formatting/themes.adoc +353 -0
  8. data/docs/features/environment-configuration/index.adoc +23 -0
  9. data/docs/internals/diff-char-range-pipeline.adoc +249 -0
  10. data/docs/internals/diffnode-enrichment.adoc +1 -0
  11. data/docs/internals/index.adoc +52 -4
  12. data/docs/reference/environment-variables.adoc +6 -0
  13. data/docs/understanding/architecture.adoc +5 -0
  14. data/examples/show_themes.rb +217 -0
  15. data/lib/canon/comparison/comparison_result.rb +9 -4
  16. data/lib/canon/config/env_schema.rb +3 -1
  17. data/lib/canon/config.rb +11 -0
  18. data/lib/canon/diff/diff_block.rb +7 -0
  19. data/lib/canon/diff/diff_block_builder.rb +2 -2
  20. data/lib/canon/diff/diff_char_range.rb +140 -0
  21. data/lib/canon/diff/diff_line.rb +42 -4
  22. data/lib/canon/diff/diff_line_builder.rb +907 -0
  23. data/lib/canon/diff/diff_node.rb +5 -1
  24. data/lib/canon/diff/diff_node_enricher.rb +1418 -0
  25. data/lib/canon/diff/diff_node_mapper.rb +54 -0
  26. data/lib/canon/diff/source_locator.rb +105 -0
  27. data/lib/canon/diff/text_decomposer.rb +103 -0
  28. data/lib/canon/diff_formatter/by_line/base_formatter.rb +264 -24
  29. data/lib/canon/diff_formatter/by_line/html_formatter.rb +35 -20
  30. data/lib/canon/diff_formatter/by_line/json_formatter.rb +36 -19
  31. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +33 -19
  32. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +583 -98
  33. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +36 -19
  34. data/lib/canon/diff_formatter/by_object/base_formatter.rb +62 -13
  35. data/lib/canon/diff_formatter/by_object/json_formatter.rb +59 -24
  36. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +74 -34
  37. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +4 -5
  38. data/lib/canon/diff_formatter/diff_detail_formatter.rb +1 -1
  39. data/lib/canon/diff_formatter/legend.rb +4 -2
  40. data/lib/canon/diff_formatter/theme.rb +864 -0
  41. data/lib/canon/diff_formatter.rb +11 -6
  42. data/lib/canon/tree_diff/matchers/hash_matcher.rb +16 -1
  43. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +10 -0
  44. data/lib/canon/tree_diff/operations/operation_detector.rb +5 -1
  45. data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
  46. data/lib/canon/version.rb +1 -1
  47. metadata +11 -2
@@ -168,7 +168,8 @@ module Canon
168
168
  character_map_file: nil, character_definitions: nil,
169
169
  show_diffs: :all, verbose_diff: false,
170
170
  show_raw_inputs: false, show_preprocessed_inputs: false,
171
- show_line_numbered_inputs: false)
171
+ show_line_numbered_inputs: false,
172
+ diff_mode: :separate, legacy_terminal: false)
172
173
  # rubocop:enable Metrics/ParameterLists
173
174
  @use_color = use_color
174
175
  @mode = mode
@@ -179,6 +180,8 @@ module Canon
179
180
  @show_raw_inputs = show_raw_inputs
180
181
  @show_preprocessed_inputs = show_preprocessed_inputs
181
182
  @show_line_numbered_inputs = show_line_numbered_inputs
183
+ @diff_mode = legacy_terminal ? :separate : diff_mode
184
+ @legacy_terminal = legacy_terminal
182
185
  @visualization_map = build_visualization_map(
183
186
  visualization_map: visualization_map,
184
187
  character_map_file: character_map_file,
@@ -259,19 +262,16 @@ module Canon
259
262
  # @param html_version [Symbol, nil] HTML version (:html4 or :html5)
260
263
  # @return [String] Formatted output
261
264
  def format(differences, format, doc1: nil, doc2: nil, html_version: nil)
262
- # In by-line mode with doc1/doc2, always perform diff regardless of differences
265
+ # In by-line mode, always use by-line diff
263
266
  if @mode == :by_line && doc1 && doc2
264
267
  return by_line_diff(doc1, doc2, format: format,
265
268
  html_version: html_version,
266
269
  differences: differences)
267
270
  end
268
271
 
269
- # Check if no differences (handle both ComparisonResult and legacy Array)
270
272
  no_diffs = if differences.respond_to?(:equivalent?)
271
- # ComparisonResult object (production path)
272
273
  differences.equivalent?
273
274
  else
274
- # Legacy Array (for low-level tests)
275
275
  differences.empty?
276
276
  end
277
277
  return success_message if no_diffs
@@ -644,10 +644,12 @@ differences: [])
644
644
 
645
645
  return output.join("\n") if doc1.nil? || doc2.nil?
646
646
 
647
- # Extract differences array from ComparisonResult if needed
647
+ # Extract differences array and equivalent status from ComparisonResult if needed
648
648
  diffs_array = if differences.is_a?(Canon::Comparison::ComparisonResult)
649
+ @comparison_equivalent = differences.equivalent?
649
650
  differences.differences
650
651
  else
652
+ @comparison_equivalent = nil
651
653
  differences
652
654
  end
653
655
 
@@ -660,6 +662,9 @@ differences: [])
660
662
  visualization_map: @visualization_map,
661
663
  show_diffs: @show_diffs,
662
664
  differences: diffs_array,
665
+ diff_mode: @legacy_terminal ? :separate : @diff_mode,
666
+ legacy_terminal: @legacy_terminal,
667
+ equivalent: @comparison_equivalent,
663
668
  )
664
669
 
665
670
  output << formatter.format(doc1, doc2)
@@ -88,9 +88,24 @@ module Canon
88
88
 
89
89
  def match_node(node2)
90
90
  sig2 = Core::NodeSignature.for(node2)
91
- candidates = (@signature_map[sig2] || []).reject { |n| @matched_tree1.include?(n) }
91
+ candidates = (@signature_map[sig2] || []).reject do |n|
92
+ @matched_tree1.include?(n)
93
+ end
92
94
  return if candidates.empty?
93
95
 
96
+ # When multiple candidates have identical signatures (common with
97
+ # duplicate subtrees like MathML formulas), sort by sibling position
98
+ # proximity to prefer matching nodes at the same position within
99
+ # their parent. This reduces cross-matching that causes cascading
100
+ # prefix closure failures.
101
+ if candidates.size > 1
102
+ pos2 = node2.position || 0
103
+ candidates = candidates.sort_by do |c|
104
+ pos1 = c.position || 0
105
+ (pos1 - pos2).abs
106
+ end
107
+ end
108
+
94
109
  # Try each candidate until one passes both subtree matching
95
110
  # AND the prefix closure constraint in matching.add.
96
111
  # When multiple candidates have identical subtrees (e.g., labels
@@ -96,6 +96,7 @@ module Canon
96
96
  def match_group(nodes1, nodes2)
97
97
  # Create similarity matrix
98
98
  matches = []
99
+ ambiguous = nodes1.size > 1
99
100
 
100
101
  nodes2.each do |node2|
101
102
  next if @matching.matched2?(node2)
@@ -116,6 +117,15 @@ module Canon
116
117
 
117
118
  similarity = node1.similarity_to(node2)
118
119
 
120
+ # When multiple candidates exist, penalize attribute value
121
+ # mismatches to prevent cross-matching of nodes with same
122
+ # structure but different IDs. Uses TreeNode#attribute_difference
123
+ # for consistent attribute comparison across the tree_diff module.
124
+ if similarity >= @threshold && ambiguous && !node1.attributes.empty?
125
+ attr_diff = node1.attribute_difference(node2)
126
+ similarity *= (1.0 - attr_diff) if attr_diff.positive?
127
+ end
128
+
119
129
  if similarity > best_similarity
120
130
  best_similarity = similarity
121
131
  best_match = node1
@@ -518,7 +518,11 @@ module Canon
518
518
  return false if text1.empty? || text2.empty?
519
519
 
520
520
  similarity = text_similarity(text1, text2)
521
- similarity >= 0.9 # 90% similarity for hierarchy changes
521
+ return false unless similarity >= 0.9
522
+
523
+ # Verify attribute values match to prevent false upgrade/downgrade
524
+ # detection on nodes with same text but different attribute values
525
+ node1.attributes == node2.attributes
522
526
  end
523
527
 
524
528
  # Extract all text content from a node and its descendants
@@ -188,7 +188,7 @@ module Canon
188
188
  # @return [Integer, nil] Max node count
189
189
  def get_max_node_count
190
190
  # Get from options if provided, otherwise use default
191
- @options[:max_node_count] || 10_000
191
+ @options[:max_node_count] || 30_000
192
192
  end
193
193
  end
194
194
  end
data/lib/canon/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
- VERSION = "0.1.21"
4
+ VERSION = "0.1.23"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: canon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.21
4
+ version: 0.1.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-27 00:00:00.000000000 Z
11
+ date: 2026-04-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diff-lcs
@@ -155,6 +155,7 @@ files:
155
155
  - docs/features/diff-formatting/context-and-grouping.adoc
156
156
  - docs/features/diff-formatting/display-filtering.adoc
157
157
  - docs/features/diff-formatting/index.adoc
158
+ - docs/features/diff-formatting/themes.adoc
158
159
  - docs/features/environment-configuration/index.adoc
159
160
  - docs/features/environment-configuration/override-system.adoc
160
161
  - docs/features/environment-configuration/size-limits.adoc
@@ -172,6 +173,7 @@ files:
172
173
  - docs/interfaces/index.adoc
173
174
  - docs/interfaces/rspec/index.adoc
174
175
  - docs/interfaces/ruby-api/index.adoc
176
+ - docs/internals/diff-char-range-pipeline.adoc
175
177
  - docs/internals/diffnode-enrichment.adoc
176
178
  - docs/internals/index.adoc
177
179
  - docs/lychee.toml
@@ -190,6 +192,7 @@ files:
190
192
  - docs/understanding/formats/xml.adoc
191
193
  - docs/understanding/formats/yaml.adoc
192
194
  - docs/understanding/index.adoc
195
+ - examples/show_themes.rb
193
196
  - exe/canon
194
197
  - lib/canon.rb
195
198
  - lib/canon/cache.rb
@@ -248,17 +251,22 @@ files:
248
251
  - lib/canon/data_model.rb
249
252
  - lib/canon/diff/diff_block.rb
250
253
  - lib/canon/diff/diff_block_builder.rb
254
+ - lib/canon/diff/diff_char_range.rb
251
255
  - lib/canon/diff/diff_classifier.rb
252
256
  - lib/canon/diff/diff_context.rb
253
257
  - lib/canon/diff/diff_context_builder.rb
254
258
  - lib/canon/diff/diff_line.rb
259
+ - lib/canon/diff/diff_line_builder.rb
255
260
  - lib/canon/diff/diff_node.rb
261
+ - lib/canon/diff/diff_node_enricher.rb
256
262
  - lib/canon/diff/diff_node_mapper.rb
257
263
  - lib/canon/diff/diff_report.rb
258
264
  - lib/canon/diff/diff_report_builder.rb
259
265
  - lib/canon/diff/formatting_detector.rb
260
266
  - lib/canon/diff/node_serializer.rb
261
267
  - lib/canon/diff/path_builder.rb
268
+ - lib/canon/diff/source_locator.rb
269
+ - lib/canon/diff/text_decomposer.rb
262
270
  - lib/canon/diff/xml_serialization_formatter.rb
263
271
  - lib/canon/diff_formatter.rb
264
272
  - lib/canon/diff_formatter/by_line/base_formatter.rb
@@ -280,6 +288,7 @@ files:
280
288
  - lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb
281
289
  - lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb
282
290
  - lib/canon/diff_formatter/legend.rb
291
+ - lib/canon/diff_formatter/theme.rb
283
292
  - lib/canon/errors.rb
284
293
  - lib/canon/formatters/html4_formatter.rb
285
294
  - lib/canon/formatters/html5_formatter.rb