canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -15,11 +15,12 @@ module Canon
15
15
  # Wrapper class for resolved match options
16
16
  # Provides convenient methods for accessing behaviors by dimension
17
17
  class ResolvedMatchOptions
18
- attr_reader :options, :format
18
+ attr_reader :options, :format, :compare_profile
19
19
 
20
- def initialize(options, format:)
20
+ def initialize(options, format:, compare_profile: nil)
21
21
  @options = options
22
22
  @format = format
23
+ @compare_profile = compare_profile
23
24
  end
24
25
 
25
26
  # Get the behavior for a specific dimension
@@ -35,6 +36,12 @@ module Canon
35
36
  @options[:preprocessing]
36
37
  end
37
38
 
39
+ # Check if semantic diff is enabled
40
+ # @return [Boolean] true if semantic diff is enabled
41
+ def semantic_diff?
42
+ @options[:semantic_diff] == true
43
+ end
44
+
38
45
  def to_h
39
46
  @options.dup
40
47
  end
@@ -116,8 +123,13 @@ module Canon
116
123
  text_content
117
124
  structural_whitespace
118
125
  attribute_presence
126
+ attribute_order
119
127
  attribute_values
128
+ element_position
120
129
  comments
130
+ element_structure
131
+ element_position
132
+ element_hierarchy
121
133
  ].freeze
122
134
 
123
135
  # Format-specific defaults
@@ -127,7 +139,9 @@ module Canon
127
139
  text_content: :normalize,
128
140
  structural_whitespace: :normalize,
129
141
  attribute_presence: :strict,
142
+ attribute_order: :ignore,
130
143
  attribute_values: :strict,
144
+ element_position: :ignore,
131
145
  comments: :ignore,
132
146
  },
133
147
  xml: {
@@ -135,7 +149,9 @@ module Canon
135
149
  text_content: :strict,
136
150
  structural_whitespace: :strict,
137
151
  attribute_presence: :strict,
152
+ attribute_order: :ignore,
138
153
  attribute_values: :strict,
154
+ element_position: :strict,
139
155
  comments: :strict,
140
156
  },
141
157
  }.freeze
@@ -148,7 +164,9 @@ module Canon
148
164
  text_content: :strict,
149
165
  structural_whitespace: :strict,
150
166
  attribute_presence: :strict,
167
+ attribute_order: :strict,
151
168
  attribute_values: :strict,
169
+ element_position: :strict,
152
170
  comments: :strict,
153
171
  },
154
172
 
@@ -159,7 +177,9 @@ module Canon
159
177
  text_content: :normalize,
160
178
  structural_whitespace: :normalize,
161
179
  attribute_presence: :strict,
180
+ attribute_order: :strict,
162
181
  attribute_values: :strict,
182
+ element_position: :strict,
163
183
  comments: :ignore,
164
184
  },
165
185
 
@@ -170,7 +190,9 @@ module Canon
170
190
  text_content: :normalize,
171
191
  structural_whitespace: :normalize,
172
192
  attribute_presence: :strict,
193
+ attribute_order: :strict,
173
194
  attribute_values: :normalize,
195
+ element_position: :ignore,
174
196
  comments: :ignore,
175
197
  },
176
198
 
@@ -180,7 +202,9 @@ module Canon
180
202
  text_content: :normalize,
181
203
  structural_whitespace: :normalize,
182
204
  attribute_presence: :strict,
205
+ attribute_order: :strict,
183
206
  attribute_values: :strict,
207
+ element_position: :ignore,
184
208
  comments: :ignore,
185
209
  },
186
210
 
@@ -191,7 +215,9 @@ module Canon
191
215
  text_content: :normalize,
192
216
  structural_whitespace: :ignore,
193
217
  attribute_presence: :strict,
218
+ attribute_order: :ignore,
194
219
  attribute_values: :normalize,
220
+ element_position: :ignore,
195
221
  comments: :ignore,
196
222
  },
197
223
 
@@ -201,7 +227,9 @@ module Canon
201
227
  text_content: :normalize,
202
228
  structural_whitespace: :ignore,
203
229
  attribute_presence: :strict,
230
+ attribute_order: :ignore,
204
231
  attribute_values: :normalize,
232
+ element_position: :ignore,
205
233
  comments: :ignore,
206
234
  },
207
235
  }.freeze
@@ -293,9 +321,19 @@ module Canon
293
321
 
294
322
  # Validate match options
295
323
  def validate_match_options!(match_options)
324
+ # Special options that don't need validation as dimensions
325
+ special_options = %i[
326
+ preprocessing
327
+ semantic_diff
328
+ similarity_threshold
329
+ hash_matching
330
+ similarity_matching
331
+ propagation
332
+ ]
333
+
296
334
  match_options.each do |dimension, behavior|
297
- # Skip preprocessing as it's validated separately
298
- next if dimension == :preprocessing
335
+ # Skip special options (validated elsewhere or passed through)
336
+ next if special_options.include?(dimension)
299
337
 
300
338
  unless MATCH_DIMENSIONS.include?(dimension)
301
339
  raise Canon::Error,
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ module Strategies
6
+ # Abstract base class for match strategies
7
+ #
8
+ # All match strategies must inherit from this class and implement:
9
+ # - match(doc1, doc2) → Array<DiffNode>
10
+ # - preprocess_for_display(doc1, doc2) → [String, String]
11
+ #
12
+ # This provides a common interface for different matching algorithms,
13
+ # enabling the Strategy Pattern for extensible comparison methods.
14
+ #
15
+ # @example Create a custom match strategy
16
+ # class MyMatchStrategy < BaseMatchStrategy
17
+ # def match(doc1, doc2)
18
+ # # Custom matching logic
19
+ # # Must return Array<Canon::Diff::DiffNode>
20
+ # end
21
+ #
22
+ # def preprocess_for_display(doc1, doc2)
23
+ # # Format documents for diff display
24
+ # # Must return [String, String]
25
+ # end
26
+ # end
27
+ #
28
+ class BaseMatchStrategy
29
+ attr_reader :format, :match_options
30
+
31
+ # Initialize strategy
32
+ #
33
+ # @param format [Symbol] Document format (:xml, :html, :json, :yaml)
34
+ # @param match_options [Hash] Match options for comparison
35
+ def initialize(format:, match_options:)
36
+ @format = format
37
+ @match_options = match_options
38
+ end
39
+
40
+ # Perform matching and return DiffNodes
41
+ #
42
+ # This is the core method that implements the matching algorithm.
43
+ # All strategies must implement this to produce DiffNodes that
44
+ # flow through the standard diff rendering pipeline.
45
+ #
46
+ # @param doc1 [Object] First document
47
+ # @param doc2 [Object] Second document
48
+ # @return [Array<Canon::Diff::DiffNode>] Array of differences
49
+ # @raise [NotImplementedError] If not implemented by subclass
50
+ def match(doc1, doc2)
51
+ raise NotImplementedError,
52
+ "#{self.class} must implement #match(doc1, doc2)"
53
+ end
54
+
55
+ # Preprocess documents for display in diff output
56
+ #
57
+ # This method formats the documents into strings suitable for
58
+ # line-by-line diff display. The format must be consistent across
59
+ # all strategies for the same format to ensure the diff rendering
60
+ # pipeline produces correct output.
61
+ #
62
+ # @param doc1 [Object] First document
63
+ # @param doc2 [Object] Second document
64
+ # @return [Array<String>] Preprocessed [doc1_string, doc2_string]
65
+ # @raise [NotImplementedError] If not implemented by subclass
66
+ def preprocess_for_display(doc1, doc2)
67
+ raise NotImplementedError,
68
+ "#{self.class} must implement #preprocess_for_display(doc1, doc2)"
69
+ end
70
+
71
+ # Optional metadata to include in ComparisonResult
72
+ #
73
+ # Subclasses can override this to provide algorithm-specific
74
+ # metadata such as statistics, configuration, etc.
75
+ #
76
+ # @return [Hash] Additional metadata
77
+ def metadata
78
+ {}
79
+ end
80
+
81
+ # Algorithm name derived from class name
82
+ #
83
+ # Automatically generates algorithm identifier from class name.
84
+ # For example:
85
+ # - DomMatchStrategy → :dom
86
+ # - SemanticTreeMatchStrategy → :semantic_tree
87
+ #
88
+ # @return [Symbol] Algorithm identifier
89
+ def algorithm_name
90
+ self.class.name.split("::").last
91
+ .gsub("MatchStrategy", "")
92
+ .gsub(/([A-Z])/, '_\1')
93
+ .downcase[1..]
94
+ .to_sym
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_match_strategy"
4
+
5
+ module Canon
6
+ module Comparison
7
+ module Strategies
8
+ # Factory for creating match strategies
9
+ #
10
+ # Selects the appropriate match strategy based on match options.
11
+ # This provides a single point for strategy instantiation and enables
12
+ # easy extension with new matching algorithms.
13
+ #
14
+ # @example Create a strategy
15
+ # strategy = MatchStrategyFactory.create(
16
+ # format: :xml,
17
+ # match_options: { semantic_diff: true }
18
+ # )
19
+ # differences = strategy.match(doc1, doc2)
20
+ #
21
+ class MatchStrategyFactory
22
+ # Create appropriate match strategy
23
+ #
24
+ # Examines match options to determine which strategy to use:
25
+ # - If semantic_diff is enabled: SemanticTreeMatchStrategy
26
+ # - Otherwise (default): DomMatchStrategy
27
+ #
28
+ # Future strategies can be added here by checking additional
29
+ # options and returning the appropriate strategy class.
30
+ #
31
+ # @param format [Symbol] Document format (:xml, :html, :json, :yaml)
32
+ # @param match_options [Hash] Match options
33
+ # @option match_options [Boolean] :semantic_diff Use semantic tree matching
34
+ # @return [BaseMatchStrategy] Instantiated strategy
35
+ #
36
+ # @example DOM matching (default)
37
+ # strategy = MatchStrategyFactory.create(
38
+ # format: :xml,
39
+ # match_options: {}
40
+ # )
41
+ # # Returns DomMatchStrategy
42
+ #
43
+ # @example Semantic tree matching
44
+ # strategy = MatchStrategyFactory.create(
45
+ # format: :xml,
46
+ # match_options: { semantic_diff: true }
47
+ # )
48
+ # # Returns SemanticTreeMatchStrategy
49
+ #
50
+ def self.create(format:, match_options:)
51
+ # Check for semantic diff option
52
+ if match_options[:semantic_diff]
53
+ require_relative "semantic_tree_match_strategy"
54
+ SemanticTreeMatchStrategy.new(format: format,
55
+ match_options: match_options)
56
+ else
57
+ # Default to DOM matching
58
+ require_relative "dom_match_strategy"
59
+ DomMatchStrategy.new(format: format, match_options: match_options)
60
+ end
61
+
62
+ # Future: Add more strategies here
63
+ # Example:
64
+ # elsif match_options[:hybrid_diff]
65
+ # require_relative "hybrid_match_strategy"
66
+ # HybridMatchStrategy.new(format, match_options)
67
+ # elsif match_options[:fuzzy_diff]
68
+ # require_relative "fuzzy_match_strategy"
69
+ # FuzzyMatchStrategy.new(format, match_options)
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,220 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_match_strategy"
4
+ require_relative "../../tree_diff/tree_diff_integrator"
5
+ require_relative "../../tree_diff/operation_converter"
6
+
7
+ module Canon
8
+ module Comparison
9
+ module Strategies
10
+ # Semantic tree matching strategy
11
+ #
12
+ # Uses TreeDiffIntegrator for intelligent structure-aware matching.
13
+ # This strategy:
14
+ # 1. Converts documents to tree representation
15
+ # 2. Performs semantic matching via TreeDiffIntegrator
16
+ # 3. Converts Operations to DiffNodes via OperationConverter
17
+ # 4. Returns DiffNodes that flow through standard rendering pipeline
18
+ #
19
+ # Key difference from DOM matching: Uses tree-based structural
20
+ # similarity and edit distance for matching instead of simple
21
+ # node-by-node comparison.
22
+ #
23
+ # @example Use semantic tree matching
24
+ # strategy = SemanticTreeMatchStrategy.new(:xml, match_options)
25
+ # diff_nodes = strategy.match(doc1, doc2)
26
+ #
27
+ class SemanticTreeMatchStrategy < BaseMatchStrategy
28
+ # Perform semantic tree matching
29
+ #
30
+ # @param doc1 [Object] First document (Nokogiri node, Hash, etc.)
31
+ # @param doc2 [Object] Second document
32
+ # @return [Array<Canon::Diff::DiffNode>] Array of differences
33
+ def match(doc1, doc2)
34
+ # Create integrator with format-specific adapter
35
+ integrator = create_integrator
36
+
37
+ # Perform tree diff - returns Operations
38
+ result = integrator.diff(doc1, doc2)
39
+
40
+ # Store statistics for metadata
41
+ @statistics = result[:statistics]
42
+
43
+ # Convert Operations to DiffNodes using OperationConverter
44
+ # This is the KEY FIX - ensures we use proper DiffNodes
45
+ convert_operations_to_diff_nodes(result[:operations])
46
+ end
47
+
48
+ # Preprocess documents for display
49
+ #
50
+ # IMPORTANT: This must use the SAME format as DomMatchStrategy
51
+ # to ensure consistent diff rendering.
52
+ #
53
+ # @param doc1 [Object] First document
54
+ # @param doc2 [Object] Second document
55
+ # @return [Array<String>] Preprocessed [doc1_string, doc2_string]
56
+ def preprocess_for_display(doc1, doc2)
57
+ case @format
58
+ when :xml
59
+ preprocess_xml(doc1, doc2)
60
+ when :html, :html4, :html5
61
+ preprocess_html(doc1, doc2)
62
+ when :json
63
+ preprocess_json(doc1, doc2)
64
+ when :yaml
65
+ preprocess_yaml(doc1, doc2)
66
+ else
67
+ raise ArgumentError, "Unsupported format: #{@format}"
68
+ end
69
+ end
70
+
71
+ # Include tree diff statistics in metadata
72
+ #
73
+ # @return [Hash] Metadata including statistics
74
+ def metadata
75
+ {
76
+ tree_diff_statistics: @statistics,
77
+ tree_diff_enabled: true,
78
+ }
79
+ end
80
+
81
+ private
82
+
83
+ # Create TreeDiffIntegrator with options
84
+ #
85
+ # @return [Canon::TreeDiff::TreeDiffIntegrator] Configured integrator
86
+ def create_integrator
87
+ Canon::TreeDiff::TreeDiffIntegrator.new(
88
+ format: @format,
89
+ options: {
90
+ similarity_threshold: @match_options[:similarity_threshold] || 0.95,
91
+ hash_matching: @match_options.fetch(:hash_matching, true),
92
+ similarity_matching: @match_options.fetch(:similarity_matching,
93
+ true),
94
+ propagation: @match_options.fetch(:propagation, true),
95
+ attribute_order: @match_options[:attribute_order] || :ignore,
96
+ },
97
+ )
98
+ end
99
+
100
+ # Convert Operations to DiffNodes using OperationConverter
101
+ #
102
+ # This is crucial - it ensures we produce proper DiffNodes with:
103
+ # - Correct dimension mapping
104
+ # - Normative/informative classification
105
+ # - Proper node extraction from TreeNodes
106
+ #
107
+ # @param operations [Array<Operation>] Operations from tree diff
108
+ # @return [Array<Canon::Diff::DiffNode>] Converted DiffNodes
109
+ def convert_operations_to_diff_nodes(operations)
110
+ converter = Canon::TreeDiff::OperationConverter.new(
111
+ format: @format,
112
+ match_options: @match_options,
113
+ )
114
+
115
+ converter.convert(operations)
116
+ end
117
+
118
+ # Preprocess XML documents
119
+ #
120
+ # Uses simple line break insertion (same as DOM diff)
121
+ # NOT Canon.format() which adds full indentation
122
+ #
123
+ # @param doc1 [Object] First XML document
124
+ # @param doc2 [Object] Second XML document
125
+ # @return [Array<String>] Preprocessed strings
126
+ def preprocess_xml(doc1, doc2)
127
+ # Serialize XML to string
128
+ # Use XmlComparator's serializer for Canon::Xml::Node
129
+ xml1 = if doc1.is_a?(Canon::Xml::Node)
130
+ require_relative "../xml_comparator"
131
+ XmlComparator.send(:serialize_node_to_xml, doc1)
132
+ elsif doc1.respond_to?(:to_xml)
133
+ doc1.to_xml
134
+ else
135
+ doc1.to_s
136
+ end
137
+
138
+ xml2 = if doc2.is_a?(Canon::Xml::Node)
139
+ require_relative "../xml_comparator"
140
+ XmlComparator.send(:serialize_node_to_xml, doc2)
141
+ elsif doc2.respond_to?(:to_xml)
142
+ doc2.to_xml
143
+ else
144
+ doc2.to_s
145
+ end
146
+
147
+ # MUST match DOM diff preprocessing EXACTLY (xml_comparator.rb:106-109)
148
+ # Simple pattern: add newline between adjacent tags
149
+ [
150
+ xml1.gsub(/></, ">\n<"),
151
+ xml2.gsub(/></, ">\n<"),
152
+ ]
153
+ end
154
+
155
+ # Preprocess HTML documents
156
+ #
157
+ # Uses native HTML serialization with line break insertion
158
+ # (same as DOM diff) to ensure proper line-by-line display
159
+ #
160
+ # @param doc1 [Object] First HTML document
161
+ # @param doc2 [Object] Second HTML document
162
+ # @return [Array<String>] Preprocessed strings
163
+ def preprocess_html(doc1, doc2)
164
+ # For Canon::Xml::Node, use XmlComparator's serializer
165
+ # For XML::DocumentFragment (from parse_node_as_fragment), use to_s
166
+ # to avoid Nokogiri auto-inserting meta tags during to_html serialization
167
+ html1 = if doc1.is_a?(Canon::Xml::Node)
168
+ require_relative "../xml_comparator"
169
+ XmlComparator.send(:serialize_node_to_xml, doc1)
170
+ elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
171
+ doc1.to_s
172
+ elsif doc1.respond_to?(:to_html)
173
+ doc1.to_html
174
+ else
175
+ doc1.to_s
176
+ end
177
+
178
+ html2 = if doc2.is_a?(Canon::Xml::Node)
179
+ require_relative "../xml_comparator"
180
+ XmlComparator.send(:serialize_node_to_xml, doc2)
181
+ elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
182
+ doc2.to_s
183
+ elsif doc2.respond_to?(:to_html)
184
+ doc2.to_html
185
+ else
186
+ doc2.to_s
187
+ end
188
+
189
+ # KEY FIX: Use simple gsub, NOT Canon.format
190
+ # This ensures proper line-by-line display matching DOM diff format
191
+ [html1.gsub(/></, ">\n<"), html2.gsub(/></, ">\n<")]
192
+ end
193
+
194
+ # Preprocess JSON documents
195
+ #
196
+ # Uses Canon formatter for consistent formatting
197
+ #
198
+ # @param doc1 [Object] First JSON document
199
+ # @param doc2 [Object] Second JSON document
200
+ # @return [Array<String>] Preprocessed strings
201
+ def preprocess_json(doc1, doc2)
202
+ require_relative "../../formatters/json_formatter"
203
+ [Canon.format(doc1, :json), Canon.format(doc2, :json)]
204
+ end
205
+
206
+ # Preprocess YAML documents
207
+ #
208
+ # Uses Canon formatter for consistent formatting
209
+ #
210
+ # @param doc1 [Object] First YAML document
211
+ # @param doc2 [Object] Second YAML document
212
+ # @return [Array<String>] Preprocessed strings
213
+ def preprocess_yaml(doc1, doc2)
214
+ require_relative "../../formatters/yaml_formatter"
215
+ [Canon.format(doc1, :yaml), Canon.format(doc2, :yaml)]
216
+ end
217
+ end
218
+ end
219
+ end
220
+ end