canon 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +12 -22
  3. data/Rakefile +5 -2
  4. data/lib/canon/cache.rb +3 -1
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +18 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/pipeline.rb +269 -0
  28. data/lib/canon/comparison/profile_definition.rb +0 -2
  29. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  30. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  31. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  32. data/lib/canon/comparison/strategies.rb +16 -0
  33. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
  34. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  35. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  36. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  37. data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
  38. data/lib/canon/comparison/xml_comparator.rb +4 -492
  39. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  40. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  41. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  42. data/lib/canon/comparison.rb +143 -266
  43. data/lib/canon/config/config_dsl.rb +159 -0
  44. data/lib/canon/config/env_provider.rb +0 -3
  45. data/lib/canon/config/env_schema.rb +48 -58
  46. data/lib/canon/config/profile_loader.rb +0 -1
  47. data/lib/canon/config.rb +116 -468
  48. data/lib/canon/diff/diff_block_builder.rb +0 -2
  49. data/lib/canon/diff/diff_classifier.rb +0 -5
  50. data/lib/canon/diff/diff_context.rb +0 -2
  51. data/lib/canon/diff/diff_context_builder.rb +0 -2
  52. data/lib/canon/diff/diff_line_builder.rb +0 -3
  53. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  54. data/lib/canon/diff/diff_node_mapper.rb +0 -4
  55. data/lib/canon/diff/diff_report_builder.rb +0 -4
  56. data/lib/canon/diff/formatting_detector.rb +0 -1
  57. data/lib/canon/diff/node_serializer.rb +0 -7
  58. data/lib/canon/diff.rb +39 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  61. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  62. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  63. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  64. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
  66. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  67. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
  68. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  70. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
  71. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
  72. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  74. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  75. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  76. data/lib/canon/diff_formatter.rb +11 -9
  77. data/lib/canon/formatters/html4_formatter.rb +0 -2
  78. data/lib/canon/formatters/html5_formatter.rb +0 -2
  79. data/lib/canon/formatters/html_formatter.rb +0 -3
  80. data/lib/canon/formatters/json_formatter.rb +0 -1
  81. data/lib/canon/formatters/xml_formatter.rb +0 -4
  82. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  83. data/lib/canon/formatters.rb +16 -0
  84. data/lib/canon/html/data_model.rb +0 -10
  85. data/lib/canon/html.rb +4 -3
  86. data/lib/canon/options/cli_generator.rb +0 -2
  87. data/lib/canon/options/registry.rb +0 -2
  88. data/lib/canon/options.rb +9 -0
  89. data/lib/canon/pretty_printer/html.rb +0 -1
  90. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  91. data/lib/canon/pretty_printer.rb +12 -0
  92. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  93. data/lib/canon/tree_diff/adapters.rb +14 -0
  94. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  95. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  96. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  97. data/lib/canon/tree_diff/core.rb +17 -0
  98. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  99. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  100. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  101. data/lib/canon/tree_diff/matchers.rb +15 -0
  102. data/lib/canon/tree_diff/operation_converter.rb +0 -8
  103. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  104. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  105. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  106. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  107. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  108. data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
  109. data/lib/canon/tree_diff/operations.rb +13 -0
  110. data/lib/canon/tree_diff.rb +26 -27
  111. data/lib/canon/validators/base_validator.rb +0 -2
  112. data/lib/canon/validators/html_validator.rb +0 -1
  113. data/lib/canon/validators/json_validator.rb +0 -1
  114. data/lib/canon/validators/xml_validator.rb +0 -1
  115. data/lib/canon/validators/yaml_validator.rb +0 -1
  116. data/lib/canon/validators.rb +12 -0
  117. data/lib/canon/version.rb +1 -1
  118. data/lib/canon/xml/c14n.rb +0 -4
  119. data/lib/canon/xml/data_model.rb +0 -10
  120. data/lib/canon/xml/line_range_mapper.rb +0 -2
  121. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  122. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  123. data/lib/canon/xml/nodes/element_node.rb +0 -2
  124. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  125. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  126. data/lib/canon/xml/nodes/root_node.rb +0 -2
  127. data/lib/canon/xml/nodes/text_node.rb +0 -2
  128. data/lib/canon/xml/nodes.rb +19 -0
  129. data/lib/canon/xml/processor.rb +0 -5
  130. data/lib/canon/xml/sax_builder.rb +0 -7
  131. data/lib/canon/xml.rb +33 -0
  132. data/lib/canon/xml_backend.rb +50 -14
  133. data/lib/canon/xml_parsing.rb +4 -2
  134. data/lib/canon.rb +25 -15
  135. data/lib/tasks/performance.rake +0 -58
  136. data/lib/tasks/performance_comparator.rb +132 -65
  137. data/lib/tasks/performance_helpers.rb +4 -249
  138. data/lib/tasks/performance_report.rb +309 -0
  139. metadata +24 -11
  140. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  141. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  142. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  143. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  144. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  145. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  146. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  147. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  148. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
@@ -1,10 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "match_options/base_resolver"
4
- require_relative "match_options/xml_resolver"
5
- require_relative "match_options/json_resolver"
6
- require_relative "match_options/yaml_resolver"
7
-
8
3
  module Canon
9
4
  module Comparison
10
5
  # Matching Options for Canon Comparison
@@ -41,12 +36,6 @@ module Canon
41
36
  @options[:preprocessing]
42
37
  end
43
38
 
44
- # Check if semantic diff is enabled
45
- # @return [Boolean] true if semantic diff is enabled
46
- def semantic_diff?
47
- @options[:semantic_diff] == true
48
- end
49
-
50
39
  def to_h
51
40
  @options.dup
52
41
  end
@@ -54,6 +43,11 @@ module Canon
54
43
 
55
44
  # Module containing match option utilities and format-specific modules
56
45
  module MatchOptions
46
+ autoload :BaseResolver, "canon/comparison/match_options/base_resolver"
47
+ autoload :JsonResolver, "canon/comparison/match_options/json_resolver"
48
+ autoload :XmlResolver, "canon/comparison/match_options/xml_resolver"
49
+ autoload :YamlResolver, "canon/comparison/match_options/yaml_resolver"
50
+
57
51
  # Preprocessing options - what to do before comparison
58
52
  PREPROCESSING_OPTIONS = %i[none c14n normalize format rendered].freeze
59
53
 
@@ -90,30 +84,15 @@ module Canon
90
84
 
91
85
  # Normalize text by collapsing whitespace and trimming
92
86
  # Mimics HTML whitespace collapsing
93
- #
94
- # Handles both ASCII and Unicode whitespace characters including:
95
- # - Regular space (U+0020)
96
- # - Non-breaking space (U+00A0)
97
- # - Other Unicode whitespace per \p{Space}
98
- #
99
- # @param text [String] Text to normalize
100
- # @return [String] Normalized text
101
87
  def normalize_text(text)
102
88
  return "" if text.nil?
103
89
 
104
90
  text.to_s
105
- .gsub(/[\p{Space}\u00a0]+/, " ") # Collapse all whitespace to single space
91
+ .gsub(/[\p{Space} ]+/, " ") # Collapse all whitespace to single space
106
92
  .strip # Remove leading/trailing whitespace
107
93
  end
108
94
 
109
95
  # Normalize text preserving Unicode whitespace type distinctions.
110
- #
111
- # Only ASCII whitespace (space, tab, newline, etc.) is collapsed.
112
- # Unicode whitespace (NBSP, ideographic space, etc.) is preserved,
113
- # so different whitespace types remain distinguishable.
114
- #
115
- # @param text [String] Text to normalize
116
- # @return [String] Normalized text with preserved whitespace types
117
96
  def normalize_text_preserving_type(text)
118
97
  return "" if text.nil?
119
98
 
@@ -123,10 +102,6 @@ module Canon
123
102
  end
124
103
 
125
104
  # Process attribute value according to match behavior
126
- #
127
- # @param value [String] Attribute value to process
128
- # @param behavior [Symbol] Match behavior (:strict, :strip, :compact, :normalize, :ignore)
129
- # @return [String] Processed value
130
105
  def process_attribute_value(value, behavior)
131
106
  case behavior
132
107
  when :strict
@@ -134,7 +109,7 @@ module Canon
134
109
  when :strip
135
110
  value.to_s.strip
136
111
  when :compact
137
- value.to_s.gsub(/[\p{Space}\u00a0]+/, " ")
112
+ value.to_s.gsub(/[\p{Space} ]+/, " ")
138
113
  when :normalize
139
114
  normalize_text(value)
140
115
  when :ignore
@@ -147,16 +122,8 @@ module Canon
147
122
 
148
123
  # XML/HTML-specific matching options
149
124
  module Xml
150
- # Matching dimensions for XML/HTML (collectively exhaustive)
151
- MATCH_DIMENSIONS = %i[
152
- text_content
153
- structural_whitespace
154
- attribute_presence
155
- attribute_order
156
- attribute_values
157
- element_position
158
- comments
159
- ].freeze
125
+ # Single source of truth: derived from the DimensionSet in Registry.
126
+ MATCH_DIMENSIONS = Dimensions::Registry.for(:xml).names.freeze
160
127
 
161
128
  # Expose FORMAT_DEFAULTS from XmlResolver (for backward compatibility)
162
129
  FORMAT_DEFAULTS = MatchOptions::XmlResolver.const_get(:FORMAT_DEFAULTS)
@@ -165,27 +132,18 @@ module Canon
165
132
  MATCH_PROFILES = MatchOptions::XmlResolver.const_get(:MATCH_PROFILES)
166
133
 
167
134
  class << self
168
- # Delegate to XmlResolver
169
135
  def resolve(**kwargs)
170
136
  MatchOptions::XmlResolver.resolve(**kwargs)
171
137
  end
172
138
 
173
- # Delegate to XmlResolver
174
139
  def get_profile_options(profile)
175
140
  MatchOptions::XmlResolver.get_profile_options(profile)
176
141
  end
177
142
 
178
- # Get valid match dimensions for XML/HTML
179
- #
180
- # @return [Array<Symbol>] Valid dimensions
181
143
  def match_dimensions
182
144
  MatchOptions::XmlResolver.match_dimensions
183
145
  end
184
146
 
185
- # Get format-specific default options
186
- #
187
- # @param format [Symbol] Format type
188
- # @return [Hash] Default options for the format
189
147
  def format_defaults(format)
190
148
  MatchOptions::XmlResolver.format_defaults(format)
191
149
  end
@@ -194,41 +152,25 @@ module Canon
194
152
 
195
153
  # JSON-specific matching options
196
154
  module Json
197
- # Matching dimensions for JSON (collectively exhaustive)
198
- MATCH_DIMENSIONS = %i[
199
- text_content
200
- structural_whitespace
201
- key_order
202
- ].freeze
203
-
204
- # Expose FORMAT_DEFAULTS from JsonResolver (for backward compatibility)
155
+ MATCH_DIMENSIONS = Dimensions::Registry.for(:json).names.freeze
156
+
205
157
  FORMAT_DEFAULTS = MatchOptions::JsonResolver.const_get(:FORMAT_DEFAULTS)
206
158
 
207
- # Expose MATCH_PROFILES from JsonResolver (for backward compatibility)
208
159
  MATCH_PROFILES = MatchOptions::JsonResolver.const_get(:MATCH_PROFILES)
209
160
 
210
161
  class << self
211
- # Delegate to JsonResolver
212
162
  def resolve(**kwargs)
213
163
  MatchOptions::JsonResolver.resolve(**kwargs)
214
164
  end
215
165
 
216
- # Delegate to JsonResolver
217
166
  def get_profile_options(profile)
218
167
  MatchOptions::JsonResolver.get_profile_options(profile)
219
168
  end
220
169
 
221
- # Get valid match dimensions for JSON
222
- #
223
- # @return [Array<Symbol>] Valid dimensions
224
170
  def match_dimensions
225
171
  MatchOptions::JsonResolver.match_dimensions
226
172
  end
227
173
 
228
- # Get format-specific default options
229
- #
230
- # @param format [Symbol] Format type
231
- # @return [Hash] Default options for the format
232
174
  def format_defaults(format)
233
175
  MatchOptions::JsonResolver.format_defaults(format)
234
176
  end
@@ -237,42 +179,25 @@ module Canon
237
179
 
238
180
  # YAML-specific matching options
239
181
  module Yaml
240
- # Matching dimensions for YAML (collectively exhaustive)
241
- MATCH_DIMENSIONS = %i[
242
- text_content
243
- structural_whitespace
244
- key_order
245
- comments
246
- ].freeze
247
-
248
- # Expose FORMAT_DEFAULTS from YamlResolver (for backward compatibility)
182
+ MATCH_DIMENSIONS = Dimensions::Registry.for(:yaml).names.freeze
183
+
249
184
  FORMAT_DEFAULTS = MatchOptions::YamlResolver.const_get(:FORMAT_DEFAULTS)
250
185
 
251
- # Expose MATCH_PROFILES from YamlResolver (for backward compatibility)
252
186
  MATCH_PROFILES = MatchOptions::YamlResolver.const_get(:MATCH_PROFILES)
253
187
 
254
188
  class << self
255
- # Delegate to YamlResolver
256
189
  def resolve(**kwargs)
257
190
  MatchOptions::YamlResolver.resolve(**kwargs)
258
191
  end
259
192
 
260
- # Delegate to YamlResolver
261
193
  def get_profile_options(profile)
262
194
  MatchOptions::YamlResolver.get_profile_options(profile)
263
195
  end
264
196
 
265
- # Get valid match dimensions for YAML
266
- #
267
- # @return [Array<Symbol>] Valid dimensions
268
197
  def match_dimensions
269
198
  MatchOptions::YamlResolver.match_dimensions
270
199
  end
271
200
 
272
- # Get format-specific default options
273
- #
274
- # @param format [Symbol] Format type
275
- # @return [Hash] Default options for the format
276
201
  def format_defaults(format)
277
202
  MatchOptions::YamlResolver.format_defaults(format)
278
203
  end
@@ -0,0 +1,269 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Shared comparison pipeline helpers used by both algorithms.
6
+ #
7
+ # Both `dom_diff` and `semantic_diff` need to:
8
+ # - detect document format from inputs (with optional hint)
9
+ # - validate that the two formats are comparable
10
+ # - merge global config-sourced profile / options into the opts hash
11
+ # - capture original-string snapshots before parsing mutates inputs
12
+ # - parse both inputs through the format-specific comparator
13
+ #
14
+ # These steps are pure pipeline mechanics — they have nothing to do with
15
+ # the comparison algorithm itself. Keeping them here ensures the two
16
+ # algorithm entrypoints cannot drift out of sync (see lutaml/canon
17
+ # "Two Comparison Algorithms — Distinct by Design" in CLAUDE.md —
18
+ # the algorithm cores stay separate; only shared infrastructure is
19
+ # consolidated).
20
+ module Pipeline
21
+ # Formats whose Canon::Config exposes a match profile / options.
22
+ CONFIG_BACKED_FORMATS = %i[xml html json yaml string].freeze
23
+
24
+ # Cross-format compatibility groups. DOM comparison accepts these
25
+ # pairings because both sides parse to the same Ruby structure.
26
+ # Semantic comparison does not — it requires exact format match.
27
+ COMPATIBLE_FORMAT_GROUPS = [
28
+ %i[json ruby_object].freeze,
29
+ %i[yaml ruby_object].freeze,
30
+ ].freeze
31
+
32
+ class << self
33
+ # Detect formats for both inputs, honouring an explicit hint.
34
+ #
35
+ # @param obj1 [Object] First input
36
+ # @param obj2 [Object] Second input
37
+ # @param format_hint [Symbol, nil] Explicit format override
38
+ # @return [Array<Symbol, Symbol>] Detected or hinted formats
39
+ def detect_formats(obj1, obj2, format_hint)
40
+ return [format_hint, format_hint] if format_hint
41
+
42
+ [FormatDetector.detect(obj1), FormatDetector.detect(obj2)]
43
+ end
44
+
45
+ # True when the two formats can be compared by the DOM algorithm.
46
+ #
47
+ # DOM allows `ruby_object` to be compared against `json` or `yaml`
48
+ # because both sides parse to the same Ruby structure. Semantic
49
+ # comparison does not allow this — it requires exact format match.
50
+ #
51
+ # @param format1 [Symbol]
52
+ # @param format2 [Symbol]
53
+ # @param strict [Boolean] When true, require exact match (semantic)
54
+ # @return [Boolean]
55
+ def formats_compatible?(format1, format2, strict: false)
56
+ return true if format1 == format2
57
+ return false if strict
58
+
59
+ COMPATIBLE_FORMAT_GROUPS.any? do |group|
60
+ group.include?(format1) && group.include?(format2)
61
+ end
62
+ end
63
+
64
+ # Raise a helpful error if formats are incompatible.
65
+ #
66
+ # @param format1 [Symbol]
67
+ # @param format2 [Symbol]
68
+ # @param strict [Boolean] Passed to {formats_compatible?}
69
+ # @raise [Canon::CompareFormatMismatchError]
70
+ # @return [void]
71
+ def validate_compatible!(format1, format2, strict: false)
72
+ return if formats_compatible?(format1, format2, strict: strict)
73
+
74
+ raise Canon::CompareFormatMismatchError.new(format1, format2)
75
+ end
76
+
77
+ # Merge global config-sourced profile and options into `opts`.
78
+ #
79
+ # Reads `Canon::Config.instance.<format>.match` for a global
80
+ # `profile` and `profile_options`, and merges them into a copy of
81
+ # the supplied opts hash. Caller-supplied values always win:
82
+ # config-derived `profile_options` extend rather than replace
83
+ # caller-supplied `global_options`.
84
+ #
85
+ # Returns the original opts hash unchanged when the format is not
86
+ # config-backed (e.g. `:ruby_object`).
87
+ #
88
+ # @param format [Symbol]
89
+ # @param opts [Hash] Caller opts (will not be mutated)
90
+ # @return [Hash] New opts hash with config globals merged in
91
+ def resolve_config(format, opts)
92
+ return opts unless CONFIG_BACKED_FORMATS.include?(format)
93
+
94
+ format_config = Canon::Config.instance.public_send(format)
95
+ match_config = format_config.match
96
+ profile = match_config.profile
97
+ profile_opts = match_config.profile_options
98
+
99
+ resolved = opts.dup
100
+ if resolved[:global_profile].nil? && profile
101
+ resolved[:global_profile] = profile
102
+ end
103
+
104
+ if profile_opts.any?
105
+ resolved[:global_options] = merge_profile_options(
106
+ resolved[:global_options], profile_opts
107
+ )
108
+ end
109
+
110
+ resolved
111
+ end
112
+
113
+ # Capture pre-parse string snapshots for diff display.
114
+ #
115
+ # Parsing (especially HTML) can mutate inputs, so originals must
116
+ # be captured before any parsing happens. Strings pass through
117
+ # unchanged; parsed nodes are serialized via NodeSerializer.
118
+ #
119
+ # @param obj1 [Object]
120
+ # @param obj2 [Object]
121
+ # @return [Array<String, String>] Captured original strings
122
+ def capture_originals(obj1, obj2)
123
+ [extract_original_string(obj1), extract_original_string(obj2)]
124
+ end
125
+
126
+ # Parse both inputs through the format-specific comparator.
127
+ #
128
+ # Delegates to `XmlComparator`, `HtmlComparator`, `JsonComparator`,
129
+ # or `YamlComparator` based on format. Uses `Cache` so the same
130
+ # string is not re-parsed across runs.
131
+ #
132
+ # @param obj1 [Object]
133
+ # @param obj2 [Object]
134
+ # @param format [Symbol]
135
+ # @param match_opts_hash [Hash] Resolved match options
136
+ # @return [Array<Object, Object>] Parsed documents
137
+ def parse_pair(obj1, obj2, format, match_opts_hash)
138
+ preprocessing = match_opts_hash[:preprocessing] || :none
139
+
140
+ case format
141
+ when :xml
142
+ [
143
+ parse_with_cache(obj1, format, preprocessing) do |doc|
144
+ XmlComparator.parse(doc, preprocessing)
145
+ end,
146
+ parse_with_cache(obj2, format, preprocessing) do |doc|
147
+ XmlComparator.parse(doc, preprocessing)
148
+ end,
149
+ ]
150
+ when :html, :html4, :html5
151
+ [
152
+ parse_with_cache(obj1, format, preprocessing) do |doc|
153
+ HtmlComparator.parse(doc, preprocessing)
154
+ end,
155
+ parse_with_cache(obj2, format, preprocessing) do |doc|
156
+ HtmlComparator.parse(doc, preprocessing)
157
+ end,
158
+ ]
159
+ when :json
160
+ [
161
+ parse_with_cache(obj1, format, :none) do |doc|
162
+ JsonComparator.parse(doc)
163
+ end,
164
+ parse_with_cache(obj2, format, :none) do |doc|
165
+ JsonComparator.parse(doc)
166
+ end,
167
+ ]
168
+ when :yaml
169
+ [
170
+ parse_with_cache(obj1, format, :none) do |doc|
171
+ YamlComparator.parse(doc)
172
+ end,
173
+ parse_with_cache(obj2, format, :none) do |doc|
174
+ YamlComparator.parse(doc)
175
+ end,
176
+ ]
177
+ else
178
+ [obj1, obj2]
179
+ end
180
+ end
181
+
182
+ # Pre-parse HTML strings through `HtmlParser.parse(_, :html5)`.
183
+ #
184
+ # The DOM comparator needs HTML4 and HTML5 inputs to share HTML's
185
+ # whitespace-sensitivity semantics, which means routing both
186
+ # through Nokogiri::HTML5.fragment up front (issue #118).
187
+ # The semantic comparator does not need this — it uses Canon's
188
+ # own HTML data model downstream — so this helper is opt-in.
189
+ #
190
+ # Returns the inputs unchanged if they are not strings.
191
+ #
192
+ # @param obj1 [Object]
193
+ # @param obj2 [Object]
194
+ # @return [Array<Object, Object>] Potentially pre-parsed HTML inputs
195
+ def preparse_html_pair(obj1, obj2)
196
+ [
197
+ html_string?(obj1) ? HtmlParser.parse(obj1, :html5) : obj1,
198
+ html_string?(obj2) ? HtmlParser.parse(obj2, :html5) : obj2,
199
+ ]
200
+ end
201
+
202
+ # True when the input is a String AND should be treated as HTML.
203
+ #
204
+ # @param obj [Object]
205
+ # @return [Boolean]
206
+ def html_string?(obj)
207
+ obj.is_a?(String)
208
+ end
209
+
210
+ private
211
+
212
+ # Merge caller-supplied global_options with config profile_opts.
213
+ #
214
+ # Caller values win on key conflict; profile_opts fill in gaps.
215
+ # `MatchConfig#profile_options` already returns a fresh hash
216
+ # (via `Hash#except`), so we can return it directly without dup.
217
+ #
218
+ # @param existing [Hash, nil] Caller-supplied options
219
+ # @param profile_opts [Hash] Config-sourced options
220
+ # @return [Hash] Merged hash
221
+ def merge_profile_options(existing, profile_opts)
222
+ return profile_opts if existing.nil?
223
+
224
+ profile_opts.merge(existing)
225
+ end
226
+
227
+ # Parse a single document with cache lookup.
228
+ #
229
+ # @param doc [Object] Document (string or already-parsed)
230
+ # @param format [Symbol] Document format
231
+ # @param preprocessing [Symbol] Preprocessing option
232
+ # @yield Block to parse the document if not cached
233
+ # @return [Object] Parsed document
234
+ def parse_with_cache(doc, format, preprocessing)
235
+ return doc unless doc.is_a?(String)
236
+
237
+ Cache.fetch(:document_parse,
238
+ Cache.key_for_document(doc, format, preprocessing)) do # rubocop:disable Lint/UselessDefaultValueArgument
239
+ yield doc
240
+ end
241
+ end
242
+
243
+ # Extract a string snapshot from various input types.
244
+ #
245
+ # Strings pass through; Nokogiri documents use to_html; Canon and
246
+ # other XML nodes go through NodeSerializer; everything else
247
+ # falls back to to_s.
248
+ #
249
+ # @param obj [Object]
250
+ # @return [String] String snapshot
251
+ def extract_original_string(obj)
252
+ case obj
253
+ when String
254
+ obj
255
+ when Nokogiri::XML::Document, Nokogiri::HTML::Document,
256
+ Nokogiri::XML::DocumentFragment, Nokogiri::HTML::DocumentFragment
257
+ obj.to_html
258
+ else
259
+ if Canon::XmlParsing.xml_node?(obj) || obj.is_a?(Canon::Xml::Node)
260
+ Canon::XmlParsing.serialize(obj)
261
+ else
262
+ obj.to_s
263
+ end
264
+ end
265
+ end
266
+ end
267
+ end
268
+ end
269
+ end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "match_options"
4
-
5
3
  module Canon
6
4
  module Comparison
7
5
  # Profile definition DSL with full validation
@@ -172,7 +172,7 @@ module Canon
172
172
  path: path,
173
173
  value1: obj1,
174
174
  value2: obj2,
175
- difference: diff_code,
175
+ diff_code: diff_code,
176
176
  }
177
177
  end
178
178
  end
@@ -1,72 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "base_match_strategy"
4
-
5
3
  module Canon
6
4
  module Comparison
7
5
  module Strategies
8
6
  # Factory for creating match strategies
9
7
  #
10
- # Selects the appropriate match strategy based on match options.
11
- # This provides a single point for strategy instantiation and enables
12
- # easy extension with new matching algorithms.
13
- #
14
- # @example Create a strategy
15
- # strategy = MatchStrategyFactory.create(
16
- # format: :xml,
17
- # match_options: { semantic_diff: true }
18
- # )
19
- # differences = strategy.match(doc1, doc2)
20
- #
8
+ # After semantic dispatch normalization, this factory is only called
9
+ # with semantic_diff: true. DOM matching is handled directly by
10
+ # the format comparators (XmlComparator, HtmlComparator, etc.).
21
11
  class MatchStrategyFactory
22
- # Create appropriate match strategy
23
- #
24
- # Examines match options to determine which strategy to use:
25
- # - If semantic_diff is enabled: SemanticTreeMatchStrategy
26
- # - Otherwise (default): DomMatchStrategy
27
- #
28
- # Future strategies can be added here by checking additional
29
- # options and returning the appropriate strategy class.
30
- #
31
- # @param format [Symbol] Document format (:xml, :html, :json, :yaml)
32
- # @param match_options [Hash] Match options
33
- # @option match_options [Boolean] :semantic_diff Use semantic tree matching
34
- # @return [BaseMatchStrategy] Instantiated strategy
35
- #
36
- # @example DOM matching (default)
37
- # strategy = MatchStrategyFactory.create(
38
- # format: :xml,
39
- # match_options: {}
40
- # )
41
- # # Returns DomMatchStrategy
42
- #
43
- # @example Semantic tree matching
44
- # strategy = MatchStrategyFactory.create(
45
- # format: :xml,
46
- # match_options: { semantic_diff: true }
47
- # )
48
- # # Returns SemanticTreeMatchStrategy
49
- #
50
12
  def self.create(format:, match_options:)
51
- # Check for semantic diff option
52
- if match_options[:semantic_diff]
53
- require_relative "semantic_tree_match_strategy"
54
- SemanticTreeMatchStrategy.new(format: format,
55
- match_options: match_options)
56
- else
57
- # Default to DOM matching
58
- require_relative "dom_match_strategy"
59
- DomMatchStrategy.new(format: format, match_options: match_options)
13
+ unless match_options[:semantic_diff]
14
+ raise ArgumentError,
15
+ "MatchStrategyFactory requires semantic_diff: true; " \
16
+ "DOM matching is handled by format comparators directly"
60
17
  end
61
18
 
62
- # Future: Add more strategies here
63
- # Example:
64
- # elsif match_options[:hybrid_diff]
65
- # require_relative "hybrid_match_strategy"
66
- # HybridMatchStrategy.new(format, match_options)
67
- # elsif match_options[:fuzzy_diff]
68
- # require_relative "fuzzy_match_strategy"
69
- # FuzzyMatchStrategy.new(format, match_options)
19
+ SemanticTreeMatchStrategy.new(format: format,
20
+ match_options: match_options)
70
21
  end
71
22
  end
72
23
  end
@@ -1,10 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "base_match_strategy"
4
- require_relative "../../tree_diff/tree_diff_integrator"
5
- require_relative "../../tree_diff/operation_converter"
6
- require_relative "../xml_node_comparison"
7
-
8
3
  module Canon
9
4
  module Comparison
10
5
  module Strategies
@@ -126,7 +121,7 @@ module Canon
126
121
  # @return [Array<String>] Preprocessed strings
127
122
  def preprocess_xml(doc1, doc2)
128
123
  xml1 = if doc1.is_a?(Canon::Xml::Node)
129
- XmlNodeComparison.serialize_node_to_xml(doc1)
124
+ Canon::Diff::NodeSerializer.serialize(doc1)
130
125
  elsif Canon::XmlParsing.xml_node?(doc1)
131
126
  Canon::XmlParsing.serialize(doc1)
132
127
  else
@@ -134,7 +129,7 @@ module Canon
134
129
  end
135
130
 
136
131
  xml2 = if doc2.is_a?(Canon::Xml::Node)
137
- XmlNodeComparison.serialize_node_to_xml(doc2)
132
+ Canon::Diff::NodeSerializer.serialize(doc2)
138
133
  elsif Canon::XmlParsing.xml_node?(doc2)
139
134
  Canon::XmlParsing.serialize(doc2)
140
135
  else
@@ -162,7 +157,7 @@ module Canon
162
157
  # For XML::DocumentFragment (from parse_node_as_fragment), use to_s
163
158
  # to avoid Nokogiri auto-inserting meta tags during to_html serialization
164
159
  html1 = if doc1.is_a?(Canon::Xml::Node)
165
- XmlNodeComparison.serialize_node_to_xml(doc1)
160
+ Canon::Diff::NodeSerializer.serialize(doc1)
166
161
  elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
167
162
  doc1.to_s
168
163
  elsif Canon::XmlParsing.xml_node?(doc1)
@@ -172,7 +167,7 @@ module Canon
172
167
  end
173
168
 
174
169
  html2 = if doc2.is_a?(Canon::Xml::Node)
175
- XmlNodeComparison.serialize_node_to_xml(doc2)
170
+ Canon::Diff::NodeSerializer.serialize(doc2)
176
171
  elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
177
172
  doc2.to_s
178
173
  elsif Canon::XmlParsing.xml_node?(doc2)
@@ -194,7 +189,6 @@ module Canon
194
189
  # @param doc2 [Object] Second JSON document
195
190
  # @return [Array<String>] Preprocessed strings
196
191
  def preprocess_json(doc1, doc2)
197
- require_relative "../../formatters/json_formatter"
198
192
  [Canon.format(doc1, :json), Canon.format(doc2, :json)]
199
193
  end
200
194
 
@@ -206,7 +200,6 @@ module Canon
206
200
  # @param doc2 [Object] Second YAML document
207
201
  # @return [Array<String>] Preprocessed strings
208
202
  def preprocess_yaml(doc1, doc2)
209
- require_relative "../../formatters/yaml_formatter"
210
203
  [Canon.format(doc1, :yaml), Canon.format(doc2, :yaml)]
211
204
  end
212
205
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Match strategy framework. Children are autoloaded — never
6
+ # `require_relative` them.
7
+ module Strategies
8
+ autoload :BaseMatchStrategy,
9
+ "canon/comparison/strategies/base_match_strategy"
10
+ autoload :MatchStrategyFactory,
11
+ "canon/comparison/strategies/match_strategy_factory"
12
+ autoload :SemanticTreeMatchStrategy,
13
+ "canon/comparison/strategies/semantic_tree_match_strategy"
14
+ end
15
+ end
16
+ end
@@ -158,9 +158,6 @@ differences)
158
158
  # @param differences [Array] Array to append difference to
159
159
  def self.add_attribute_difference(n1:, n2:, diff1:, diff2:,
160
160
  dimension:, differences:, **opts)
161
- # Import DiffNodeBuilder to avoid circular dependency
162
- require_relative "diff_node_builder"
163
-
164
161
  diff_node = Canon::Comparison::DiffNodeBuilder.build(
165
162
  node1: n1,
166
163
  node2: n2,
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../match_options"
4
- require_relative "../../xml/namespace_helper"
5
-
6
3
  module Canon
7
4
  module Comparison
8
5
  module XmlComparatorHelpers