canon 0.2.9 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +21 -22
  3. data/Rakefile +25 -2
  4. data/lib/canon/cache.rb +18 -27
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +20 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/node_inspector.rb +13 -48
  28. data/lib/canon/comparison/pipeline.rb +269 -0
  29. data/lib/canon/comparison/profile_definition.rb +0 -2
  30. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  31. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  32. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  33. data/lib/canon/comparison/strategies.rb +16 -0
  34. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -5
  35. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  36. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  37. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  38. data/lib/canon/comparison/xml_comparator/node_parser.rb +2 -6
  39. data/lib/canon/comparison/xml_comparator.rb +4 -492
  40. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  41. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  42. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  43. data/lib/canon/comparison.rb +144 -267
  44. data/lib/canon/config/config_dsl.rb +159 -0
  45. data/lib/canon/config/env_provider.rb +0 -3
  46. data/lib/canon/config/env_schema.rb +48 -58
  47. data/lib/canon/config/profile_loader.rb +0 -1
  48. data/lib/canon/config.rb +116 -468
  49. data/lib/canon/diff/diff_block_builder.rb +0 -2
  50. data/lib/canon/diff/diff_classifier.rb +0 -5
  51. data/lib/canon/diff/diff_context.rb +0 -2
  52. data/lib/canon/diff/diff_context_builder.rb +0 -2
  53. data/lib/canon/diff/diff_line_builder.rb +2 -3
  54. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  55. data/lib/canon/diff/diff_node_mapper.rb +10 -12
  56. data/lib/canon/diff/diff_report_builder.rb +0 -4
  57. data/lib/canon/diff/formatting_detector.rb +3 -3
  58. data/lib/canon/diff/node_serializer.rb +0 -7
  59. data/lib/canon/diff/xml_serialization_formatter.rb +0 -3
  60. data/lib/canon/diff.rb +39 -0
  61. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  62. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  63. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  64. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  66. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  67. data/lib/canon/diff_formatter/by_object/base_formatter.rb +20 -17
  68. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +119 -3
  70. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  71. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -5
  72. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +27 -61
  74. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -29
  75. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  76. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  77. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  78. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  79. data/lib/canon/diff_formatter.rb +26 -20
  80. data/lib/canon/formatters/html4_formatter.rb +0 -2
  81. data/lib/canon/formatters/html5_formatter.rb +0 -2
  82. data/lib/canon/formatters/html_formatter.rb +0 -3
  83. data/lib/canon/formatters/json_formatter.rb +0 -1
  84. data/lib/canon/formatters/xml_formatter.rb +0 -4
  85. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  86. data/lib/canon/formatters.rb +16 -0
  87. data/lib/canon/html/data_model.rb +1 -11
  88. data/lib/canon/html.rb +4 -3
  89. data/lib/canon/options/cli_generator.rb +0 -2
  90. data/lib/canon/options/registry.rb +0 -2
  91. data/lib/canon/options.rb +9 -0
  92. data/lib/canon/pretty_printer/html.rb +0 -1
  93. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  94. data/lib/canon/pretty_printer.rb +12 -0
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  96. data/lib/canon/tree_diff/adapters.rb +14 -0
  97. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  98. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  99. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  100. data/lib/canon/tree_diff/core.rb +17 -0
  101. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  102. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  103. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  104. data/lib/canon/tree_diff/matchers.rb +15 -0
  105. data/lib/canon/tree_diff/operation_converter.rb +7 -15
  106. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  107. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  108. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  109. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  110. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  111. data/lib/canon/tree_diff/operations/operation_detector.rb +6 -5
  112. data/lib/canon/tree_diff/operations.rb +13 -0
  113. data/lib/canon/tree_diff.rb +26 -27
  114. data/lib/canon/validators/base_validator.rb +5 -10
  115. data/lib/canon/validators/html_validator.rb +2 -8
  116. data/lib/canon/validators/json_validator.rb +0 -1
  117. data/lib/canon/validators/xml_validator.rb +2 -8
  118. data/lib/canon/validators/yaml_validator.rb +0 -1
  119. data/lib/canon/validators.rb +12 -0
  120. data/lib/canon/version.rb +1 -1
  121. data/lib/canon/xml/c14n.rb +0 -4
  122. data/lib/canon/xml/data_model.rb +5 -15
  123. data/lib/canon/xml/line_range_mapper.rb +0 -2
  124. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  125. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  126. data/lib/canon/xml/nodes/element_node.rb +0 -2
  127. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  128. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  129. data/lib/canon/xml/nodes/root_node.rb +0 -2
  130. data/lib/canon/xml/nodes/text_node.rb +0 -2
  131. data/lib/canon/xml/nodes.rb +19 -0
  132. data/lib/canon/xml/processor.rb +0 -5
  133. data/lib/canon/xml/sax_builder.rb +1 -8
  134. data/lib/canon/xml/whitespace_normalizer.rb +2 -2
  135. data/lib/canon/xml.rb +33 -0
  136. data/lib/canon/xml_backend.rb +50 -14
  137. data/lib/canon/xml_parsing.rb +32 -18
  138. data/lib/canon.rb +25 -15
  139. data/lib/tasks/performance.rake +0 -58
  140. data/lib/tasks/performance_comparator.rb +132 -65
  141. data/lib/tasks/performance_helpers.rb +4 -249
  142. data/lib/tasks/performance_report.rb +309 -0
  143. metadata +28 -15
  144. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  145. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  146. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  147. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  148. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  149. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  150. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  151. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  152. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -270
@@ -1,10 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "match_options/base_resolver"
4
- require_relative "match_options/xml_resolver"
5
- require_relative "match_options/json_resolver"
6
- require_relative "match_options/yaml_resolver"
7
-
8
3
  module Canon
9
4
  module Comparison
10
5
  # Matching Options for Canon Comparison
@@ -41,12 +36,6 @@ module Canon
41
36
  @options[:preprocessing]
42
37
  end
43
38
 
44
- # Check if semantic diff is enabled
45
- # @return [Boolean] true if semantic diff is enabled
46
- def semantic_diff?
47
- @options[:semantic_diff] == true
48
- end
49
-
50
39
  def to_h
51
40
  @options.dup
52
41
  end
@@ -54,6 +43,11 @@ module Canon
54
43
 
55
44
  # Module containing match option utilities and format-specific modules
56
45
  module MatchOptions
46
+ autoload :BaseResolver, "canon/comparison/match_options/base_resolver"
47
+ autoload :JsonResolver, "canon/comparison/match_options/json_resolver"
48
+ autoload :XmlResolver, "canon/comparison/match_options/xml_resolver"
49
+ autoload :YamlResolver, "canon/comparison/match_options/yaml_resolver"
50
+
57
51
  # Preprocessing options - what to do before comparison
58
52
  PREPROCESSING_OPTIONS = %i[none c14n normalize format rendered].freeze
59
53
 
@@ -90,30 +84,15 @@ module Canon
90
84
 
91
85
  # Normalize text by collapsing whitespace and trimming
92
86
  # Mimics HTML whitespace collapsing
93
- #
94
- # Handles both ASCII and Unicode whitespace characters including:
95
- # - Regular space (U+0020)
96
- # - Non-breaking space (U+00A0)
97
- # - Other Unicode whitespace per \p{Space}
98
- #
99
- # @param text [String] Text to normalize
100
- # @return [String] Normalized text
101
87
  def normalize_text(text)
102
88
  return "" if text.nil?
103
89
 
104
90
  text.to_s
105
- .gsub(/[\p{Space}\u00a0]+/, " ") # Collapse all whitespace to single space
91
+ .gsub(/[\p{Space} ]+/, " ") # Collapse all whitespace to single space
106
92
  .strip # Remove leading/trailing whitespace
107
93
  end
108
94
 
109
95
  # Normalize text preserving Unicode whitespace type distinctions.
110
- #
111
- # Only ASCII whitespace (space, tab, newline, etc.) is collapsed.
112
- # Unicode whitespace (NBSP, ideographic space, etc.) is preserved,
113
- # so different whitespace types remain distinguishable.
114
- #
115
- # @param text [String] Text to normalize
116
- # @return [String] Normalized text with preserved whitespace types
117
96
  def normalize_text_preserving_type(text)
118
97
  return "" if text.nil?
119
98
 
@@ -123,10 +102,6 @@ module Canon
123
102
  end
124
103
 
125
104
  # Process attribute value according to match behavior
126
- #
127
- # @param value [String] Attribute value to process
128
- # @param behavior [Symbol] Match behavior (:strict, :strip, :compact, :normalize, :ignore)
129
- # @return [String] Processed value
130
105
  def process_attribute_value(value, behavior)
131
106
  case behavior
132
107
  when :strict
@@ -134,7 +109,7 @@ module Canon
134
109
  when :strip
135
110
  value.to_s.strip
136
111
  when :compact
137
- value.to_s.gsub(/[\p{Space}\u00a0]+/, " ")
112
+ value.to_s.gsub(/[\p{Space} ]+/, " ")
138
113
  when :normalize
139
114
  normalize_text(value)
140
115
  when :ignore
@@ -147,16 +122,8 @@ module Canon
147
122
 
148
123
  # XML/HTML-specific matching options
149
124
  module Xml
150
- # Matching dimensions for XML/HTML (collectively exhaustive)
151
- MATCH_DIMENSIONS = %i[
152
- text_content
153
- structural_whitespace
154
- attribute_presence
155
- attribute_order
156
- attribute_values
157
- element_position
158
- comments
159
- ].freeze
125
+ # Single source of truth: derived from the DimensionSet in Registry.
126
+ MATCH_DIMENSIONS = Dimensions::Registry.for(:xml).names.freeze
160
127
 
161
128
  # Expose FORMAT_DEFAULTS from XmlResolver (for backward compatibility)
162
129
  FORMAT_DEFAULTS = MatchOptions::XmlResolver.const_get(:FORMAT_DEFAULTS)
@@ -165,27 +132,18 @@ module Canon
165
132
  MATCH_PROFILES = MatchOptions::XmlResolver.const_get(:MATCH_PROFILES)
166
133
 
167
134
  class << self
168
- # Delegate to XmlResolver
169
135
  def resolve(**kwargs)
170
136
  MatchOptions::XmlResolver.resolve(**kwargs)
171
137
  end
172
138
 
173
- # Delegate to XmlResolver
174
139
  def get_profile_options(profile)
175
140
  MatchOptions::XmlResolver.get_profile_options(profile)
176
141
  end
177
142
 
178
- # Get valid match dimensions for XML/HTML
179
- #
180
- # @return [Array<Symbol>] Valid dimensions
181
143
  def match_dimensions
182
144
  MatchOptions::XmlResolver.match_dimensions
183
145
  end
184
146
 
185
- # Get format-specific default options
186
- #
187
- # @param format [Symbol] Format type
188
- # @return [Hash] Default options for the format
189
147
  def format_defaults(format)
190
148
  MatchOptions::XmlResolver.format_defaults(format)
191
149
  end
@@ -194,41 +152,25 @@ module Canon
194
152
 
195
153
  # JSON-specific matching options
196
154
  module Json
197
- # Matching dimensions for JSON (collectively exhaustive)
198
- MATCH_DIMENSIONS = %i[
199
- text_content
200
- structural_whitespace
201
- key_order
202
- ].freeze
203
-
204
- # Expose FORMAT_DEFAULTS from JsonResolver (for backward compatibility)
155
+ MATCH_DIMENSIONS = Dimensions::Registry.for(:json).names.freeze
156
+
205
157
  FORMAT_DEFAULTS = MatchOptions::JsonResolver.const_get(:FORMAT_DEFAULTS)
206
158
 
207
- # Expose MATCH_PROFILES from JsonResolver (for backward compatibility)
208
159
  MATCH_PROFILES = MatchOptions::JsonResolver.const_get(:MATCH_PROFILES)
209
160
 
210
161
  class << self
211
- # Delegate to JsonResolver
212
162
  def resolve(**kwargs)
213
163
  MatchOptions::JsonResolver.resolve(**kwargs)
214
164
  end
215
165
 
216
- # Delegate to JsonResolver
217
166
  def get_profile_options(profile)
218
167
  MatchOptions::JsonResolver.get_profile_options(profile)
219
168
  end
220
169
 
221
- # Get valid match dimensions for JSON
222
- #
223
- # @return [Array<Symbol>] Valid dimensions
224
170
  def match_dimensions
225
171
  MatchOptions::JsonResolver.match_dimensions
226
172
  end
227
173
 
228
- # Get format-specific default options
229
- #
230
- # @param format [Symbol] Format type
231
- # @return [Hash] Default options for the format
232
174
  def format_defaults(format)
233
175
  MatchOptions::JsonResolver.format_defaults(format)
234
176
  end
@@ -237,42 +179,25 @@ module Canon
237
179
 
238
180
  # YAML-specific matching options
239
181
  module Yaml
240
- # Matching dimensions for YAML (collectively exhaustive)
241
- MATCH_DIMENSIONS = %i[
242
- text_content
243
- structural_whitespace
244
- key_order
245
- comments
246
- ].freeze
247
-
248
- # Expose FORMAT_DEFAULTS from YamlResolver (for backward compatibility)
182
+ MATCH_DIMENSIONS = Dimensions::Registry.for(:yaml).names.freeze
183
+
249
184
  FORMAT_DEFAULTS = MatchOptions::YamlResolver.const_get(:FORMAT_DEFAULTS)
250
185
 
251
- # Expose MATCH_PROFILES from YamlResolver (for backward compatibility)
252
186
  MATCH_PROFILES = MatchOptions::YamlResolver.const_get(:MATCH_PROFILES)
253
187
 
254
188
  class << self
255
- # Delegate to YamlResolver
256
189
  def resolve(**kwargs)
257
190
  MatchOptions::YamlResolver.resolve(**kwargs)
258
191
  end
259
192
 
260
- # Delegate to YamlResolver
261
193
  def get_profile_options(profile)
262
194
  MatchOptions::YamlResolver.get_profile_options(profile)
263
195
  end
264
196
 
265
- # Get valid match dimensions for YAML
266
- #
267
- # @return [Array<Symbol>] Valid dimensions
268
197
  def match_dimensions
269
198
  MatchOptions::YamlResolver.match_dimensions
270
199
  end
271
200
 
272
- # Get format-specific default options
273
- #
274
- # @param format [Symbol] Format type
275
- # @return [Hash] Default options for the format
276
201
  def format_defaults(format)
277
202
  MatchOptions::YamlResolver.format_defaults(format)
278
203
  end
@@ -10,37 +10,25 @@ module Canon
10
10
  # * Canon::TreeDiff::Core::TreeNode — semantic tree diff nodes.
11
11
  # * Backend-specific nodes (Nokogiri or Moxml) — live parsed nodes.
12
12
  #
13
- # All type dispatch uses backend-branching (`if XmlBackend.nokogiri?`)
14
- # rather than `case/when` with constant references. This prevents
15
- # NameError when Nokogiri constants are undefined under Opal.
16
- #
17
- # Every node query in the codebase should go through this module.
18
- # Do not create private dispatch methods in consumers.
13
+ # Architecture: NodeInspector handles Canon-native types (Canon::Xml::Node,
14
+ # TreeNode) directly, then delegates ALL backend-specific queries to
15
+ # XmlParsing. No Moxml/Nokogiri constants are referenced here — that
16
+ # knowledge lives exclusively in XmlParsing.
19
17
  module NodeInspector
20
- NOKOGIRI_TEXT_TYPE = defined?(Nokogiri::XML::Node::TEXT_NODE) ? Nokogiri::XML::Node::TEXT_NODE : 3
21
-
22
18
  # --- Type predicates ---
23
19
 
24
20
  def self.text_node?(node)
25
21
  return false unless node
26
22
  return node.node_type == :text if node.is_a?(Canon::Xml::Node)
27
23
 
28
- if XmlBackend.nokogiri?
29
- node.is_a?(Nokogiri::XML::Text) || node.is_a?(Moxml::Text)
30
- else
31
- node.is_a?(Moxml::Text)
32
- end
24
+ XmlParsing.text_node?(node)
33
25
  end
34
26
 
35
27
  def self.element_node?(node)
36
28
  return false unless node
37
29
  return node.node_type == :element if node.is_a?(Canon::Xml::Node)
38
30
 
39
- if XmlBackend.nokogiri?
40
- node.is_a?(Nokogiri::XML::Element) || node.is_a?(Moxml::Element)
41
- else
42
- node.is_a?(Moxml::Element)
43
- end
31
+ XmlParsing.element?(node)
44
32
  end
45
33
 
46
34
  def self.comment_node?(node)
@@ -57,7 +45,7 @@ module Canon
57
45
  end
58
46
  false
59
47
  else
60
- node.is_a?(Moxml::Comment)
48
+ XmlParsing.comment?(node)
61
49
  end
62
50
  end
63
51
 
@@ -100,7 +88,6 @@ module Canon
100
88
 
101
89
  # --- Node queries ---
102
90
 
103
- # Unified node name extraction across all node types.
104
91
  def self.name(node)
105
92
  return nil unless node
106
93
  return node.name if node.is_a?(Canon::Xml::Node)
@@ -109,7 +96,6 @@ module Canon
109
96
  XmlParsing.name(node)
110
97
  end
111
98
 
112
- # Unified parent access across all node types.
113
99
  def self.parent(node)
114
100
  return nil unless node
115
101
  return node.parent if node.is_a?(Canon::Xml::Node)
@@ -118,7 +104,6 @@ module Canon
118
104
  XmlParsing.parent(node)
119
105
  end
120
106
 
121
- # Unified children access across all node types.
122
107
  def self.children(node)
123
108
  return [] unless node
124
109
  return node.children if node.is_a?(Canon::Xml::Node)
@@ -127,34 +112,21 @@ module Canon
127
112
  XmlParsing.children(node)
128
113
  end
129
114
 
130
- # Extract the text content of +node+ as a String.
131
115
  def self.text_content(node)
132
- case node
133
- when Canon::Xml::Nodes::TextNode
134
- node.value.to_s
135
- when Canon::Xml::Node
136
- node.text_content.to_s
137
- when Moxml::Text
138
- node.content.to_s
139
- else
140
- XmlParsing.text_content(node).to_s
141
- end
116
+ return node.value.to_s if node.is_a?(Canon::Xml::Nodes::TextNode)
117
+ return node.text_content.to_s if node.is_a?(Canon::Xml::Node)
118
+
119
+ XmlParsing.text_content(node).to_s
142
120
  end
143
121
 
144
- # Unified node type that always returns a symbol.
145
- # Returns nil for unrecognised nodes.
146
122
  def self.node_type(node)
147
123
  return nil unless node
148
124
  return node.node_type if node.is_a?(Canon::Xml::Node)
125
+ return node.type&.to_sym if node.is_a?(Canon::TreeDiff::Core::TreeNode)
149
126
 
150
- if node.is_a?(Canon::TreeDiff::Core::TreeNode)
151
- node.type&.to_sym
152
- else
153
- XmlParsing.node_type(node)
154
- end
127
+ XmlParsing.node_type(node)
155
128
  end
156
129
 
157
- # Unified attribute value access.
158
130
  def self.attribute_value(node, attr_name)
159
131
  return nil unless node
160
132
 
@@ -168,7 +140,6 @@ module Canon
168
140
  end
169
141
  end
170
142
 
171
- # Unified namespace URI access.
172
143
  def self.namespace_uri(node)
173
144
  return nil unless node
174
145
 
@@ -179,7 +150,6 @@ module Canon
179
150
  end
180
151
  end
181
152
 
182
- # Extract parse-time errors carried on a node or its owning document.
183
153
  def self.parse_errors(node)
184
154
  return [] if node.nil?
185
155
  return Array(node.parse_errors).map(&:to_s) if node.is_a?(Canon::Xml::Node)
@@ -194,11 +164,6 @@ module Canon
194
164
  []
195
165
  end
196
166
  end
197
-
198
- # Deprecated: use NodeInspector.parent instead.
199
- def self.parent_of(node)
200
- parent(node)
201
- end
202
167
  end
203
168
  end
204
169
  end
@@ -0,0 +1,269 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Shared comparison pipeline helpers used by both algorithms.
6
+ #
7
+ # Both `dom_diff` and `semantic_diff` need to:
8
+ # - detect document format from inputs (with optional hint)
9
+ # - validate that the two formats are comparable
10
+ # - merge global config-sourced profile / options into the opts hash
11
+ # - capture original-string snapshots before parsing mutates inputs
12
+ # - parse both inputs through the format-specific comparator
13
+ #
14
+ # These steps are pure pipeline mechanics — they have nothing to do with
15
+ # the comparison algorithm itself. Keeping them here ensures the two
16
+ # algorithm entrypoints cannot drift out of sync (see lutaml/canon
17
+ # "Two Comparison Algorithms — Distinct by Design" in CLAUDE.md —
18
+ # the algorithm cores stay separate; only shared infrastructure is
19
+ # consolidated).
20
+ module Pipeline
21
+ # Formats whose Canon::Config exposes a match profile / options.
22
+ CONFIG_BACKED_FORMATS = %i[xml html json yaml string].freeze
23
+
24
+ # Cross-format compatibility groups. DOM comparison accepts these
25
+ # pairings because both sides parse to the same Ruby structure.
26
+ # Semantic comparison does not — it requires exact format match.
27
+ COMPATIBLE_FORMAT_GROUPS = [
28
+ %i[json ruby_object].freeze,
29
+ %i[yaml ruby_object].freeze,
30
+ ].freeze
31
+
32
+ class << self
33
+ # Detect formats for both inputs, honouring an explicit hint.
34
+ #
35
+ # @param obj1 [Object] First input
36
+ # @param obj2 [Object] Second input
37
+ # @param format_hint [Symbol, nil] Explicit format override
38
+ # @return [Array<Symbol, Symbol>] Detected or hinted formats
39
+ def detect_formats(obj1, obj2, format_hint)
40
+ return [format_hint, format_hint] if format_hint
41
+
42
+ [FormatDetector.detect(obj1), FormatDetector.detect(obj2)]
43
+ end
44
+
45
+ # True when the two formats can be compared by the DOM algorithm.
46
+ #
47
+ # DOM allows `ruby_object` to be compared against `json` or `yaml`
48
+ # because both sides parse to the same Ruby structure. Semantic
49
+ # comparison does not allow this — it requires exact format match.
50
+ #
51
+ # @param format1 [Symbol]
52
+ # @param format2 [Symbol]
53
+ # @param strict [Boolean] When true, require exact match (semantic)
54
+ # @return [Boolean]
55
+ def formats_compatible?(format1, format2, strict: false)
56
+ return true if format1 == format2
57
+ return false if strict
58
+
59
+ COMPATIBLE_FORMAT_GROUPS.any? do |group|
60
+ group.include?(format1) && group.include?(format2)
61
+ end
62
+ end
63
+
64
+ # Raise a helpful error if formats are incompatible.
65
+ #
66
+ # @param format1 [Symbol]
67
+ # @param format2 [Symbol]
68
+ # @param strict [Boolean] Passed to {formats_compatible?}
69
+ # @raise [Canon::CompareFormatMismatchError]
70
+ # @return [void]
71
+ def validate_compatible!(format1, format2, strict: false)
72
+ return if formats_compatible?(format1, format2, strict: strict)
73
+
74
+ raise Canon::CompareFormatMismatchError.new(format1, format2)
75
+ end
76
+
77
+ # Merge global config-sourced profile and options into `opts`.
78
+ #
79
+ # Reads `Canon::Config.instance.<format>.match` for a global
80
+ # `profile` and `profile_options`, and merges them into a copy of
81
+ # the supplied opts hash. Caller-supplied values always win:
82
+ # config-derived `profile_options` extend rather than replace
83
+ # caller-supplied `global_options`.
84
+ #
85
+ # Returns the original opts hash unchanged when the format is not
86
+ # config-backed (e.g. `:ruby_object`).
87
+ #
88
+ # @param format [Symbol]
89
+ # @param opts [Hash] Caller opts (will not be mutated)
90
+ # @return [Hash] New opts hash with config globals merged in
91
+ def resolve_config(format, opts)
92
+ return opts unless CONFIG_BACKED_FORMATS.include?(format)
93
+
94
+ format_config = Canon::Config.instance.public_send(format)
95
+ match_config = format_config.match
96
+ profile = match_config.profile
97
+ profile_opts = match_config.profile_options
98
+
99
+ resolved = opts.dup
100
+ if resolved[:global_profile].nil? && profile
101
+ resolved[:global_profile] = profile
102
+ end
103
+
104
+ if profile_opts.any?
105
+ resolved[:global_options] = merge_profile_options(
106
+ resolved[:global_options], profile_opts
107
+ )
108
+ end
109
+
110
+ resolved
111
+ end
112
+
113
+ # Capture pre-parse string snapshots for diff display.
114
+ #
115
+ # Parsing (especially HTML) can mutate inputs, so originals must
116
+ # be captured before any parsing happens. Strings pass through
117
+ # unchanged; parsed nodes are serialized via NodeSerializer.
118
+ #
119
+ # @param obj1 [Object]
120
+ # @param obj2 [Object]
121
+ # @return [Array<String, String>] Captured original strings
122
+ def capture_originals(obj1, obj2)
123
+ [extract_original_string(obj1), extract_original_string(obj2)]
124
+ end
125
+
126
+ # Parse both inputs through the format-specific comparator.
127
+ #
128
+ # Delegates to `XmlComparator`, `HtmlComparator`, `JsonComparator`,
129
+ # or `YamlComparator` based on format. Uses `Cache` so the same
130
+ # string is not re-parsed across runs.
131
+ #
132
+ # @param obj1 [Object]
133
+ # @param obj2 [Object]
134
+ # @param format [Symbol]
135
+ # @param match_opts_hash [Hash] Resolved match options
136
+ # @return [Array<Object, Object>] Parsed documents
137
+ def parse_pair(obj1, obj2, format, match_opts_hash)
138
+ preprocessing = match_opts_hash[:preprocessing] || :none
139
+
140
+ case format
141
+ when :xml
142
+ [
143
+ parse_with_cache(obj1, format, preprocessing) do |doc|
144
+ XmlComparator.parse(doc, preprocessing)
145
+ end,
146
+ parse_with_cache(obj2, format, preprocessing) do |doc|
147
+ XmlComparator.parse(doc, preprocessing)
148
+ end,
149
+ ]
150
+ when :html, :html4, :html5
151
+ [
152
+ parse_with_cache(obj1, format, preprocessing) do |doc|
153
+ HtmlComparator.parse(doc, preprocessing)
154
+ end,
155
+ parse_with_cache(obj2, format, preprocessing) do |doc|
156
+ HtmlComparator.parse(doc, preprocessing)
157
+ end,
158
+ ]
159
+ when :json
160
+ [
161
+ parse_with_cache(obj1, format, :none) do |doc|
162
+ JsonComparator.parse(doc)
163
+ end,
164
+ parse_with_cache(obj2, format, :none) do |doc|
165
+ JsonComparator.parse(doc)
166
+ end,
167
+ ]
168
+ when :yaml
169
+ [
170
+ parse_with_cache(obj1, format, :none) do |doc|
171
+ YamlComparator.parse(doc)
172
+ end,
173
+ parse_with_cache(obj2, format, :none) do |doc|
174
+ YamlComparator.parse(doc)
175
+ end,
176
+ ]
177
+ else
178
+ [obj1, obj2]
179
+ end
180
+ end
181
+
182
+ # Pre-parse HTML strings through `HtmlParser.parse(_, :html5)`.
183
+ #
184
+ # The DOM comparator needs HTML4 and HTML5 inputs to share HTML's
185
+ # whitespace-sensitivity semantics, which means routing both
186
+ # through Nokogiri::HTML5.fragment up front (issue #118).
187
+ # The semantic comparator does not need this — it uses Canon's
188
+ # own HTML data model downstream — so this helper is opt-in.
189
+ #
190
+ # Returns the inputs unchanged if they are not strings.
191
+ #
192
+ # @param obj1 [Object]
193
+ # @param obj2 [Object]
194
+ # @return [Array<Object, Object>] Potentially pre-parsed HTML inputs
195
+ def preparse_html_pair(obj1, obj2)
196
+ [
197
+ html_string?(obj1) ? HtmlParser.parse(obj1, :html5) : obj1,
198
+ html_string?(obj2) ? HtmlParser.parse(obj2, :html5) : obj2,
199
+ ]
200
+ end
201
+
202
+ # True when the input is a String AND should be treated as HTML.
203
+ #
204
+ # @param obj [Object]
205
+ # @return [Boolean]
206
+ def html_string?(obj)
207
+ obj.is_a?(String)
208
+ end
209
+
210
+ private
211
+
212
+ # Merge caller-supplied global_options with config profile_opts.
213
+ #
214
+ # Caller values win on key conflict; profile_opts fill in gaps.
215
+ # `MatchConfig#profile_options` already returns a fresh hash
216
+ # (via `Hash#except`), so we can return it directly without dup.
217
+ #
218
+ # @param existing [Hash, nil] Caller-supplied options
219
+ # @param profile_opts [Hash] Config-sourced options
220
+ # @return [Hash] Merged hash
221
+ def merge_profile_options(existing, profile_opts)
222
+ return profile_opts if existing.nil?
223
+
224
+ profile_opts.merge(existing)
225
+ end
226
+
227
+ # Parse a single document with cache lookup.
228
+ #
229
+ # @param doc [Object] Document (string or already-parsed)
230
+ # @param format [Symbol] Document format
231
+ # @param preprocessing [Symbol] Preprocessing option
232
+ # @yield Block to parse the document if not cached
233
+ # @return [Object] Parsed document
234
+ def parse_with_cache(doc, format, preprocessing)
235
+ return doc unless doc.is_a?(String)
236
+
237
+ Cache.fetch(:document_parse,
238
+ Cache.key_for_document(doc, format, preprocessing)) do # rubocop:disable Lint/UselessDefaultValueArgument
239
+ yield doc
240
+ end
241
+ end
242
+
243
+ # Extract a string snapshot from various input types.
244
+ #
245
+ # Strings pass through; Nokogiri documents use to_html; Canon and
246
+ # other XML nodes go through NodeSerializer; everything else
247
+ # falls back to to_s.
248
+ #
249
+ # @param obj [Object]
250
+ # @return [String] String snapshot
251
+ def extract_original_string(obj)
252
+ case obj
253
+ when String
254
+ obj
255
+ when Nokogiri::XML::Document, Nokogiri::HTML::Document,
256
+ Nokogiri::XML::DocumentFragment, Nokogiri::HTML::DocumentFragment
257
+ obj.to_html
258
+ else
259
+ if Canon::XmlParsing.xml_node?(obj) || obj.is_a?(Canon::Xml::Node)
260
+ Canon::XmlParsing.serialize(obj)
261
+ else
262
+ obj.to_s
263
+ end
264
+ end
265
+ end
266
+ end
267
+ end
268
+ end
269
+ end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "match_options"
4
-
5
3
  module Canon
6
4
  module Comparison
7
5
  # Profile definition DSL with full validation
@@ -172,7 +172,7 @@ module Canon
172
172
  path: path,
173
173
  value1: obj1,
174
174
  value2: obj2,
175
- difference: diff_code,
175
+ diff_code: diff_code,
176
176
  }
177
177
  end
178
178
  end