canon 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +12 -22
  3. data/Rakefile +5 -2
  4. data/lib/canon/cache.rb +3 -1
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +18 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/pipeline.rb +269 -0
  28. data/lib/canon/comparison/profile_definition.rb +0 -2
  29. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  30. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  31. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  32. data/lib/canon/comparison/strategies.rb +16 -0
  33. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
  34. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  35. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  36. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  37. data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
  38. data/lib/canon/comparison/xml_comparator.rb +4 -492
  39. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  40. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  41. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  42. data/lib/canon/comparison.rb +143 -266
  43. data/lib/canon/config/config_dsl.rb +159 -0
  44. data/lib/canon/config/env_provider.rb +0 -3
  45. data/lib/canon/config/env_schema.rb +48 -58
  46. data/lib/canon/config/profile_loader.rb +0 -1
  47. data/lib/canon/config.rb +116 -468
  48. data/lib/canon/diff/diff_block_builder.rb +0 -2
  49. data/lib/canon/diff/diff_classifier.rb +0 -5
  50. data/lib/canon/diff/diff_context.rb +0 -2
  51. data/lib/canon/diff/diff_context_builder.rb +0 -2
  52. data/lib/canon/diff/diff_line_builder.rb +0 -3
  53. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  54. data/lib/canon/diff/diff_node_mapper.rb +0 -4
  55. data/lib/canon/diff/diff_report_builder.rb +0 -4
  56. data/lib/canon/diff/formatting_detector.rb +0 -1
  57. data/lib/canon/diff/node_serializer.rb +0 -7
  58. data/lib/canon/diff.rb +39 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  61. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  62. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  63. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  64. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
  66. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  67. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
  68. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  70. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
  71. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
  72. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  74. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  75. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  76. data/lib/canon/diff_formatter.rb +11 -9
  77. data/lib/canon/formatters/html4_formatter.rb +0 -2
  78. data/lib/canon/formatters/html5_formatter.rb +0 -2
  79. data/lib/canon/formatters/html_formatter.rb +0 -3
  80. data/lib/canon/formatters/json_formatter.rb +0 -1
  81. data/lib/canon/formatters/xml_formatter.rb +0 -4
  82. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  83. data/lib/canon/formatters.rb +16 -0
  84. data/lib/canon/html/data_model.rb +0 -10
  85. data/lib/canon/html.rb +4 -3
  86. data/lib/canon/options/cli_generator.rb +0 -2
  87. data/lib/canon/options/registry.rb +0 -2
  88. data/lib/canon/options.rb +9 -0
  89. data/lib/canon/pretty_printer/html.rb +0 -1
  90. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  91. data/lib/canon/pretty_printer.rb +12 -0
  92. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  93. data/lib/canon/tree_diff/adapters.rb +14 -0
  94. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  95. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  96. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  97. data/lib/canon/tree_diff/core.rb +17 -0
  98. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  99. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  100. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  101. data/lib/canon/tree_diff/matchers.rb +15 -0
  102. data/lib/canon/tree_diff/operation_converter.rb +0 -8
  103. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  104. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  105. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  106. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  107. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  108. data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
  109. data/lib/canon/tree_diff/operations.rb +13 -0
  110. data/lib/canon/tree_diff.rb +26 -27
  111. data/lib/canon/validators/base_validator.rb +0 -2
  112. data/lib/canon/validators/html_validator.rb +0 -1
  113. data/lib/canon/validators/json_validator.rb +0 -1
  114. data/lib/canon/validators/xml_validator.rb +0 -1
  115. data/lib/canon/validators/yaml_validator.rb +0 -1
  116. data/lib/canon/validators.rb +12 -0
  117. data/lib/canon/version.rb +1 -1
  118. data/lib/canon/xml/c14n.rb +0 -4
  119. data/lib/canon/xml/data_model.rb +0 -10
  120. data/lib/canon/xml/line_range_mapper.rb +0 -2
  121. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  122. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  123. data/lib/canon/xml/nodes/element_node.rb +0 -2
  124. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  125. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  126. data/lib/canon/xml/nodes/root_node.rb +0 -2
  127. data/lib/canon/xml/nodes/text_node.rb +0 -2
  128. data/lib/canon/xml/nodes.rb +19 -0
  129. data/lib/canon/xml/processor.rb +0 -5
  130. data/lib/canon/xml/sax_builder.rb +0 -7
  131. data/lib/canon/xml.rb +33 -0
  132. data/lib/canon/xml_backend.rb +50 -14
  133. data/lib/canon/xml_parsing.rb +4 -2
  134. data/lib/canon.rb +25 -15
  135. data/lib/tasks/performance.rake +0 -58
  136. data/lib/tasks/performance_comparator.rb +132 -65
  137. data/lib/tasks/performance_helpers.rb +4 -249
  138. data/lib/tasks/performance_report.rb +309 -0
  139. metadata +24 -11
  140. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  141. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  142. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  143. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  144. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  145. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  146. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  147. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  148. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
@@ -1,54 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Comparison dimensions
4
- #
5
- # Provides dimension classes for comparing specific aspects of documents.
6
- # Each dimension knows how to extract and compare data according to different behaviors.
7
- #
8
- # == Architecture
9
- #
10
- # Dimensions represent "WHAT to compare" - specific aspects of a document that can be compared:
11
- # - Text content
12
- # - Comments
13
- # - Attribute values
14
- # - Attribute presence
15
- # - Attribute order
16
- # - Element position
17
- # - Structural whitespace
18
- #
19
- # == Behaviors
20
- #
21
- # Each dimension supports comparison behaviors:
22
- # - :strict - Exact comparison
23
- # - :normalize - Normalized comparison (e.g., collapse whitespace)
24
- # - :ignore - Skip comparison
25
- #
26
- # == Usage
27
- #
28
- # # Get a dimension instance
29
- # dimension = Canon::Comparison::Dimensions::Registry.get(:text_content)
30
- #
31
- # # Compare two nodes
32
- # dimension.equivalent?(node1, node2, :normalize)
33
- #
34
- # # Or use the registry directly
35
- # Canon::Comparison::Dimensions::Registry.compare(:text_content, node1, node2, :normalize)
36
-
37
- require_relative "dimensions/base_dimension"
38
- require_relative "dimensions/registry"
39
- require_relative "dimensions/text_content_dimension"
40
- require_relative "dimensions/comments_dimension"
41
- require_relative "dimensions/attribute_values_dimension"
42
- require_relative "dimensions/attribute_presence_dimension"
43
- require_relative "dimensions/attribute_order_dimension"
44
- require_relative "dimensions/element_position_dimension"
45
- require_relative "dimensions/structural_whitespace_dimension"
46
-
47
3
  module Canon
48
4
  module Comparison
5
+ # Dimension value objects for comparison aspects.
6
+ #
7
+ # Each format (XML/HTML, JSON, YAML) has a distinct set of dimensions —
8
+ # specific aspects of a document that can be compared with different
9
+ # behaviors (:strict, :normalize, :ignore).
10
+ #
11
+ # A Dimension knows its metadata (name, valid behaviors, normative
12
+ # classification rule). Comparison logic stays in the comparators where
13
+ # it has full node context.
14
+ #
15
+ # DimensionSet groups dimensions per format. Registry provides pre-built
16
+ # sets with format lookup (html/html4/html5 all resolve to the XML set).
49
17
  module Dimensions
50
- # Version constant for the dimensions module
51
- VERSION = "1.0.0"
18
+ autoload :Dimension, "canon/comparison/dimensions/dimension"
19
+ autoload :DimensionSet, "canon/comparison/dimensions/dimension_set"
20
+ autoload :Registry, "canon/comparison/dimensions/registry"
52
21
  end
53
22
  end
54
23
  end
@@ -1,21 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "../comparison" # Load base module with constants first
5
- require_relative "markup_comparator"
6
- require_relative "xml_comparator"
7
- require_relative "match_options"
8
- require_relative "comparison_result"
9
- require_relative "compare_profile"
10
- require_relative "html_compare_profile"
11
- require_relative "../diff/diff_node"
12
- require_relative "../diff/diff_classifier"
13
- require_relative "strategies/match_strategy_factory"
14
- require_relative "../html/data_model"
15
- require_relative "xml_node_comparison"
16
- require_relative "xml_comparator/diff_node_builder"
17
- # Whitespace sensitivity module (single source of truth for sensitive elements)
18
- require_relative "whitespace_sensitivity"
19
4
 
20
5
  module Canon
21
6
  module Comparison
@@ -106,12 +91,6 @@ module Canon
106
91
  # Store resolved match options hash for use in comparison logic
107
92
  opts[:match_opts] = match_opts_hash
108
93
 
109
- # Use tree diff if semantic_diff option is enabled
110
- if match_opts.semantic_diff?
111
- return perform_semantic_tree_diff(html1, html2, opts,
112
- match_opts_hash)
113
- end
114
-
115
94
  # Create child_opts with resolved options
116
95
  child_opts = opts.merge(child_opts)
117
96
 
@@ -188,9 +167,7 @@ module Canon
188
167
  # accepted: dom_diff routes html/html4/html5 input through
189
168
  # Nokogiri::HTML5.fragment per #118.
190
169
  def fragment_node?(node)
191
- node.is_a?(Nokogiri::XML::DocumentFragment) ||
192
- node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
193
- node.is_a?(Nokogiri::HTML5::DocumentFragment)
170
+ XmlBackend.document_fragment?(node)
194
171
  end
195
172
 
196
173
  # Compare children of document fragments using the shared
@@ -232,62 +209,6 @@ module Canon
232
209
  end
233
210
  end
234
211
 
235
- # Perform semantic tree diff using SemanticTreeMatchStrategy
236
- #
237
- # @param html1 [String, Nokogiri::HTML::Document] First HTML
238
- # @param html2 [String, Nokogiri::HTML::Document] Second HTML
239
- # @param opts [Hash] Comparison options
240
- # @param match_opts_hash [Hash] Resolved match options
241
- # @return [Boolean, ComparisonResult] Result of tree diff comparison
242
- def perform_semantic_tree_diff(html1, html2, opts, match_opts_hash)
243
- # Capture original HTML strings for display (see equivalent? for details).
244
- original_str1 = opts.delete(:_original_str1) ||
245
- extract_original_string(html1)
246
- original_str2 = opts.delete(:_original_str2) ||
247
- extract_original_string(html2)
248
-
249
- # Parse to Canon::Xml::Node (preserves preprocessing)
250
- # For HTML, we parse as XML to get Canon::Xml::Node structure
251
- node1 = parse_node_for_semantic(html1,
252
- match_opts_hash[:preprocessing])
253
- node2 = parse_node_for_semantic(html2,
254
- match_opts_hash[:preprocessing])
255
-
256
- # Create strategy using factory
257
- strategy = Strategies::MatchStrategyFactory.create(
258
- format: :html,
259
- match_options: match_opts_hash,
260
- )
261
-
262
- # Pass Canon::Xml::Node directly - adapter now handles it
263
- differences = strategy.match(node1, node2)
264
-
265
- # Return based on verbose mode
266
- if opts[:verbose]
267
- # Get preprocessed strings for display
268
- preprocessed = strategy.preprocess_for_display(node1, node2)
269
-
270
- # Detect HTML version (default to HTML5 for Canon nodes)
271
- html_version = :html5
272
-
273
- # Return ComparisonResult with strategy metadata
274
- ComparisonResult.new(
275
- differences: differences,
276
- preprocessed_strings: preprocessed,
277
- original_strings: [original_str1, original_str2],
278
- format: :html,
279
- html_version: html_version,
280
- match_options: match_opts_hash.merge(strategy.metadata),
281
- algorithm: :semantic,
282
- parse_errors_expected: Comparison.parse_errors_for(node1),
283
- parse_errors_received: Comparison.parse_errors_for(node2),
284
- )
285
- else
286
- # Simple boolean result - equivalent if no normative differences
287
- differences.none?(&:normative?)
288
- end
289
- end
290
-
291
212
  # Parse node as fragment to preserve actual content
292
213
  # Uses HTML4.fragment or HTML5.fragment based on content detection
293
214
  #
@@ -296,15 +217,11 @@ module Canon
296
217
  # @param match_opts [Hash] Match options
297
218
  # @return [Nokogiri::HTML::DocumentFragment] Parsed fragment
298
219
  def parse_node_as_fragment(node, preprocessing = :none, match_opts = {})
299
- # If already an XML fragment (no meta tags), return it
300
- if node.is_a?(Nokogiri::XML::DocumentFragment)
220
+ if XmlBackend.document_fragment?(node)
301
221
  return node
302
222
  end
303
223
 
304
- # Convert HTML fragments to string and re-parse as XML to remove phantom tags
305
- # This handles cases where pre-parsed HTML4/HTML5 fragments have auto-inserted meta
306
- html_string = if node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
307
- node.is_a?(Nokogiri::HTML5::DocumentFragment)
224
+ html_string = if XmlBackend.document_fragment?(node)
308
225
  node.to_s # Use to_s to avoid re-inserting meta tags
309
226
  elsif node.is_a?(String)
310
227
  node
@@ -312,10 +229,7 @@ module Canon
312
229
  node.to_html
313
230
  end
314
231
 
315
- # Use XML fragment parser to preserve structure without auto-generated elements.
316
- # Decode HTML named entities (  etc.) to UTF-8 characters since XML
317
- # parser only understands the five XML entities.
318
- frag = Nokogiri::XML.fragment(
232
+ frag = XmlBackend.xml_fragment(
319
233
  decode_html_named_entities(html_string),
320
234
  )
321
235
 
@@ -402,24 +316,15 @@ module Canon
402
316
 
403
317
  # Normalize HTML documents to fragments to avoid DTD differences
404
318
  # This ensures comparing string with document works correctly
405
- if node.is_a?(Nokogiri::HTML::Document) ||
406
- node.is_a?(Nokogiri::HTML4::Document) ||
407
- node.is_a?(Nokogiri::HTML5::Document)
408
- # Get root element and create fragment from its outer HTML
409
- # This avoids DOCTYPE and other document-level nodes
319
+ if XmlBackend.html_document?(node)
410
320
  root = node.at_css("html") || node.root
411
321
  if root
412
- node = Nokogiri::XML.fragment(root.to_html)
322
+ node = XmlBackend.xml_fragment(root.to_html)
413
323
  end
414
324
  end
415
325
 
416
- # For preprocessing modes that require whitespace filtering,
417
- # apply the same post-parsing normalization used for string inputs.
418
- # This is needed because dom_diff() pre-parses HTML5 strings into
419
- # Nokogiri fragments before calling HtmlComparator, bypassing the
420
- # string-input path where these filters are normally applied.
421
326
  if %i[normalize format rendered].include?(preprocessing)
422
- frag = node.is_a?(Nokogiri::XML::DocumentFragment) ? node : Nokogiri::XML.fragment(node.to_html)
327
+ frag = XmlBackend.document_fragment?(node) ? node : XmlBackend.xml_fragment(node.to_html)
423
328
  normalize_html_style_script_comments(frag)
424
329
  if preprocessing == :rendered
425
330
  normalize_rendered_whitespace(frag, match_opts)
@@ -467,11 +372,7 @@ module Canon
467
372
  node
468
373
  end
469
374
 
470
- # Parse as Nokogiri fragment for DOM comparison
471
- # Use XML fragment parser to avoid auto-inserted meta tags.
472
- # Decode HTML named entities (  etc.) to UTF-8 characters since
473
- # XML parser only understands the five XML entities.
474
- frag = Nokogiri::XML.fragment(
375
+ frag = XmlBackend.xml_fragment(
475
376
  decode_html_named_entities(html_string),
476
377
  )
477
378
 
@@ -565,17 +466,7 @@ module Canon
565
466
  # @param node [Canon::Xml::Node, Nokogiri::XML::Node] HTML node
566
467
  # @return [Symbol] :html5 or :html4
567
468
  def detect_html_version_from_node(node)
568
- # Check node type for Nokogiri
569
- if node.is_a?(Nokogiri::HTML5::Document) ||
570
- node.is_a?(Nokogiri::HTML5::DocumentFragment)
571
- :html5
572
- elsif node.is_a?(Nokogiri::HTML4::Document) ||
573
- node.is_a?(Nokogiri::HTML4::DocumentFragment)
574
- :html4
575
- else
576
- # Default to HTML5 for Canon::Xml::Node and unknown types
577
- :html5
578
- end
469
+ XmlBackend.html_version_from_node(node)
579
470
  end
580
471
 
581
472
  # Serialize node to string for diff display
@@ -585,7 +476,7 @@ module Canon
585
476
  # @return [String] Serialized HTML string
586
477
  def serialize_for_display(node)
587
478
  if node.is_a?(Canon::Xml::Node)
588
- XmlNodeComparison.serialize_node_to_xml(node)
479
+ Canon::Diff::NodeSerializer.serialize(node)
589
480
  elsif Canon::XmlParsing.xml_node?(node)
590
481
  Canon::XmlBackend.nokogiri? ? node.to_html : Canon::XmlParsing.serialize(node)
591
482
  else
@@ -780,32 +671,18 @@ compare_profile = nil)
780
671
  # XML documents typically have XML processing instructions or are
781
672
  # instances of Nokogiri::XML::Document (not HTML variants)
782
673
  def xml_document?(node)
783
- # Check if it's a pure XML document (not HTML4/HTML5 which also
784
- # inherit from XML::Document)
785
- # Check both Document and DocumentFragment variants
786
- return false if node.is_a?(Nokogiri::HTML4::Document) ||
787
- node.is_a?(Nokogiri::HTML5::Document) ||
788
- node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
789
- node.is_a?(Nokogiri::HTML5::DocumentFragment)
790
-
791
- # If it's an XML document, check for XML processing instruction
792
- if node.is_a?(Nokogiri::XML::Document) && node.children.any? do |child|
793
- child.is_a?(Nokogiri::XML::ProcessingInstruction) &&
794
- child.name == "xml"
674
+ return false if XmlBackend.html_document?(node) || XmlBackend.document_fragment?(node)
675
+
676
+ if XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Document) && node.children.any? do |child|
677
+ child.is_a?(Nokogiri::XML::ProcessingInstruction) && child.name == "xml"
795
678
  end
796
- # XML documents often start with <?xml ...?> processing instruction
797
679
  return true
798
-
799
- # Note: We don't blindly return true here because HTML documents
800
- # also inherit from XML::Document. We only return true if there's
801
- # an XML processing instruction above.
802
680
  end
803
681
 
804
- # Check if it's a fragment that contains XML processing instructions
805
- if (node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)) && node.children.any? do |child|
806
- child.is_a?(Nokogiri::XML::ProcessingInstruction) &&
807
- child.name == "xml"
808
- end
682
+ if (node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)) &&
683
+ XmlBackend.nokogiri? && node.children.any? do |child|
684
+ child.is_a?(Nokogiri::XML::ProcessingInstruction) && child.name == "xml"
685
+ end
809
686
  return true
810
687
  end
811
688
 
@@ -1,9 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "compare_profile"
4
- # Whitespace sensitivity module (single source of truth for sensitive elements)
5
- require_relative "whitespace_sensitivity"
6
-
7
3
  module Canon
8
4
  module Comparison
9
5
  # HtmlCompareProfile extends CompareProfile with HTML-specific comparison policies
@@ -29,34 +25,35 @@ module Canon
29
25
  # Override for HTML-specific comment handling
30
26
  #
31
27
  # In HTML, comments are presentational content (not part of the DOM semantics)
32
- # unless explicitly set to :strict. This differs from XML where comments
33
- # may carry semantic meaning.
34
- #
35
- # HTML default for comments is :ignore, so comments don't affect equivalence
36
- # unless the user explicitly sets comments: :strict
28
+ # unless explicitly set to :strict.
37
29
  #
38
30
  # @param dimension [Symbol] The match dimension to check
39
31
  # @return [Boolean] true if differences affect equivalence
40
32
  def affects_equivalence?(dimension)
41
- # Comments in HTML: default is :ignore (presentational)
42
- # Only affect equivalence if explicitly set to :strict
43
33
  if dimension == :comments
44
- # Check if comments key exists in options
45
34
  if match_options.is_a?(Hash)
46
- # If comments key doesn't exist, default to false (HTML default: ignore)
47
35
  return false unless match_options.key?(:comments)
48
36
 
49
- # If key exists, check if it's :strict
50
37
  return match_options[:comments] == :strict
51
38
  elsif match_options.is_a?(ResolvedMatchOptions)
52
- behavior = behavior_for(dimension)
53
- return behavior == :strict
39
+ return behavior_for(dimension) == :strict
54
40
  end
55
- # Default: comments don't affect equivalence in HTML
56
41
  return false
57
42
  end
58
43
 
59
- # All other dimensions use base class behavior
44
+ super
45
+ end
46
+
47
+ # Override normative classification for HTML-specific comment handling.
48
+ #
49
+ # Delegates to the parent class for all dimensions, which in turn delegates
50
+ # to Dimension objects. For :comments, applies the same HTML-specific rule
51
+ # as affects_equivalence? — comments default to non-normative in HTML.
52
+ def normative_dimension?(dimension)
53
+ if dimension == :comments
54
+ return affects_equivalence?(:comments)
55
+ end
56
+
60
57
  super
61
58
  end
62
59
 
@@ -1,49 +1,30 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "json"
4
- require_relative "match_options"
5
- require_relative "comparison_result"
6
4
 
7
5
  module Canon
8
6
  module Comparison
9
7
  # JSON comparison class
10
- # Handles comparison of JSON objects with various options
8
+ # Delegates to RubyObjectComparator for actual comparison logic
11
9
  class JsonComparator
12
- # Default comparison options for JSON
13
10
  DEFAULT_OPTS = {
14
- # Output options
15
11
  verbose: false,
16
-
17
- # Match system options
18
12
  match_profile: nil,
19
13
  match: nil,
20
14
  preprocessing: nil,
21
15
  global_profile: nil,
22
16
  global_options: nil,
23
-
24
- # Diff display options
25
17
  diff: nil,
26
18
  }.freeze
27
19
 
28
20
  class << self
29
- # Parse JSON from string or return as-is
30
- #
31
- # @param obj [String, Hash, Array] JSON string or parsed object
32
- # @return [Object] Parsed JSON object
33
21
  def parse(obj)
34
22
  parse_json(obj)
35
23
  end
36
24
 
37
- # Compare two JSON objects for equivalence
38
- #
39
- # @param json1 [String, Hash, Array] First JSON
40
- # @param json2 [String, Hash, Array] Second JSON
41
- # @param opts [Hash] Comparison options
42
- # @return [Boolean, ComparisonResult] true if equivalent, or ComparisonResult if verbose
43
25
  def equivalent?(json1, json2, opts = {})
44
26
  opts = DEFAULT_OPTS.merge(opts)
45
27
 
46
- # Resolve match options with format-specific defaults
47
28
  match_opts_hash = MatchOptions::Json.resolve(
48
29
  format: :json,
49
30
  match_profile: opts[:match_profile],
@@ -53,24 +34,16 @@ module Canon
53
34
  global_options: opts[:global_options],
54
35
  )
55
36
 
56
- # Wrap in ResolvedMatchOptions for consistency with XML/HTML
57
- Canon::Comparison::ResolvedMatchOptions.new(
58
- match_opts_hash,
59
- format: :json,
60
- )
61
-
62
- # Store resolved match options for use in comparison logic
63
37
  opts[:match_opts] = match_opts_hash
64
38
 
65
- # Parse JSON if strings
66
39
  obj1 = parse_json(json1)
67
40
  obj2 = parse_json(json2)
68
41
 
69
42
  differences = []
70
- result = compare_ruby_objects(obj1, obj2, opts, differences, "")
43
+ result = RubyObjectComparator.compare_objects(obj1, obj2, opts,
44
+ differences, "")
71
45
 
72
46
  if opts[:verbose]
73
- # Format JSON for display
74
47
  json_str1 = obj1.is_a?(String) ? obj1 : JSON.pretty_generate(obj1)
75
48
  json_str2 = obj2.is_a?(String) ? obj2 : JSON.pretty_generate(obj2)
76
49
 
@@ -87,148 +60,14 @@ module Canon
87
60
 
88
61
  private
89
62
 
90
- # Parse JSON from string or return as-is
91
63
  def parse_json(obj)
92
64
  return obj unless obj.is_a?(String)
93
65
 
94
66
  begin
95
67
  JSON.parse(obj)
96
68
  rescue JSON::ParserError
97
- obj # Return original string if parsing fails
98
- end
99
- end
100
-
101
- # Compare Ruby objects (Hash, Array, primitives) for JSON/YAML
102
- def compare_ruby_objects(obj1, obj2, opts, differences, path)
103
- # Check for type mismatch
104
- unless obj1.instance_of?(obj2.class)
105
- add_ruby_difference(path, obj1, obj2, Comparison::UNEQUAL_TYPES,
106
- opts, differences)
107
- return Comparison::UNEQUAL_TYPES
108
- end
109
-
110
- case obj1
111
- when Hash
112
- compare_hashes(obj1, obj2, opts, differences, path)
113
- when Array
114
- compare_arrays(obj1, obj2, opts, differences, path)
115
- when NilClass, TrueClass, FalseClass, Numeric, String, Symbol
116
- compare_primitives(obj1, obj2, opts, differences, path)
117
- else
118
- # Fallback to equality comparison
119
- if obj1 == obj2
120
- Comparison::EQUIVALENT
121
- else
122
- add_ruby_difference(path, obj1, obj2,
123
- Comparison::UNEQUAL_PRIMITIVES, opts,
124
- differences)
125
- Comparison::UNEQUAL_PRIMITIVES
126
- end
127
- end
128
- end
129
-
130
- # Compare two hashes
131
- def compare_hashes(hash1, hash2, opts, differences, path)
132
- keys1 = hash1.keys
133
- keys2 = hash2.keys
134
-
135
- # Sort keys if order should be ignored (based on match options)
136
- match_opts = opts[:match_opts]
137
- if match_opts[:key_order] != :strict
138
- keys1 = keys1.sort_by(&:to_s)
139
- keys2 = keys2.sort_by(&:to_s)
140
- elsif keys1 != keys2
141
- # Strict mode: key order matters
142
- # Check if keys are in same order
143
- # Keys are different or in different order
144
- # First check if it's just ordering (same keys, different order)
145
- if keys1.sort_by(&:to_s) == keys2.sort_by(&:to_s)
146
- # Same keys, different order - this is a key_order difference
147
- key_path = path.empty? ? "(key order)" : "#{path}.(key order)"
148
- add_ruby_difference(key_path, keys1, keys2,
149
- Comparison::UNEQUAL_HASH_KEY_ORDER, opts, differences)
150
- return Comparison::UNEQUAL_HASH_KEY_ORDER
151
- end
152
- end
153
-
154
- # Check for missing keys
155
- missing_in_2 = keys1 - keys2
156
- missing_in_1 = keys2 - keys1
157
-
158
- missing_in_2.each do |key|
159
- key_path = path.empty? ? key.to_s : "#{path}.#{key}"
160
- add_ruby_difference(key_path, hash1[key], nil,
161
- Comparison::MISSING_HASH_KEY, opts, differences)
162
- end
163
-
164
- missing_in_1.each do |key|
165
- key_path = path.empty? ? key.to_s : "#{path}.#{key}"
166
- add_ruby_difference(key_path, nil, hash2[key],
167
- Comparison::MISSING_HASH_KEY, opts, differences)
168
- end
169
-
170
- has_missing_keys = !missing_in_1.empty? || !missing_in_2.empty?
171
-
172
- # Compare common keys
173
- common_keys = keys1 & keys2
174
- all_equivalent = true
175
- common_keys.each do |key|
176
- key_path = path.empty? ? key.to_s : "#{path}.#{key}"
177
- result = compare_ruby_objects(hash1[key], hash2[key], opts,
178
- differences, key_path)
179
- all_equivalent = false unless result == Comparison::EQUIVALENT
180
- end
181
-
182
- # Return appropriate status
183
- return Comparison::MISSING_HASH_KEY if has_missing_keys && all_equivalent
184
- return Comparison::UNEQUAL_HASH_VALUES unless all_equivalent
185
-
186
- has_missing_keys ? Comparison::MISSING_HASH_KEY : Comparison::EQUIVALENT
187
- end
188
-
189
- # Compare two arrays
190
- def compare_arrays(arr1, arr2, opts, differences, path)
191
- unless arr1.length == arr2.length
192
- add_ruby_difference(path, arr1, arr2,
193
- Comparison::UNEQUAL_ARRAY_LENGTHS, opts,
194
- differences)
195
- return Comparison::UNEQUAL_ARRAY_LENGTHS
69
+ obj
196
70
  end
197
-
198
- all_equivalent = true
199
- arr1.each_with_index do |elem1, index|
200
- elem2 = arr2[index]
201
- elem_path = "#{path}[#{index}]"
202
- result = compare_ruby_objects(elem1, elem2, opts, differences,
203
- elem_path)
204
- all_equivalent = false unless result == Comparison::EQUIVALENT
205
- end
206
-
207
- all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ARRAY_ELEMENTS
208
- end
209
-
210
- # Compare primitive values
211
- def compare_primitives(val1, val2, opts, differences, path)
212
- if val1 == val2
213
- Comparison::EQUIVALENT
214
- else
215
- add_ruby_difference(path, val1, val2,
216
- Comparison::UNEQUAL_PRIMITIVES, opts,
217
- differences)
218
- Comparison::UNEQUAL_PRIMITIVES
219
- end
220
- end
221
-
222
- # Add a Ruby object difference
223
- def add_ruby_difference(path, obj1, obj2, diff_code, opts, differences)
224
- return unless opts[:verbose]
225
-
226
- differences << {
227
- path: path,
228
- value1: obj1,
229
- value2: obj2,
230
- diff_code: diff_code,
231
- }
232
71
  end
233
72
  end
234
73
  end
@@ -10,8 +10,6 @@ module Canon
10
10
  # @param obj [String, Hash, Array] Object to parse
11
11
  # @return [Hash, Array] Parsed Ruby object
12
12
  def self.parse_json(obj)
13
- # Delegate to JsonComparator's private method via public API
14
- require_relative "json_comparator"
15
13
  JsonComparator.parse_json(obj)
16
14
  end
17
15
  end