canon 0.2.9 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +21 -22
  3. data/Rakefile +25 -2
  4. data/lib/canon/cache.rb +18 -27
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +20 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/node_inspector.rb +13 -48
  28. data/lib/canon/comparison/pipeline.rb +269 -0
  29. data/lib/canon/comparison/profile_definition.rb +0 -2
  30. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  31. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  32. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  33. data/lib/canon/comparison/strategies.rb +16 -0
  34. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -5
  35. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  36. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  37. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  38. data/lib/canon/comparison/xml_comparator/node_parser.rb +2 -6
  39. data/lib/canon/comparison/xml_comparator.rb +4 -492
  40. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  41. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  42. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  43. data/lib/canon/comparison.rb +144 -267
  44. data/lib/canon/config/config_dsl.rb +159 -0
  45. data/lib/canon/config/env_provider.rb +0 -3
  46. data/lib/canon/config/env_schema.rb +48 -58
  47. data/lib/canon/config/profile_loader.rb +0 -1
  48. data/lib/canon/config.rb +116 -468
  49. data/lib/canon/diff/diff_block_builder.rb +0 -2
  50. data/lib/canon/diff/diff_classifier.rb +0 -5
  51. data/lib/canon/diff/diff_context.rb +0 -2
  52. data/lib/canon/diff/diff_context_builder.rb +0 -2
  53. data/lib/canon/diff/diff_line_builder.rb +2 -3
  54. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  55. data/lib/canon/diff/diff_node_mapper.rb +10 -12
  56. data/lib/canon/diff/diff_report_builder.rb +0 -4
  57. data/lib/canon/diff/formatting_detector.rb +3 -3
  58. data/lib/canon/diff/node_serializer.rb +0 -7
  59. data/lib/canon/diff/xml_serialization_formatter.rb +0 -3
  60. data/lib/canon/diff.rb +39 -0
  61. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  62. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  63. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  64. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  66. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  67. data/lib/canon/diff_formatter/by_object/base_formatter.rb +20 -17
  68. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +119 -3
  70. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  71. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -5
  72. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +27 -61
  74. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -29
  75. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  76. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  77. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  78. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  79. data/lib/canon/diff_formatter.rb +26 -20
  80. data/lib/canon/formatters/html4_formatter.rb +0 -2
  81. data/lib/canon/formatters/html5_formatter.rb +0 -2
  82. data/lib/canon/formatters/html_formatter.rb +0 -3
  83. data/lib/canon/formatters/json_formatter.rb +0 -1
  84. data/lib/canon/formatters/xml_formatter.rb +0 -4
  85. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  86. data/lib/canon/formatters.rb +16 -0
  87. data/lib/canon/html/data_model.rb +1 -11
  88. data/lib/canon/html.rb +4 -3
  89. data/lib/canon/options/cli_generator.rb +0 -2
  90. data/lib/canon/options/registry.rb +0 -2
  91. data/lib/canon/options.rb +9 -0
  92. data/lib/canon/pretty_printer/html.rb +0 -1
  93. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  94. data/lib/canon/pretty_printer.rb +12 -0
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  96. data/lib/canon/tree_diff/adapters.rb +14 -0
  97. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  98. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  99. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  100. data/lib/canon/tree_diff/core.rb +17 -0
  101. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  102. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  103. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  104. data/lib/canon/tree_diff/matchers.rb +15 -0
  105. data/lib/canon/tree_diff/operation_converter.rb +7 -15
  106. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  107. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  108. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  109. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  110. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  111. data/lib/canon/tree_diff/operations/operation_detector.rb +6 -5
  112. data/lib/canon/tree_diff/operations.rb +13 -0
  113. data/lib/canon/tree_diff.rb +26 -27
  114. data/lib/canon/validators/base_validator.rb +5 -10
  115. data/lib/canon/validators/html_validator.rb +2 -8
  116. data/lib/canon/validators/json_validator.rb +0 -1
  117. data/lib/canon/validators/xml_validator.rb +2 -8
  118. data/lib/canon/validators/yaml_validator.rb +0 -1
  119. data/lib/canon/validators.rb +12 -0
  120. data/lib/canon/version.rb +1 -1
  121. data/lib/canon/xml/c14n.rb +0 -4
  122. data/lib/canon/xml/data_model.rb +5 -15
  123. data/lib/canon/xml/line_range_mapper.rb +0 -2
  124. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  125. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  126. data/lib/canon/xml/nodes/element_node.rb +0 -2
  127. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  128. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  129. data/lib/canon/xml/nodes/root_node.rb +0 -2
  130. data/lib/canon/xml/nodes/text_node.rb +0 -2
  131. data/lib/canon/xml/nodes.rb +19 -0
  132. data/lib/canon/xml/processor.rb +0 -5
  133. data/lib/canon/xml/sax_builder.rb +1 -8
  134. data/lib/canon/xml/whitespace_normalizer.rb +2 -2
  135. data/lib/canon/xml.rb +33 -0
  136. data/lib/canon/xml_backend.rb +50 -14
  137. data/lib/canon/xml_parsing.rb +32 -18
  138. data/lib/canon.rb +25 -15
  139. data/lib/tasks/performance.rake +0 -58
  140. data/lib/tasks/performance_comparator.rb +132 -65
  141. data/lib/tasks/performance_helpers.rb +4 -249
  142. data/lib/tasks/performance_report.rb +309 -0
  143. metadata +28 -15
  144. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  145. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  146. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  147. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  148. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  149. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  150. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  151. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  152. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -270
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "html_formatter_base"
4
-
5
3
  module Canon
6
4
  module Formatters
7
5
  # HTML5 formatter using Nokogiri::HTML5 parser
@@ -1,9 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "html_formatter_base"
5
- require_relative "../pretty_printer/html"
6
- require_relative "../validators/html_validator"
7
4
 
8
5
  module Canon
9
6
  module Formatters
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "json"
4
- require_relative "../validators/json_validator"
5
4
 
6
5
  module Canon
7
6
  module Formatters
@@ -1,10 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "../xml_backend"
5
- require_relative "../xml/c14n"
6
- require_relative "../pretty_printer/xml"
7
- require_relative "../validators/xml_validator"
8
4
 
9
5
  module Canon
10
6
  module Formatters
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "yaml"
4
- require_relative "../validators/yaml_validator"
5
4
 
6
5
  module Canon
7
6
  module Formatters
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Format-specific canonicalizers and parsers for the top-level
5
+ # `Canon.format` / `Canon.parse` API. Distinct from {DiffFormatter},
6
+ # which renders comparison output.
7
+ module Formatters
8
+ autoload :Html4Formatter, "canon/formatters/html4_formatter"
9
+ autoload :Html5Formatter, "canon/formatters/html5_formatter"
10
+ autoload :HtmlFormatter, "canon/formatters/html_formatter"
11
+ autoload :HtmlFormatterBase, "canon/formatters/html_formatter_base"
12
+ autoload :JsonFormatter, "canon/formatters/json_formatter"
13
+ autoload :XmlFormatter, "canon/formatters/xml_formatter"
14
+ autoload :YamlFormatter, "canon/formatters/yaml_formatter"
15
+ end
16
+ end
@@ -1,14 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "../data_model"
5
- require_relative "../xml/nodes/root_node"
6
- require_relative "../xml/nodes/element_node"
7
- require_relative "../xml/nodes/namespace_node"
8
- require_relative "../xml/nodes/attribute_node"
9
- require_relative "../xml/nodes/text_node"
10
- require_relative "../xml/nodes/comment_node"
11
- require_relative "../xml/nodes/processing_instruction_node"
12
4
 
13
5
  module Canon
14
6
  module Html
@@ -81,7 +73,6 @@ module Canon
81
73
  def self.serialize(node)
82
74
  # HTML nodes use the same serialization as XML
83
75
  # Delegate to XML serialization implementation
84
- require_relative "../xml/data_model"
85
76
  Canon::Xml::DataModel.serialize(node)
86
77
  end
87
78
 
@@ -89,7 +80,7 @@ module Canon
89
80
  def self.build_from_nokogiri(nokogiri_doc)
90
81
  root = Canon::Xml::Nodes::RootNode.new
91
82
 
92
- if nokogiri_doc.respond_to?(:root) && nokogiri_doc.root
83
+ if nokogiri_doc.is_a?(Nokogiri::XML::Document) && nokogiri_doc.root
93
84
  # For Documents (HTML4, HTML5): process the root element
94
85
  root.add_child(build_element_node(nokogiri_doc.root))
95
86
 
@@ -222,7 +213,6 @@ module Canon
222
213
  whitespace_sensitive_tags = %w[pre code textarea script style]
223
214
 
224
215
  # Check if whitespace is between inline siblings
225
- require_relative "../comparison/whitespace_sensitivity"
226
216
  unless whitespace_sensitive_tags.include?(parent_name) ||
227
217
  Canon::Comparison::WhitespaceSensitivity.inline_whitespace_significant?(nokogiri_text)
228
218
  return nil
data/lib/canon/html.rb CHANGED
@@ -1,9 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "html/data_model"
4
-
5
3
  module Canon
6
- # HTML-specific functionality for Canon
4
+ # HTML-specific functionality for Canon.
5
+ #
6
+ # Children are autoloaded — never `require_relative` them.
7
7
  module Html
8
+ autoload :DataModel, "canon/html/data_model"
8
9
  end
9
10
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "registry"
4
-
5
3
  module Canon
6
4
  module Options
7
5
  # Generates Thor CLI options from the Options Registry
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../comparison/match_options"
4
-
5
3
  module Canon
6
4
  module Options
7
5
  # Centralized registry for all Canon options
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # CLI option definitions and the option registry.
5
+ module Options
6
+ autoload :CliGenerator, "canon/options/cli_generator"
7
+ autoload :Registry, "canon/options/registry"
8
+ end
9
+ end
@@ -2,7 +2,6 @@
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
4
  require "stringio"
5
- require_relative "html_void_elements"
6
5
 
7
6
  module Canon
8
7
  module PrettyPrinter
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "html_void_elements"
5
4
 
6
5
  module Canon
7
6
  module PrettyPrinter
@@ -398,7 +397,6 @@ module Canon
398
397
 
399
398
  # Load the default visualization map from DiffFormatter constants.
400
399
  def default_vis_map
401
- require_relative "../diff_formatter"
402
400
  Canon::DiffFormatter::DEFAULT_VISUALIZATION_MAP
403
401
  rescue LoadError, NameError
404
402
  { " " => "░", "\t" => "⇥", "\n" => "↵", "\r" => "⏎", "\u00A0" => "␣" }
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Pretty-printers that emit format-aware, fixture-ready output.
5
+ module PrettyPrinter
6
+ autoload :Html, "canon/pretty_printer/html"
7
+ autoload :HtmlVoidElements, "canon/pretty_printer/html_void_elements"
8
+ autoload :Json, "canon/pretty_printer/json"
9
+ autoload :Xml, "canon/pretty_printer/xml"
10
+ autoload :XmlNormalized, "canon/pretty_printer/xml_normalized"
11
+ end
12
+ end
@@ -193,7 +193,7 @@ module Canon
193
193
  # @param element [Nokogiri::XML::Element] Element to check
194
194
  # @return [Boolean] True if element is whitespace-sensitive
195
195
  def whitespace_sensitive?(element)
196
- return false unless element.respond_to?(:name)
196
+ return false unless Canon::Comparison::NodeInspector.element_node?(element)
197
197
 
198
198
  # List of HTML elements where whitespace is semantically significant
199
199
  whitespace_sensitive_tags = %w[pre code textarea script style]
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module TreeDiff
5
+ # Format-specific tree adapters that wrap parsed documents into
6
+ # Canon::TreeDiff::Core::TreeNode trees for the matcher.
7
+ module Adapters
8
+ autoload :HTMLAdapter, "canon/tree_diff/adapters/html_adapter"
9
+ autoload :JSONAdapter, "canon/tree_diff/adapters/json_adapter"
10
+ autoload :XMLAdapter, "canon/tree_diff/adapters/xml_adapter"
11
+ autoload :YAMLAdapter, "canon/tree_diff/adapters/yaml_adapter"
12
+ end
13
+ end
14
+ end
@@ -35,18 +35,12 @@ module Canon
35
35
  # @param attrs2 [Hash] Second attribute hash
36
36
  # @return [Boolean] True if attributes are considered equal
37
37
  def equal?(attrs1, attrs2)
38
- # Handle nil/empty cases
39
38
  return true if attrs1.nil? && attrs2.nil?
40
39
  return false if attrs1.nil? || attrs2.nil?
41
40
 
42
- attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
43
- attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
44
-
45
41
  if attribute_order == :strict
46
- # Strict mode: order matters
47
42
  attrs1 == attrs2
48
43
  else
49
- # Ignore/normalize mode: sort keys for comparison
50
44
  normalize_for_comparison(attrs1) == normalize_for_comparison(attrs2)
51
45
  end
52
46
  end
@@ -142,7 +142,7 @@ module Canon
142
142
  whitespace_sensitive_tags = %w[pre code textarea script style]
143
143
 
144
144
  # Check if this node is whitespace-sensitive
145
- if node.respond_to?(:label)
145
+ if node.is_a?(TreeNode)
146
146
  label = node.label.to_s.downcase
147
147
  return true if whitespace_sensitive_tags.include?(label)
148
148
  end
@@ -350,15 +350,22 @@ module Canon
350
350
  #
351
351
  # @return [String] XPath expression
352
352
  def xpath
353
- # If we have a source node that supports xpath, use it
354
- if @source_node.respond_to?(:path)
355
- return @source_node.path
356
- end
353
+ return @source_node.path if nokogiri_source?(@source_node)
357
354
 
358
- # Otherwise construct path from tree structure
359
355
  construct_path
360
356
  end
361
357
 
358
+ # True when the supplied source node is a Nokogiri node that
359
+ # exposes an XPath via +path+. Nokogiri is an optional
360
+ # backend so the constant is guarded.
361
+ def nokogiri_source?(node)
362
+ return false unless Canon::XmlBackend.nokogiri?
363
+
364
+ node.is_a?(Nokogiri::XML::Node)
365
+ end
366
+
367
+ private :nokogiri_source?
368
+
362
369
  # Construct path from tree structure
363
370
  #
364
371
  # @return [String] Path expression
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module TreeDiff
5
+ # Core tree-diff primitives: TreeNode, matching, signatures, weights,
6
+ # attribute comparison, and XML entity decoding. Children are
7
+ # autoloaded — never `require_relative` them.
8
+ module Core
9
+ autoload :AttributeComparator, "canon/tree_diff/core/attribute_comparator"
10
+ autoload :Matching, "canon/tree_diff/core/matching"
11
+ autoload :NodeSignature, "canon/tree_diff/core/node_signature"
12
+ autoload :NodeWeight, "canon/tree_diff/core/node_weight"
13
+ autoload :TreeNode, "canon/tree_diff/core/tree_node"
14
+ autoload :XmlEntityDecoder, "canon/tree_diff/core/xml_entity_decoder"
15
+ end
16
+ end
17
+ end
@@ -1,12 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../core/tree_node"
4
- require_relative "../core/node_signature"
5
- require_relative "../core/node_weight"
6
- require_relative "../core/matching"
7
- require_relative "../core/attribute_comparator"
8
- require_relative "../core/xml_entity_decoder"
9
-
10
3
  module Canon
11
4
  module TreeDiff
12
5
  module Matchers
@@ -1,9 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../core/tree_node"
4
- require_relative "../core/node_signature"
5
- require_relative "../core/matching"
6
-
7
3
  module Canon
8
4
  module TreeDiff
9
5
  module Matchers
@@ -165,7 +161,7 @@ module Canon
165
161
  whitespace_sensitive_tags = %w[pre code textarea script style]
166
162
 
167
163
  # Check if this node is whitespace-sensitive
168
- if node.respond_to?(:label)
164
+ if node.is_a?(Core::TreeNode)
169
165
  label = node.label.to_s.downcase
170
166
  return true if whitespace_sensitive_tags.include?(label)
171
167
  end
@@ -1,9 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../core/tree_node"
4
- require_relative "../core/node_weight"
5
- require_relative "../core/matching"
6
-
7
3
  module Canon
8
4
  module TreeDiff
9
5
  module Matchers
@@ -218,7 +214,7 @@ module Canon
218
214
  whitespace_sensitive_tags = %w[pre code textarea script style]
219
215
 
220
216
  # Check if this node is whitespace-sensitive
221
- if node.respond_to?(:label)
217
+ if node.is_a?(Core::TreeNode)
222
218
  label = node.label.to_s.downcase
223
219
  return true if whitespace_sensitive_tags.include?(label)
224
220
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module TreeDiff
5
+ # Tree-matching strategies — hash-based, similarity-based,
6
+ # structural propagation, and the universal fallback.
7
+ module Matchers
8
+ autoload :HashMatcher, "canon/tree_diff/matchers/hash_matcher"
9
+ autoload :SimilarityMatcher, "canon/tree_diff/matchers/similarity_matcher"
10
+ autoload :StructuralPropagator,
11
+ "canon/tree_diff/matchers/structural_propagator"
12
+ autoload :UniversalMatcher, "canon/tree_diff/matchers/universal_matcher"
13
+ end
14
+ end
15
+ end
@@ -1,13 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../diff/diff_node"
4
- require_relative "../comparison/match_options"
5
- # OperationConverter helper modules
6
- require_relative "operation_converter_helpers/metadata_enricher"
7
- require_relative "operation_converter_helpers/reason_builder"
8
- require_relative "operation_converter_helpers/post_processor"
9
- require_relative "operation_converter_helpers/update_change_handler"
10
-
11
3
  module Canon
12
4
  module TreeDiff
13
5
  # Converts TreeDiff Operations to DiffNodes for integration with Canon's
@@ -124,6 +116,8 @@ module Canon
124
116
  end
125
117
  end
126
118
 
119
+ public :convert_operation
120
+
127
121
  # Convert INSERT operation to DiffNode
128
122
  #
129
123
  # @param operation [Operation] Insert operation
@@ -153,7 +147,7 @@ module Canon
153
147
 
154
148
  # Determine dimension for INSERT/DELETE operations based on node type
155
149
  def dimension_for_insert_delete(tree_node)
156
- label = tree_node.respond_to?(:label) ? tree_node.label : nil
150
+ label = tree_node.is_a?(Canon::TreeDiff::Core::TreeNode) ? tree_node.label : nil
157
151
  return :comments if label == "comment"
158
152
 
159
153
  :element_structure
@@ -359,7 +353,7 @@ module Canon
359
353
  def extract_source_node(tree_node)
360
354
  return nil if tree_node.nil?
361
355
 
362
- tree_node.respond_to?(:source_node) ? tree_node.source_node : tree_node
356
+ tree_node.is_a?(Canon::TreeDiff::Core::TreeNode) ? tree_node.source_node : tree_node
363
357
  end
364
358
 
365
359
  # Determine if a diff is normative based on match options
@@ -383,12 +377,10 @@ module Canon
383
377
  return false if node.nil?
384
378
 
385
379
  # Get element name from node
386
- element_name = if node.respond_to?(:label)
387
- node.label # TreeNode
388
- elsif node.respond_to?(:name)
389
- node.name # Nokogiri node
380
+ element_name = if node.is_a?(Canon::TreeDiff::Core::TreeNode)
381
+ node.label
390
382
  else
391
- return false
383
+ Canon::Comparison::NodeInspector.name(node)
392
384
  end
393
385
 
394
386
  # Check if it's in our metadata elements list
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../../diff/path_builder"
4
- require_relative "../../diff/node_serializer"
5
-
6
3
  module Canon
7
4
  module TreeDiff
8
5
  module OperationConverterHelpers
@@ -45,13 +42,7 @@ module Canon
45
42
  def self.serialize(tree_node)
46
43
  return nil if tree_node.nil?
47
44
 
48
- # Extract source node from TreeNode
49
- source = if tree_node.respond_to?(:source_node)
50
- tree_node.source_node
51
- else
52
- tree_node
53
- end
54
-
45
+ source = tree_node.is_a?(Core::TreeNode) ? tree_node.source_node : tree_node
55
46
  Canon::Diff::NodeSerializer.serialize(source)
56
47
  end
57
48
 
@@ -62,8 +53,7 @@ module Canon
62
53
  def self.extract_attributes(tree_node)
63
54
  return nil if tree_node.nil?
64
55
 
65
- # Use TreeNode's attributes directly (already normalized by adapter)
66
- tree_node.respond_to?(:attributes) ? (tree_node.attributes || {}) : {}
56
+ tree_node.is_a?(Core::TreeNode) ? (tree_node.attributes || {}) : {}
67
57
  end
68
58
  end
69
59
  end
@@ -20,7 +20,7 @@ module Canon
20
20
  # For each DELETE, try to find a matching INSERT
21
21
  deletes.each do |delete_node|
22
22
  node1 = delete_node.node1
23
- next unless node1.respond_to?(:name) && node1.respond_to?(:attributes)
23
+ next unless backend_element?(node1)
24
24
 
25
25
  # Skip if node has no attributes (can't be attribute order diff)
26
26
  next if node1.attributes.nil? || node1.attributes.empty?
@@ -28,7 +28,7 @@ module Canon
28
28
  # Find inserts with same element name at same position
29
29
  matching_insert = inserts.find do |insert_node|
30
30
  node2 = insert_node.node2
31
- next false unless node2.respond_to?(:name) && node2.respond_to?(:attributes)
31
+ next false unless backend_element?(node2)
32
32
  next false unless node1.name == node2.name
33
33
 
34
34
  # Must have attributes to differ in order
@@ -59,6 +59,17 @@ module Canon
59
59
  diff_nodes
60
60
  end
61
61
 
62
+ # True when +node+ is a backend element (Nokogiri or Moxml) that
63
+ # exposes its attributes via +attributes+. Canon-native
64
+ # +ElementNode+ uses +attribute_nodes+ and is therefore excluded.
65
+ def self.backend_element?(node)
66
+ return false unless node
67
+ return false unless Canon::Comparison::NodeInspector.element_node?(node)
68
+
69
+ Canon::XmlBackend.nokogiri? ? node.is_a?(Nokogiri::XML::Node) : false
70
+ end
71
+ private_class_method :backend_element?
72
+
62
73
  # Check if two attribute hashes are equal ignoring order
63
74
  #
64
75
  # @param attrs1 [Hash] First attribute hash
@@ -68,11 +79,6 @@ module Canon
68
79
  return true if attrs1.nil? && attrs2.nil?
69
80
  return false if attrs1.nil? || attrs2.nil?
70
81
 
71
- # Convert to hashes if needed
72
- attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
73
- attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
74
-
75
- # Compare as sets (order-independent)
76
82
  attrs1.sort.to_h == attrs2.sort.to_h
77
83
  end
78
84
 
@@ -16,7 +16,7 @@ module Canon
16
16
  node = operation[:node]
17
17
  content = operation[:content]
18
18
 
19
- if node.respond_to?(:label)
19
+ if node.is_a?(Core::TreeNode)
20
20
  # Include content preview for clarity
21
21
  "Element inserted: #{content || "<#{node.label}>"}"
22
22
  else
@@ -32,7 +32,7 @@ module Canon
32
32
  node = operation[:node]
33
33
  content = operation[:content]
34
34
 
35
- if node.respond_to?(:label)
35
+ if node.is_a?(Core::TreeNode)
36
36
  # Include content preview for clarity
37
37
  "Element deleted: #{content || "<#{node.label}>"}"
38
38
  else
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../operation_converter_helpers/reason_builder"
4
-
5
3
  module Canon
6
4
  module TreeDiff
7
5
  module OperationConverterHelpers
@@ -20,8 +18,8 @@ module Canon
20
18
  def self.convert(operation, metadata, is_metadata, normative_determiner)
21
19
  tree_node1 = operation[:node1] # TreeNode from adapter
22
20
  tree_node2 = operation[:node2] # TreeNode from adapter
23
- node1 = tree_node1.respond_to?(:source_node) ? tree_node1.source_node : tree_node1
24
- node2 = tree_node2.respond_to?(:source_node) ? tree_node2.source_node : tree_node2
21
+ node1 = tree_node1.is_a?(Core::TreeNode) ? tree_node1.source_node : tree_node1
22
+ node2 = tree_node2.is_a?(Core::TreeNode) ? tree_node2.source_node : tree_node2
25
23
  changes = operation[:changes]
26
24
 
27
25
  # Handle case where changes is a boolean or non-hash value
@@ -150,8 +148,8 @@ is_metadata, normative_determiner, tree_node1, tree_node2)
150
148
  # @param tree_node2 [Object] Second tree node
151
149
  # @return [Symbol] The dimension to use (:text_content or :comments)
152
150
  def self.dimension_for_value_change(tree_node1, tree_node2)
153
- label1 = tree_node1.respond_to?(:label) ? tree_node1.label : nil
154
- label2 = tree_node2.respond_to?(:label) ? tree_node2.label : nil
151
+ label1 = tree_node1.is_a?(Core::TreeNode) ? tree_node1.label : nil
152
+ label2 = tree_node2.is_a?(Core::TreeNode) ? tree_node2.label : nil
155
153
 
156
154
  # If either node is a comment, use :comments dimension
157
155
  return :comments if label1 == "comment" || label2 == "comment"
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module TreeDiff
5
+ # Helper modules consumed by OperationConverter when converting
6
+ # tree operations into DiffNodes.
7
+ module OperationConverterHelpers
8
+ autoload :MetadataEnricher,
9
+ "canon/tree_diff/operation_converter_helpers/metadata_enricher"
10
+ autoload :PostProcessor,
11
+ "canon/tree_diff/operation_converter_helpers/post_processor"
12
+ autoload :ReasonBuilder,
13
+ "canon/tree_diff/operation_converter_helpers/reason_builder"
14
+ autoload :UpdateChangeHandler,
15
+ "canon/tree_diff/operation_converter_helpers/update_change_handler"
16
+ end
17
+ end
18
+ end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../core/xml_entity_decoder"
4
-
5
3
  module Canon
6
4
  module TreeDiff
7
5
  module Operations
@@ -600,6 +598,10 @@ module Canon
600
598
  depth
601
599
  end
602
600
 
601
+ public :normalize_text, :calculate_depth, :text_similarity,
602
+ :extract_text_content, :collect_all_nodes, :nodes_identical?,
603
+ :detect_changes
604
+
603
605
  # Check if a node is in a whitespace-sensitive context
604
606
  #
605
607
  # HTML elements where whitespace is significant: <pre>, <code>, <textarea>, <script>, <style>
@@ -615,13 +617,12 @@ module Canon
615
617
  # Check if this node or any ancestor is whitespace-sensitive
616
618
  current = node
617
619
  while current
618
- if current.respond_to?(:label)
620
+ if current.is_a?(Core::TreeNode)
619
621
  label = current.label.to_s.downcase
620
622
  return true if whitespace_sensitive_tags.include?(label)
621
623
  end
622
624
 
623
- # Check parent
624
- current = current.parent if current.respond_to?(:parent)
625
+ current = current.is_a?(Core::TreeNode) ? current.parent : nil
625
626
  break unless current
626
627
  end
627
628
 
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module TreeDiff
5
+ # Tree-diff operations: Operation (INSERT/DELETE/UPDATE/MOVE) and
6
+ # the OperationDetector that emits them.
7
+ module Operations
8
+ autoload :Operation, "canon/tree_diff/operations/operation"
9
+ autoload :OperationDetector,
10
+ "canon/tree_diff/operations/operation_detector"
11
+ end
12
+ end
13
+ end