canon 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +69 -92
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/Gemfile +1 -0
  6. data/docs/_config.yml +90 -1
  7. data/docs/advanced/diff-classification.adoc +82 -2
  8. data/docs/advanced/extending-canon.adoc +193 -0
  9. data/docs/features/match-options/index.adoc +239 -1
  10. data/docs/internals/diffnode-enrichment.adoc +611 -0
  11. data/docs/internals/index.adoc +251 -0
  12. data/docs/lychee.toml +13 -6
  13. data/docs/understanding/architecture.adoc +749 -33
  14. data/docs/understanding/comparison-pipeline.adoc +122 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +87 -0
  27. data/lib/canon/comparison/html_comparator.rb +70 -26
  28. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  29. data/lib/canon/comparison/html_parser.rb +80 -0
  30. data/lib/canon/comparison/json_comparator.rb +12 -0
  31. data/lib/canon/comparison/json_parser.rb +19 -0
  32. data/lib/canon/comparison/markup_comparator.rb +293 -0
  33. data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
  34. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  35. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  36. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  37. data/lib/canon/comparison/match_options.rb +68 -463
  38. data/lib/canon/comparison/profile_definition.rb +149 -0
  39. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  40. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  41. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  42. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  43. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  44. data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
  45. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  46. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  47. data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
  48. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
  49. data/lib/canon/comparison/xml_comparator.rb +97 -684
  50. data/lib/canon/comparison/xml_node_comparison.rb +319 -0
  51. data/lib/canon/comparison/xml_parser.rb +19 -0
  52. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  53. data/lib/canon/comparison.rb +265 -110
  54. data/lib/canon/diff/diff_classifier.rb +101 -2
  55. data/lib/canon/diff/diff_node.rb +32 -2
  56. data/lib/canon/diff/formatting_detector.rb +1 -1
  57. data/lib/canon/diff/node_serializer.rb +191 -0
  58. data/lib/canon/diff/path_builder.rb +143 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  61. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  62. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  64. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  65. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  66. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  67. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  68. data/lib/canon/diff_formatter.rb +1 -1
  69. data/lib/canon/rspec_matchers.rb +38 -9
  70. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  71. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  72. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  73. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  74. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  75. data/lib/canon/version.rb +1 -1
  76. data/lib/canon/xml/data_model.rb +24 -13
  77. metadata +48 -2
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ module XmlComparatorHelpers
6
+ # Namespace declaration comparison logic
7
+ # Handles comparison of xmlns and xmlns:* attributes
8
+ class NamespaceComparator
9
+ # Compare namespace declarations between two nodes
10
+ #
11
+ # @param node1 [Object] First node
12
+ # @param node2 [Object] Second node
13
+ # @param opts [Hash] Comparison options
14
+ # @param differences [Array] Array to append differences to
15
+ # @return [Symbol] Comparison result
16
+ def self.compare(node1, node2, opts, differences)
17
+ ns_decls1 = extract_declarations(node1)
18
+ ns_decls2 = extract_declarations(node2)
19
+
20
+ # Find missing, extra, and changed namespace declarations
21
+ missing = ns_decls1.keys - ns_decls2.keys # In node1 but not node2
22
+ extra = ns_decls2.keys - ns_decls1.keys # In node2 but not node1
23
+ changed = ns_decls1.select do |prefix, uri|
24
+ ns_decls2[prefix] && ns_decls2[prefix] != uri
25
+ end.keys
26
+
27
+ # If there are any differences, create a DiffNode
28
+ if missing.any? || extra.any? || changed.any?
29
+ add_namespace_difference(node1, node2, missing, extra, changed,
30
+ opts, differences)
31
+ return Comparison::UNEQUAL_ATTRIBUTES
32
+ end
33
+
34
+ Comparison::EQUIVALENT
35
+ end
36
+
37
+ # Extract namespace declarations from a node
38
+ #
39
+ # @param node [Object] Node to extract namespace declarations from
40
+ # @return [Hash] Hash of prefix => URI mappings
41
+ def self.extract_declarations(node)
42
+ declarations = {}
43
+
44
+ # Handle Canon::Xml::Node (uses namespace_nodes)
45
+ if node.respond_to?(:namespace_nodes)
46
+ return extract_from_namespace_nodes(node.namespace_nodes,
47
+ declarations)
48
+ end
49
+
50
+ # Handle Nokogiri/Moxml nodes (use attributes)
51
+ raw_attrs = node.respond_to?(:attribute_nodes) ? node.attribute_nodes : node.attributes
52
+
53
+ # Handle Canon::Xml::Node attribute format (array of AttributeNode)
54
+ if raw_attrs.is_a?(Array)
55
+ extract_from_array_attributes(raw_attrs, declarations)
56
+ else
57
+ # Handle Nokogiri and Moxml attribute formats (Hash-like)
58
+ extract_from_hash_attributes(raw_attrs, declarations)
59
+ end
60
+
61
+ declarations
62
+ end
63
+
64
+ # Extract from Canon::Xml::Node namespace_nodes
65
+ #
66
+ # @param namespace_nodes [Array] Array of NamespaceNode objects
67
+ # @param declarations [Hash] Output hash to populate
68
+ # @return [Hash] Declarations hash
69
+ def self.extract_from_namespace_nodes(namespace_nodes, declarations)
70
+ namespace_nodes.each do |ns|
71
+ # Skip the implicit xml namespace (always present)
72
+ next if ns.prefix == "xml" && ns.uri == "http://www.w3.org/XML/1998/namespace"
73
+
74
+ prefix = ns.prefix || ""
75
+ declarations[prefix] = ns.uri
76
+ end
77
+
78
+ declarations
79
+ end
80
+
81
+ # Extract from array-format attributes
82
+ #
83
+ # @param raw_attrs [Array] Array of AttributeNode objects
84
+ # @param declarations [Hash] Output hash to populate
85
+ # @return [Hash] Declarations hash
86
+ def self.extract_from_array_attributes(raw_attrs, declarations)
87
+ raw_attrs.each do |attr|
88
+ name = attr.name
89
+ value = attr.value
90
+
91
+ if namespace_declaration?(name)
92
+ # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
93
+ prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
94
+ declarations[prefix] = value
95
+ end
96
+ end
97
+
98
+ declarations
99
+ end
100
+
101
+ # Extract from hash-format attributes
102
+ #
103
+ # @param raw_attrs [Hash] Hash-like attributes
104
+ # @param declarations [Hash] Output hash to populate
105
+ # @return [Hash] Declarations hash
106
+ def self.extract_from_hash_attributes(raw_attrs, declarations)
107
+ raw_attrs.each do |key, val|
108
+ # Normalize key and value
109
+ name = if key.is_a?(String)
110
+ # Nokogiri format: key=name (String), val=attr object
111
+ key
112
+ else
113
+ # Moxml format: key=attr object, val=nil
114
+ key.respond_to?(:name) ? key.name : key.to_s
115
+ end
116
+
117
+ if namespace_declaration?(name)
118
+ value = if val.respond_to?(:value)
119
+ val.value
120
+ else
121
+ val.to_s
122
+ end
123
+
124
+ # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
125
+ prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
126
+ declarations[prefix] = value
127
+ end
128
+ end
129
+
130
+ declarations
131
+ end
132
+
133
+ # Check if an attribute name is a namespace declaration
134
+ #
135
+ # @param attr_name [String] Attribute name
136
+ # @return [Boolean] true if it's a namespace declaration
137
+ def self.namespace_declaration?(attr_name)
138
+ attr_name == "xmlns" || attr_name.start_with?("xmlns:")
139
+ end
140
+
141
+ # Add a namespace declaration difference
142
+ #
143
+ # @param node1 [Object] First node
144
+ # @param node2 [Object] Second node
145
+ # @param missing [Array] Missing prefixes
146
+ # @param extra [Array] Extra prefixes
147
+ # @param changed [Array] Changed prefixes
148
+ # @param opts [Hash] Options
149
+ # @param differences [Array] Array to append difference to
150
+ def self.add_namespace_difference(node1, node2, missing, extra,
151
+ changed, opts, differences)
152
+ # Build a descriptive reason
153
+ reasons = []
154
+ if missing.any?
155
+ reasons << "removed: #{missing.map do |p|
156
+ p.empty? ? 'xmlns' : "xmlns:#{p}"
157
+ end.join(', ')}"
158
+ end
159
+ if extra.any?
160
+ reasons << "added: #{extra.map do |p|
161
+ p.empty? ? 'xmlns' : "xmlns:#{p}"
162
+ end.join(', ')}"
163
+ end
164
+ if changed.any?
165
+ reasons << "changed: #{changed.map do |p|
166
+ p.empty? ? 'xmlns' : "xmlns:#{p}"
167
+ end.join(', ')}"
168
+ end
169
+
170
+ # Import DiffNodeBuilder to avoid circular dependency
171
+ require_relative "diff_node_builder"
172
+
173
+ diff_node = DiffNodeBuilder.build(
174
+ node1: node1,
175
+ node2: node2,
176
+ diff1: Comparison::UNEQUAL_ATTRIBUTES,
177
+ diff2: Comparison::UNEQUAL_ATTRIBUTES,
178
+ dimension: :namespace_declarations,
179
+ **opts,
180
+ )
181
+ differences << diff_node if diff_node
182
+ end
183
+ end
184
+ end
185
+ end
186
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../xml/c14n"
4
+
5
+ module Canon
6
+ module Comparison
7
+ module XmlComparatorHelpers
8
+ # Node parser with preprocessing support
9
+ # Handles conversion of strings and various node types to Canon::Xml::Node
10
+ class NodeParser
11
+ # Parse a node from string or return as-is
12
+ # Applies preprocessing transformation before parsing if specified
13
+ #
14
+ # @param node [String, Object] Node to parse
15
+ # @param preprocessing [Symbol] Preprocessing mode (:none, :normalize, :c14n, :format)
16
+ # @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
17
+ # @return [Canon::Xml::Node] Parsed node
18
+ def self.parse(node, preprocessing = :none, preserve_whitespace: false)
19
+ # If already a Canon::Xml::Node, return as-is
20
+ return node if node.is_a?(Canon::Xml::Node)
21
+
22
+ # If it's a Nokogiri or Moxml node, convert to DataModel
23
+ unless node.is_a?(String)
24
+ return convert_from_node(node,
25
+ preserve_whitespace: preserve_whitespace)
26
+ end
27
+
28
+ # Apply preprocessing to XML string before parsing
29
+ xml_string = apply_preprocessing(node, preprocessing)
30
+
31
+ # Use Canon::Xml::DataModel for parsing to get Canon::Xml::Node instances
32
+ Canon::Xml::DataModel.from_xml(xml_string,
33
+ preserve_whitespace: preserve_whitespace)
34
+ end
35
+
36
+ # Apply preprocessing transformation to XML string
37
+ #
38
+ # @param xml_string [String] XML string to preprocess
39
+ # @param preprocessing [Symbol] Preprocessing mode
40
+ # @return [String] Preprocessed XML string
41
+ def self.apply_preprocessing(xml_string, preprocessing)
42
+ case preprocessing
43
+ when :normalize
44
+ # Normalize whitespace: collapse runs, trim lines
45
+ xml_string.lines.map(&:strip).reject(&:empty?).join("\n")
46
+ when :c14n
47
+ # Canonicalize the XML
48
+ Canon::Xml::C14n.canonicalize(xml_string, with_comments: false)
49
+ when :format
50
+ # Pretty format the XML
51
+ Canon.format(xml_string, :xml)
52
+ else
53
+ # :none or unrecognized - use as-is
54
+ xml_string
55
+ end
56
+ end
57
+
58
+ # Convert from Nokogiri/Moxml node to Canon::Xml::Node
59
+ #
60
+ # @param node [Object] Nokogiri or Moxml node
61
+ # @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
62
+ # @return [Canon::Xml::Node] Converted node
63
+ def self.convert_from_node(node, preserve_whitespace: false)
64
+ # Convert to XML string then parse through DataModel
65
+ xml_str = if node.respond_to?(:to_xml)
66
+ node.to_xml
67
+ elsif node.respond_to?(:to_s)
68
+ node.to_s
69
+ else
70
+ raise Canon::Error,
71
+ "Unable to convert node to string: #{node.class}"
72
+ end
73
+ Canon::Xml::DataModel.from_xml(xml_str,
74
+ preserve_whitespace: preserve_whitespace)
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ module XmlComparatorHelpers
6
+ # Node type comparison strategy for XML nodes
7
+ #
8
+ # Handles dispatching comparison logic based on node type.
9
+ # Supports both Canon::Xml::Node (with symbolic node_type) and
10
+ # Moxml/Nokogiri nodes (with predicate methods like element?, text?, etc.)
11
+ #
12
+ # This module encapsulates the complex node type detection and dispatch
13
+ # logic, making the main XmlComparator cleaner and more maintainable.
14
+ module NodeTypeComparator
15
+ class << self
16
+ # Compare two nodes by dispatching to appropriate comparison method
17
+ #
18
+ # @param node1 [Object] First node
19
+ # @param node2 [Object] Second node
20
+ # @param comparator [XmlComparator] The comparator instance for method delegation
21
+ # @param opts [Hash] Comparison options
22
+ # @param child_opts [Hash] Options for child comparison
23
+ # @param diff_children [Boolean] Whether to diff children
24
+ # @param differences [Array] Array to collect differences
25
+ # @return [Integer] Comparison result code
26
+ def compare(node1, node2, comparator, opts, child_opts,
27
+ diff_children, differences)
28
+ # Dispatch based on node type
29
+ # Canon::Xml::Node types use .node_type method that returns symbols
30
+ # Nokogiri also has .node_type but returns integers, so check for Symbol
31
+ if node1.respond_to?(:node_type) && node2.respond_to?(:node_type) &&
32
+ node1.node_type.is_a?(Symbol) && node2.node_type.is_a?(Symbol)
33
+ compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
34
+ diff_children, differences)
35
+ # Moxml/Nokogiri types use .element?, .text?, etc. methods
36
+ else
37
+ compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
38
+ diff_children, differences)
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ # Compare nodes using symbolic node_type (Canon::Xml::Node)
45
+ def compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
46
+ diff_children, differences)
47
+ case node1.node_type
48
+ when :root
49
+ comparator.send(:compare_children, node1, node2, opts, child_opts,
50
+ diff_children, differences)
51
+ when :element
52
+ comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
53
+ diff_children, differences)
54
+ when :text
55
+ comparator.send(:compare_text_nodes, node1, node2, opts,
56
+ differences)
57
+ when :comment
58
+ comparator.send(:compare_comment_nodes, node1, node2, opts,
59
+ differences)
60
+ when :cdata
61
+ comparator.send(:compare_text_nodes, node1, node2, opts,
62
+ differences)
63
+ when :processing_instruction
64
+ comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
65
+ differences)
66
+ else
67
+ Comparison::EQUIVALENT
68
+ end
69
+ end
70
+
71
+ # Compare nodes using predicate methods (Moxml/Nokogiri)
72
+ def compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
73
+ diff_children, differences)
74
+ if node1.respond_to?(:element?) && node1.element?
75
+ comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
76
+ diff_children, differences)
77
+ elsif node1.respond_to?(:text?) && node1.text?
78
+ comparator.send(:compare_text_nodes, node1, node2, opts,
79
+ differences)
80
+ elsif node1.respond_to?(:comment?) && node1.comment?
81
+ comparator.send(:compare_comment_nodes, node1, node2, opts,
82
+ differences)
83
+ elsif node1.respond_to?(:cdata?) && node1.cdata?
84
+ comparator.send(:compare_text_nodes, node1, node2, opts,
85
+ differences)
86
+ elsif node1.respond_to?(:processing_instruction?) &&
87
+ node1.processing_instruction?
88
+ comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
89
+ differences)
90
+ elsif node1.respond_to?(:root)
91
+ # Document node (Moxml/Nokogiri - legacy path)
92
+ comparator.send(:compare_document_nodes, node1, node2, opts, child_opts,
93
+ diff_children, differences)
94
+ else
95
+ Comparison::EQUIVALENT
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end