canon 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +163 -67
- data/README.adoc +400 -7
- data/docs/Gemfile +9 -0
- data/docs/INDEX.adoc +99 -182
- data/docs/_config.yml +100 -0
- data/docs/advanced/diff-classification.adoc +547 -0
- data/docs/advanced/diff-pipeline.adoc +358 -0
- data/docs/advanced/index.adoc +214 -0
- data/docs/advanced/semantic-diff-report.adoc +390 -0
- data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
- data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
- data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
- data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
- data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
- data/docs/features/diff-formatting/display-filtering.adoc +472 -0
- data/docs/features/diff-formatting/index.adoc +140 -0
- data/docs/features/environment-configuration/index.adoc +327 -0
- data/docs/features/environment-configuration/override-system.adoc +436 -0
- data/docs/features/environment-configuration/size-limits.adoc +273 -0
- data/docs/features/index.adoc +173 -0
- data/docs/features/input-validation/index.adoc +521 -0
- data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
- data/docs/features/match-options/html-policies.adoc +312 -0
- data/docs/features/match-options/index.adoc +621 -0
- data/docs/getting-started/index.adoc +83 -0
- data/docs/getting-started/quick-start.adoc +76 -0
- data/docs/guides/choosing-configuration.adoc +689 -0
- data/docs/guides/index.adoc +181 -0
- data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
- data/docs/interfaces/index.adoc +101 -0
- data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
- data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
- data/docs/lychee.toml +65 -0
- data/docs/reference/cli-options.adoc +418 -0
- data/docs/reference/environment-variables.adoc +375 -0
- data/docs/reference/index.adoc +204 -0
- data/docs/reference/options-across-interfaces.adoc +417 -0
- data/docs/understanding/algorithms/dom-diff.adoc +389 -0
- data/docs/understanding/algorithms/index.adoc +314 -0
- data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
- data/docs/understanding/architecture.adoc +447 -0
- data/docs/understanding/comparison-pipeline.adoc +317 -0
- data/docs/understanding/formats/html.adoc +380 -0
- data/docs/understanding/formats/index.adoc +261 -0
- data/docs/understanding/formats/json.adoc +390 -0
- data/docs/understanding/formats/xml.adoc +366 -0
- data/docs/understanding/formats/yaml.adoc +504 -0
- data/docs/understanding/index.adoc +130 -0
- data/lib/canon/cli.rb +42 -1
- data/lib/canon/commands/diff_command.rb +108 -23
- data/lib/canon/comparison/compare_profile.rb +101 -0
- data/lib/canon/comparison/comparison_result.rb +41 -2
- data/lib/canon/comparison/html_comparator.rb +292 -71
- data/lib/canon/comparison/html_compare_profile.rb +117 -0
- data/lib/canon/comparison/match_options.rb +42 -4
- data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
- data/lib/canon/comparison/xml_comparator.rb +695 -91
- data/lib/canon/comparison.rb +207 -2
- data/lib/canon/config/env_provider.rb +71 -0
- data/lib/canon/config/env_schema.rb +58 -0
- data/lib/canon/config/override_resolver.rb +55 -0
- data/lib/canon/config/type_converter.rb +59 -0
- data/lib/canon/config.rb +158 -29
- data/lib/canon/data_model.rb +29 -0
- data/lib/canon/diff/diff_classifier.rb +74 -14
- data/lib/canon/diff/diff_context_builder.rb +41 -0
- data/lib/canon/diff/diff_line.rb +18 -2
- data/lib/canon/diff/diff_node.rb +18 -3
- data/lib/canon/diff/diff_node_mapper.rb +71 -12
- data/lib/canon/diff/formatting_detector.rb +53 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
- data/lib/canon/diff_formatter/debug_output.rb +7 -1
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
- data/lib/canon/diff_formatter/legend.rb +42 -0
- data/lib/canon/diff_formatter.rb +78 -9
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html_formatter_base.rb +35 -1
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/yaml_formatter.rb +3 -0
- data/lib/canon/html/data_model.rb +229 -0
- data/lib/canon/html.rb +9 -0
- data/lib/canon/options/cli_generator.rb +70 -0
- data/lib/canon/options/registry.rb +234 -0
- data/lib/canon/rspec_matchers.rb +34 -13
- data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
- data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
- data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
- data/lib/canon/tree_diff/core/matching.rb +241 -0
- data/lib/canon/tree_diff/core/node_signature.rb +164 -0
- data/lib/canon/tree_diff/core/node_weight.rb +135 -0
- data/lib/canon/tree_diff/core/tree_node.rb +450 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
- data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
- data/lib/canon/tree_diff/operation_converter.rb +631 -0
- data/lib/canon/tree_diff/operations/operation.rb +92 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
- data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
- data/lib/canon/tree_diff.rb +33 -0
- data/lib/canon/validators/json_validator.rb +3 -1
- data/lib/canon/validators/yaml_validator.rb +3 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +22 -23
- data/lib/canon/xml/element_matcher.rb +128 -20
- data/lib/canon/xml/namespace_helper.rb +110 -0
- data/lib/canon.rb +3 -0
- metadata +81 -23
- data/_config.yml +0 -116
- data/docs/ADVANCED_TOPICS.adoc +0 -20
- data/docs/BASIC_USAGE.adoc +0 -16
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/docs/DIFF_FORMATTING.adoc +0 -540
- data/docs/FORMATS.adoc +0 -447
- data/docs/INPUT_VALIDATION.adoc +0 -477
- data/docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/docs/MATCH_OPTIONS.adoc +0 -719
- data/docs/MODES.adoc +0 -432
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/docs/OPTIONS.adoc +0 -1387
- data/docs/PREPROCESSING.adoc +0 -491
- data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
- data/docs/UNDERSTANDING_CANON.adoc +0 -17
|
@@ -5,6 +5,8 @@ require_relative "match_options"
|
|
|
5
5
|
require_relative "../diff/diff_node"
|
|
6
6
|
require_relative "../diff/diff_classifier"
|
|
7
7
|
require_relative "comparison_result"
|
|
8
|
+
require_relative "../tree_diff"
|
|
9
|
+
require_relative "strategies/match_strategy_factory"
|
|
8
10
|
|
|
9
11
|
module Canon
|
|
10
12
|
module Comparison
|
|
@@ -67,6 +69,11 @@ module Canon
|
|
|
67
69
|
# Store resolved match options hash for use in comparison logic
|
|
68
70
|
opts[:match_opts] = match_opts_hash
|
|
69
71
|
|
|
72
|
+
# Use tree diff if semantic_diff option is enabled
|
|
73
|
+
if match_opts.semantic_diff?
|
|
74
|
+
return perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
|
|
75
|
+
end
|
|
76
|
+
|
|
70
77
|
# Create child_opts with resolved options
|
|
71
78
|
child_opts = opts.merge(child_opts)
|
|
72
79
|
|
|
@@ -74,6 +81,18 @@ module Canon
|
|
|
74
81
|
node1 = parse_node(n1, match_opts_hash[:preprocessing])
|
|
75
82
|
node2 = parse_node(n2, match_opts_hash[:preprocessing])
|
|
76
83
|
|
|
84
|
+
# Store original strings for line diff display (before preprocessing)
|
|
85
|
+
original1 = if n1.is_a?(String)
|
|
86
|
+
n1
|
|
87
|
+
else
|
|
88
|
+
(n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
|
|
89
|
+
end
|
|
90
|
+
original2 = if n2.is_a?(String)
|
|
91
|
+
n2
|
|
92
|
+
else
|
|
93
|
+
(n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
|
|
94
|
+
end
|
|
95
|
+
|
|
77
96
|
differences = []
|
|
78
97
|
diff_children = opts[:diff_children] || false
|
|
79
98
|
|
|
@@ -89,33 +108,110 @@ module Canon
|
|
|
89
108
|
end
|
|
90
109
|
|
|
91
110
|
if opts[:verbose]
|
|
92
|
-
#
|
|
93
|
-
#
|
|
94
|
-
xml1 = node1.respond_to?(:to_xml) ? node1.to_xml : node1.to_s
|
|
95
|
-
xml2 = node2.respond_to?(:to_xml) ? node2.to_xml : node2.to_s
|
|
96
|
-
|
|
111
|
+
# Serialize parsed nodes for consistent formatting
|
|
112
|
+
# This ensures both sides formatted identically, showing only real differences
|
|
97
113
|
preprocessed = [
|
|
98
|
-
|
|
99
|
-
|
|
114
|
+
serialize_node_to_xml(node1).gsub(/></, ">\n<"),
|
|
115
|
+
serialize_node_to_xml(node2).gsub(/></, ">\n<"),
|
|
100
116
|
]
|
|
101
117
|
|
|
102
118
|
ComparisonResult.new(
|
|
103
119
|
differences: differences,
|
|
104
120
|
preprocessed_strings: preprocessed,
|
|
121
|
+
original_strings: [original1, original2],
|
|
105
122
|
format: :xml,
|
|
106
123
|
match_options: match_opts_hash,
|
|
124
|
+
algorithm: :dom,
|
|
107
125
|
)
|
|
126
|
+
elsif result != Comparison::EQUIVALENT && !differences.empty?
|
|
127
|
+
# Non-verbose mode: check equivalence
|
|
128
|
+
# If comparison found differences, classify them to determine if normative
|
|
129
|
+
classifier = Canon::Diff::DiffClassifier.new(match_opts)
|
|
130
|
+
classifier.classify_all(differences.select do |d|
|
|
131
|
+
d.is_a?(Canon::Diff::DiffNode)
|
|
132
|
+
end)
|
|
133
|
+
# Equivalent if no normative differences (matches semantic algorithm)
|
|
134
|
+
differences.none?(&:normative?)
|
|
108
135
|
else
|
|
136
|
+
# Either equivalent or no differences tracked
|
|
109
137
|
result == Comparison::EQUIVALENT
|
|
110
138
|
end
|
|
111
139
|
end
|
|
112
140
|
|
|
113
141
|
private
|
|
114
142
|
|
|
143
|
+
# Perform semantic tree diff using SemanticTreeMatchStrategy
|
|
144
|
+
#
|
|
145
|
+
# @param n1 [String, Moxml::Node] First node
|
|
146
|
+
# @param n2 [String, Moxml::Node] Second node
|
|
147
|
+
# @param opts [Hash] Comparison options
|
|
148
|
+
# @param match_opts_hash [Hash] Resolved match options
|
|
149
|
+
# @return [Boolean, ComparisonResult] Result of tree diff comparison
|
|
150
|
+
def perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
|
|
151
|
+
# Store original strings for line diff display (before preprocessing)
|
|
152
|
+
original1 = if n1.is_a?(String)
|
|
153
|
+
n1
|
|
154
|
+
else
|
|
155
|
+
(n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
|
|
156
|
+
end
|
|
157
|
+
original2 = if n2.is_a?(String)
|
|
158
|
+
n2
|
|
159
|
+
else
|
|
160
|
+
(n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Parse to Canon::Xml::Node (preserves preprocessing)
|
|
164
|
+
node1 = parse_node(n1, match_opts_hash[:preprocessing])
|
|
165
|
+
node2 = parse_node(n2, match_opts_hash[:preprocessing])
|
|
166
|
+
|
|
167
|
+
# Create strategy using factory
|
|
168
|
+
strategy = Strategies::MatchStrategyFactory.create(
|
|
169
|
+
format: :xml,
|
|
170
|
+
match_options: match_opts_hash,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Pass Canon::Xml::Node directly - XML adapter now handles it
|
|
174
|
+
differences = strategy.match(node1, node2)
|
|
175
|
+
|
|
176
|
+
# Return based on verbose mode
|
|
177
|
+
if opts[:verbose]
|
|
178
|
+
# Get preprocessed strings for display
|
|
179
|
+
preprocessed = strategy.preprocess_for_display(node1, node2)
|
|
180
|
+
|
|
181
|
+
# Return ComparisonResult with strategy metadata
|
|
182
|
+
ComparisonResult.new(
|
|
183
|
+
differences: differences,
|
|
184
|
+
preprocessed_strings: preprocessed,
|
|
185
|
+
original_strings: [original1, original2],
|
|
186
|
+
format: :xml,
|
|
187
|
+
match_options: match_opts_hash.merge(strategy.metadata),
|
|
188
|
+
algorithm: :semantic,
|
|
189
|
+
)
|
|
190
|
+
else
|
|
191
|
+
# Simple boolean result - equivalent if no normative differences
|
|
192
|
+
differences.none?(&:normative?)
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
115
196
|
# Parse a node from string or return as-is
|
|
116
197
|
# Applies preprocessing transformation before parsing if specified
|
|
117
198
|
def parse_node(node, preprocessing = :none)
|
|
118
|
-
|
|
199
|
+
# If already a Canon::Xml::Node, return as-is
|
|
200
|
+
return node if node.is_a?(Canon::Xml::Node)
|
|
201
|
+
|
|
202
|
+
# If it's a Nokogiri or Moxml node, convert to DataModel
|
|
203
|
+
unless node.is_a?(String)
|
|
204
|
+
# Convert to XML string then parse through DataModel
|
|
205
|
+
xml_str = if node.respond_to?(:to_xml)
|
|
206
|
+
node.to_xml
|
|
207
|
+
elsif node.respond_to?(:to_s)
|
|
208
|
+
node.to_s
|
|
209
|
+
else
|
|
210
|
+
raise Canon::Error,
|
|
211
|
+
"Unable to convert node to string: #{node.class}"
|
|
212
|
+
end
|
|
213
|
+
return Canon::Xml::DataModel.from_xml(xml_str)
|
|
214
|
+
end
|
|
119
215
|
|
|
120
216
|
# Apply preprocessing to XML string before parsing
|
|
121
217
|
xml_string = case preprocessing
|
|
@@ -134,8 +230,8 @@ module Canon
|
|
|
134
230
|
node
|
|
135
231
|
end
|
|
136
232
|
|
|
137
|
-
# Use
|
|
138
|
-
|
|
233
|
+
# Use Canon::Xml::DataModel for parsing to get Canon::Xml::Node instances
|
|
234
|
+
Canon::Xml::DataModel.from_xml(xml_string)
|
|
139
235
|
end
|
|
140
236
|
|
|
141
237
|
# Main comparison dispatcher
|
|
@@ -159,10 +255,7 @@ module Canon
|
|
|
159
255
|
children1.zip(children2).each do |child1, child2|
|
|
160
256
|
child_result = compare_nodes(child1, child2, opts, child_opts,
|
|
161
257
|
diff_children, differences)
|
|
162
|
-
|
|
163
|
-
result = child_result
|
|
164
|
-
break
|
|
165
|
-
end
|
|
258
|
+
result = child_result unless child_result == Comparison::EQUIVALENT
|
|
166
259
|
end
|
|
167
260
|
return result
|
|
168
261
|
end
|
|
@@ -187,7 +280,30 @@ module Canon
|
|
|
187
280
|
end
|
|
188
281
|
|
|
189
282
|
# Dispatch based on node type
|
|
190
|
-
|
|
283
|
+
# Canon::Xml::Node types use .node_type method that returns symbols
|
|
284
|
+
# Nokogiri also has .node_type but returns integers, so check for Symbol
|
|
285
|
+
if n1.respond_to?(:node_type) && n2.respond_to?(:node_type) &&
|
|
286
|
+
n1.node_type.is_a?(Symbol) && n2.node_type.is_a?(Symbol)
|
|
287
|
+
case n1.node_type
|
|
288
|
+
when :root
|
|
289
|
+
compare_children(n1, n2, opts, child_opts, diff_children,
|
|
290
|
+
differences)
|
|
291
|
+
when :element
|
|
292
|
+
compare_element_nodes(n1, n2, opts, child_opts, diff_children,
|
|
293
|
+
differences)
|
|
294
|
+
when :text
|
|
295
|
+
compare_text_nodes(n1, n2, opts, differences)
|
|
296
|
+
when :comment
|
|
297
|
+
compare_comment_nodes(n1, n2, opts, differences)
|
|
298
|
+
when :cdata
|
|
299
|
+
compare_text_nodes(n1, n2, opts, differences)
|
|
300
|
+
when :processing_instruction
|
|
301
|
+
compare_processing_instruction_nodes(n1, n2, opts, differences)
|
|
302
|
+
else
|
|
303
|
+
Comparison::EQUIVALENT
|
|
304
|
+
end
|
|
305
|
+
# Moxml/Nokogiri types use .element?, .text?, etc. methods
|
|
306
|
+
elsif n1.respond_to?(:element?) && n1.element?
|
|
191
307
|
compare_element_nodes(n1, n2, opts, child_opts, diff_children,
|
|
192
308
|
differences)
|
|
193
309
|
elsif n1.respond_to?(:text?) && n1.text?
|
|
@@ -200,7 +316,7 @@ module Canon
|
|
|
200
316
|
n1.processing_instruction?
|
|
201
317
|
compare_processing_instruction_nodes(n1, n2, opts, differences)
|
|
202
318
|
elsif n1.respond_to?(:root)
|
|
203
|
-
# Document node
|
|
319
|
+
# Document node (Moxml/Nokogiri - legacy path)
|
|
204
320
|
compare_document_nodes(n1, n2, opts, child_opts, diff_children,
|
|
205
321
|
differences)
|
|
206
322
|
else
|
|
@@ -214,11 +330,34 @@ module Canon
|
|
|
214
330
|
# Compare element names
|
|
215
331
|
unless n1.name == n2.name
|
|
216
332
|
add_difference(n1, n2, Comparison::UNEQUAL_ELEMENTS,
|
|
217
|
-
Comparison::UNEQUAL_ELEMENTS, :
|
|
333
|
+
Comparison::UNEQUAL_ELEMENTS, :element_structure, opts,
|
|
218
334
|
differences)
|
|
219
335
|
return Comparison::UNEQUAL_ELEMENTS
|
|
220
336
|
end
|
|
221
337
|
|
|
338
|
+
# Compare namespace URIs - elements with different namespaces are different elements
|
|
339
|
+
ns1 = n1.respond_to?(:namespace_uri) ? n1.namespace_uri : nil
|
|
340
|
+
ns2 = n2.respond_to?(:namespace_uri) ? n2.namespace_uri : nil
|
|
341
|
+
|
|
342
|
+
unless ns1 == ns2
|
|
343
|
+
# Create descriptive reason showing the actual namespace URIs
|
|
344
|
+
ns1_display = ns1.nil? || ns1.empty? ? "(no namespace)" : ns1
|
|
345
|
+
ns2_display = ns2.nil? || ns2.empty? ? "(no namespace)" : ns2
|
|
346
|
+
|
|
347
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
348
|
+
node1: n1,
|
|
349
|
+
node2: n2,
|
|
350
|
+
dimension: :namespace_uri,
|
|
351
|
+
reason: "namespace '#{ns1_display}' vs '#{ns2_display}' on element '#{n1.name}'",
|
|
352
|
+
)
|
|
353
|
+
differences << diff_node if opts[:verbose]
|
|
354
|
+
return Comparison::UNEQUAL_ELEMENTS
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Compare namespace declarations (xmlns and xmlns:* attributes)
|
|
358
|
+
ns_result = compare_namespace_declarations(n1, n2, opts, differences)
|
|
359
|
+
return ns_result unless ns_result == Comparison::EQUIVALENT
|
|
360
|
+
|
|
222
361
|
# Compare attributes
|
|
223
362
|
attr_result = compare_attribute_sets(n1, n2, opts, differences)
|
|
224
363
|
return attr_result unless attr_result == Comparison::EQUIVALENT
|
|
@@ -231,20 +370,67 @@ module Canon
|
|
|
231
370
|
|
|
232
371
|
# Compare attribute sets
|
|
233
372
|
def compare_attribute_sets(n1, n2, opts, differences)
|
|
234
|
-
|
|
235
|
-
|
|
373
|
+
# Get attributes using the appropriate method for each node type
|
|
374
|
+
raw_attrs1 = n1.respond_to?(:attribute_nodes) ? n1.attribute_nodes : n1.attributes
|
|
375
|
+
raw_attrs2 = n2.respond_to?(:attribute_nodes) ? n2.attribute_nodes : n2.attributes
|
|
236
376
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
attrs2 = attrs2.sort_by { |k, _v| k.to_s }.to_h
|
|
377
|
+
attrs1 = filter_attributes(raw_attrs1, opts)
|
|
378
|
+
attrs2 = filter_attributes(raw_attrs2, opts)
|
|
240
379
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
380
|
+
match_opts = opts[:match_opts]
|
|
381
|
+
attribute_order_behavior = match_opts[:attribute_order] || :strict
|
|
382
|
+
|
|
383
|
+
# Check attribute order if not ignored
|
|
384
|
+
keys1 = attrs1.keys.map(&:to_s)
|
|
385
|
+
keys2 = attrs2.keys.map(&:to_s)
|
|
386
|
+
if attribute_order_behavior == :strict
|
|
387
|
+
# Strict mode: attribute order matters
|
|
388
|
+
# Check if keys are in same order
|
|
389
|
+
|
|
390
|
+
if keys1 != keys2
|
|
391
|
+
# Keys are different or in different order
|
|
392
|
+
# First check if it's just ordering (same keys, different order)
|
|
393
|
+
if keys1.sort == keys2.sort
|
|
394
|
+
# Same keys, different order - this is an attribute_order difference
|
|
395
|
+
add_difference(n1, n2, Comparison::UNEQUAL_ATTRIBUTES,
|
|
396
|
+
Comparison::UNEQUAL_ATTRIBUTES,
|
|
397
|
+
:attribute_order, opts, differences)
|
|
398
|
+
return Comparison::UNEQUAL_ATTRIBUTES
|
|
399
|
+
else
|
|
400
|
+
# Different keys - this is attribute_presence difference
|
|
401
|
+
add_difference(n1, n2, Comparison::MISSING_ATTRIBUTE,
|
|
402
|
+
Comparison::MISSING_ATTRIBUTE,
|
|
403
|
+
:attribute_presence, opts, differences)
|
|
404
|
+
return Comparison::MISSING_ATTRIBUTE
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
# Order matches, now check values in order
|
|
409
|
+
else
|
|
410
|
+
# Ignore/normalize mode: attribute order doesn't affect equivalence
|
|
411
|
+
# But in verbose mode, we should still track order differences as informative
|
|
412
|
+
|
|
413
|
+
# Check if order differs (but keys are the same)
|
|
414
|
+
if keys1 != keys2 && keys1.sort == keys2.sort && opts[:verbose]
|
|
415
|
+
# Same keys, different order - create informative DiffNode
|
|
416
|
+
# This allows line diffs to be properly classified as informative
|
|
417
|
+
add_difference(n1, n2, Comparison::UNEQUAL_ATTRIBUTES,
|
|
418
|
+
Comparison::UNEQUAL_ATTRIBUTES,
|
|
419
|
+
:attribute_order, opts, differences)
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
# Sort attributes so order doesn't matter for comparison
|
|
423
|
+
attrs1 = attrs1.sort_by { |k, _v| k.to_s }.to_h
|
|
424
|
+
attrs2 = attrs2.sort_by { |k, _v| k.to_s }.to_h
|
|
247
425
|
|
|
426
|
+
unless attrs1.keys.map(&:to_s).sort == attrs2.keys.map(&:to_s).sort
|
|
427
|
+
add_difference(n1, n2, Comparison::MISSING_ATTRIBUTE,
|
|
428
|
+
Comparison::MISSING_ATTRIBUTE,
|
|
429
|
+
:attribute_presence, opts, differences)
|
|
430
|
+
return Comparison::MISSING_ATTRIBUTE
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
end
|
|
248
434
|
attrs1.each do |name, value|
|
|
249
435
|
unless attrs2[name] == value
|
|
250
436
|
add_difference(n1, n2, Comparison::UNEQUAL_ATTRIBUTES,
|
|
@@ -262,32 +448,57 @@ module Canon
|
|
|
262
448
|
filtered = {}
|
|
263
449
|
match_opts = opts[:match_opts]
|
|
264
450
|
|
|
265
|
-
|
|
266
|
-
|
|
451
|
+
# Handle Canon::Xml::Node attribute format (array of AttributeNode)
|
|
452
|
+
if attributes.is_a?(Array)
|
|
453
|
+
attributes.each do |attr|
|
|
454
|
+
name = attr.name
|
|
455
|
+
value = attr.value
|
|
456
|
+
|
|
457
|
+
# Skip namespace declarations - they're handled separately
|
|
458
|
+
next if is_namespace_declaration?(name)
|
|
459
|
+
|
|
460
|
+
# Skip if attribute name should be ignored
|
|
461
|
+
next if should_ignore_attr_by_name?(name, opts)
|
|
462
|
+
|
|
463
|
+
# Skip if attribute content should be ignored
|
|
464
|
+
next if should_ignore_attr_content?(value, opts)
|
|
465
|
+
|
|
466
|
+
# Apply match options for attribute values
|
|
467
|
+
behavior = match_opts[:attribute_values] || :strict
|
|
468
|
+
value = MatchOptions.process_attribute_value(value, behavior)
|
|
469
|
+
|
|
470
|
+
filtered[name] = value
|
|
471
|
+
end
|
|
472
|
+
else
|
|
473
|
+
# Handle Nokogiri and Moxml attribute formats (Hash-like):
|
|
267
474
|
# - Nokogiri: key is String name, val is Nokogiri::XML::Attr object
|
|
268
475
|
# - Moxml: key is Moxml::Attribute object, val is nil
|
|
476
|
+
attributes.each do |key, val|
|
|
477
|
+
if key.is_a?(String)
|
|
478
|
+
# Nokogiri format: key=name (String), val=attr object
|
|
479
|
+
name = key
|
|
480
|
+
value = val.respond_to?(:value) ? val.value : val.to_s
|
|
481
|
+
else
|
|
482
|
+
# Moxml format: key=attr object, val=nil
|
|
483
|
+
name = key.respond_to?(:name) ? key.name : key.to_s
|
|
484
|
+
value = key.respond_to?(:value) ? key.value : key.to_s
|
|
485
|
+
end
|
|
269
486
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
name = key
|
|
273
|
-
value = val.respond_to?(:value) ? val.value : val.to_s
|
|
274
|
-
else
|
|
275
|
-
# Moxml format: key=attr object, val=nil
|
|
276
|
-
name = key.respond_to?(:name) ? key.name : key.to_s
|
|
277
|
-
value = key.respond_to?(:value) ? key.value : key.to_s
|
|
278
|
-
end
|
|
487
|
+
# Skip namespace declarations - they're handled separately
|
|
488
|
+
next if is_namespace_declaration?(name)
|
|
279
489
|
|
|
280
|
-
|
|
281
|
-
|
|
490
|
+
# Skip if attribute name should be ignored
|
|
491
|
+
next if should_ignore_attr_by_name?(name, opts)
|
|
282
492
|
|
|
283
|
-
|
|
284
|
-
|
|
493
|
+
# Skip if attribute content should be ignored
|
|
494
|
+
next if should_ignore_attr_content?(value, opts)
|
|
285
495
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
496
|
+
# Apply match options for attribute values
|
|
497
|
+
behavior = match_opts[:attribute_values] || :strict
|
|
498
|
+
value = MatchOptions.process_attribute_value(value, behavior)
|
|
289
499
|
|
|
290
|
-
|
|
500
|
+
filtered[name] = value
|
|
501
|
+
end
|
|
291
502
|
end
|
|
292
503
|
|
|
293
504
|
filtered
|
|
@@ -324,26 +535,35 @@ module Canon
|
|
|
324
535
|
behavior = :strict
|
|
325
536
|
end
|
|
326
537
|
|
|
327
|
-
if
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
# - Otherwise use :text_content
|
|
334
|
-
dimension = if behavior == :normalize && whitespace_only_difference?(
|
|
335
|
-
text1, text2
|
|
336
|
-
)
|
|
337
|
-
:structural_whitespace
|
|
338
|
-
else
|
|
339
|
-
:text_content
|
|
340
|
-
end
|
|
538
|
+
# Check if raw content differs
|
|
539
|
+
raw_differs = text1 != text2
|
|
540
|
+
|
|
541
|
+
# Check if matches according to behavior
|
|
542
|
+
matches_per_behavior = MatchOptions.match_text?(text1, text2,
|
|
543
|
+
behavior)
|
|
341
544
|
|
|
545
|
+
# Determine the correct dimension for this difference
|
|
546
|
+
# - If text_content is :strict, ALL differences use :text_content dimension
|
|
547
|
+
# - If text_content is :normalize, whitespace-only diffs use :structural_whitespace
|
|
548
|
+
# - Otherwise use :text_content
|
|
549
|
+
dimension = if behavior == :normalize && whitespace_only_difference?(
|
|
550
|
+
text1, text2
|
|
551
|
+
)
|
|
552
|
+
:structural_whitespace
|
|
553
|
+
else
|
|
554
|
+
:text_content
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
# Create DiffNode in verbose mode when raw content differs
|
|
558
|
+
# This ensures informative diffs are created even for :ignore/:normalize
|
|
559
|
+
if raw_differs && opts[:verbose]
|
|
342
560
|
add_difference(n1, n2, Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
343
561
|
Comparison::UNEQUAL_TEXT_CONTENTS, dimension,
|
|
344
562
|
opts, differences)
|
|
345
|
-
Comparison::UNEQUAL_TEXT_CONTENTS
|
|
346
563
|
end
|
|
564
|
+
|
|
565
|
+
# Return based on whether behavior makes difference acceptable
|
|
566
|
+
matches_per_behavior ? Comparison::EQUIVALENT : Comparison::UNEQUAL_TEXT_CONTENTS
|
|
347
567
|
end
|
|
348
568
|
|
|
349
569
|
# Check if the difference between two texts is only whitespace-related
|
|
@@ -396,18 +616,25 @@ module Canon
|
|
|
396
616
|
match_opts = opts[:match_opts]
|
|
397
617
|
behavior = match_opts[:comments]
|
|
398
618
|
|
|
399
|
-
#
|
|
400
|
-
|
|
619
|
+
# Canon::Xml::Node CommentNode uses .value, Nokogiri uses .content
|
|
620
|
+
content1 = node_text(n1)
|
|
621
|
+
content2 = node_text(n2)
|
|
401
622
|
|
|
402
|
-
|
|
403
|
-
|
|
623
|
+
# Check if content differs
|
|
624
|
+
contents_differ = content1 != content2
|
|
404
625
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
626
|
+
# Create DiffNode in verbose mode when content differs
|
|
627
|
+
# This ensures informative diffs are created even for :ignore behavior
|
|
628
|
+
if contents_differ && opts[:verbose]
|
|
408
629
|
add_difference(n1, n2, Comparison::UNEQUAL_COMMENTS,
|
|
409
630
|
Comparison::UNEQUAL_COMMENTS, :comments, opts,
|
|
410
631
|
differences)
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
# Return based on behavior and whether content matches
|
|
635
|
+
if behavior == :ignore || !contents_differ
|
|
636
|
+
Comparison::EQUIVALENT
|
|
637
|
+
else
|
|
411
638
|
Comparison::UNEQUAL_COMMENTS
|
|
412
639
|
end
|
|
413
640
|
end
|
|
@@ -451,25 +678,155 @@ module Canon
|
|
|
451
678
|
differences)
|
|
452
679
|
end
|
|
453
680
|
|
|
454
|
-
# Compare children of two nodes
|
|
681
|
+
# Compare children of two nodes using semantic matching
|
|
682
|
+
#
|
|
683
|
+
# Uses ElementMatcher to pair children semantically (by identity attributes
|
|
684
|
+
# or position), then compares matched pairs and detects position changes.
|
|
455
685
|
def compare_children(n1, n2, opts, child_opts, diff_children,
|
|
456
686
|
differences)
|
|
457
687
|
children1 = filter_children(n1.children, opts)
|
|
458
688
|
children2 = filter_children(n2.children, opts)
|
|
459
689
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
690
|
+
# Quick check: if both have no children, they're equivalent
|
|
691
|
+
return Comparison::EQUIVALENT if children1.empty? && children2.empty?
|
|
692
|
+
|
|
693
|
+
# Check if we can use ElementMatcher (requires Canon::Xml::DataModel nodes)
|
|
694
|
+
# ElementMatcher expects nodes with .node_type method that returns symbols
|
|
695
|
+
# and only works with element nodes (filters out text, comment, etc.)
|
|
696
|
+
can_use_matcher = children1.all? do |c|
|
|
697
|
+
c.is_a?(Canon::Xml::Node) && c.node_type == :element
|
|
698
|
+
end &&
|
|
699
|
+
children2.all? { |c| c.is_a?(Canon::Xml::Node) && c.node_type == :element }
|
|
700
|
+
|
|
701
|
+
if can_use_matcher && !children1.empty? && !children2.empty?
|
|
702
|
+
# Use ElementMatcher for semantic matching with position tracking
|
|
703
|
+
use_element_matcher_comparison(children1, children2, n1, opts,
|
|
704
|
+
child_opts, diff_children, differences)
|
|
705
|
+
else
|
|
706
|
+
# Fall back to simple positional comparison for Moxml/Nokogiri nodes
|
|
707
|
+
# Length check
|
|
708
|
+
unless children1.length == children2.length
|
|
709
|
+
# Determine dimension based on type of first differing child
|
|
710
|
+
# When lengths differ, find which child is missing/extra
|
|
711
|
+
dimension = :text_content # default
|
|
712
|
+
|
|
713
|
+
# Compare position by position to find first difference
|
|
714
|
+
max_len = [children1.length, children2.length].max
|
|
715
|
+
(0...max_len).each do |i|
|
|
716
|
+
if i >= children1.length
|
|
717
|
+
# Extra child in children2
|
|
718
|
+
dimension = determine_node_dimension(children2[i])
|
|
719
|
+
break
|
|
720
|
+
elsif i >= children2.length
|
|
721
|
+
# Extra child in children1
|
|
722
|
+
dimension = determine_node_dimension(children1[i])
|
|
723
|
+
break
|
|
724
|
+
elsif !same_node_type?(children1[i], children2[i])
|
|
725
|
+
# Different node types at same position
|
|
726
|
+
dimension = determine_node_dimension(children1[i])
|
|
727
|
+
break
|
|
728
|
+
end
|
|
729
|
+
end
|
|
730
|
+
|
|
731
|
+
add_difference(n1, n2, Comparison::MISSING_NODE,
|
|
732
|
+
Comparison::MISSING_NODE, dimension, opts,
|
|
733
|
+
differences)
|
|
734
|
+
return Comparison::MISSING_NODE
|
|
735
|
+
end
|
|
736
|
+
|
|
737
|
+
# Compare children pairwise by position
|
|
738
|
+
result = Comparison::EQUIVALENT
|
|
739
|
+
children1.zip(children2).each do |child1, child2|
|
|
740
|
+
child_result = compare_nodes(child1, child2, child_opts, child_opts,
|
|
741
|
+
diff_children, differences)
|
|
742
|
+
result = child_result unless child_result == Comparison::EQUIVALENT
|
|
743
|
+
end
|
|
744
|
+
|
|
745
|
+
result
|
|
464
746
|
end
|
|
747
|
+
end
|
|
748
|
+
|
|
749
|
+
# Use ElementMatcher for semantic comparison (Canon::Xml::DataModel nodes)
|
|
750
|
+
def use_element_matcher_comparison(children1, children2, parent_node,
|
|
751
|
+
opts, child_opts, diff_children,
|
|
752
|
+
differences)
|
|
753
|
+
require_relative "../xml/element_matcher"
|
|
465
754
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
755
|
+
# Create temporary RootNode wrappers to use ElementMatcher
|
|
756
|
+
# Don't modify parent pointers - just set @children directly
|
|
757
|
+
require_relative "../xml/nodes/root_node"
|
|
758
|
+
|
|
759
|
+
temp_root1 = Canon::Xml::Nodes::RootNode.new
|
|
760
|
+
temp_root1.instance_variable_set(:@children, children1.dup)
|
|
761
|
+
|
|
762
|
+
temp_root2 = Canon::Xml::Nodes::RootNode.new
|
|
763
|
+
temp_root2.instance_variable_set(:@children, children2.dup)
|
|
764
|
+
|
|
765
|
+
matcher = Canon::Xml::ElementMatcher.new
|
|
766
|
+
matches = matcher.match_trees(temp_root1, temp_root2)
|
|
767
|
+
|
|
768
|
+
# Filter matches to only include direct children
|
|
769
|
+
# match_trees returns ALL descendants, but we only want direct children
|
|
770
|
+
matches = matches.select do |m|
|
|
771
|
+
(m.elem1.nil? || children1.include?(m.elem1)) &&
|
|
772
|
+
(m.elem2.nil? || children2.include?(m.elem2))
|
|
470
773
|
end
|
|
471
774
|
|
|
472
|
-
|
|
775
|
+
# If no matches and children exist, they're all different
|
|
776
|
+
if matches.empty? && (!children1.empty? || !children2.empty?)
|
|
777
|
+
add_difference(parent_node, parent_node, Comparison::MISSING_NODE,
|
|
778
|
+
Comparison::MISSING_NODE, :text_content, opts,
|
|
779
|
+
differences)
|
|
780
|
+
return Comparison::UNEQUAL_ELEMENTS
|
|
781
|
+
end
|
|
782
|
+
|
|
783
|
+
all_equivalent = true
|
|
784
|
+
|
|
785
|
+
matches.each do |match|
|
|
786
|
+
case match.status
|
|
787
|
+
when :matched
|
|
788
|
+
# Check if element position changed
|
|
789
|
+
if match.position_changed?
|
|
790
|
+
match_opts = opts[:match_opts]
|
|
791
|
+
position_behavior = match_opts[:element_position] || :strict
|
|
792
|
+
|
|
793
|
+
# Only create DiffNode if element_position is not :ignore
|
|
794
|
+
if position_behavior != :ignore
|
|
795
|
+
add_difference(
|
|
796
|
+
match.elem1,
|
|
797
|
+
match.elem2,
|
|
798
|
+
"position #{match.pos1}",
|
|
799
|
+
"position #{match.pos2}",
|
|
800
|
+
:element_position,
|
|
801
|
+
opts,
|
|
802
|
+
differences,
|
|
803
|
+
)
|
|
804
|
+
all_equivalent = false if position_behavior == :strict
|
|
805
|
+
end
|
|
806
|
+
end
|
|
807
|
+
|
|
808
|
+
# Compare the matched elements for content/attribute differences
|
|
809
|
+
result = compare_nodes(match.elem1, match.elem2, child_opts,
|
|
810
|
+
child_opts, diff_children, differences)
|
|
811
|
+
all_equivalent = false unless result == Comparison::EQUIVALENT
|
|
812
|
+
|
|
813
|
+
when :deleted
|
|
814
|
+
# Element present in first tree but not second
|
|
815
|
+
add_difference(match.elem1, nil, Comparison::MISSING_NODE,
|
|
816
|
+
Comparison::MISSING_NODE, :element_structure, opts,
|
|
817
|
+
differences)
|
|
818
|
+
all_equivalent = false
|
|
819
|
+
|
|
820
|
+
when :inserted
|
|
821
|
+
# Element present in second tree but not first
|
|
822
|
+
add_difference(nil, match.elem2, Comparison::MISSING_NODE,
|
|
823
|
+
Comparison::MISSING_NODE, :element_structure, opts,
|
|
824
|
+
differences)
|
|
825
|
+
all_equivalent = false
|
|
826
|
+
end
|
|
827
|
+
end
|
|
828
|
+
|
|
829
|
+
all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ELEMENTS
|
|
473
830
|
end
|
|
474
831
|
|
|
475
832
|
# Filter children based on options
|
|
@@ -483,20 +840,28 @@ module Canon
|
|
|
483
840
|
def node_excluded?(node, opts)
|
|
484
841
|
match_opts = opts[:match_opts]
|
|
485
842
|
|
|
486
|
-
#
|
|
487
|
-
|
|
488
|
-
|
|
843
|
+
# Determine node type
|
|
844
|
+
# Canon::Xml::Node uses node_type that returns Symbol
|
|
845
|
+
# Nokogiri uses node_type that returns Integer, so check for Symbol first
|
|
846
|
+
if node.respond_to?(:node_type) && node.node_type.is_a?(Symbol)
|
|
847
|
+
node.node_type == :comment
|
|
848
|
+
else
|
|
849
|
+
node.respond_to?(:comment?) && node.comment?
|
|
489
850
|
end
|
|
490
851
|
|
|
852
|
+
is_text = if node.respond_to?(:node_type) && node.node_type.is_a?(Symbol)
|
|
853
|
+
node.node_type == :text
|
|
854
|
+
else
|
|
855
|
+
node.respond_to?(:text?) && node.text?
|
|
856
|
+
end
|
|
857
|
+
|
|
491
858
|
# Ignore text nodes if specified
|
|
492
|
-
return true if opts[:ignore_text_nodes] &&
|
|
493
|
-
node.respond_to?(:text?) && node.text?
|
|
859
|
+
return true if opts[:ignore_text_nodes] && is_text
|
|
494
860
|
|
|
495
861
|
# Ignore whitespace-only text nodes based on structural_whitespace
|
|
496
862
|
# Both :ignore and :normalize should filter out whitespace-only nodes
|
|
497
863
|
if %i[ignore
|
|
498
|
-
normalize].include?(match_opts[:structural_whitespace]) &&
|
|
499
|
-
node.respond_to?(:text?) && node.text?
|
|
864
|
+
normalize].include?(match_opts[:structural_whitespace]) && is_text
|
|
500
865
|
text = node_text(node)
|
|
501
866
|
return true if MatchOptions.normalize_text(text).empty?
|
|
502
867
|
end
|
|
@@ -504,8 +869,40 @@ module Canon
|
|
|
504
869
|
false
|
|
505
870
|
end
|
|
506
871
|
|
|
872
|
+
# Determine the appropriate dimension for a node type
|
|
873
|
+
# @param node [Object] The node to check
|
|
874
|
+
# @return [Symbol] The dimension symbol
|
|
875
|
+
def determine_node_dimension(node)
|
|
876
|
+
# Canon::Xml::Node types
|
|
877
|
+
if node.respond_to?(:node_type) && node.node_type.is_a?(Symbol)
|
|
878
|
+
case node.node_type
|
|
879
|
+
when :comment then :comments
|
|
880
|
+
when :text, :cdata then :text_content
|
|
881
|
+
when :processing_instruction then :processing_instructions
|
|
882
|
+
else :text_content
|
|
883
|
+
end
|
|
884
|
+
# Moxml/Nokogiri types
|
|
885
|
+
elsif node.respond_to?(:comment?) && node.comment?
|
|
886
|
+
:comments
|
|
887
|
+
elsif node.respond_to?(:text?) && node.text?
|
|
888
|
+
:text_content
|
|
889
|
+
elsif node.respond_to?(:cdata?) && node.cdata?
|
|
890
|
+
:text_content
|
|
891
|
+
elsif node.respond_to?(:processing_instruction?) && node.processing_instruction?
|
|
892
|
+
:processing_instructions
|
|
893
|
+
else
|
|
894
|
+
:text_content
|
|
895
|
+
end
|
|
896
|
+
end
|
|
897
|
+
|
|
507
898
|
# Check if two nodes are the same type
|
|
508
899
|
def same_node_type?(n1, n2)
|
|
900
|
+
# Canon::Xml::Node types - check node_type method
|
|
901
|
+
if n1.respond_to?(:node_type) && n2.respond_to?(:node_type)
|
|
902
|
+
return n1.node_type == n2.node_type
|
|
903
|
+
end
|
|
904
|
+
|
|
905
|
+
# Moxml/Nokogiri types - check individual type methods
|
|
509
906
|
return true if n1.respond_to?(:element?) && n1.element? &&
|
|
510
907
|
n2.respond_to?(:element?) && n2.element?
|
|
511
908
|
return true if n1.respond_to?(:text?) && n1.text? &&
|
|
@@ -525,7 +922,10 @@ module Canon
|
|
|
525
922
|
|
|
526
923
|
# Get text content from a node
|
|
527
924
|
def node_text(node)
|
|
528
|
-
|
|
925
|
+
# Canon::Xml::Node TextNode uses .value
|
|
926
|
+
if node.respond_to?(:value)
|
|
927
|
+
node.value.to_s
|
|
928
|
+
elsif node.respond_to?(:content)
|
|
529
929
|
node.content.to_s
|
|
530
930
|
elsif node.respond_to?(:text)
|
|
531
931
|
node.text.to_s
|
|
@@ -534,6 +934,66 @@ module Canon
|
|
|
534
934
|
end
|
|
535
935
|
end
|
|
536
936
|
|
|
937
|
+
# Extract element path for context (best effort)
|
|
938
|
+
# @param node [Object] Node to extract path from
|
|
939
|
+
# @return [Array<String>] Path components
|
|
940
|
+
def extract_element_path(node)
|
|
941
|
+
path = []
|
|
942
|
+
current = node
|
|
943
|
+
max_depth = 20
|
|
944
|
+
depth = 0
|
|
945
|
+
|
|
946
|
+
while current && depth < max_depth
|
|
947
|
+
if current.respond_to?(:name) && current.name
|
|
948
|
+
path.unshift(current.name)
|
|
949
|
+
end
|
|
950
|
+
|
|
951
|
+
break unless current.respond_to?(:parent)
|
|
952
|
+
|
|
953
|
+
current = current.parent
|
|
954
|
+
depth += 1
|
|
955
|
+
|
|
956
|
+
# Stop at document root
|
|
957
|
+
break if current.respond_to?(:root)
|
|
958
|
+
end
|
|
959
|
+
|
|
960
|
+
path
|
|
961
|
+
end
|
|
962
|
+
|
|
963
|
+
# Serialize a node to XML string
|
|
964
|
+
# @param node [Canon::Xml::Node, Object] Node to serialize
|
|
965
|
+
# @return [String] XML string representation
|
|
966
|
+
def serialize_node_to_xml(node)
|
|
967
|
+
if node.is_a?(Canon::Xml::Nodes::RootNode)
|
|
968
|
+
# Serialize all children of root
|
|
969
|
+
node.children.map { |child| serialize_node_to_xml(child) }.join
|
|
970
|
+
elsif node.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
971
|
+
# Serialize element with attributes and children
|
|
972
|
+
attrs = node.attribute_nodes.map do |a|
|
|
973
|
+
" #{a.name}=\"#{a.value}\""
|
|
974
|
+
end.join
|
|
975
|
+
children_xml = node.children.map do |c|
|
|
976
|
+
serialize_node_to_xml(c)
|
|
977
|
+
end.join
|
|
978
|
+
|
|
979
|
+
if children_xml.empty?
|
|
980
|
+
"<#{node.name}#{attrs}/>"
|
|
981
|
+
else
|
|
982
|
+
"<#{node.name}#{attrs}>#{children_xml}</#{node.name}>"
|
|
983
|
+
end
|
|
984
|
+
elsif node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
985
|
+
node.value
|
|
986
|
+
elsif node.is_a?(Canon::Xml::Nodes::CommentNode)
|
|
987
|
+
"<!--#{node.value}-->"
|
|
988
|
+
elsif node.is_a?(Canon::Xml::Nodes::ProcessingInstructionNode)
|
|
989
|
+
"<?#{node.target} #{node.data}?>"
|
|
990
|
+
elsif node.respond_to?(:to_xml)
|
|
991
|
+
node.to_xml
|
|
992
|
+
else
|
|
993
|
+
node.to_s
|
|
994
|
+
end
|
|
995
|
+
end
|
|
996
|
+
|
|
537
997
|
# Add a difference to the differences array
|
|
538
998
|
# @param node1 [Object] First node
|
|
539
999
|
# @param node2 [Object] Second node
|
|
@@ -542,24 +1002,168 @@ module Canon
|
|
|
542
1002
|
# @param dimension [Symbol] The match dimension causing this difference
|
|
543
1003
|
# @param opts [Hash] Options
|
|
544
1004
|
# @param differences [Array] Array to append difference to
|
|
545
|
-
def add_difference(node1, node2, diff1, diff2, dimension,
|
|
1005
|
+
def add_difference(node1, node2, diff1, diff2, dimension, _opts,
|
|
546
1006
|
differences)
|
|
547
|
-
return unless opts[:verbose]
|
|
548
|
-
|
|
549
1007
|
# All differences must be DiffNode objects (OO architecture)
|
|
550
1008
|
if dimension.nil?
|
|
551
1009
|
raise ArgumentError,
|
|
552
1010
|
"dimension required for DiffNode"
|
|
553
1011
|
end
|
|
554
1012
|
|
|
1013
|
+
# Build informative reason message
|
|
1014
|
+
reason = build_difference_reason(node1, node2, diff1, diff2,
|
|
1015
|
+
dimension)
|
|
1016
|
+
|
|
555
1017
|
diff_node = Canon::Diff::DiffNode.new(
|
|
556
1018
|
node1: node1,
|
|
557
1019
|
node2: node2,
|
|
558
1020
|
dimension: dimension,
|
|
559
|
-
reason:
|
|
1021
|
+
reason: reason,
|
|
560
1022
|
)
|
|
561
1023
|
differences << diff_node
|
|
562
1024
|
end
|
|
1025
|
+
|
|
1026
|
+
# Build a human-readable reason for a difference
|
|
1027
|
+
# @param node1 [Object] First node
|
|
1028
|
+
# @param node2 [Object] Second node
|
|
1029
|
+
# @param diff1 [String] Difference type for node1
|
|
1030
|
+
# @param diff2 [String] Difference type for node2
|
|
1031
|
+
# @param dimension [Symbol] The dimension of the difference
|
|
1032
|
+
# @return [String] Human-readable reason
|
|
1033
|
+
def build_difference_reason(node1, node2, diff1, diff2, dimension)
|
|
1034
|
+
# For deleted/inserted nodes, include namespace information if available
|
|
1035
|
+
if dimension == :text_content && (node1.nil? || node2.nil?)
|
|
1036
|
+
node = node1 || node2
|
|
1037
|
+
if node.respond_to?(:name) && node.respond_to?(:namespace_uri)
|
|
1038
|
+
ns = node.namespace_uri
|
|
1039
|
+
ns_info = if ns.nil? || ns.empty?
|
|
1040
|
+
""
|
|
1041
|
+
else
|
|
1042
|
+
" (namespace: #{ns})"
|
|
1043
|
+
end
|
|
1044
|
+
return "element '#{node.name}'#{ns_info}: #{diff1} vs #{diff2}"
|
|
1045
|
+
end
|
|
1046
|
+
end
|
|
1047
|
+
|
|
1048
|
+
"#{diff1} vs #{diff2}"
|
|
1049
|
+
end
|
|
1050
|
+
|
|
1051
|
+
# Compare namespace declarations (xmlns and xmlns:* attributes)
|
|
1052
|
+
# @param n1 [Object] First node
|
|
1053
|
+
# @param n2 [Object] Second node
|
|
1054
|
+
# @param opts [Hash] Options
|
|
1055
|
+
# @param differences [Array] Array to append differences to
|
|
1056
|
+
# @return [Symbol] Comparison result
|
|
1057
|
+
def compare_namespace_declarations(n1, n2, opts, differences)
|
|
1058
|
+
ns_decls1 = extract_namespace_declarations(n1)
|
|
1059
|
+
ns_decls2 = extract_namespace_declarations(n2)
|
|
1060
|
+
|
|
1061
|
+
# Find missing, extra, and changed namespace declarations
|
|
1062
|
+
missing = ns_decls1.keys - ns_decls2.keys # In n1 but not n2
|
|
1063
|
+
extra = ns_decls2.keys - ns_decls1.keys # In n2 but not n1
|
|
1064
|
+
changed = ns_decls1.select do |prefix, uri|
|
|
1065
|
+
ns_decls2[prefix] && ns_decls2[prefix] != uri
|
|
1066
|
+
end.keys
|
|
1067
|
+
|
|
1068
|
+
# If there are any differences, create a DiffNode
|
|
1069
|
+
if missing.any? || extra.any? || changed.any?
|
|
1070
|
+
# Build a descriptive reason
|
|
1071
|
+
reasons = []
|
|
1072
|
+
if missing.any?
|
|
1073
|
+
reasons << "removed: #{missing.map do |p|
|
|
1074
|
+
p.empty? ? 'xmlns' : "xmlns:#{p}"
|
|
1075
|
+
end.join(', ')}"
|
|
1076
|
+
end
|
|
1077
|
+
if extra.any?
|
|
1078
|
+
reasons << "added: #{extra.map do |p|
|
|
1079
|
+
p.empty? ? 'xmlns' : "xmlns:#{p}"
|
|
1080
|
+
end.join(', ')}"
|
|
1081
|
+
end
|
|
1082
|
+
if changed.any?
|
|
1083
|
+
reasons << "changed: #{changed.map do |p|
|
|
1084
|
+
p.empty? ? 'xmlns' : "xmlns:#{p}"
|
|
1085
|
+
end.join(', ')}"
|
|
1086
|
+
end
|
|
1087
|
+
|
|
1088
|
+
add_difference(
|
|
1089
|
+
n1,
|
|
1090
|
+
n2,
|
|
1091
|
+
Comparison::UNEQUAL_ATTRIBUTES,
|
|
1092
|
+
Comparison::UNEQUAL_ATTRIBUTES,
|
|
1093
|
+
:namespace_declarations,
|
|
1094
|
+
opts,
|
|
1095
|
+
differences,
|
|
1096
|
+
)
|
|
1097
|
+
return Comparison::UNEQUAL_ATTRIBUTES
|
|
1098
|
+
end
|
|
1099
|
+
|
|
1100
|
+
Comparison::EQUIVALENT
|
|
1101
|
+
end
|
|
1102
|
+
|
|
1103
|
+
# Extract namespace declarations from a node
|
|
1104
|
+
# @param node [Object] Node to extract namespace declarations from
|
|
1105
|
+
# @return [Hash] Hash of prefix => URI mappings
|
|
1106
|
+
def extract_namespace_declarations(node)
|
|
1107
|
+
declarations = {}
|
|
1108
|
+
|
|
1109
|
+
# Handle Canon::Xml::Node (uses namespace_nodes)
|
|
1110
|
+
if node.respond_to?(:namespace_nodes)
|
|
1111
|
+
node.namespace_nodes.each do |ns|
|
|
1112
|
+
# Skip the implicit xml namespace (always present)
|
|
1113
|
+
next if ns.prefix == "xml" && ns.uri == "http://www.w3.org/XML/1998/namespace"
|
|
1114
|
+
|
|
1115
|
+
prefix = ns.prefix || ""
|
|
1116
|
+
declarations[prefix] = ns.uri
|
|
1117
|
+
end
|
|
1118
|
+
return declarations
|
|
1119
|
+
end
|
|
1120
|
+
|
|
1121
|
+
# Handle Nokogiri/Moxml nodes (use attributes)
|
|
1122
|
+
# Get raw attributes
|
|
1123
|
+
raw_attrs = node.respond_to?(:attribute_nodes) ? node.attribute_nodes : node.attributes
|
|
1124
|
+
|
|
1125
|
+
# Handle Canon::Xml::Node attribute format (array of AttributeNode)
|
|
1126
|
+
if raw_attrs.is_a?(Array)
|
|
1127
|
+
raw_attrs.each do |attr|
|
|
1128
|
+
name = attr.name
|
|
1129
|
+
value = attr.value
|
|
1130
|
+
|
|
1131
|
+
if is_namespace_declaration?(name)
|
|
1132
|
+
# Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
|
|
1133
|
+
prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
|
|
1134
|
+
declarations[prefix] = value
|
|
1135
|
+
end
|
|
1136
|
+
end
|
|
1137
|
+
else
|
|
1138
|
+
# Handle Nokogiri and Moxml attribute formats (Hash-like)
|
|
1139
|
+
raw_attrs.each do |key, val|
|
|
1140
|
+
if key.is_a?(String)
|
|
1141
|
+
# Nokogiri format: key=name (String), val=attr object
|
|
1142
|
+
name = key
|
|
1143
|
+
value = val.respond_to?(:value) ? val.value : val.to_s
|
|
1144
|
+
else
|
|
1145
|
+
# Moxml format: key=attr object, val=nil
|
|
1146
|
+
name = key.respond_to?(:name) ? key.name : key.to_s
|
|
1147
|
+
value = key.respond_to?(:value) ? key.value : key.to_s
|
|
1148
|
+
end
|
|
1149
|
+
|
|
1150
|
+
if is_namespace_declaration?(name)
|
|
1151
|
+
# Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
|
|
1152
|
+
prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
|
|
1153
|
+
declarations[prefix] = value
|
|
1154
|
+
end
|
|
1155
|
+
end
|
|
1156
|
+
end
|
|
1157
|
+
|
|
1158
|
+
declarations
|
|
1159
|
+
end
|
|
1160
|
+
|
|
1161
|
+
# Check if an attribute name is a namespace declaration
|
|
1162
|
+
# @param attr_name [String] Attribute name
|
|
1163
|
+
# @return [Boolean] true if it's a namespace declaration
|
|
1164
|
+
def is_namespace_declaration?(attr_name)
|
|
1165
|
+
attr_name == "xmlns" || attr_name.start_with?("xmlns:")
|
|
1166
|
+
end
|
|
563
1167
|
end
|
|
564
1168
|
end
|
|
565
1169
|
end
|