canon 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +25 -99
- data/README.adoc +220 -26
- data/docs/advanced/diff-classification.adoc +118 -26
- data/lib/canon/cli.rb +30 -0
- data/lib/canon/commands/diff_command.rb +3 -0
- data/lib/canon/comparison/markup_comparator.rb +109 -2
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
- data/lib/canon/comparison/xml_comparator.rb +192 -0
- data/lib/canon/config/env_schema.rb +5 -1
- data/lib/canon/config.rb +30 -0
- data/lib/canon/diff/diff_classifier.rb +48 -33
- data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
- data/lib/canon/diff_formatter.rb +102 -12
- data/lib/canon/version.rb +1 -1
- metadata +3 -2
|
@@ -568,9 +568,201 @@ differences)
|
|
|
568
568
|
end
|
|
569
569
|
end
|
|
570
570
|
|
|
571
|
+
# For attribute presence differences, show what attributes differ
|
|
572
|
+
if dimension == :attribute_presence
|
|
573
|
+
attrs1 = extract_attributes(node1)
|
|
574
|
+
attrs2 = extract_attributes(node2)
|
|
575
|
+
return build_attribute_diff_reason(attrs1, attrs2)
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
# For text content differences, show the actual text (truncated if needed)
|
|
579
|
+
if dimension == :text_content
|
|
580
|
+
text1 = extract_text_from_node(node1)
|
|
581
|
+
text2 = extract_text_from_node(node2)
|
|
582
|
+
return build_text_diff_reason(text1, text2)
|
|
583
|
+
end
|
|
584
|
+
|
|
571
585
|
"#{diff1} vs #{diff2}"
|
|
572
586
|
end
|
|
573
587
|
|
|
588
|
+
# Build a clear reason message for attribute presence differences
|
|
589
|
+
#
|
|
590
|
+
# @param attrs1 [Hash, nil] First node's attributes
|
|
591
|
+
# @param attrs2 [Hash, nil] Second node's attributes
|
|
592
|
+
# @return [String] Clear explanation of the attribute difference
|
|
593
|
+
def build_attribute_diff_reason(attrs1, attrs2)
|
|
594
|
+
return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes" unless attrs1 && attrs2
|
|
595
|
+
|
|
596
|
+
require "set"
|
|
597
|
+
keys1 = attrs1.keys.to_set
|
|
598
|
+
keys2 = attrs2.keys.to_set
|
|
599
|
+
|
|
600
|
+
only_in_first = keys1 - keys2
|
|
601
|
+
only_in_second = keys2 - keys1
|
|
602
|
+
common = keys1 & keys2
|
|
603
|
+
|
|
604
|
+
# Check if values differ for common keys
|
|
605
|
+
different_values = common.reject { |k| attrs1[k] == attrs2[k] }
|
|
606
|
+
|
|
607
|
+
parts = []
|
|
608
|
+
parts << "only in first: #{only_in_first.to_a.sort.join(', ')}" if only_in_first.any?
|
|
609
|
+
parts << "only in second: #{only_in_second.to_a.sort.join(', ')}" if only_in_second.any?
|
|
610
|
+
parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
|
|
611
|
+
|
|
612
|
+
if parts.empty?
|
|
613
|
+
"#{keys1.size} vs #{keys2.size} attributes (same names)"
|
|
614
|
+
else
|
|
615
|
+
parts.join("; ")
|
|
616
|
+
end
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
# Extract text from a node for diff reason
|
|
620
|
+
#
|
|
621
|
+
# @param node [Object, nil] Node to extract text from
|
|
622
|
+
# @return [String, nil] Text content or nil
|
|
623
|
+
def extract_text_from_node(node)
|
|
624
|
+
return nil if node.nil?
|
|
625
|
+
|
|
626
|
+
# For Canon::Xml::Nodes::TextNode
|
|
627
|
+
return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
628
|
+
|
|
629
|
+
# For XML/HTML nodes with text_content method
|
|
630
|
+
return node.text_content if node.respond_to?(:text_content)
|
|
631
|
+
|
|
632
|
+
# For nodes with text method
|
|
633
|
+
return node.text if node.respond_to?(:text)
|
|
634
|
+
|
|
635
|
+
# For nodes with content method (Moxml::Text)
|
|
636
|
+
return node.content if node.respond_to?(:content)
|
|
637
|
+
|
|
638
|
+
# For nodes with value method (other types)
|
|
639
|
+
return node.value if node.respond_to?(:value)
|
|
640
|
+
|
|
641
|
+
# For simple text nodes or strings
|
|
642
|
+
return node.to_s if node.is_a?(String)
|
|
643
|
+
|
|
644
|
+
# For other node types, try to_s
|
|
645
|
+
node.to_s
|
|
646
|
+
rescue StandardError
|
|
647
|
+
nil
|
|
648
|
+
end
|
|
649
|
+
|
|
650
|
+
# Build a clear reason message for text content differences
|
|
651
|
+
#
|
|
652
|
+
# @param text1 [String, nil] First text content
|
|
653
|
+
# @param text2 [String, nil] Second text content
|
|
654
|
+
# @return [String] Clear explanation of the text difference
|
|
655
|
+
def build_text_diff_reason(text1, text2)
|
|
656
|
+
# Handle nil cases
|
|
657
|
+
return "missing vs '#{truncate_text(text2)}'" if text1.nil? && text2
|
|
658
|
+
return "'#{truncate_text(text2)}' vs missing" if text1 && text2.nil?
|
|
659
|
+
return "both missing" if text1.nil? && text2.nil?
|
|
660
|
+
|
|
661
|
+
# Check if both are whitespace-only
|
|
662
|
+
if whitespace_only?(text1) && whitespace_only?(text2)
|
|
663
|
+
return "whitespace: #{describe_whitespace(text1)} vs #{describe_whitespace(text2)}"
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
# Show text with visible whitespace markers
|
|
667
|
+
# Use escaped representations for clarity: \n for newline, \t for tab, · for spaces
|
|
668
|
+
vis1 = visualize_whitespace(text1)
|
|
669
|
+
vis2 = visualize_whitespace(text2)
|
|
670
|
+
|
|
671
|
+
"Text: \"#{vis1}\" vs \"#{vis2}\""
|
|
672
|
+
end
|
|
673
|
+
|
|
674
|
+
# Check if text is only whitespace
|
|
675
|
+
#
|
|
676
|
+
# @param text [String] Text to check
|
|
677
|
+
# @return [Boolean] true if whitespace-only
|
|
678
|
+
def whitespace_only?(text)
|
|
679
|
+
return false if text.nil?
|
|
680
|
+
|
|
681
|
+
text.to_s.strip.empty?
|
|
682
|
+
end
|
|
683
|
+
|
|
684
|
+
# Make whitespace visible in text content
|
|
685
|
+
# Uses the existing character visualization map from DiffFormatter (single source of truth)
|
|
686
|
+
#
|
|
687
|
+
# @param text [String] Text to visualize
|
|
688
|
+
# @return [String] Text with visible whitespace markers
|
|
689
|
+
def visualize_whitespace(text)
|
|
690
|
+
return "" if text.nil?
|
|
691
|
+
|
|
692
|
+
# Use the character map loader as the single source of truth
|
|
693
|
+
viz_map = character_visualization_map
|
|
694
|
+
|
|
695
|
+
# Replace each character with its visualization
|
|
696
|
+
text.chars.map { |char| viz_map[char] || char }.join
|
|
697
|
+
end
|
|
698
|
+
|
|
699
|
+
# Get the character visualization map (lazy-loaded to avoid circular dependency)
|
|
700
|
+
#
|
|
701
|
+
# @return [Hash] Character to visualization symbol mapping
|
|
702
|
+
def character_visualization_map
|
|
703
|
+
@character_visualization_map ||= begin
|
|
704
|
+
# Load the YAML file directly to avoid circular dependency
|
|
705
|
+
require "yaml"
|
|
706
|
+
lib_root = File.expand_path("../..", __dir__)
|
|
707
|
+
yaml_path = File.join(lib_root,
|
|
708
|
+
"canon/diff_formatter/character_map.yml")
|
|
709
|
+
data = YAML.load_file(yaml_path)
|
|
710
|
+
|
|
711
|
+
# Build visualization map from the YAML data
|
|
712
|
+
visualization_map = {}
|
|
713
|
+
data["characters"].each do |char_data|
|
|
714
|
+
# Get the character from either unicode code point or character field
|
|
715
|
+
char = if char_data["unicode"]
|
|
716
|
+
# Convert hex string to character
|
|
717
|
+
[char_data["unicode"].to_i(16)].pack("U")
|
|
718
|
+
else
|
|
719
|
+
# Use character field directly (handles \n, \t, etc.)
|
|
720
|
+
char_data["character"]
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
vis = char_data["visualization"]
|
|
724
|
+
visualization_map[char] = vis
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
visualization_map
|
|
728
|
+
end
|
|
729
|
+
end
|
|
730
|
+
|
|
731
|
+
# Describe whitespace content in a readable way
|
|
732
|
+
#
|
|
733
|
+
# @param text [String] Whitespace text
|
|
734
|
+
# @return [String] Description like "4 chars (2 newlines, 2 spaces)"
|
|
735
|
+
def describe_whitespace(text)
|
|
736
|
+
return "0 chars" if text.nil? || text.empty?
|
|
737
|
+
|
|
738
|
+
char_count = text.length
|
|
739
|
+
newline_count = text.count("\n")
|
|
740
|
+
space_count = text.count(" ")
|
|
741
|
+
tab_count = text.count("\t")
|
|
742
|
+
|
|
743
|
+
parts = []
|
|
744
|
+
parts << "#{newline_count} newlines" if newline_count.positive?
|
|
745
|
+
parts << "#{space_count} spaces" if space_count.positive?
|
|
746
|
+
parts << "#{tab_count} tabs" if tab_count.positive?
|
|
747
|
+
|
|
748
|
+
description = parts.join(", ")
|
|
749
|
+
"#{char_count} chars (#{description})"
|
|
750
|
+
end
|
|
751
|
+
|
|
752
|
+
# Truncate text for display in reason messages
|
|
753
|
+
#
|
|
754
|
+
# @param text [String] Text to truncate
|
|
755
|
+
# @param max_length [Integer] Maximum length
|
|
756
|
+
# @return [String] Truncated text
|
|
757
|
+
def truncate_text(text, max_length = 40)
|
|
758
|
+
return "" if text.nil?
|
|
759
|
+
|
|
760
|
+
text = text.to_s
|
|
761
|
+
return text if text.length <= max_length
|
|
762
|
+
|
|
763
|
+
"#{text[0...max_length]}..."
|
|
764
|
+
end
|
|
765
|
+
|
|
574
766
|
# Compare namespace declarations (xmlns and xmlns:* attributes)
|
|
575
767
|
# Delegates to XmlComparatorHelpers::NamespaceComparator
|
|
576
768
|
def compare_namespace_declarations(n1, n2, opts, differences)
|
|
@@ -14,6 +14,9 @@ module Canon
|
|
|
14
14
|
show_diffs: :symbol,
|
|
15
15
|
verbose_diff: :boolean,
|
|
16
16
|
algorithm: :symbol,
|
|
17
|
+
show_raw_inputs: :boolean,
|
|
18
|
+
show_preprocessed_inputs: :boolean,
|
|
19
|
+
show_line_numbered_inputs: :boolean,
|
|
17
20
|
|
|
18
21
|
# MatchConfig attributes
|
|
19
22
|
profile: :symbol,
|
|
@@ -42,7 +45,8 @@ module Canon
|
|
|
42
45
|
|
|
43
46
|
def all_diff_attributes
|
|
44
47
|
%i[mode use_color context_lines grouping_lines show_diffs
|
|
45
|
-
verbose_diff algorithm
|
|
48
|
+
verbose_diff algorithm show_raw_inputs show_preprocessed_inputs
|
|
49
|
+
show_line_numbered_inputs max_file_size max_node_count max_diff_lines]
|
|
46
50
|
end
|
|
47
51
|
|
|
48
52
|
def all_match_attributes
|
data/lib/canon/config.rb
CHANGED
|
@@ -221,6 +221,30 @@ module Canon
|
|
|
221
221
|
@resolver.set_programmatic(:verbose_diff, value)
|
|
222
222
|
end
|
|
223
223
|
|
|
224
|
+
def show_raw_inputs
|
|
225
|
+
@resolver.resolve(:show_raw_inputs)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def show_raw_inputs=(value)
|
|
229
|
+
@resolver.set_programmatic(:show_raw_inputs, value)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def show_preprocessed_inputs
|
|
233
|
+
@resolver.resolve(:show_preprocessed_inputs)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def show_preprocessed_inputs=(value)
|
|
237
|
+
@resolver.set_programmatic(:show_preprocessed_inputs, value)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def show_line_numbered_inputs
|
|
241
|
+
@resolver.resolve(:show_line_numbered_inputs)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def show_line_numbered_inputs=(value)
|
|
245
|
+
@resolver.set_programmatic(:show_line_numbered_inputs, value)
|
|
246
|
+
end
|
|
247
|
+
|
|
224
248
|
def algorithm
|
|
225
249
|
@resolver.resolve(:algorithm)
|
|
226
250
|
end
|
|
@@ -266,6 +290,9 @@ module Canon
|
|
|
266
290
|
show_diffs: show_diffs,
|
|
267
291
|
verbose_diff: verbose_diff,
|
|
268
292
|
diff_algorithm: algorithm,
|
|
293
|
+
show_raw_inputs: show_raw_inputs,
|
|
294
|
+
show_preprocessed_inputs: show_preprocessed_inputs,
|
|
295
|
+
show_line_numbered_inputs: show_line_numbered_inputs,
|
|
269
296
|
max_file_size: max_file_size,
|
|
270
297
|
max_node_count: max_node_count,
|
|
271
298
|
max_diff_lines: max_diff_lines,
|
|
@@ -283,6 +310,9 @@ module Canon
|
|
|
283
310
|
show_diffs: :all,
|
|
284
311
|
verbose_diff: false,
|
|
285
312
|
algorithm: :dom,
|
|
313
|
+
show_raw_inputs: false,
|
|
314
|
+
show_preprocessed_inputs: false,
|
|
315
|
+
show_line_numbered_inputs: false,
|
|
286
316
|
max_file_size: 5_242_880, # 5MB in bytes
|
|
287
317
|
max_node_count: 10_000, # Maximum nodes in tree
|
|
288
318
|
max_diff_lines: 10_000, # Maximum diff output lines
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "formatting_detector"
|
|
4
|
+
require_relative "xml_serialization_formatter"
|
|
4
5
|
require_relative "../comparison/compare_profile"
|
|
5
6
|
require_relative "../comparison/whitespace_sensitivity"
|
|
6
7
|
|
|
@@ -8,6 +9,11 @@ module Canon
|
|
|
8
9
|
module Diff
|
|
9
10
|
# Classifies DiffNodes as normative (affects equivalence) or informative (doesn't affect equivalence)
|
|
10
11
|
# based on the match options in effect
|
|
12
|
+
#
|
|
13
|
+
# Classification hierarchy (three distinct kinds of differences):
|
|
14
|
+
# 1. Serialization formatting: XML syntax differences (always non-normative)
|
|
15
|
+
# 2. Content formatting: Whitespace differences in content (non-normative when normalized)
|
|
16
|
+
# 3. Normative: Semantic content differences (affect equivalence)
|
|
11
17
|
class DiffClassifier
|
|
12
18
|
attr_reader :match_options, :profile
|
|
13
19
|
|
|
@@ -25,11 +31,20 @@ module Canon
|
|
|
25
31
|
|
|
26
32
|
# Classify a single DiffNode as normative or informative
|
|
27
33
|
# Hierarchy: formatting-only < informative < normative
|
|
28
|
-
# CompareProfile determines base classification,
|
|
34
|
+
# CompareProfile determines base classification, XmlSerializationFormatter handles serialization formatting
|
|
29
35
|
# @param diff_node [DiffNode] The diff node to classify
|
|
30
36
|
# @return [DiffNode] The same diff node with normative/formatting attributes set
|
|
31
37
|
def classify(diff_node)
|
|
32
|
-
#
|
|
38
|
+
# FIRST: Check for XML serialization-level formatting differences
|
|
39
|
+
# These are ALWAYS non-normative (formatting-only) regardless of match options
|
|
40
|
+
# Examples: self-closing tags (<tag/>) vs explicit closing tags (<tag></tag>)
|
|
41
|
+
if XmlSerializationFormatter.serialization_formatting?(diff_node)
|
|
42
|
+
diff_node.formatting = true
|
|
43
|
+
diff_node.normative = false
|
|
44
|
+
return diff_node
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# SECOND: Handle content-level formatting for text_content with :normalize behavior
|
|
33
48
|
# When text_content is :normalize and the difference is formatting-only,
|
|
34
49
|
# it should be marked as non-normative (informative)
|
|
35
50
|
# This ensures that verbose and non-verbose modes give consistent results
|
|
@@ -38,7 +53,7 @@ module Canon
|
|
|
38
53
|
# (like <pre>, <code>, <textarea> in HTML), don't apply formatting detection
|
|
39
54
|
# because whitespace should be preserved in these elements
|
|
40
55
|
#
|
|
41
|
-
# This check must come
|
|
56
|
+
# This check must come BEFORE normative_dimension? is called,
|
|
42
57
|
# because normative_dimension? returns true for text_content: :normalize
|
|
43
58
|
# (since the dimension affects equivalence), which would prevent formatting
|
|
44
59
|
# detection from being applied.
|
|
@@ -51,11 +66,11 @@ module Canon
|
|
|
51
66
|
return diff_node
|
|
52
67
|
end
|
|
53
68
|
|
|
54
|
-
#
|
|
69
|
+
# THIRD: Determine if this dimension is normative based on CompareProfile
|
|
55
70
|
# This respects the policy settings (strict/normalize/ignore)
|
|
56
71
|
is_normative = profile.normative_dimension?(diff_node.dimension)
|
|
57
72
|
|
|
58
|
-
#
|
|
73
|
+
# FOURTH: Check if FormattingDetector should be consulted for non-normative dimensions
|
|
59
74
|
# Only check for formatting-only when dimension is NOT normative
|
|
60
75
|
# This ensures strict mode differences remain normative
|
|
61
76
|
should_check_formatting = !is_normative &&
|
|
@@ -68,7 +83,7 @@ module Canon
|
|
|
68
83
|
return diff_node
|
|
69
84
|
end
|
|
70
85
|
|
|
71
|
-
#
|
|
86
|
+
# FIFTH: Apply the normative determination from CompareProfile
|
|
72
87
|
diff_node.formatting = false
|
|
73
88
|
diff_node.normative = is_normative
|
|
74
89
|
|
|
@@ -127,33 +142,6 @@ module Canon
|
|
|
127
142
|
normalized1 == normalized2 && text1 != text2
|
|
128
143
|
end
|
|
129
144
|
|
|
130
|
-
# Check if a node is a text node
|
|
131
|
-
# @param node [Object] The node to check
|
|
132
|
-
# @return [Boolean] true if the node is a text node
|
|
133
|
-
def text_node?(node)
|
|
134
|
-
return false if node.nil?
|
|
135
|
-
|
|
136
|
-
# Canon::Xml::Nodes::TextNode
|
|
137
|
-
return true if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
138
|
-
|
|
139
|
-
# Nokogiri text nodes (node_type returns integer constant like 3)
|
|
140
|
-
return true if node.respond_to?(:node_type) &&
|
|
141
|
-
node.node_type.is_a?(Integer) &&
|
|
142
|
-
node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
|
143
|
-
|
|
144
|
-
# Moxml text nodes (node_type returns symbol)
|
|
145
|
-
return true if node.respond_to?(:node_type) && node.node_type == :text
|
|
146
|
-
|
|
147
|
-
# String
|
|
148
|
-
return true if node.is_a?(String)
|
|
149
|
-
|
|
150
|
-
# Test doubles or objects with text node-like interface
|
|
151
|
-
# Check if it has a value method (contains text content)
|
|
152
|
-
return true if node.respond_to?(:value)
|
|
153
|
-
|
|
154
|
-
false
|
|
155
|
-
end
|
|
156
|
-
|
|
157
145
|
# Check if the text node is inside a whitespace-sensitive element
|
|
158
146
|
# @param diff_node [DiffNode] The diff node to check
|
|
159
147
|
# @return [Boolean] true if inside a whitespace-sensitive element
|
|
@@ -200,6 +188,33 @@ module Canon
|
|
|
200
188
|
# If extraction fails, return nil (not formatting-only)
|
|
201
189
|
nil
|
|
202
190
|
end
|
|
191
|
+
|
|
192
|
+
# Check if a node is a text node
|
|
193
|
+
# @param node [Object] The node to check
|
|
194
|
+
# @return [Boolean] true if the node is a text node
|
|
195
|
+
def text_node?(node)
|
|
196
|
+
return false if node.nil?
|
|
197
|
+
|
|
198
|
+
# Canon::Xml::Nodes::TextNode
|
|
199
|
+
return true if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
200
|
+
|
|
201
|
+
# Nokogiri text nodes (node_type returns integer constant like 3)
|
|
202
|
+
return true if node.respond_to?(:node_type) &&
|
|
203
|
+
node.node_type.is_a?(Integer) &&
|
|
204
|
+
node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
|
205
|
+
|
|
206
|
+
# Moxml text nodes (node_type returns symbol)
|
|
207
|
+
return true if node.respond_to?(:node_type) && node.node_type == :text
|
|
208
|
+
|
|
209
|
+
# String
|
|
210
|
+
return true if node.is_a?(String)
|
|
211
|
+
|
|
212
|
+
# Test doubles or objects with text node-like interface
|
|
213
|
+
# Check if it has a value method (contains text content)
|
|
214
|
+
return true if node.respond_to?(:value)
|
|
215
|
+
|
|
216
|
+
false
|
|
217
|
+
end
|
|
203
218
|
end
|
|
204
219
|
end
|
|
205
220
|
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Diff
|
|
5
|
+
# Detects and classifies XML serialization-level formatting differences.
|
|
6
|
+
#
|
|
7
|
+
# Serialization-level formatting differences are differences in XML syntax
|
|
8
|
+
# that do not affect the semantic content of the document. These differences
|
|
9
|
+
# arise from different valid ways to serialize the same semantic content.
|
|
10
|
+
#
|
|
11
|
+
# These differences are ALWAYS non-normative (formatting-only) regardless
|
|
12
|
+
# of match options, because they are purely syntactic variations.
|
|
13
|
+
#
|
|
14
|
+
# Examples:
|
|
15
|
+
# - Self-closing vs explicit closing tags: <tag/> vs <tag></tag>
|
|
16
|
+
# - Attribute quote style: attr="value" vs attr='value' (parser-normalized)
|
|
17
|
+
# - Whitespace within tags: <tag a="1" b="2"> vs <tag a="1" b="2"> (parser-normalized)
|
|
18
|
+
#
|
|
19
|
+
# Note: Some serialization differences are normalized away by XML parsers
|
|
20
|
+
# (attribute quotes, tag spacing). This class focuses on differences that
|
|
21
|
+
# survive parsing and comparison, such as self-closing vs explicit closing.
|
|
22
|
+
class XmlSerializationFormatter
|
|
23
|
+
# Detect if a diff node represents an XML serialization formatting difference.
|
|
24
|
+
#
|
|
25
|
+
# Serialization formatting differences are ALWAYS non-normative because they
|
|
26
|
+
# represent different valid serializations of the same semantic content.
|
|
27
|
+
#
|
|
28
|
+
# @param diff_node [DiffNode] The diff node to check
|
|
29
|
+
# @return [Boolean] true if this is a serialization formatting difference
|
|
30
|
+
def self.serialization_formatting?(diff_node)
|
|
31
|
+
# Currently only handles text_content dimension
|
|
32
|
+
# Future: add detection for other dimensions
|
|
33
|
+
return false unless diff_node.dimension == :text_content
|
|
34
|
+
|
|
35
|
+
empty_text_content_serialization_diff?(diff_node)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Check if a text_content difference is from XML serialization format.
|
|
39
|
+
#
|
|
40
|
+
# Specifically detects self-closing tags (<tag/>) vs explicit closing tags
|
|
41
|
+
# (<tag></tag>), which create different text node structures:
|
|
42
|
+
# - Self-closing: no text node (nil)
|
|
43
|
+
# - Explicit closing: empty or whitespace-only text node ("", " ", "\n", etc.)
|
|
44
|
+
#
|
|
45
|
+
# Per XML standards, these forms are semantically equivalent.
|
|
46
|
+
#
|
|
47
|
+
# @param diff_node [DiffNode] The diff node to check
|
|
48
|
+
# @return [Boolean] true if this is a serialization formatting difference
|
|
49
|
+
def self.empty_text_content_serialization_diff?(diff_node)
|
|
50
|
+
return false unless diff_node.dimension == :text_content
|
|
51
|
+
|
|
52
|
+
node1 = diff_node.node1
|
|
53
|
+
node2 = diff_node.node2
|
|
54
|
+
|
|
55
|
+
# Both nodes are nil - no actual difference, not a serialization formatting diff
|
|
56
|
+
return false if node1.nil? && node2.nil?
|
|
57
|
+
|
|
58
|
+
# Only one is nil (e.g., one doc has self-closing, other has text)
|
|
59
|
+
# If the non-nil one is blank, it's still serialization formatting
|
|
60
|
+
if node1.nil? || node2.nil?
|
|
61
|
+
non_nil = node1 || node2
|
|
62
|
+
return false unless text_node?(non_nil)
|
|
63
|
+
|
|
64
|
+
text = extract_text_content(non_nil)
|
|
65
|
+
return blank?(text)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Both must be text nodes
|
|
69
|
+
return false unless text_node?(node1) && text_node?(node2)
|
|
70
|
+
|
|
71
|
+
text1 = extract_text_content(node1)
|
|
72
|
+
text2 = extract_text_content(node2)
|
|
73
|
+
|
|
74
|
+
# Check if both texts are blank/whitespace-only
|
|
75
|
+
# This indicates self-closing vs explicit closing tag syntax
|
|
76
|
+
blank?(text1) && blank?(text2)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if a value is blank (nil or whitespace-only)
|
|
80
|
+
# @param value [String, nil] Value to check
|
|
81
|
+
# @return [Boolean] true if blank
|
|
82
|
+
def self.blank?(value)
|
|
83
|
+
value.nil? ||
|
|
84
|
+
(value.respond_to?(:empty?) && value.empty?) ||
|
|
85
|
+
(value.respond_to?(:strip) && value.strip.empty?)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Check if a node is a text node
|
|
89
|
+
# @param node [Object] The node to check
|
|
90
|
+
# @return [Boolean] true if the node is a text node
|
|
91
|
+
def self.text_node?(node)
|
|
92
|
+
return false if node.nil?
|
|
93
|
+
|
|
94
|
+
# Canon::Xml::Nodes::TextNode
|
|
95
|
+
return true if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
96
|
+
|
|
97
|
+
# Moxml::Text (check before generic node_type check)
|
|
98
|
+
return true if node.is_a?(Moxml::Text)
|
|
99
|
+
|
|
100
|
+
# Nokogiri text nodes (node_type returns integer constant like 3)
|
|
101
|
+
return true if node.respond_to?(:node_type) &&
|
|
102
|
+
node.node_type.is_a?(Integer) &&
|
|
103
|
+
node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
|
104
|
+
|
|
105
|
+
# Moxml text nodes (node_type returns symbol) - for when using Moxml adapters
|
|
106
|
+
return true if node.respond_to?(:node_type) && node.node_type == :text
|
|
107
|
+
|
|
108
|
+
# String
|
|
109
|
+
return true if node.is_a?(String)
|
|
110
|
+
|
|
111
|
+
# Test doubles or objects with text node-like interface
|
|
112
|
+
# Check if it has a value method (contains text content)
|
|
113
|
+
return true if node.respond_to?(:value)
|
|
114
|
+
|
|
115
|
+
false
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Extract text content from a node
|
|
119
|
+
# @param node [Object] The node to extract text from
|
|
120
|
+
# @return [String, nil] The text content or nil
|
|
121
|
+
def self.extract_text_content(node)
|
|
122
|
+
return nil if node.nil?
|
|
123
|
+
|
|
124
|
+
# For TextNode with value attribute (Canon::Xml::Nodes::TextNode)
|
|
125
|
+
return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
126
|
+
|
|
127
|
+
# For XML/HTML nodes with text_content method
|
|
128
|
+
return node.text_content if node.respond_to?(:text_content)
|
|
129
|
+
|
|
130
|
+
# For nodes with content method (try before text, as Moxml::Text.text returns "")
|
|
131
|
+
return node.content if node.respond_to?(:content)
|
|
132
|
+
|
|
133
|
+
# For nodes with text method
|
|
134
|
+
return node.text if node.respond_to?(:text)
|
|
135
|
+
|
|
136
|
+
# For nodes with value method (other types)
|
|
137
|
+
return node.value if node.respond_to?(:value)
|
|
138
|
+
|
|
139
|
+
# For simple text nodes or strings
|
|
140
|
+
return node.to_s if node.is_a?(String)
|
|
141
|
+
|
|
142
|
+
# For other node types, try to_s
|
|
143
|
+
node.to_s
|
|
144
|
+
rescue StandardError
|
|
145
|
+
# If extraction fails, return nil (not a serialization difference)
|
|
146
|
+
nil
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
private_class_method :blank?, :text_node?, :extract_text_content,
|
|
150
|
+
:empty_text_content_serialization_diff?
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|