canon 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +69 -92
- data/README.adoc +13 -13
- data/docs/.lycheeignore +69 -0
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/advanced/extending-canon.adoc +193 -0
- data/docs/features/match-options/index.adoc +239 -1
- data/docs/internals/diffnode-enrichment.adoc +611 -0
- data/docs/internals/index.adoc +251 -0
- data/docs/lychee.toml +13 -6
- data/docs/understanding/architecture.adoc +749 -33
- data/docs/understanding/comparison-pipeline.adoc +122 -0
- data/lib/canon/cache.rb +129 -0
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
- data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
- data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
- data/lib/canon/comparison/dimensions/registry.rb +77 -0
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
- data/lib/canon/comparison/dimensions.rb +54 -0
- data/lib/canon/comparison/format_detector.rb +87 -0
- data/lib/canon/comparison/html_comparator.rb +70 -26
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/html_parser.rb +80 -0
- data/lib/canon/comparison/json_comparator.rb +12 -0
- data/lib/canon/comparison/json_parser.rb +19 -0
- data/lib/canon/comparison/markup_comparator.rb +293 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
- data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
- data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
- data/lib/canon/comparison/match_options.rb +68 -463
- data/lib/canon/comparison/profile_definition.rb +149 -0
- data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
- data/lib/canon/comparison/xml_comparator.rb +97 -684
- data/lib/canon/comparison/xml_node_comparison.rb +319 -0
- data/lib/canon/comparison/xml_parser.rb +19 -0
- data/lib/canon/comparison/yaml_comparator.rb +3 -3
- data/lib/canon/comparison.rb +265 -110
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/diff_node.rb +32 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/node_serializer.rb +191 -0
- data/lib/canon/diff/path_builder.rb +143 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
- data/lib/canon/diff_formatter.rb +1 -1
- data/lib/canon/rspec_matchers.rb +38 -9
- data/lib/canon/tree_diff/operation_converter.rb +92 -338
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +48 -2
data/lib/canon/diff/diff_node.rb
CHANGED
|
@@ -5,9 +5,19 @@ module Canon
|
|
|
5
5
|
# Represents a semantic difference between two nodes in a comparison tree
|
|
6
6
|
# This is created during the Comparison Layer and carries information about
|
|
7
7
|
# which dimension caused the difference and whether it's normative or informative
|
|
8
|
+
#
|
|
9
|
+
# DiffNode is library-agnostic - it works with data extracted from nodes,
|
|
10
|
+
# not the raw node references themselves. This allows Canon to work with
|
|
11
|
+
# any parsing library (Nokogiri, Moxml, etc.) without being tied to it.
|
|
8
12
|
class DiffNode
|
|
9
13
|
attr_reader :node1, :node2
|
|
10
|
-
attr_accessor :dimension, :reason, :normative, :formatting
|
|
14
|
+
attr_accessor :dimension, :reason, :normative, :formatting,
|
|
15
|
+
# Enriched metadata for Stage 4 rendering
|
|
16
|
+
:path, # Canonical path with ordinal indices
|
|
17
|
+
:serialized_before, # Serialized content for display (before)
|
|
18
|
+
:serialized_after, # Serialized content for display (after)
|
|
19
|
+
:attributes_before, # Normalized attributes hash (before)
|
|
20
|
+
:attributes_after # Normalized attributes hash (after)
|
|
11
21
|
|
|
12
22
|
# @param node1 [Object] The first node being compared
|
|
13
23
|
# @param node2 [Object] The second node being compared
|
|
@@ -15,13 +25,26 @@ module Canon
|
|
|
15
25
|
# (e.g., :text_content, :attribute_whitespace, :structural_whitespace,
|
|
16
26
|
# :comments, :key_order)
|
|
17
27
|
# @param reason [String] Human-readable explanation of the difference
|
|
18
|
-
|
|
28
|
+
# @param path [String, nil] Optional canonical path with ordinal indices
|
|
29
|
+
# @param serialized_before [String, nil] Optional serialized content for display
|
|
30
|
+
# @param serialized_after [String, nil] Optional serialized content for display
|
|
31
|
+
# @param attributes_before [Hash, nil] Optional normalized attributes hash
|
|
32
|
+
# @param attributes_after [Hash, nil] Optional normalized attributes hash
|
|
33
|
+
def initialize(node1:, node2:, dimension:, reason:,
|
|
34
|
+
path: nil, serialized_before: nil, serialized_after: nil,
|
|
35
|
+
attributes_before: nil, attributes_after: nil)
|
|
19
36
|
@node1 = node1
|
|
20
37
|
@node2 = node2
|
|
21
38
|
@dimension = dimension
|
|
22
39
|
@reason = reason
|
|
23
40
|
@normative = nil # Will be set by DiffClassifier
|
|
24
41
|
@formatting = nil # Will be set by DiffClassifier
|
|
42
|
+
# Enriched metadata (optional, populated by PathBuilder and NodeSerializer)
|
|
43
|
+
@path = path
|
|
44
|
+
@serialized_before = serialized_before
|
|
45
|
+
@serialized_after = serialized_after
|
|
46
|
+
@attributes_before = attributes_before
|
|
47
|
+
@attributes_after = attributes_after
|
|
25
48
|
end
|
|
26
49
|
|
|
27
50
|
# @return [Boolean] true if this diff is normative (affects equivalence)
|
|
@@ -54,6 +77,11 @@ module Canon
|
|
|
54
77
|
reason: reason,
|
|
55
78
|
normative: normative,
|
|
56
79
|
formatting: formatting,
|
|
80
|
+
path: path,
|
|
81
|
+
serialized_before: serialized_before,
|
|
82
|
+
serialized_after: serialized_after,
|
|
83
|
+
attributes_before: attributes_before,
|
|
84
|
+
attributes_after: attributes_after,
|
|
57
85
|
}
|
|
58
86
|
end
|
|
59
87
|
|
|
@@ -65,6 +93,8 @@ module Canon
|
|
|
65
93
|
reason == other.reason &&
|
|
66
94
|
normative == other.normative &&
|
|
67
95
|
formatting == other.formatting
|
|
96
|
+
# Note: path and serialized content are not part of equality
|
|
97
|
+
# since they're derived from nodes, not independent properties
|
|
68
98
|
end
|
|
69
99
|
end
|
|
70
100
|
end
|
|
@@ -11,7 +11,7 @@ module Canon
|
|
|
11
11
|
# @param line2 [String, nil] Second line to compare
|
|
12
12
|
# @return [Boolean] true if lines differ only in formatting
|
|
13
13
|
def self.formatting_only?(line1, line2)
|
|
14
|
-
# If both are nil or empty, not a formatting diff
|
|
14
|
+
# If both are nil or empty, not a formatting diff (no difference)
|
|
15
15
|
return false if blank?(line1) && blank?(line2)
|
|
16
16
|
|
|
17
17
|
# If only one is blank, it's not just formatting
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../xml/data_model"
|
|
4
|
+
require_relative "../xml/nodes/text_node"
|
|
5
|
+
require_relative "../xml/nodes/comment_node"
|
|
6
|
+
require_relative "../xml/nodes/element_node"
|
|
7
|
+
require_relative "../xml/nodes/processing_instruction_node"
|
|
8
|
+
require_relative "../xml/nodes/root_node"
|
|
9
|
+
|
|
10
|
+
module Canon
|
|
11
|
+
module Diff
|
|
12
|
+
# Serializes nodes from different parsing libraries into canonical strings
|
|
13
|
+
# This abstraction allows Canon to work with any parsing library
|
|
14
|
+
# (Nokogiri, Moxml, etc.) without being tied to a specific implementation.
|
|
15
|
+
#
|
|
16
|
+
# This is library-agnostic because it detects node type and uses
|
|
17
|
+
# the appropriate serialization method.
|
|
18
|
+
class NodeSerializer
|
|
19
|
+
# Serialize a node to a string for display
|
|
20
|
+
# Handles both Nokogiri and Canon nodes
|
|
21
|
+
#
|
|
22
|
+
# @param node [Object] Node to serialize (Nokogiri, Canon, or nil)
|
|
23
|
+
# @return [String] Serialized string representation
|
|
24
|
+
def self.serialize(node)
|
|
25
|
+
return "" if node.nil?
|
|
26
|
+
|
|
27
|
+
# Handle Canon::Xml::Nodes::TextNode
|
|
28
|
+
if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
29
|
+
return node.value.to_s
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Handle Canon::Xml::Nodes::CommentNode
|
|
33
|
+
if node.is_a?(Canon::Xml::Nodes::CommentNode)
|
|
34
|
+
return "<!--#{node.value}-->"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Handle Canon::Xml::Nodes::ElementNode
|
|
38
|
+
if node.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
39
|
+
return serialize_element_node(node)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Handle Canon::Xml::Nodes::ProcessingInstructionNode
|
|
43
|
+
if node.is_a?(Canon::Xml::Nodes::ProcessingInstructionNode)
|
|
44
|
+
return "<?#{node.target} #{node.data}?>"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Handle Canon::Xml::Nodes::RootNode - serialize children
|
|
48
|
+
if node.is_a?(Canon::Xml::Nodes::RootNode)
|
|
49
|
+
return node.children.map { |child| serialize(child) }.join
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Handle Nokogiri nodes
|
|
53
|
+
if node.respond_to?(:to_html)
|
|
54
|
+
return node.to_html
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
if node.respond_to?(:to_xml)
|
|
58
|
+
return node.to_xml
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Fallback to string
|
|
62
|
+
node.to_s
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Serialize an ElementNode to HTML/XML string
|
|
66
|
+
#
|
|
67
|
+
# @param element [Canon::Xml::Nodes::ElementNode] Element to serialize
|
|
68
|
+
# @return [String] Serialized element
|
|
69
|
+
def self.serialize_element_node(element)
|
|
70
|
+
# Build opening tag with attributes
|
|
71
|
+
tag = "<#{element.name}"
|
|
72
|
+
|
|
73
|
+
# Add attributes
|
|
74
|
+
element.sorted_attribute_nodes.each do |attr|
|
|
75
|
+
tag += " #{attr.name}=\"#{attr.value}\""
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Check if element has children
|
|
79
|
+
if element.children.empty?
|
|
80
|
+
# Self-closing tag for empty elements
|
|
81
|
+
"#{tag}/>"
|
|
82
|
+
else
|
|
83
|
+
# Full element with children
|
|
84
|
+
content = element.children.map { |child| serialize(child) }.join
|
|
85
|
+
"#{tag}>#{content}</#{element.name}>"
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Extract attributes from a node as a normalized hash
|
|
90
|
+
# Handles both Nokogiri and Canon nodes
|
|
91
|
+
#
|
|
92
|
+
# @param node [Object] Node to extract attributes from
|
|
93
|
+
# @return [Hash] Normalized attributes hash
|
|
94
|
+
def self.extract_attributes(node)
|
|
95
|
+
return {} if node.nil?
|
|
96
|
+
|
|
97
|
+
# Handle Canon::Xml::Nodes::ElementNode
|
|
98
|
+
if node.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
99
|
+
attrs = {}
|
|
100
|
+
node.attribute_nodes.each do |attr|
|
|
101
|
+
attrs[attr.name] = attr.value
|
|
102
|
+
end
|
|
103
|
+
return attrs
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Handle Nokogiri elements
|
|
107
|
+
if node.respond_to?(:attributes) && node.attributes.is_a?(Hash)
|
|
108
|
+
attrs = {}
|
|
109
|
+
node.attributes.each do |name, attr|
|
|
110
|
+
# Nokogiri attributes have different structure
|
|
111
|
+
value = if attr.respond_to?(:value)
|
|
112
|
+
attr.value
|
|
113
|
+
elsif attr.is_a?(String)
|
|
114
|
+
attr
|
|
115
|
+
else
|
|
116
|
+
attr.to_s
|
|
117
|
+
end
|
|
118
|
+
attrs[name] = value
|
|
119
|
+
end
|
|
120
|
+
return attrs
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Handle TreeNode attributes (already a hash)
|
|
124
|
+
if node.is_a?(Hash)
|
|
125
|
+
return node
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
{}
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Get element name from a node
|
|
132
|
+
# Handles both Nokogiri and Canon nodes
|
|
133
|
+
#
|
|
134
|
+
# @param node [Object] Node to get name from
|
|
135
|
+
# @return [String] Element name
|
|
136
|
+
def self.element_name(node)
|
|
137
|
+
return "" if node.nil?
|
|
138
|
+
|
|
139
|
+
# Handle Canon::Xml::Nodes::ElementNode
|
|
140
|
+
if node.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
141
|
+
return node.name
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Handle Nokogiri elements
|
|
145
|
+
if node.respond_to?(:name)
|
|
146
|
+
return node.name.to_s
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
""
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Get text content from a node
|
|
153
|
+
# Handles both Nokogiri and Canon nodes
|
|
154
|
+
#
|
|
155
|
+
# @param node [Object] Node to get text from
|
|
156
|
+
# @return [String] Text content
|
|
157
|
+
def self.text_content(node)
|
|
158
|
+
return "" if node.nil?
|
|
159
|
+
|
|
160
|
+
# Handle Canon::Xml::Nodes::TextNode
|
|
161
|
+
if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
162
|
+
return node.value.to_s
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Handle Nokogiri text nodes
|
|
166
|
+
if node.respond_to?(:text)
|
|
167
|
+
return node.text.to_s
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
if node.respond_to?(:content)
|
|
171
|
+
return node.content.to_s
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
""
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Serialize attributes to string format
|
|
178
|
+
# Returns attributes in " name=\"value\"" format
|
|
179
|
+
#
|
|
180
|
+
# @param attributes [Hash] Attributes hash
|
|
181
|
+
# @return [String] Serialized attributes
|
|
182
|
+
def self.serialize_attributes(attributes)
|
|
183
|
+
return "" if attributes.nil? || attributes.empty?
|
|
184
|
+
|
|
185
|
+
attributes.sort.map do |name, value|
|
|
186
|
+
" #{name}=\"#{value}\""
|
|
187
|
+
end.join
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Diff
|
|
5
|
+
# Builds canonical XPath-like paths from TreeNodes or raw nodes
|
|
6
|
+
# Generates paths with ordinal indices to uniquely identify nodes
|
|
7
|
+
# regardless of the parsing library used (Nokogiri, Moxml, Canon, etc.)
|
|
8
|
+
#
|
|
9
|
+
# This is library-agnostic because it operates on different node types:
|
|
10
|
+
# - TreeNodes (from semantic diff adapters) - uses `label` attribute
|
|
11
|
+
# - Canon::Xml::Node (from DOM diff) - uses `name` attribute
|
|
12
|
+
# - Nokogiri nodes (from HTML DOM diff) - uses `name` method
|
|
13
|
+
#
|
|
14
|
+
# @example Build path for a TreeNode
|
|
15
|
+
# path = PathBuilder.build(tree_node)
|
|
16
|
+
# # => "/#document-fragment/div[0]/p[1]/span[2]"
|
|
17
|
+
#
|
|
18
|
+
# @example Build path for a Canon::Xml::Node
|
|
19
|
+
# path = PathBuilder.build(canon_node)
|
|
20
|
+
# # => "/#document/root[0]/body[0]/p[1]"
|
|
21
|
+
#
|
|
22
|
+
# @example Build path for a Nokogiri node
|
|
23
|
+
# path = PathBuilder.build(nokogiri_node)
|
|
24
|
+
# # => "/#document/div[0]/p[1]/span[2]"
|
|
25
|
+
class PathBuilder
|
|
26
|
+
# Build canonical path from a node (TreeNode, Canon::Xml::Node, or Nokogiri)
|
|
27
|
+
#
|
|
28
|
+
# @param node [Object] Node to build path for
|
|
29
|
+
# @param format [Symbol] Format (:document or :fragment)
|
|
30
|
+
# @return [String] Canonical path with ordinal indices
|
|
31
|
+
def self.build(node, format: :fragment)
|
|
32
|
+
return "" if node.nil?
|
|
33
|
+
|
|
34
|
+
# Build path segments from root to node
|
|
35
|
+
segments = build_segments(node)
|
|
36
|
+
|
|
37
|
+
# Join segments with /
|
|
38
|
+
"/#{segments.join('/')}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Build path segments (node names with ordinal indices)
|
|
42
|
+
# Traverses from node up to root, then reverses
|
|
43
|
+
# Handles both TreeNodes and raw nodes (Canon::Xml::Node, Nokogiri)
|
|
44
|
+
#
|
|
45
|
+
# @param tree_node [Object] Node to build segments for
|
|
46
|
+
# @return [Array<String>] Path segments from root to node
|
|
47
|
+
def self.build_segments(tree_node)
|
|
48
|
+
segments = []
|
|
49
|
+
current = tree_node
|
|
50
|
+
max_depth = 1000 # Prevent infinite loops
|
|
51
|
+
depth = 0
|
|
52
|
+
|
|
53
|
+
# Traverse up to root
|
|
54
|
+
while current && depth < max_depth
|
|
55
|
+
segments.unshift(segment_for_node(current))
|
|
56
|
+
|
|
57
|
+
# Move to parent if available
|
|
58
|
+
break unless current.respond_to?(:parent)
|
|
59
|
+
|
|
60
|
+
current = current.parent
|
|
61
|
+
depth += 1
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
segments
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Build path segment for a single node
|
|
68
|
+
# Returns label with ordinal index: "div[0]", "span[1]", etc.
|
|
69
|
+
# Handles both TreeNodes (with label) and raw nodes (with name)
|
|
70
|
+
#
|
|
71
|
+
# @param tree_node [Object] Node (TreeNode, Canon::Xml::Node, or Nokogiri)
|
|
72
|
+
# @return [String] Path segment with ordinal index
|
|
73
|
+
def self.segment_for_node(tree_node)
|
|
74
|
+
# Handle both TreeNodes (with label) and raw nodes (with name)
|
|
75
|
+
label = if tree_node.respond_to?(:label)
|
|
76
|
+
tree_node.label
|
|
77
|
+
elsif tree_node.respond_to?(:name)
|
|
78
|
+
tree_node.name
|
|
79
|
+
else
|
|
80
|
+
"unknown"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Get ordinal index (position among siblings with same label)
|
|
84
|
+
index = ordinal_index(tree_node)
|
|
85
|
+
|
|
86
|
+
"#{label}[#{index}]"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Get ordinal index of node among its siblings with the same label
|
|
90
|
+
# Handles both TreeNodes (with Array children) and raw nodes (with NodeSet children)
|
|
91
|
+
#
|
|
92
|
+
# @param tree_node [Object] Node (TreeNode, Canon::Xml::Node, or Nokogiri)
|
|
93
|
+
# @return [Integer] Zero-based ordinal index
|
|
94
|
+
def self.ordinal_index(tree_node)
|
|
95
|
+
# Defensive: return 0 if no parent or doesn't respond to parent
|
|
96
|
+
return 0 unless tree_node.respond_to?(:parent)
|
|
97
|
+
return 0 unless tree_node.parent
|
|
98
|
+
|
|
99
|
+
# Check if parent has children
|
|
100
|
+
return 0 unless tree_node.parent.respond_to?(:children)
|
|
101
|
+
|
|
102
|
+
siblings = tree_node.parent.children
|
|
103
|
+
return 0 unless siblings
|
|
104
|
+
|
|
105
|
+
# Convert to array if it's a NodeSet (Nokogiri) or similar
|
|
106
|
+
siblings = siblings.to_a unless siblings.is_a?(Array)
|
|
107
|
+
|
|
108
|
+
# Get the label/name for comparison
|
|
109
|
+
my_label = if tree_node.respond_to?(:label)
|
|
110
|
+
tree_node.label
|
|
111
|
+
elsif tree_node.respond_to?(:name)
|
|
112
|
+
tree_node.name
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
return 0 unless my_label
|
|
116
|
+
|
|
117
|
+
# Count siblings with same label that appear before this node
|
|
118
|
+
same_label_siblings = siblings.select do |s|
|
|
119
|
+
sibling_label = if s.respond_to?(:label)
|
|
120
|
+
s.label
|
|
121
|
+
elsif s.respond_to?(:name)
|
|
122
|
+
s.name
|
|
123
|
+
end
|
|
124
|
+
sibling_label == my_label
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Find position in same-label siblings
|
|
128
|
+
same_label_siblings.index(tree_node) || 0
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Build human-readable path description
|
|
132
|
+
# Alternative format that may be more useful for error messages
|
|
133
|
+
# Handles both TreeNodes and raw nodes
|
|
134
|
+
#
|
|
135
|
+
# @param tree_node [Object] Node (TreeNode, Canon::Xml::Node, or Nokogiri)
|
|
136
|
+
# @return [String] Human-readable path
|
|
137
|
+
def self.human_path(tree_node)
|
|
138
|
+
segments = build_segments(tree_node)
|
|
139
|
+
segments.join(" → ")
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
@@ -456,6 +456,257 @@ module Canon
|
|
|
456
456
|
# Default to 10,000 if config not available
|
|
457
457
|
config&.xml&.diff&.max_diff_lines || 10_000
|
|
458
458
|
end
|
|
459
|
+
|
|
460
|
+
# Build set of children of matched parents
|
|
461
|
+
#
|
|
462
|
+
# @param matches [Array<Match>] Element matches
|
|
463
|
+
# @return [Set] Set of child elements
|
|
464
|
+
def build_children_set(matches)
|
|
465
|
+
require "set"
|
|
466
|
+
|
|
467
|
+
children = Set.new
|
|
468
|
+
|
|
469
|
+
matches.each do |match|
|
|
470
|
+
next unless match.status == :matched
|
|
471
|
+
|
|
472
|
+
[match.elem1, match.elem2].compact.each do |elem|
|
|
473
|
+
next unless elem.respond_to?(:children)
|
|
474
|
+
|
|
475
|
+
elem.children.each do |child|
|
|
476
|
+
children.add(child) if child.respond_to?(:name)
|
|
477
|
+
end
|
|
478
|
+
end
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
children
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
# Build set of individual elements that have semantic diffs
|
|
485
|
+
#
|
|
486
|
+
# @return [Set] Set of elements with semantic diffs
|
|
487
|
+
def build_elements_with_semantic_diffs_set
|
|
488
|
+
require "set"
|
|
489
|
+
|
|
490
|
+
elements = Set.new
|
|
491
|
+
|
|
492
|
+
return elements if @differences.nil? || @differences.empty?
|
|
493
|
+
|
|
494
|
+
@differences.each do |diff|
|
|
495
|
+
next unless diff.is_a?(Canon::Diff::DiffNode)
|
|
496
|
+
|
|
497
|
+
# Add both nodes if they exist
|
|
498
|
+
elements.add(diff.node1) if diff.node1
|
|
499
|
+
elements.add(diff.node2) if diff.node2
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
elements
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
# Check if an element or its children have semantic diffs
|
|
506
|
+
#
|
|
507
|
+
# @param element [Object] Element to check
|
|
508
|
+
# @param elements_with_semantic_diffs [Set] Set of elements with diffs
|
|
509
|
+
# @return [Boolean] True if element or descendants have semantic diffs
|
|
510
|
+
def has_semantic_diff_in_subtree?(element, elements_with_semantic_diffs)
|
|
511
|
+
# Check the element itself
|
|
512
|
+
return true if elements_with_semantic_diffs.include?(element)
|
|
513
|
+
|
|
514
|
+
# Check all descendants
|
|
515
|
+
if element.respond_to?(:children)
|
|
516
|
+
element.children.any? do |child|
|
|
517
|
+
has_semantic_diff_in_subtree?(child, elements_with_semantic_diffs)
|
|
518
|
+
end
|
|
519
|
+
else
|
|
520
|
+
false
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
# Check if diff display should be skipped
|
|
525
|
+
# Returns true when:
|
|
526
|
+
# 1. show_diffs is :normative AND there are no normative differences
|
|
527
|
+
# 2. show_diffs is :informative AND there are no informative differences
|
|
528
|
+
#
|
|
529
|
+
# @return [Boolean] True if diff display should be skipped
|
|
530
|
+
def should_skip_diff_display?
|
|
531
|
+
return false if @differences.nil? || @differences.empty?
|
|
532
|
+
|
|
533
|
+
case @show_diffs
|
|
534
|
+
when :normative
|
|
535
|
+
# Skip if no normative diffs
|
|
536
|
+
@differences.none? do |diff|
|
|
537
|
+
diff.is_a?(Canon::Diff::DiffNode) && diff.normative?
|
|
538
|
+
end
|
|
539
|
+
when :informative
|
|
540
|
+
# Skip if no informative diffs
|
|
541
|
+
@differences.none? do |diff|
|
|
542
|
+
diff.is_a?(Canon::Diff::DiffNode) && diff.informative?
|
|
543
|
+
end
|
|
544
|
+
else
|
|
545
|
+
# :all or other - never skip
|
|
546
|
+
false
|
|
547
|
+
end
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
# Group diff sections by proximity
|
|
551
|
+
#
|
|
552
|
+
# @param sections [Array<Hash>] Diff sections
|
|
553
|
+
# @param grouping_lines [Integer] Maximum gap to group
|
|
554
|
+
# @return [Array<Array>] Grouped sections
|
|
555
|
+
def group_diff_sections(sections, grouping_lines)
|
|
556
|
+
return [] if sections.empty?
|
|
557
|
+
|
|
558
|
+
groups = []
|
|
559
|
+
current_group = [sections[0]]
|
|
560
|
+
|
|
561
|
+
sections[1..].each do |section|
|
|
562
|
+
last_section = current_group.last
|
|
563
|
+
|
|
564
|
+
# Calculate gap
|
|
565
|
+
gap1 = if last_section[:end_line1] && section[:start_line1]
|
|
566
|
+
section[:start_line1] - last_section[:end_line1] - 1
|
|
567
|
+
else
|
|
568
|
+
Float::INFINITY
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
gap2 = if last_section[:end_line2] && section[:start_line2]
|
|
572
|
+
section[:start_line2] - last_section[:end_line2] - 1
|
|
573
|
+
else
|
|
574
|
+
Float::INFINITY
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
max_gap = [gap1, gap2].max
|
|
578
|
+
|
|
579
|
+
if max_gap <= grouping_lines
|
|
580
|
+
current_group << section
|
|
581
|
+
else
|
|
582
|
+
groups << current_group
|
|
583
|
+
current_group = [section]
|
|
584
|
+
end
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
groups << current_group unless current_group.empty?
|
|
588
|
+
groups
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
# Format groups of diffs
|
|
592
|
+
#
|
|
593
|
+
# @param groups [Array<Array>] Grouped diff sections
|
|
594
|
+
# @return [String] Formatted groups
|
|
595
|
+
def format_diff_groups(groups)
|
|
596
|
+
output = []
|
|
597
|
+
|
|
598
|
+
groups.each_with_index do |group, group_idx|
|
|
599
|
+
output << "" if group_idx.positive?
|
|
600
|
+
|
|
601
|
+
if group.length > 1
|
|
602
|
+
output << colorize("Context block has #{group.length} diffs",
|
|
603
|
+
:yellow, :bold)
|
|
604
|
+
output << ""
|
|
605
|
+
group.each do |section|
|
|
606
|
+
output << section[:formatted] if section[:formatted]
|
|
607
|
+
end
|
|
608
|
+
elsif group[0][:formatted]
|
|
609
|
+
output << group[0][:formatted]
|
|
610
|
+
end
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
output.join("\n")
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# Format matched element with metadata
|
|
617
|
+
# Subclasses may override to customize behavior
|
|
618
|
+
#
|
|
619
|
+
# @param match [Match] Element match
|
|
620
|
+
# @param map1 [Hash] Line range map for doc1
|
|
621
|
+
# @param map2 [Hash] Line range map for doc2
|
|
622
|
+
# @param lines1 [Array<String>] Lines from doc1
|
|
623
|
+
# @param lines2 [Array<String>] Lines from doc2
|
|
624
|
+
# @return [Hash, nil] Metadata hash or nil
|
|
625
|
+
def format_matched_element_with_metadata(match, map1, map2, lines1,
|
|
626
|
+
lines2)
|
|
627
|
+
range1 = map1[match.elem1]
|
|
628
|
+
range2 = map2[match.elem2]
|
|
629
|
+
return nil unless range1 && range2
|
|
630
|
+
|
|
631
|
+
# Subclasses must implement format_matched_element
|
|
632
|
+
formatted = format_matched_element(match, map1, map2, lines1, lines2)
|
|
633
|
+
return nil unless formatted
|
|
634
|
+
|
|
635
|
+
{
|
|
636
|
+
formatted: formatted,
|
|
637
|
+
start_line1: range1.start_line,
|
|
638
|
+
end_line1: range1.end_line,
|
|
639
|
+
start_line2: range2.start_line,
|
|
640
|
+
end_line2: range2.end_line,
|
|
641
|
+
path: match.path.join("/"),
|
|
642
|
+
}
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
# Format deleted element with metadata
|
|
646
|
+
# Subclasses may override to customize behavior
|
|
647
|
+
#
|
|
648
|
+
# @param match [Match] Element match
|
|
649
|
+
# @param map1 [Hash] Line range map for doc1
|
|
650
|
+
# @param lines1 [Array<String>] Lines from doc1
|
|
651
|
+
# @return [Hash, nil] Metadata hash or nil
|
|
652
|
+
def format_deleted_element_with_metadata(match, map1, lines1)
|
|
653
|
+
range1 = map1[match.elem1]
|
|
654
|
+
return nil unless range1
|
|
655
|
+
|
|
656
|
+
# Subclasses must implement format_deleted_element
|
|
657
|
+
formatted = format_deleted_element(match, map1, lines1)
|
|
658
|
+
return nil unless formatted
|
|
659
|
+
|
|
660
|
+
{
|
|
661
|
+
formatted: formatted,
|
|
662
|
+
start_line1: range1.start_line,
|
|
663
|
+
end_line1: range1.end_line,
|
|
664
|
+
start_line2: nil,
|
|
665
|
+
end_line2: nil,
|
|
666
|
+
path: match.path.join("/"),
|
|
667
|
+
}
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
# Format inserted element with metadata
|
|
671
|
+
# Subclasses may override to customize behavior
|
|
672
|
+
#
|
|
673
|
+
# @param match [Match] Element match
|
|
674
|
+
# @param map2 [Hash] Line range map for doc2
|
|
675
|
+
# @param lines2 [Array<String>] Lines from doc2
|
|
676
|
+
# @return [Hash, nil] Metadata hash or nil
|
|
677
|
+
def format_inserted_element_with_metadata(match, map2, lines2)
|
|
678
|
+
range2 = map2[match.elem2]
|
|
679
|
+
return nil unless range2
|
|
680
|
+
|
|
681
|
+
# Subclasses must implement format_inserted_element
|
|
682
|
+
formatted = format_inserted_element(match, map2, lines2)
|
|
683
|
+
return nil unless formatted
|
|
684
|
+
|
|
685
|
+
{
|
|
686
|
+
formatted: formatted,
|
|
687
|
+
start_line1: nil,
|
|
688
|
+
end_line1: nil,
|
|
689
|
+
start_line2: range2.start_line,
|
|
690
|
+
end_line2: range2.end_line,
|
|
691
|
+
path: match.path.join("/"),
|
|
692
|
+
}
|
|
693
|
+
end
|
|
694
|
+
|
|
695
|
+
# Subclasses must implement these element formatting methods
|
|
696
|
+
def format_matched_element(_match, _map1, _map2, _lines1, _lines2)
|
|
697
|
+
raise NotImplementedError,
|
|
698
|
+
"Subclasses must implement format_matched_element"
|
|
699
|
+
end
|
|
700
|
+
|
|
701
|
+
def format_deleted_element(_match, _map1, _lines1)
|
|
702
|
+
raise NotImplementedError,
|
|
703
|
+
"Subclasses must implement format_deleted_element"
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
def format_inserted_element(_match, _map2, _lines2)
|
|
707
|
+
raise NotImplementedError,
|
|
708
|
+
"Subclasses must implement format_inserted_element"
|
|
709
|
+
end
|
|
459
710
|
end
|
|
460
711
|
end
|
|
461
712
|
end
|