canon 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +69 -92
- data/README.adoc +13 -13
- data/docs/.lycheeignore +69 -0
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/advanced/extending-canon.adoc +193 -0
- data/docs/features/match-options/index.adoc +239 -1
- data/docs/internals/diffnode-enrichment.adoc +611 -0
- data/docs/internals/index.adoc +251 -0
- data/docs/lychee.toml +13 -6
- data/docs/understanding/architecture.adoc +749 -33
- data/docs/understanding/comparison-pipeline.adoc +122 -0
- data/lib/canon/cache.rb +129 -0
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
- data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
- data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
- data/lib/canon/comparison/dimensions/registry.rb +77 -0
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
- data/lib/canon/comparison/dimensions.rb +54 -0
- data/lib/canon/comparison/format_detector.rb +87 -0
- data/lib/canon/comparison/html_comparator.rb +70 -26
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/html_parser.rb +80 -0
- data/lib/canon/comparison/json_comparator.rb +12 -0
- data/lib/canon/comparison/json_parser.rb +19 -0
- data/lib/canon/comparison/markup_comparator.rb +293 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
- data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
- data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
- data/lib/canon/comparison/match_options.rb +68 -463
- data/lib/canon/comparison/profile_definition.rb +149 -0
- data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
- data/lib/canon/comparison/xml_comparator.rb +97 -684
- data/lib/canon/comparison/xml_node_comparison.rb +319 -0
- data/lib/canon/comparison/xml_parser.rb +19 -0
- data/lib/canon/comparison/yaml_comparator.rb +3 -3
- data/lib/canon/comparison.rb +265 -110
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/diff_node.rb +32 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/node_serializer.rb +191 -0
- data/lib/canon/diff/path_builder.rb +143 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
- data/lib/canon/diff_formatter.rb +1 -1
- data/lib/canon/rspec_matchers.rb +38 -9
- data/lib/canon/tree_diff/operation_converter.rb +92 -338
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +48 -2
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../diff/path_builder"
|
|
4
|
+
require_relative "../../diff/node_serializer"
|
|
5
|
+
|
|
6
|
+
module Canon
|
|
7
|
+
module TreeDiff
|
|
8
|
+
module OperationConverterHelpers
|
|
9
|
+
# Metadata enrichment for DiffNodes
|
|
10
|
+
# Handles path building, serialization, and attribute extraction
|
|
11
|
+
module MetadataEnricher
|
|
12
|
+
# Enrich DiffNode with canonical path, serialized content, and attributes
|
|
13
|
+
# This extracts presentation-ready metadata from TreeNodes for Stage 4 rendering
|
|
14
|
+
#
|
|
15
|
+
# @param tree_node1 [Canon::TreeDiff::Core::TreeNode, nil] First tree node
|
|
16
|
+
# @param tree_node2 [Canon::TreeDiff::Core::TreeNode, nil] Second tree node
|
|
17
|
+
# @param format [Symbol] Document format
|
|
18
|
+
# @return [Hash] Enriched metadata hash
|
|
19
|
+
def self.enrich(tree_node1, tree_node2, format)
|
|
20
|
+
{
|
|
21
|
+
path: build_path(tree_node1 || tree_node2, format),
|
|
22
|
+
serialized_before: serialize(tree_node1),
|
|
23
|
+
serialized_after: serialize(tree_node2),
|
|
24
|
+
attributes_before: extract_attributes(tree_node1),
|
|
25
|
+
attributes_after: extract_attributes(tree_node2),
|
|
26
|
+
}
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Build canonical path for a TreeNode
|
|
30
|
+
#
|
|
31
|
+
# @param tree_node [Canon::TreeDiff::Core::TreeNode] Tree node
|
|
32
|
+
# @param format [Symbol] Document format
|
|
33
|
+
# @return [String, nil] Canonical path with ordinal indices
|
|
34
|
+
def self.build_path(tree_node, format)
|
|
35
|
+
return nil if tree_node.nil?
|
|
36
|
+
|
|
37
|
+
Canon::Diff::PathBuilder.build(tree_node,
|
|
38
|
+
format: format == :xml ? :document : :fragment)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Serialize a TreeNode's source node to string
|
|
42
|
+
#
|
|
43
|
+
# @param tree_node [Canon::TreeDiff::Core::TreeNode, nil] Tree node
|
|
44
|
+
# @return [String, nil] Serialized content
|
|
45
|
+
def self.serialize(tree_node)
|
|
46
|
+
return nil if tree_node.nil?
|
|
47
|
+
|
|
48
|
+
# Extract source node from TreeNode
|
|
49
|
+
source = if tree_node.respond_to?(:source_node)
|
|
50
|
+
tree_node.source_node
|
|
51
|
+
else
|
|
52
|
+
tree_node
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
Canon::Diff::NodeSerializer.serialize(source)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Extract attributes from a TreeNode
|
|
59
|
+
#
|
|
60
|
+
# @param tree_node [Canon::TreeDiff::Core::TreeNode, nil] Tree node
|
|
61
|
+
# @return [Hash, nil] Attributes hash
|
|
62
|
+
def self.extract_attributes(tree_node)
|
|
63
|
+
return nil if tree_node.nil?
|
|
64
|
+
|
|
65
|
+
# Use TreeNode's attributes directly (already normalized by adapter)
|
|
66
|
+
tree_node.respond_to?(:attributes) ? (tree_node.attributes || {}) : {}
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module TreeDiff
|
|
5
|
+
module OperationConverterHelpers
|
|
6
|
+
# Post-processing of DiffNodes
|
|
7
|
+
# Handles detection of attribute-order-only differences and other optimizations
|
|
8
|
+
module PostProcessor
|
|
9
|
+
# Detect INSERT/DELETE pairs that differ only in attribute order
|
|
10
|
+
# and reclassify them to use the attribute_order dimension
|
|
11
|
+
#
|
|
12
|
+
# @param diff_nodes [Array<DiffNode>] Diff nodes to process
|
|
13
|
+
# @param normative_determiner [#call] Proc/object to determine normative status
|
|
14
|
+
# @return [Array<DiffNode>] Processed diff nodes
|
|
15
|
+
def self.detect_attribute_order_diffs(diff_nodes, normative_determiner)
|
|
16
|
+
# Group nodes by parent and element type
|
|
17
|
+
deletes = diff_nodes.select { |dn| dn.node1 && !dn.node2 }
|
|
18
|
+
inserts = diff_nodes.select { |dn| !dn.node1 && dn.node2 }
|
|
19
|
+
|
|
20
|
+
# For each DELETE, try to find a matching INSERT
|
|
21
|
+
deletes.each do |delete_node|
|
|
22
|
+
node1 = delete_node.node1
|
|
23
|
+
next unless node1.respond_to?(:name) && node1.respond_to?(:attributes)
|
|
24
|
+
|
|
25
|
+
# Skip if node has no attributes (can't be attribute order diff)
|
|
26
|
+
next if node1.attributes.nil? || node1.attributes.empty?
|
|
27
|
+
|
|
28
|
+
# Find inserts with same element name at same position
|
|
29
|
+
matching_insert = inserts.find do |insert_node|
|
|
30
|
+
node2 = insert_node.node2
|
|
31
|
+
next false unless node2.respond_to?(:name) && node2.respond_to?(:attributes)
|
|
32
|
+
next false unless node1.name == node2.name
|
|
33
|
+
|
|
34
|
+
# Must have attributes to differ in order
|
|
35
|
+
next false if node2.attributes.nil? || node2.attributes.empty?
|
|
36
|
+
|
|
37
|
+
# Check if they differ only in attribute order
|
|
38
|
+
next false unless attributes_equal_ignoring_order?(
|
|
39
|
+
node1.attributes, node2.attributes
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Ensure same content (text and children structure)
|
|
43
|
+
nodes_same_except_attr_order?(node1, node2)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
next unless matching_insert
|
|
47
|
+
|
|
48
|
+
# Found an attribute-order-only difference
|
|
49
|
+
# Reclassify both nodes to use attribute_order dimension
|
|
50
|
+
delete_node.dimension = :attribute_order
|
|
51
|
+
delete_node.reason = "attribute order changed"
|
|
52
|
+
delete_node.normative = normative_determiner.call(:attribute_order)
|
|
53
|
+
|
|
54
|
+
matching_insert.dimension = :attribute_order
|
|
55
|
+
matching_insert.reason = "attribute order changed"
|
|
56
|
+
matching_insert.normative = normative_determiner.call(:attribute_order)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
diff_nodes
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Check if two attribute hashes are equal ignoring order
|
|
63
|
+
#
|
|
64
|
+
# @param attrs1 [Hash] First attribute hash
|
|
65
|
+
# @param attrs2 [Hash] Second attribute hash
|
|
66
|
+
# @return [Boolean] True if attributes are equal (ignoring order)
|
|
67
|
+
def self.attributes_equal_ignoring_order?(attrs1, attrs2)
|
|
68
|
+
return true if attrs1.nil? && attrs2.nil?
|
|
69
|
+
return false if attrs1.nil? || attrs2.nil?
|
|
70
|
+
|
|
71
|
+
# Convert to hashes if needed
|
|
72
|
+
attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
|
|
73
|
+
attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
|
|
74
|
+
|
|
75
|
+
# Compare as sets (order-independent)
|
|
76
|
+
attrs1.sort.to_h == attrs2.sort.to_h
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if two nodes are the same except for attribute order
|
|
80
|
+
#
|
|
81
|
+
# @param node1 [Nokogiri::XML::Node] First node
|
|
82
|
+
# @param node2 [Nokogiri::XML::Node] Second node
|
|
83
|
+
# @return [Boolean] True if nodes are same except attribute order
|
|
84
|
+
def self.nodes_same_except_attr_order?(node1, node2)
|
|
85
|
+
# Same text content
|
|
86
|
+
return false if node1.text != node2.text
|
|
87
|
+
|
|
88
|
+
# Same number of children
|
|
89
|
+
return false if node1.children.length != node2.children.length
|
|
90
|
+
|
|
91
|
+
# If has children, they should have same structure
|
|
92
|
+
if node1.children.any?
|
|
93
|
+
node1.children.zip(node2.children).all? do |child1, child2|
|
|
94
|
+
child1.name == child2.name
|
|
95
|
+
end
|
|
96
|
+
else
|
|
97
|
+
true
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module TreeDiff
|
|
7
|
+
module OperationConverterHelpers
|
|
8
|
+
# Reason string builders for operations
|
|
9
|
+
# Handles creation of human-readable reason messages for DiffNodes
|
|
10
|
+
module ReasonBuilder
|
|
11
|
+
# Build reason string for INSERT operation
|
|
12
|
+
#
|
|
13
|
+
# @param operation [Operation] Operation
|
|
14
|
+
# @return [String] Reason description
|
|
15
|
+
def self.build_insert_reason(operation)
|
|
16
|
+
node = operation[:node]
|
|
17
|
+
content = operation[:content]
|
|
18
|
+
|
|
19
|
+
if node.respond_to?(:label)
|
|
20
|
+
# Include content preview for clarity
|
|
21
|
+
"Element inserted: #{content || "<#{node.label}>"}"
|
|
22
|
+
else
|
|
23
|
+
"Element inserted"
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Build reason string for DELETE operation
|
|
28
|
+
#
|
|
29
|
+
# @param operation [Operation] Operation
|
|
30
|
+
# @return [String] Reason description
|
|
31
|
+
def self.build_delete_reason(operation)
|
|
32
|
+
node = operation[:node]
|
|
33
|
+
content = operation[:content]
|
|
34
|
+
|
|
35
|
+
if node.respond_to?(:label)
|
|
36
|
+
# Include content preview for clarity
|
|
37
|
+
"Element deleted: #{content || "<#{node.label}>"}"
|
|
38
|
+
else
|
|
39
|
+
"Element deleted"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Build reason string for UPDATE operation
|
|
44
|
+
#
|
|
45
|
+
# @param operation [Operation] Operation
|
|
46
|
+
# @return [String] Reason description
|
|
47
|
+
def self.build_update_reason(operation)
|
|
48
|
+
change_type = operation[:change_type] || "content"
|
|
49
|
+
"updated #{change_type}"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Build reason string for MOVE operation
|
|
53
|
+
#
|
|
54
|
+
# @param operation [Operation] Operation
|
|
55
|
+
# @return [String] Reason description
|
|
56
|
+
def self.build_move_reason(operation)
|
|
57
|
+
from_pos = operation[:from_position]
|
|
58
|
+
to_pos = operation[:to_position]
|
|
59
|
+
|
|
60
|
+
if from_pos && to_pos
|
|
61
|
+
"moved from position #{from_pos} to #{to_pos}"
|
|
62
|
+
else
|
|
63
|
+
"moved to different position"
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Build detailed reason for attribute differences
|
|
68
|
+
#
|
|
69
|
+
# @param old_attrs [Hash] Old attributes
|
|
70
|
+
# @param new_attrs [Hash] New attributes
|
|
71
|
+
# @return [String] Detailed reason
|
|
72
|
+
def self.build_attribute_diff_details(old_attrs, new_attrs)
|
|
73
|
+
old_keys = Set.new(old_attrs.keys)
|
|
74
|
+
new_keys = Set.new(new_attrs.keys)
|
|
75
|
+
|
|
76
|
+
missing = old_keys - new_keys
|
|
77
|
+
extra = new_keys - old_keys
|
|
78
|
+
changed = (old_keys & new_keys).reject do |k|
|
|
79
|
+
old_attrs[k] == new_attrs[k]
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
parts = []
|
|
83
|
+
parts << "Missing: #{missing.to_a.join(', ')}" if missing.any?
|
|
84
|
+
parts << "Extra: #{extra.to_a.join(', ')}" if extra.any?
|
|
85
|
+
if changed.any?
|
|
86
|
+
parts << "Changed: #{changed.map do |k|
|
|
87
|
+
"#{k}=\"#{truncate(old_attrs[k],
|
|
88
|
+
20)}\" → \"#{truncate(new_attrs[k], 20)}\""
|
|
89
|
+
end.join(', ')}"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
parts.any? ? "Attributes differ (#{parts.join('; ')})" : "Attribute values differ"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Build reason for attribute value changes
|
|
96
|
+
#
|
|
97
|
+
# @param changes [Hash] Changes hash
|
|
98
|
+
# @return [String] Reason description
|
|
99
|
+
def self.build_attribute_value_reason(changes)
|
|
100
|
+
# Changes can be either true (flag) or { old: ..., new: ... } (detailed)
|
|
101
|
+
if changes.is_a?(Hash) && changes.key?(:old)
|
|
102
|
+
build_attribute_diff_details(changes[:old], changes[:new])
|
|
103
|
+
else
|
|
104
|
+
"attribute values differ"
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Build reason for attribute order changes
|
|
109
|
+
#
|
|
110
|
+
# @param changes [Hash] Changes hash
|
|
111
|
+
# @return [String] Reason description
|
|
112
|
+
def self.build_attribute_order_reason(changes)
|
|
113
|
+
if changes.is_a?(Hash) && changes.key?(:old)
|
|
114
|
+
old_order = changes[:old]
|
|
115
|
+
new_order = changes[:new]
|
|
116
|
+
"Attribute order changed: [#{old_order.join(', ')}] → [#{new_order.join(', ')}]"
|
|
117
|
+
else
|
|
118
|
+
"attribute order differs"
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Build reason for text content changes
|
|
123
|
+
#
|
|
124
|
+
# @param changes [Hash] Changes hash
|
|
125
|
+
# @return [String] Reason description
|
|
126
|
+
def self.build_text_content_reason(changes)
|
|
127
|
+
if changes.is_a?(Hash) && changes.key?(:old)
|
|
128
|
+
old_val = changes[:old] || ""
|
|
129
|
+
new_val = changes[:new] || ""
|
|
130
|
+
preview_old = truncate(old_val.to_s, 40)
|
|
131
|
+
preview_new = truncate(new_val.to_s, 40)
|
|
132
|
+
"Text content changed: \"#{preview_old}\" → \"#{preview_new}\""
|
|
133
|
+
else
|
|
134
|
+
"text content differs"
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Build reason for element name changes
|
|
139
|
+
#
|
|
140
|
+
# @param changes [Hash] Changes hash
|
|
141
|
+
# @return [String] Reason description
|
|
142
|
+
def self.build_element_name_reason(changes)
|
|
143
|
+
if changes.is_a?(Hash) && changes.key?(:old)
|
|
144
|
+
old_label = changes[:old]
|
|
145
|
+
new_label = changes[:new]
|
|
146
|
+
"Element name changed: <#{old_label}> → <#{new_label}>"
|
|
147
|
+
else
|
|
148
|
+
"element name differs"
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Truncate text for reason messages
|
|
153
|
+
#
|
|
154
|
+
# @param text [String] Text to truncate
|
|
155
|
+
# @param max_length [Integer] Maximum length
|
|
156
|
+
# @return [String] Truncated text
|
|
157
|
+
def self.truncate(text, max_length)
|
|
158
|
+
return "" if text.nil?
|
|
159
|
+
|
|
160
|
+
text = text.to_s
|
|
161
|
+
return text if text.length <= max_length
|
|
162
|
+
|
|
163
|
+
"#{text[0...max_length - 3]}..."
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../operation_converter_helpers/reason_builder"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module TreeDiff
|
|
7
|
+
module OperationConverterHelpers
|
|
8
|
+
# Handles UPDATE operation conversion
|
|
9
|
+
# Processes different change types (attributes, attribute_order, value, label)
|
|
10
|
+
module UpdateChangeHandler
|
|
11
|
+
# Convert UPDATE operation to DiffNode(s)
|
|
12
|
+
#
|
|
13
|
+
# May return multiple DiffNodes if multiple dimensions changed
|
|
14
|
+
#
|
|
15
|
+
# @param operation [Operation] Update operation
|
|
16
|
+
# @param metadata [Hash] Enriched metadata from MetadataEnricher
|
|
17
|
+
# @param is_metadata [Boolean] Whether nodes are metadata elements
|
|
18
|
+
# @param normative_determiner [#call] Proc/object to determine normative status
|
|
19
|
+
# @return [Array<DiffNode>] Diff nodes representing updates
|
|
20
|
+
def self.convert(operation, metadata, is_metadata, normative_determiner)
|
|
21
|
+
tree_node1 = operation[:node1] # TreeNode from adapter
|
|
22
|
+
tree_node2 = operation[:node2] # TreeNode from adapter
|
|
23
|
+
node1 = tree_node1.respond_to?(:source_node) ? tree_node1.source_node : tree_node1
|
|
24
|
+
node2 = tree_node2.respond_to?(:source_node) ? tree_node2.source_node : tree_node2
|
|
25
|
+
changes = operation[:changes]
|
|
26
|
+
|
|
27
|
+
# Handle case where changes is a boolean or non-hash value
|
|
28
|
+
changes = {} unless changes.is_a?(Hash)
|
|
29
|
+
|
|
30
|
+
diff_nodes = []
|
|
31
|
+
|
|
32
|
+
# Create separate DiffNode for each change dimension
|
|
33
|
+
# This ensures each dimension can be classified independently
|
|
34
|
+
|
|
35
|
+
if changes.key?(:attributes)
|
|
36
|
+
diff_nodes << create_attribute_value_diff(
|
|
37
|
+
node1, node2, changes[:attributes], metadata, is_metadata, normative_determiner
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
if changes.key?(:attribute_order)
|
|
42
|
+
diff_nodes << create_attribute_order_diff(
|
|
43
|
+
node1, node2, changes[:attribute_order], metadata, is_metadata, normative_determiner
|
|
44
|
+
)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
if changes.key?(:value)
|
|
48
|
+
diff_nodes << create_text_content_diff(
|
|
49
|
+
node1, node2, changes[:value], metadata, is_metadata, normative_determiner
|
|
50
|
+
)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
if changes.key?(:label)
|
|
54
|
+
diff_nodes << create_element_name_diff(
|
|
55
|
+
node1, node2, changes[:label], metadata, is_metadata, normative_determiner
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# If no specific changes detected, create a generic update
|
|
60
|
+
if diff_nodes.empty?
|
|
61
|
+
diff_nodes << create_generic_update_diff(
|
|
62
|
+
node1, node2, metadata, is_metadata, normative_determiner
|
|
63
|
+
)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
diff_nodes
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Create DiffNode for attribute value differences
|
|
70
|
+
#
|
|
71
|
+
# @param node1 [Object] First node
|
|
72
|
+
# @param node2 [Object] Second node
|
|
73
|
+
# @param changes [Object] Attribute changes
|
|
74
|
+
# @param metadata [Hash] Enriched metadata
|
|
75
|
+
# @param is_metadata [Boolean] Whether nodes are metadata elements
|
|
76
|
+
# @param normative_determiner [#call] Proc to determine normative status
|
|
77
|
+
# @return [DiffNode] Diff node for attribute value differences
|
|
78
|
+
def self.create_attribute_value_diff(node1, node2, changes, metadata,
|
|
79
|
+
is_metadata, normative_determiner)
|
|
80
|
+
diff_details = ReasonBuilder.build_attribute_value_reason(changes)
|
|
81
|
+
|
|
82
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
83
|
+
node1: node1,
|
|
84
|
+
node2: node2,
|
|
85
|
+
dimension: :attribute_values,
|
|
86
|
+
reason: diff_details,
|
|
87
|
+
**metadata,
|
|
88
|
+
)
|
|
89
|
+
diff_node.normative = is_metadata ? false : normative_determiner.call(:attribute_values)
|
|
90
|
+
diff_node
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Create DiffNode for attribute order differences
|
|
94
|
+
#
|
|
95
|
+
# @param node1 [Object] First node
|
|
96
|
+
# @param node2 [Object] Second node
|
|
97
|
+
# @param changes [Object] Attribute order changes
|
|
98
|
+
# @param metadata [Hash] Enriched metadata
|
|
99
|
+
# @param is_metadata [Boolean] Whether nodes are metadata elements
|
|
100
|
+
# @param normative_determiner [#call] Proc to determine normative status
|
|
101
|
+
# @return [DiffNode] Diff node for attribute order differences
|
|
102
|
+
def self.create_attribute_order_diff(node1, node2, changes, metadata,
|
|
103
|
+
is_metadata, normative_determiner)
|
|
104
|
+
reason = ReasonBuilder.build_attribute_order_reason(changes)
|
|
105
|
+
|
|
106
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
107
|
+
node1: node1,
|
|
108
|
+
node2: node2,
|
|
109
|
+
dimension: :attribute_order,
|
|
110
|
+
reason: reason,
|
|
111
|
+
**metadata,
|
|
112
|
+
)
|
|
113
|
+
diff_node.normative = is_metadata ? false : normative_determiner.call(:attribute_order)
|
|
114
|
+
diff_node
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Create DiffNode for text content differences
|
|
118
|
+
#
|
|
119
|
+
# @param node1 [Object] First node
|
|
120
|
+
# @param node2 [Object] Second node
|
|
121
|
+
# @param changes [Object] Value changes
|
|
122
|
+
# @param metadata [Hash] Enriched metadata
|
|
123
|
+
# @param is_metadata [Boolean] Whether nodes are metadata elements
|
|
124
|
+
# @param normative_determiner [#call] Proc to determine normative status
|
|
125
|
+
# @return [DiffNode] Diff node for text content differences
|
|
126
|
+
def self.create_text_content_diff(node1, node2, changes, metadata,
|
|
127
|
+
is_metadata, normative_determiner)
|
|
128
|
+
reason = ReasonBuilder.build_text_content_reason(changes)
|
|
129
|
+
|
|
130
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
131
|
+
node1: node1,
|
|
132
|
+
node2: node2,
|
|
133
|
+
dimension: :text_content,
|
|
134
|
+
reason: reason,
|
|
135
|
+
**metadata,
|
|
136
|
+
)
|
|
137
|
+
diff_node.normative = is_metadata ? false : normative_determiner.call(:text_content)
|
|
138
|
+
diff_node
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Create DiffNode for element name differences
|
|
142
|
+
#
|
|
143
|
+
# @param node1 [Object] First node
|
|
144
|
+
# @param node2 [Object] Second node
|
|
145
|
+
# @param changes [Object] Label changes
|
|
146
|
+
# @param metadata [Hash] Enriched metadata
|
|
147
|
+
# @param is_metadata [Boolean] Whether nodes are metadata elements
|
|
148
|
+
# @param normative_determiner [#call] Proc to determine normative status
|
|
149
|
+
# @return [DiffNode] Diff node for element name differences
|
|
150
|
+
def self.create_element_name_diff(node1, node2, changes, metadata,
|
|
151
|
+
is_metadata, normative_determiner)
|
|
152
|
+
reason = ReasonBuilder.build_element_name_reason(changes)
|
|
153
|
+
|
|
154
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
155
|
+
node1: node1,
|
|
156
|
+
node2: node2,
|
|
157
|
+
dimension: :element_structure,
|
|
158
|
+
reason: reason,
|
|
159
|
+
**metadata,
|
|
160
|
+
)
|
|
161
|
+
diff_node.normative = is_metadata ? false : normative_determiner.call(:element_structure)
|
|
162
|
+
diff_node
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Create generic update DiffNode
|
|
166
|
+
#
|
|
167
|
+
# @param node1 [Object] First node
|
|
168
|
+
# @param node2 [Object] Second node
|
|
169
|
+
# @param metadata [Hash] Enriched metadata
|
|
170
|
+
# @param is_metadata [Boolean] Whether nodes are metadata elements
|
|
171
|
+
# @param normative_determiner [#call] Proc to determine normative status
|
|
172
|
+
# @return [DiffNode] Generic update diff node
|
|
173
|
+
def self.create_generic_update_diff(node1, node2, metadata,
|
|
174
|
+
is_metadata, normative_determiner)
|
|
175
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
176
|
+
node1: node1,
|
|
177
|
+
node2: node2,
|
|
178
|
+
dimension: :text_content,
|
|
179
|
+
reason: "content differs",
|
|
180
|
+
**metadata,
|
|
181
|
+
)
|
|
182
|
+
diff_node.normative = is_metadata ? false : normative_determiner.call(:text_content)
|
|
183
|
+
diff_node
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
data/lib/canon/version.rb
CHANGED
data/lib/canon/xml/data_model.rb
CHANGED
|
@@ -18,8 +18,9 @@ module Canon
|
|
|
18
18
|
# Build XPath data model from XML string
|
|
19
19
|
#
|
|
20
20
|
# @param xml_string [String] XML content to parse
|
|
21
|
+
# @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
|
|
21
22
|
# @return [Nodes::RootNode] Root of the data model tree
|
|
22
|
-
def self.from_xml(xml_string)
|
|
23
|
+
def self.from_xml(xml_string, preserve_whitespace: false)
|
|
23
24
|
# Parse with Nokogiri
|
|
24
25
|
doc = Nokogiri::XML(xml_string) do |config|
|
|
25
26
|
config.nonet # Disable network access
|
|
@@ -30,7 +31,7 @@ module Canon
|
|
|
30
31
|
check_for_relative_namespace_uris(doc)
|
|
31
32
|
|
|
32
33
|
# Convert to XPath data model
|
|
33
|
-
build_from_nokogiri(doc)
|
|
34
|
+
build_from_nokogiri(doc, preserve_whitespace: preserve_whitespace)
|
|
34
35
|
end
|
|
35
36
|
|
|
36
37
|
# Alias for compatibility with base class interface
|
|
@@ -74,19 +75,21 @@ module Canon
|
|
|
74
75
|
|
|
75
76
|
# Build XPath data model from Nokogiri document or fragment
|
|
76
77
|
# rubocop:disable Metrics/MethodLength
|
|
77
|
-
def self.build_from_nokogiri(nokogiri_doc)
|
|
78
|
+
def self.build_from_nokogiri(nokogiri_doc, preserve_whitespace: false)
|
|
78
79
|
root = Nodes::RootNode.new
|
|
79
80
|
|
|
80
81
|
if nokogiri_doc.respond_to?(:root) && nokogiri_doc.root
|
|
81
82
|
# For Documents (XML, HTML4, HTML5, Moxml): process the root element
|
|
82
|
-
root.add_child(build_element_node(nokogiri_doc.root
|
|
83
|
+
root.add_child(build_element_node(nokogiri_doc.root,
|
|
84
|
+
preserve_whitespace: preserve_whitespace))
|
|
83
85
|
|
|
84
86
|
# Process PIs and comments outside doc element
|
|
85
87
|
nokogiri_doc.children.each do |child|
|
|
86
88
|
next if child == nokogiri_doc.root
|
|
87
89
|
next if child.is_a?(Nokogiri::XML::DTD)
|
|
88
90
|
|
|
89
|
-
node = build_node_from_nokogiri(child
|
|
91
|
+
node = build_node_from_nokogiri(child,
|
|
92
|
+
preserve_whitespace: preserve_whitespace)
|
|
90
93
|
root.add_child(node) if node
|
|
91
94
|
end
|
|
92
95
|
else
|
|
@@ -95,7 +98,8 @@ module Canon
|
|
|
95
98
|
nokogiri_doc.children.each do |child|
|
|
96
99
|
next if child.is_a?(Nokogiri::XML::DTD)
|
|
97
100
|
|
|
98
|
-
node = build_node_from_nokogiri(child
|
|
101
|
+
node = build_node_from_nokogiri(child,
|
|
102
|
+
preserve_whitespace: preserve_whitespace)
|
|
99
103
|
root.add_child(node) if node
|
|
100
104
|
end
|
|
101
105
|
end
|
|
@@ -104,12 +108,15 @@ module Canon
|
|
|
104
108
|
end
|
|
105
109
|
|
|
106
110
|
# Build node from Nokogiri node
|
|
107
|
-
def self.build_node_from_nokogiri(nokogiri_node
|
|
111
|
+
def self.build_node_from_nokogiri(nokogiri_node,
|
|
112
|
+
preserve_whitespace: false)
|
|
108
113
|
case nokogiri_node
|
|
109
114
|
when Nokogiri::XML::Element
|
|
110
|
-
build_element_node(nokogiri_node
|
|
115
|
+
build_element_node(nokogiri_node,
|
|
116
|
+
preserve_whitespace: preserve_whitespace)
|
|
111
117
|
when Nokogiri::XML::Text
|
|
112
|
-
build_text_node(nokogiri_node
|
|
118
|
+
build_text_node(nokogiri_node,
|
|
119
|
+
preserve_whitespace: preserve_whitespace)
|
|
113
120
|
when Nokogiri::XML::Comment
|
|
114
121
|
build_comment_node(nokogiri_node)
|
|
115
122
|
when Nokogiri::XML::ProcessingInstruction
|
|
@@ -119,7 +126,7 @@ module Canon
|
|
|
119
126
|
|
|
120
127
|
# Build element node from Nokogiri element
|
|
121
128
|
# rubocop:disable Metrics/MethodLength
|
|
122
|
-
def self.build_element_node(nokogiri_element)
|
|
129
|
+
def self.build_element_node(nokogiri_element, preserve_whitespace: false)
|
|
123
130
|
element = Nodes::ElementNode.new(
|
|
124
131
|
name: nokogiri_element.name,
|
|
125
132
|
namespace_uri: nokogiri_element.namespace&.href,
|
|
@@ -134,7 +141,8 @@ module Canon
|
|
|
134
141
|
|
|
135
142
|
# Build child nodes
|
|
136
143
|
nokogiri_element.children.each do |child|
|
|
137
|
-
node = build_node_from_nokogiri(child
|
|
144
|
+
node = build_node_from_nokogiri(child,
|
|
145
|
+
preserve_whitespace: preserve_whitespace)
|
|
138
146
|
element.add_child(node) if node
|
|
139
147
|
end
|
|
140
148
|
|
|
@@ -195,13 +203,16 @@ module Canon
|
|
|
195
203
|
end
|
|
196
204
|
|
|
197
205
|
# Build text node from Nokogiri text node
|
|
198
|
-
def self.build_text_node(nokogiri_text)
|
|
206
|
+
def self.build_text_node(nokogiri_text, preserve_whitespace: false)
|
|
199
207
|
# XML text nodes: preserve all content including whitespace
|
|
200
208
|
# Unlike HTML, XML treats all whitespace as significant
|
|
201
209
|
content = nokogiri_text.content
|
|
202
210
|
|
|
203
211
|
# Skip empty text nodes between elements (common formatting whitespace)
|
|
204
|
-
|
|
212
|
+
# UNLESS preserve_whitespace is true (for structural_whitespace: :strict)
|
|
213
|
+
if !preserve_whitespace && content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element)
|
|
214
|
+
return nil
|
|
215
|
+
end
|
|
205
216
|
|
|
206
217
|
# Nokogiri already handles CDATA conversion and entity resolution
|
|
207
218
|
Nodes::TextNode.new(value: content)
|