canon 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +12 -22
- data/Rakefile +5 -2
- data/lib/canon/cache.rb +3 -1
- data/lib/canon/cli.rb +0 -3
- data/lib/canon/commands/diff_command.rb +0 -6
- data/lib/canon/commands/format_command.rb +0 -4
- data/lib/canon/commands.rb +9 -0
- data/lib/canon/comparison/child_realignment.rb +0 -2
- data/lib/canon/comparison/compare_profile.rb +30 -36
- data/lib/canon/comparison/comparison_result.rb +0 -2
- data/lib/canon/comparison/diff_node_builder.rb +353 -0
- data/lib/canon/comparison/dimensions/dimension.rb +51 -0
- data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
- data/lib/canon/comparison/dimensions/registry.rb +101 -60
- data/lib/canon/comparison/dimensions.rb +15 -46
- data/lib/canon/comparison/html_comparator.rb +18 -141
- data/lib/canon/comparison/html_compare_profile.rb +15 -18
- data/lib/canon/comparison/json_comparator.rb +4 -165
- data/lib/canon/comparison/json_parser.rb +0 -2
- data/lib/canon/comparison/markup_comparator.rb +14 -210
- data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
- data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
- data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
- data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
- data/lib/canon/comparison/match_options.rb +13 -88
- data/lib/canon/comparison/pipeline.rb +269 -0
- data/lib/canon/comparison/profile_definition.rb +0 -2
- data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
- data/lib/canon/comparison/strategies.rb +16 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
- data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
- data/lib/canon/comparison/xml_comparator.rb +4 -492
- data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
- data/lib/canon/comparison/xml_node_comparison.rb +4 -119
- data/lib/canon/comparison/yaml_comparator.rb +0 -3
- data/lib/canon/comparison.rb +143 -266
- data/lib/canon/config/config_dsl.rb +159 -0
- data/lib/canon/config/env_provider.rb +0 -3
- data/lib/canon/config/env_schema.rb +48 -58
- data/lib/canon/config/profile_loader.rb +0 -1
- data/lib/canon/config.rb +116 -468
- data/lib/canon/diff/diff_block_builder.rb +0 -2
- data/lib/canon/diff/diff_classifier.rb +0 -5
- data/lib/canon/diff/diff_context.rb +0 -2
- data/lib/canon/diff/diff_context_builder.rb +0 -2
- data/lib/canon/diff/diff_line_builder.rb +0 -3
- data/lib/canon/diff/diff_node_enricher.rb +0 -4
- data/lib/canon/diff/diff_node_mapper.rb +0 -4
- data/lib/canon/diff/diff_report_builder.rb +0 -4
- data/lib/canon/diff/formatting_detector.rb +0 -1
- data/lib/canon/diff/node_serializer.rb +0 -7
- data/lib/canon/diff.rb +39 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/debug_output.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
- data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
- data/lib/canon/diff_formatter.rb +11 -9
- data/lib/canon/formatters/html4_formatter.rb +0 -2
- data/lib/canon/formatters/html5_formatter.rb +0 -2
- data/lib/canon/formatters/html_formatter.rb +0 -3
- data/lib/canon/formatters/json_formatter.rb +0 -1
- data/lib/canon/formatters/xml_formatter.rb +0 -4
- data/lib/canon/formatters/yaml_formatter.rb +0 -1
- data/lib/canon/formatters.rb +16 -0
- data/lib/canon/html/data_model.rb +0 -10
- data/lib/canon/html.rb +4 -3
- data/lib/canon/options/cli_generator.rb +0 -2
- data/lib/canon/options/registry.rb +0 -2
- data/lib/canon/options.rb +9 -0
- data/lib/canon/pretty_printer/html.rb +0 -1
- data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
- data/lib/canon/pretty_printer.rb +12 -0
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters.rb +14 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
- data/lib/canon/tree_diff/core/node_signature.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +12 -5
- data/lib/canon/tree_diff/core.rb +17 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
- data/lib/canon/tree_diff/matchers.rb +15 -0
- data/lib/canon/tree_diff/operation_converter.rb +0 -8
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
- data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
- data/lib/canon/tree_diff/operations.rb +13 -0
- data/lib/canon/tree_diff.rb +26 -27
- data/lib/canon/validators/base_validator.rb +0 -2
- data/lib/canon/validators/html_validator.rb +0 -1
- data/lib/canon/validators/json_validator.rb +0 -1
- data/lib/canon/validators/xml_validator.rb +0 -1
- data/lib/canon/validators/yaml_validator.rb +0 -1
- data/lib/canon/validators.rb +12 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +0 -4
- data/lib/canon/xml/data_model.rb +0 -10
- data/lib/canon/xml/line_range_mapper.rb +0 -2
- data/lib/canon/xml/nodes/attribute_node.rb +0 -2
- data/lib/canon/xml/nodes/comment_node.rb +0 -2
- data/lib/canon/xml/nodes/element_node.rb +0 -2
- data/lib/canon/xml/nodes/namespace_node.rb +0 -2
- data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
- data/lib/canon/xml/nodes/root_node.rb +0 -2
- data/lib/canon/xml/nodes/text_node.rb +0 -2
- data/lib/canon/xml/nodes.rb +19 -0
- data/lib/canon/xml/processor.rb +0 -5
- data/lib/canon/xml/sax_builder.rb +0 -7
- data/lib/canon/xml.rb +33 -0
- data/lib/canon/xml_backend.rb +50 -14
- data/lib/canon/xml_parsing.rb +4 -2
- data/lib/canon.rb +25 -15
- data/lib/tasks/performance.rake +0 -58
- data/lib/tasks/performance_comparator.rb +132 -65
- data/lib/tasks/performance_helpers.rb +4 -249
- data/lib/tasks/performance_report.rb +309 -0
- metadata +24 -11
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
- data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
- data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Comparison
|
|
7
|
+
# Single factory for DiffNode creation in the DOM comparison path.
|
|
8
|
+
#
|
|
9
|
+
# Centralises reason building, metadata enrichment (path, serialization,
|
|
10
|
+
# attributes), and whitespace visualization — previously duplicated
|
|
11
|
+
# across MarkupComparator and XmlComparator.
|
|
12
|
+
class DiffNodeBuilder
|
|
13
|
+
# Build an enriched DiffNode.
|
|
14
|
+
def self.build(node1:, node2:, diff1:, diff2:, dimension:, **_opts)
|
|
15
|
+
raise ArgumentError, "dimension required for DiffNode" if dimension.nil?
|
|
16
|
+
|
|
17
|
+
reason = build_reason(node1, node2, diff1, diff2, dimension)
|
|
18
|
+
metadata = enrich_metadata(node1, node2)
|
|
19
|
+
|
|
20
|
+
Canon::Diff::DiffNode.new(
|
|
21
|
+
node1: node1,
|
|
22
|
+
node2: node2,
|
|
23
|
+
dimension: dimension,
|
|
24
|
+
reason: reason,
|
|
25
|
+
**metadata,
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# --- Reason building ---------------------------------------------------
|
|
30
|
+
|
|
31
|
+
def self.build_reason(node1, node2, diff1, diff2, dimension)
|
|
32
|
+
# Nil-node text content with namespace info
|
|
33
|
+
if dimension == :text_content && (node1.nil? || node2.nil?)
|
|
34
|
+
node = node1 || node2
|
|
35
|
+
if node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)
|
|
36
|
+
ns = Canon::XmlParsing.namespace_uri(node)
|
|
37
|
+
ns_info = ns.nil? || ns.empty? ? "" : " (namespace: #{ns})"
|
|
38
|
+
label = Canon::Comparison.code_pair_label(diff1, diff2)
|
|
39
|
+
return "element '#{node.name}'#{ns_info}: #{label}"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
case dimension
|
|
44
|
+
when :attribute_presence
|
|
45
|
+
build_attribute_difference_reason(
|
|
46
|
+
extract_attributes(node1), extract_attributes(node2)
|
|
47
|
+
)
|
|
48
|
+
when :attribute_values
|
|
49
|
+
build_attribute_values_reason(node1, node2)
|
|
50
|
+
when :text_content
|
|
51
|
+
build_text_difference_reason(
|
|
52
|
+
extract_text_content(node1), extract_text_content(node2)
|
|
53
|
+
)
|
|
54
|
+
when :attribute_order
|
|
55
|
+
build_attribute_order_reason(node1, node2)
|
|
56
|
+
when :comments
|
|
57
|
+
build_comment_difference_reason(node1,
|
|
58
|
+
node2) || fallback_reason(diff1,
|
|
59
|
+
diff2, dimension, node1, node2)
|
|
60
|
+
when :whitespace_adjacency
|
|
61
|
+
build_whitespace_adjacency_reason(node1, node2)
|
|
62
|
+
else
|
|
63
|
+
fallback_reason(diff1, diff2, dimension, node1, node2)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# --- Metadata enrichment -----------------------------------------------
|
|
68
|
+
|
|
69
|
+
def self.enrich_metadata(node1, node2)
|
|
70
|
+
{
|
|
71
|
+
path: Canon::Diff::PathBuilder.build(node1 || node2,
|
|
72
|
+
format: :document),
|
|
73
|
+
serialized_before: serialize(node1),
|
|
74
|
+
serialized_after: serialize(node2),
|
|
75
|
+
attributes_before: extract_attributes(node1),
|
|
76
|
+
attributes_after: extract_attributes(node2),
|
|
77
|
+
}
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# --- Node queries (delegate to NodeSerializer) -------------------------
|
|
81
|
+
|
|
82
|
+
def self.serialize(node)
|
|
83
|
+
return nil if node.nil?
|
|
84
|
+
|
|
85
|
+
Canon::Diff::NodeSerializer.serialize(node)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def self.extract_attributes(node)
|
|
89
|
+
return nil if node.nil?
|
|
90
|
+
|
|
91
|
+
Canon::Diff::NodeSerializer.extract_attributes(node)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# --- Attribute reason builders -----------------------------------------
|
|
95
|
+
|
|
96
|
+
def self.build_attribute_difference_reason(attrs1, attrs2)
|
|
97
|
+
unless attrs1 && attrs2
|
|
98
|
+
return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
keys1 = attrs1.keys.to_set
|
|
102
|
+
keys2 = attrs2.keys.to_set
|
|
103
|
+
|
|
104
|
+
only_in_first = keys1 - keys2
|
|
105
|
+
only_in_second = keys2 - keys1
|
|
106
|
+
different_values = (keys1 & keys2).reject { |k| attrs1[k] == attrs2[k] }
|
|
107
|
+
|
|
108
|
+
parts = []
|
|
109
|
+
parts << "only in first: #{only_in_first.to_a.sort.join(', ')}" if only_in_first.any?
|
|
110
|
+
parts << "only in second: #{only_in_second.to_a.sort.join(', ')}" if only_in_second.any?
|
|
111
|
+
parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
|
|
112
|
+
|
|
113
|
+
parts.empty? ? "#{keys1.size} vs #{keys2.size} attributes (same names)" : parts.join("; ")
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def self.build_attribute_values_reason(node1, node2)
|
|
117
|
+
attrs1 = extract_attributes(node1) || {}
|
|
118
|
+
attrs2 = extract_attributes(node2) || {}
|
|
119
|
+
|
|
120
|
+
differing = (attrs1.keys | attrs2.keys).sort.reject do |k|
|
|
121
|
+
attrs1[k.to_s] == attrs2[k.to_s]
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
changed = differing.map do |k|
|
|
125
|
+
"Changed: #{k}=\"#{attrs1[k.to_s]}\" → \"#{attrs2[k.to_s]}\""
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
changed.empty? ? "attributes differ" : "Attributes differ (#{changed.join('; ')})"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def self.build_attribute_order_reason(node1, node2)
|
|
132
|
+
keys1 = extract_attributes(node1)&.keys || []
|
|
133
|
+
keys2 = extract_attributes(node2)&.keys || []
|
|
134
|
+
"Attribute order changed: [#{keys1.join(', ')}] → [#{keys2.join(', ')}]"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# --- Text content extraction -------------------------------------------
|
|
138
|
+
|
|
139
|
+
def self.extract_text_content(node)
|
|
140
|
+
return nil if node.nil?
|
|
141
|
+
|
|
142
|
+
case node
|
|
143
|
+
when Canon::Xml::Nodes::TextNode
|
|
144
|
+
node.value
|
|
145
|
+
when Canon::Xml::Node
|
|
146
|
+
node.text_content
|
|
147
|
+
else
|
|
148
|
+
if Canon::XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Node)
|
|
149
|
+
node.content.to_s
|
|
150
|
+
elsif Canon::XmlParsing.xml_node?(node)
|
|
151
|
+
Canon::XmlParsing.text_content(node)
|
|
152
|
+
else
|
|
153
|
+
node.to_s
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
rescue StandardError
|
|
157
|
+
nil
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# --- Text diff reason --------------------------------------------------
|
|
161
|
+
|
|
162
|
+
def self.build_text_difference_reason(text1, text2)
|
|
163
|
+
return "missing vs '#{truncate(text2)}'" if text1.nil? && text2
|
|
164
|
+
return "'#{truncate(text1)}' vs missing" if text1 && text2.nil?
|
|
165
|
+
return "both missing" if text1.nil? && text2.nil?
|
|
166
|
+
|
|
167
|
+
if whitespace_only?(text1) && whitespace_only?(text2)
|
|
168
|
+
return "whitespace: #{describe_whitespace(text1)} vs #{describe_whitespace(text2)}"
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
"Text: \"#{visualize_whitespace(text1)}\" vs \"#{visualize_whitespace(text2)}\""
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# --- Comment reason ----------------------------------------------------
|
|
175
|
+
|
|
176
|
+
def self.build_comment_difference_reason(node1, node2)
|
|
177
|
+
cm1 = node1 && NodeInspector.comment_node?(node1)
|
|
178
|
+
cm2 = node2 && NodeInspector.comment_node?(node2)
|
|
179
|
+
|
|
180
|
+
return nil unless cm1 || cm2
|
|
181
|
+
|
|
182
|
+
if cm1 && !cm2
|
|
183
|
+
"Comment present on EXPECTED only: <!--#{truncate(comment_text(node1))}-->"
|
|
184
|
+
elsif cm2 && !cm1
|
|
185
|
+
"Comment present on ACTUAL only: <!--#{truncate(comment_text(node2))}-->"
|
|
186
|
+
else
|
|
187
|
+
t1 = truncate(comment_text(node1))
|
|
188
|
+
t2 = truncate(comment_text(node2))
|
|
189
|
+
"Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def self.comment_text(node)
|
|
194
|
+
NodeInspector.text_content(node).to_s
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# --- Whitespace adjacency reason (#137) --------------------------------
|
|
198
|
+
|
|
199
|
+
def self.build_whitespace_adjacency_reason(node1, node2)
|
|
200
|
+
text1 = extract_text_content(node1)
|
|
201
|
+
text2 = extract_text_content(node2)
|
|
202
|
+
|
|
203
|
+
ws_on_first = NodeInspector.whitespace_only_text?(node1) &&
|
|
204
|
+
!NodeInspector.whitespace_only_text?(node2)
|
|
205
|
+
ws_on_second = NodeInspector.whitespace_only_text?(node2) &&
|
|
206
|
+
!NodeInspector.whitespace_only_text?(node1)
|
|
207
|
+
|
|
208
|
+
unless ws_on_first || ws_on_second
|
|
209
|
+
return build_text_difference_reason(text1, text2)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
if ws_on_first
|
|
213
|
+
build_adjacency_side(text1, text2, node1, "EXPECTED", "ACTUAL")
|
|
214
|
+
else
|
|
215
|
+
build_adjacency_side(text2, text1, node2, "ACTUAL", "EXPECTED")
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# --- Whitespace visualization ------------------------------------------
|
|
220
|
+
|
|
221
|
+
def self.visualize_whitespace(text)
|
|
222
|
+
return "" if text.nil?
|
|
223
|
+
|
|
224
|
+
viz_map = character_visualization_map
|
|
225
|
+
text.chars.map { |char| viz_map[char] || char }.join
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def self.describe_whitespace(text)
|
|
229
|
+
return "0 chars" if text.nil? || text.empty?
|
|
230
|
+
|
|
231
|
+
char_count = text.length
|
|
232
|
+
parts = []
|
|
233
|
+
parts << "#{text.count("\n")} newlines" if text.include?("\n")
|
|
234
|
+
parts << "#{text.count(' ')} spaces" if text.include?(" ")
|
|
235
|
+
parts << "#{text.count("\t")} tabs" if text.include?("\t")
|
|
236
|
+
|
|
237
|
+
"#{char_count} chars (#{parts.join(', ')})"
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def self.whitespace_only?(text)
|
|
241
|
+
return false if text.nil?
|
|
242
|
+
|
|
243
|
+
text.to_s.strip.empty?
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def self.truncate(text, max_length = 40)
|
|
247
|
+
return "" if text.nil?
|
|
248
|
+
|
|
249
|
+
text = text.to_s
|
|
250
|
+
return text if text.length <= max_length
|
|
251
|
+
|
|
252
|
+
"#{text[0...max_length]}..."
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# --- Private helpers ---------------------------------------------------
|
|
256
|
+
|
|
257
|
+
# Default reason when no dimension-specific handler matched.
|
|
258
|
+
def self.fallback_reason(diff1, diff2, dimension, node1, node2)
|
|
259
|
+
if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
|
|
260
|
+
"element structure mismatch (children differ)"
|
|
261
|
+
elsif dimension == :element_structure &&
|
|
262
|
+
diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
263
|
+
diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
264
|
+
(node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
|
|
265
|
+
(node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
|
|
266
|
+
node1.name && node2.name && node1.name != node2.name
|
|
267
|
+
"different element name (<#{node1.name}> vs <#{node2.name}>)"
|
|
268
|
+
else
|
|
269
|
+
Canon::Comparison.code_pair_label(diff1, diff2)
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
private_class_method :fallback_reason
|
|
273
|
+
|
|
274
|
+
# Build one side of a whitespace-adjacency reason.
|
|
275
|
+
def self.build_adjacency_side(ws_text, content_text, ws_node,
|
|
276
|
+
present_side, absent_side)
|
|
277
|
+
ws_vis = visualize_whitespace(ws_text)
|
|
278
|
+
|
|
279
|
+
if content_text.nil? || content_text.strip.empty?
|
|
280
|
+
parent_label = whitespace_adjacency_parent_label(ws_node)
|
|
281
|
+
"Whitespace inside #{parent_label}: " \
|
|
282
|
+
"present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
|
|
283
|
+
else
|
|
284
|
+
direction = whitespace_partner_direction(ws_node)
|
|
285
|
+
content_vis = visualize_whitespace(truncate(content_text))
|
|
286
|
+
"Whitespace #{direction} \"#{content_vis}\": " \
|
|
287
|
+
"present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
private_class_method :build_adjacency_side
|
|
291
|
+
|
|
292
|
+
def self.whitespace_adjacency_parent_label(ws_node)
|
|
293
|
+
parent = NodeInspector.parent(ws_node)
|
|
294
|
+
return "(unknown parent)" unless parent
|
|
295
|
+
|
|
296
|
+
name = parent.name
|
|
297
|
+
name && !name.empty? ? "<#{name}>" : "(unknown parent)"
|
|
298
|
+
end
|
|
299
|
+
private_class_method :whitespace_adjacency_parent_label
|
|
300
|
+
|
|
301
|
+
# Direction of the partner content relative to the whitespace node.
|
|
302
|
+
def self.whitespace_partner_direction(ws_node)
|
|
303
|
+
parent = NodeInspector.parent(ws_node)
|
|
304
|
+
return "adjacent to" unless parent
|
|
305
|
+
|
|
306
|
+
siblings = parent.children
|
|
307
|
+
idx = siblings.index(ws_node)
|
|
308
|
+
return "adjacent to" unless idx
|
|
309
|
+
|
|
310
|
+
if non_ws_sibling_exists?(siblings, idx, 1) then "before"
|
|
311
|
+
elsif non_ws_sibling_exists?(siblings, idx, -1) then "after"
|
|
312
|
+
else "adjacent to"
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
private_class_method :whitespace_partner_direction
|
|
316
|
+
|
|
317
|
+
def self.non_ws_sibling_exists?(siblings, idx, direction)
|
|
318
|
+
i = idx + direction
|
|
319
|
+
while i >= 0 && i < siblings.length
|
|
320
|
+
s = siblings[i]
|
|
321
|
+
is_ws_text = NodeInspector.text_node?(s) &&
|
|
322
|
+
NodeInspector.text_content(s).strip.empty?
|
|
323
|
+
return true unless is_ws_text
|
|
324
|
+
|
|
325
|
+
i += direction
|
|
326
|
+
end
|
|
327
|
+
false
|
|
328
|
+
end
|
|
329
|
+
private_class_method :non_ws_sibling_exists?
|
|
330
|
+
|
|
331
|
+
# Lazy-loaded character visualization map from YAML.
|
|
332
|
+
def self.character_visualization_map
|
|
333
|
+
@character_visualization_map ||= begin
|
|
334
|
+
require "yaml"
|
|
335
|
+
lib_root = File.expand_path("../..", __dir__)
|
|
336
|
+
yaml_path = File.join(lib_root,
|
|
337
|
+
"canon/diff_formatter/character_map.yml")
|
|
338
|
+
data = YAML.load_file(yaml_path)
|
|
339
|
+
|
|
340
|
+
data["characters"].each_with_object({}) do |char_data, map|
|
|
341
|
+
char = if char_data["unicode"]
|
|
342
|
+
[char_data["unicode"].to_i(16)].pack("U")
|
|
343
|
+
else
|
|
344
|
+
char_data["character"]
|
|
345
|
+
end
|
|
346
|
+
map[char] = char_data["visualization"]
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
private_class_method :character_visualization_map
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
module Dimensions
|
|
6
|
+
# Immutable value object representing a single comparison dimension.
|
|
7
|
+
#
|
|
8
|
+
# A dimension is an aspect of a document that can be compared with
|
|
9
|
+
# different behaviors (e.g., :strict, :normalize, :ignore). Each
|
|
10
|
+
# dimension knows its own classification rules — whether a difference
|
|
11
|
+
# is normative (affects equivalence) for a given behavior, and whether
|
|
12
|
+
# formatting detection should apply.
|
|
13
|
+
class Dimension
|
|
14
|
+
attr_reader :name, :valid_behaviors
|
|
15
|
+
|
|
16
|
+
# @param name [Symbol] Dimension identifier (e.g., :text_content)
|
|
17
|
+
# @param valid_behaviors [Array<Symbol>] Allowed behaviors
|
|
18
|
+
# @param normative_rule [Symbol] :behavior_not_ignore or :strict_only
|
|
19
|
+
# @param formatting_detection [Boolean] Whether FormattingDetector applies
|
|
20
|
+
def initialize(name:, valid_behaviors:, normative_rule: :behavior_not_ignore,
|
|
21
|
+
formatting_detection: false)
|
|
22
|
+
@name = name
|
|
23
|
+
@valid_behaviors = valid_behaviors.freeze
|
|
24
|
+
@normative_rule = normative_rule
|
|
25
|
+
@formatting_detection = formatting_detection
|
|
26
|
+
freeze
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Whether a difference in this dimension with the given behavior is
|
|
30
|
+
# normative (affects equivalence).
|
|
31
|
+
def normative?(behavior)
|
|
32
|
+
case @normative_rule
|
|
33
|
+
when :strict_only then behavior == :strict
|
|
34
|
+
else behavior != :ignore
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Whether the given behavior is valid for this dimension.
|
|
39
|
+
def valid_behavior?(behavior)
|
|
40
|
+
@valid_behaviors.include?(behavior)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Whether formatting detection should apply to differences in this
|
|
44
|
+
# dimension.
|
|
45
|
+
def supports_formatting_detection?
|
|
46
|
+
@formatting_detection
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
module Dimensions
|
|
6
|
+
# Immutable collection of dimensions for a specific format.
|
|
7
|
+
#
|
|
8
|
+
# Each format (XML, JSON, YAML) has its own DimensionSet listing the
|
|
9
|
+
# comparison aspects relevant to that format. Provides lookup by name,
|
|
10
|
+
# enumeration, and existence checks.
|
|
11
|
+
class DimensionSet
|
|
12
|
+
attr_reader :format
|
|
13
|
+
|
|
14
|
+
# @param format [Symbol] Format identifier (e.g., :xml, :json, :yaml)
|
|
15
|
+
# @param dimensions [Array<Dimension>] Dimensions for this format
|
|
16
|
+
def initialize(format, dimensions)
|
|
17
|
+
@format = format
|
|
18
|
+
@dimensions = dimensions.to_h do |dim|
|
|
19
|
+
[dim.name, dim]
|
|
20
|
+
end.freeze
|
|
21
|
+
freeze
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Lookup a dimension by name.
|
|
25
|
+
#
|
|
26
|
+
# @param name [Symbol]
|
|
27
|
+
# @return [Dimension, nil]
|
|
28
|
+
def [](name)
|
|
29
|
+
@dimensions[name]
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# All dimension names for this format, in definition order.
|
|
33
|
+
#
|
|
34
|
+
# @return [Array<Symbol>]
|
|
35
|
+
def names
|
|
36
|
+
@dimensions.keys
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Whether this format has a dimension with the given name.
|
|
40
|
+
#
|
|
41
|
+
# @param name [Symbol]
|
|
42
|
+
# @return [Boolean]
|
|
43
|
+
def dimension?(name)
|
|
44
|
+
@dimensions.key?(name)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -1,75 +1,116 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "base_dimension"
|
|
4
|
-
require_relative "text_content_dimension"
|
|
5
|
-
require_relative "comments_dimension"
|
|
6
|
-
require_relative "attribute_values_dimension"
|
|
7
|
-
require_relative "attribute_presence_dimension"
|
|
8
|
-
require_relative "attribute_order_dimension"
|
|
9
|
-
require_relative "element_position_dimension"
|
|
10
|
-
require_relative "structural_whitespace_dimension"
|
|
11
|
-
|
|
12
3
|
module Canon
|
|
13
4
|
module Comparison
|
|
14
5
|
module Dimensions
|
|
15
|
-
#
|
|
6
|
+
# Pre-built dimension sets with format lookup.
|
|
16
7
|
#
|
|
17
|
-
#
|
|
18
|
-
#
|
|
8
|
+
# XML/HTML share 7 dimensions. JSON has 3. YAML has 4.
|
|
9
|
+
# Format aliases (html, html4, html5) resolve to the XML set.
|
|
19
10
|
module Registry
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
11
|
+
SETS = {
|
|
12
|
+
xml: DimensionSet.new(:xml, [
|
|
13
|
+
Dimension.new(
|
|
14
|
+
name: :text_content,
|
|
15
|
+
valid_behaviors: %i[strict normalize
|
|
16
|
+
ignore],
|
|
17
|
+
formatting_detection: true,
|
|
18
|
+
),
|
|
19
|
+
Dimension.new(
|
|
20
|
+
name: :structural_whitespace,
|
|
21
|
+
valid_behaviors: %i[strict normalize
|
|
22
|
+
ignore],
|
|
23
|
+
normative_rule: :strict_only,
|
|
24
|
+
formatting_detection: true,
|
|
25
|
+
),
|
|
26
|
+
Dimension.new(
|
|
27
|
+
name: :attribute_presence,
|
|
28
|
+
valid_behaviors: %i[strict ignore],
|
|
29
|
+
),
|
|
30
|
+
Dimension.new(
|
|
31
|
+
name: :attribute_order,
|
|
32
|
+
valid_behaviors: %i[strict ignore],
|
|
33
|
+
),
|
|
34
|
+
Dimension.new(
|
|
35
|
+
name: :attribute_values,
|
|
36
|
+
valid_behaviors: %i[strict strip compact
|
|
37
|
+
normalize ignore],
|
|
38
|
+
),
|
|
39
|
+
Dimension.new(
|
|
40
|
+
name: :element_position,
|
|
41
|
+
valid_behaviors: %i[strict ignore],
|
|
42
|
+
),
|
|
43
|
+
Dimension.new(
|
|
44
|
+
name: :comments,
|
|
45
|
+
valid_behaviors: %i[strict ignore],
|
|
46
|
+
),
|
|
47
|
+
]),
|
|
38
48
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
49
|
+
json: DimensionSet.new(:json, [
|
|
50
|
+
Dimension.new(
|
|
51
|
+
name: :text_content,
|
|
52
|
+
valid_behaviors: %i[strict normalize
|
|
53
|
+
ignore],
|
|
54
|
+
),
|
|
55
|
+
Dimension.new(
|
|
56
|
+
name: :structural_whitespace,
|
|
57
|
+
valid_behaviors: %i[strict normalize
|
|
58
|
+
ignore],
|
|
59
|
+
normative_rule: :strict_only,
|
|
60
|
+
),
|
|
61
|
+
Dimension.new(
|
|
62
|
+
name: :key_order,
|
|
63
|
+
valid_behaviors: %i[strict ignore],
|
|
64
|
+
),
|
|
65
|
+
]),
|
|
44
66
|
|
|
45
|
-
|
|
46
|
-
|
|
67
|
+
yaml: DimensionSet.new(:yaml, [
|
|
68
|
+
Dimension.new(
|
|
69
|
+
name: :text_content,
|
|
70
|
+
valid_behaviors: %i[strict normalize
|
|
71
|
+
ignore],
|
|
72
|
+
),
|
|
73
|
+
Dimension.new(
|
|
74
|
+
name: :structural_whitespace,
|
|
75
|
+
valid_behaviors: %i[strict normalize
|
|
76
|
+
ignore],
|
|
77
|
+
normative_rule: :strict_only,
|
|
78
|
+
),
|
|
79
|
+
Dimension.new(
|
|
80
|
+
name: :key_order,
|
|
81
|
+
valid_behaviors: %i[strict ignore],
|
|
82
|
+
),
|
|
83
|
+
Dimension.new(
|
|
84
|
+
name: :comments,
|
|
85
|
+
valid_behaviors: %i[strict ignore],
|
|
86
|
+
),
|
|
87
|
+
]),
|
|
88
|
+
}.freeze
|
|
47
89
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
end
|
|
90
|
+
FORMAT_ALIASES = {
|
|
91
|
+
html: :xml,
|
|
92
|
+
html4: :xml,
|
|
93
|
+
html5: :xml,
|
|
94
|
+
}.freeze
|
|
54
95
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
96
|
+
class << self
|
|
97
|
+
# Look up the DimensionSet for a format.
|
|
98
|
+
# Format aliases (html, html4, html5) resolve to the :xml set.
|
|
99
|
+
# Unknown formats fall back to :xml.
|
|
100
|
+
#
|
|
101
|
+
# @param format [Symbol]
|
|
102
|
+
# @return [DimensionSet]
|
|
103
|
+
def for(format)
|
|
104
|
+
key = FORMAT_ALIASES[format] || format
|
|
105
|
+
SETS[key] || SETS[:xml]
|
|
106
|
+
end
|
|
62
107
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
# @return [Boolean] true if nodes match for this dimension
|
|
70
|
-
def self.compare(dimension_name, node1, node2, behavior) # rubocop:disable Naming/PredicateMethod
|
|
71
|
-
dimension = get(dimension_name)
|
|
72
|
-
dimension.equivalent?(node1, node2, behavior)
|
|
108
|
+
# All format names with explicit sets (excluding aliases).
|
|
109
|
+
#
|
|
110
|
+
# @return [Array<Symbol>]
|
|
111
|
+
def format_names
|
|
112
|
+
SETS.keys
|
|
113
|
+
end
|
|
73
114
|
end
|
|
74
115
|
end
|
|
75
116
|
end
|