canon 0.2.9 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +21 -22
- data/Rakefile +25 -2
- data/lib/canon/cache.rb +18 -27
- data/lib/canon/cli.rb +0 -3
- data/lib/canon/commands/diff_command.rb +0 -6
- data/lib/canon/commands/format_command.rb +0 -4
- data/lib/canon/commands.rb +9 -0
- data/lib/canon/comparison/child_realignment.rb +0 -2
- data/lib/canon/comparison/compare_profile.rb +30 -36
- data/lib/canon/comparison/comparison_result.rb +0 -2
- data/lib/canon/comparison/diff_node_builder.rb +353 -0
- data/lib/canon/comparison/dimensions/dimension.rb +51 -0
- data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
- data/lib/canon/comparison/dimensions/registry.rb +101 -60
- data/lib/canon/comparison/dimensions.rb +15 -46
- data/lib/canon/comparison/html_comparator.rb +20 -141
- data/lib/canon/comparison/html_compare_profile.rb +15 -18
- data/lib/canon/comparison/json_comparator.rb +4 -165
- data/lib/canon/comparison/json_parser.rb +0 -2
- data/lib/canon/comparison/markup_comparator.rb +14 -210
- data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
- data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
- data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
- data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
- data/lib/canon/comparison/match_options.rb +13 -88
- data/lib/canon/comparison/node_inspector.rb +13 -48
- data/lib/canon/comparison/pipeline.rb +269 -0
- data/lib/canon/comparison/profile_definition.rb +0 -2
- data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
- data/lib/canon/comparison/strategies.rb +16 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -5
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
- data/lib/canon/comparison/xml_comparator/node_parser.rb +2 -6
- data/lib/canon/comparison/xml_comparator.rb +4 -492
- data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
- data/lib/canon/comparison/xml_node_comparison.rb +4 -119
- data/lib/canon/comparison/yaml_comparator.rb +0 -3
- data/lib/canon/comparison.rb +144 -267
- data/lib/canon/config/config_dsl.rb +159 -0
- data/lib/canon/config/env_provider.rb +0 -3
- data/lib/canon/config/env_schema.rb +48 -58
- data/lib/canon/config/profile_loader.rb +0 -1
- data/lib/canon/config.rb +116 -468
- data/lib/canon/diff/diff_block_builder.rb +0 -2
- data/lib/canon/diff/diff_classifier.rb +0 -5
- data/lib/canon/diff/diff_context.rb +0 -2
- data/lib/canon/diff/diff_context_builder.rb +0 -2
- data/lib/canon/diff/diff_line_builder.rb +2 -3
- data/lib/canon/diff/diff_node_enricher.rb +0 -4
- data/lib/canon/diff/diff_node_mapper.rb +10 -12
- data/lib/canon/diff/diff_report_builder.rb +0 -4
- data/lib/canon/diff/formatting_detector.rb +3 -3
- data/lib/canon/diff/node_serializer.rb +0 -7
- data/lib/canon/diff/xml_serialization_formatter.rb +0 -3
- data/lib/canon/diff.rb +39 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +20 -17
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +119 -3
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -5
- data/lib/canon/diff_formatter/debug_output.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +27 -61
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -29
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
- data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
- data/lib/canon/diff_formatter.rb +26 -20
- data/lib/canon/formatters/html4_formatter.rb +0 -2
- data/lib/canon/formatters/html5_formatter.rb +0 -2
- data/lib/canon/formatters/html_formatter.rb +0 -3
- data/lib/canon/formatters/json_formatter.rb +0 -1
- data/lib/canon/formatters/xml_formatter.rb +0 -4
- data/lib/canon/formatters/yaml_formatter.rb +0 -1
- data/lib/canon/formatters.rb +16 -0
- data/lib/canon/html/data_model.rb +1 -11
- data/lib/canon/html.rb +4 -3
- data/lib/canon/options/cli_generator.rb +0 -2
- data/lib/canon/options/registry.rb +0 -2
- data/lib/canon/options.rb +9 -0
- data/lib/canon/pretty_printer/html.rb +0 -1
- data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
- data/lib/canon/pretty_printer.rb +12 -0
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters.rb +14 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
- data/lib/canon/tree_diff/core/node_signature.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +12 -5
- data/lib/canon/tree_diff/core.rb +17 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
- data/lib/canon/tree_diff/matchers.rb +15 -0
- data/lib/canon/tree_diff/operation_converter.rb +7 -15
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
- data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +6 -5
- data/lib/canon/tree_diff/operations.rb +13 -0
- data/lib/canon/tree_diff.rb +26 -27
- data/lib/canon/validators/base_validator.rb +5 -10
- data/lib/canon/validators/html_validator.rb +2 -8
- data/lib/canon/validators/json_validator.rb +0 -1
- data/lib/canon/validators/xml_validator.rb +2 -8
- data/lib/canon/validators/yaml_validator.rb +0 -1
- data/lib/canon/validators.rb +12 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +0 -4
- data/lib/canon/xml/data_model.rb +5 -15
- data/lib/canon/xml/line_range_mapper.rb +0 -2
- data/lib/canon/xml/nodes/attribute_node.rb +0 -2
- data/lib/canon/xml/nodes/comment_node.rb +0 -2
- data/lib/canon/xml/nodes/element_node.rb +0 -2
- data/lib/canon/xml/nodes/namespace_node.rb +0 -2
- data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
- data/lib/canon/xml/nodes/root_node.rb +0 -2
- data/lib/canon/xml/nodes/text_node.rb +0 -2
- data/lib/canon/xml/nodes.rb +19 -0
- data/lib/canon/xml/processor.rb +0 -5
- data/lib/canon/xml/sax_builder.rb +1 -8
- data/lib/canon/xml/whitespace_normalizer.rb +2 -2
- data/lib/canon/xml.rb +33 -0
- data/lib/canon/xml_backend.rb +50 -14
- data/lib/canon/xml_parsing.rb +32 -18
- data/lib/canon.rb +25 -15
- data/lib/tasks/performance.rake +0 -58
- data/lib/tasks/performance_comparator.rb +132 -65
- data/lib/tasks/performance_helpers.rb +4 -249
- data/lib/tasks/performance_report.rb +309 -0
- metadata +28 -15
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
- data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
- data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -270
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "match_options/base_resolver"
|
|
4
|
-
require_relative "match_options/xml_resolver"
|
|
5
|
-
require_relative "match_options/json_resolver"
|
|
6
|
-
require_relative "match_options/yaml_resolver"
|
|
7
|
-
|
|
8
3
|
module Canon
|
|
9
4
|
module Comparison
|
|
10
5
|
# Matching Options for Canon Comparison
|
|
@@ -41,12 +36,6 @@ module Canon
|
|
|
41
36
|
@options[:preprocessing]
|
|
42
37
|
end
|
|
43
38
|
|
|
44
|
-
# Check if semantic diff is enabled
|
|
45
|
-
# @return [Boolean] true if semantic diff is enabled
|
|
46
|
-
def semantic_diff?
|
|
47
|
-
@options[:semantic_diff] == true
|
|
48
|
-
end
|
|
49
|
-
|
|
50
39
|
def to_h
|
|
51
40
|
@options.dup
|
|
52
41
|
end
|
|
@@ -54,6 +43,11 @@ module Canon
|
|
|
54
43
|
|
|
55
44
|
# Module containing match option utilities and format-specific modules
|
|
56
45
|
module MatchOptions
|
|
46
|
+
autoload :BaseResolver, "canon/comparison/match_options/base_resolver"
|
|
47
|
+
autoload :JsonResolver, "canon/comparison/match_options/json_resolver"
|
|
48
|
+
autoload :XmlResolver, "canon/comparison/match_options/xml_resolver"
|
|
49
|
+
autoload :YamlResolver, "canon/comparison/match_options/yaml_resolver"
|
|
50
|
+
|
|
57
51
|
# Preprocessing options - what to do before comparison
|
|
58
52
|
PREPROCESSING_OPTIONS = %i[none c14n normalize format rendered].freeze
|
|
59
53
|
|
|
@@ -90,30 +84,15 @@ module Canon
|
|
|
90
84
|
|
|
91
85
|
# Normalize text by collapsing whitespace and trimming
|
|
92
86
|
# Mimics HTML whitespace collapsing
|
|
93
|
-
#
|
|
94
|
-
# Handles both ASCII and Unicode whitespace characters including:
|
|
95
|
-
# - Regular space (U+0020)
|
|
96
|
-
# - Non-breaking space (U+00A0)
|
|
97
|
-
# - Other Unicode whitespace per \p{Space}
|
|
98
|
-
#
|
|
99
|
-
# @param text [String] Text to normalize
|
|
100
|
-
# @return [String] Normalized text
|
|
101
87
|
def normalize_text(text)
|
|
102
88
|
return "" if text.nil?
|
|
103
89
|
|
|
104
90
|
text.to_s
|
|
105
|
-
.gsub(/[\p{Space}
|
|
91
|
+
.gsub(/[\p{Space} ]+/, " ") # Collapse all whitespace to single space
|
|
106
92
|
.strip # Remove leading/trailing whitespace
|
|
107
93
|
end
|
|
108
94
|
|
|
109
95
|
# Normalize text preserving Unicode whitespace type distinctions.
|
|
110
|
-
#
|
|
111
|
-
# Only ASCII whitespace (space, tab, newline, etc.) is collapsed.
|
|
112
|
-
# Unicode whitespace (NBSP, ideographic space, etc.) is preserved,
|
|
113
|
-
# so different whitespace types remain distinguishable.
|
|
114
|
-
#
|
|
115
|
-
# @param text [String] Text to normalize
|
|
116
|
-
# @return [String] Normalized text with preserved whitespace types
|
|
117
96
|
def normalize_text_preserving_type(text)
|
|
118
97
|
return "" if text.nil?
|
|
119
98
|
|
|
@@ -123,10 +102,6 @@ module Canon
|
|
|
123
102
|
end
|
|
124
103
|
|
|
125
104
|
# Process attribute value according to match behavior
|
|
126
|
-
#
|
|
127
|
-
# @param value [String] Attribute value to process
|
|
128
|
-
# @param behavior [Symbol] Match behavior (:strict, :strip, :compact, :normalize, :ignore)
|
|
129
|
-
# @return [String] Processed value
|
|
130
105
|
def process_attribute_value(value, behavior)
|
|
131
106
|
case behavior
|
|
132
107
|
when :strict
|
|
@@ -134,7 +109,7 @@ module Canon
|
|
|
134
109
|
when :strip
|
|
135
110
|
value.to_s.strip
|
|
136
111
|
when :compact
|
|
137
|
-
value.to_s.gsub(/[\p{Space}
|
|
112
|
+
value.to_s.gsub(/[\p{Space} ]+/, " ")
|
|
138
113
|
when :normalize
|
|
139
114
|
normalize_text(value)
|
|
140
115
|
when :ignore
|
|
@@ -147,16 +122,8 @@ module Canon
|
|
|
147
122
|
|
|
148
123
|
# XML/HTML-specific matching options
|
|
149
124
|
module Xml
|
|
150
|
-
#
|
|
151
|
-
MATCH_DIMENSIONS =
|
|
152
|
-
text_content
|
|
153
|
-
structural_whitespace
|
|
154
|
-
attribute_presence
|
|
155
|
-
attribute_order
|
|
156
|
-
attribute_values
|
|
157
|
-
element_position
|
|
158
|
-
comments
|
|
159
|
-
].freeze
|
|
125
|
+
# Single source of truth: derived from the DimensionSet in Registry.
|
|
126
|
+
MATCH_DIMENSIONS = Dimensions::Registry.for(:xml).names.freeze
|
|
160
127
|
|
|
161
128
|
# Expose FORMAT_DEFAULTS from XmlResolver (for backward compatibility)
|
|
162
129
|
FORMAT_DEFAULTS = MatchOptions::XmlResolver.const_get(:FORMAT_DEFAULTS)
|
|
@@ -165,27 +132,18 @@ module Canon
|
|
|
165
132
|
MATCH_PROFILES = MatchOptions::XmlResolver.const_get(:MATCH_PROFILES)
|
|
166
133
|
|
|
167
134
|
class << self
|
|
168
|
-
# Delegate to XmlResolver
|
|
169
135
|
def resolve(**kwargs)
|
|
170
136
|
MatchOptions::XmlResolver.resolve(**kwargs)
|
|
171
137
|
end
|
|
172
138
|
|
|
173
|
-
# Delegate to XmlResolver
|
|
174
139
|
def get_profile_options(profile)
|
|
175
140
|
MatchOptions::XmlResolver.get_profile_options(profile)
|
|
176
141
|
end
|
|
177
142
|
|
|
178
|
-
# Get valid match dimensions for XML/HTML
|
|
179
|
-
#
|
|
180
|
-
# @return [Array<Symbol>] Valid dimensions
|
|
181
143
|
def match_dimensions
|
|
182
144
|
MatchOptions::XmlResolver.match_dimensions
|
|
183
145
|
end
|
|
184
146
|
|
|
185
|
-
# Get format-specific default options
|
|
186
|
-
#
|
|
187
|
-
# @param format [Symbol] Format type
|
|
188
|
-
# @return [Hash] Default options for the format
|
|
189
147
|
def format_defaults(format)
|
|
190
148
|
MatchOptions::XmlResolver.format_defaults(format)
|
|
191
149
|
end
|
|
@@ -194,41 +152,25 @@ module Canon
|
|
|
194
152
|
|
|
195
153
|
# JSON-specific matching options
|
|
196
154
|
module Json
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
text_content
|
|
200
|
-
structural_whitespace
|
|
201
|
-
key_order
|
|
202
|
-
].freeze
|
|
203
|
-
|
|
204
|
-
# Expose FORMAT_DEFAULTS from JsonResolver (for backward compatibility)
|
|
155
|
+
MATCH_DIMENSIONS = Dimensions::Registry.for(:json).names.freeze
|
|
156
|
+
|
|
205
157
|
FORMAT_DEFAULTS = MatchOptions::JsonResolver.const_get(:FORMAT_DEFAULTS)
|
|
206
158
|
|
|
207
|
-
# Expose MATCH_PROFILES from JsonResolver (for backward compatibility)
|
|
208
159
|
MATCH_PROFILES = MatchOptions::JsonResolver.const_get(:MATCH_PROFILES)
|
|
209
160
|
|
|
210
161
|
class << self
|
|
211
|
-
# Delegate to JsonResolver
|
|
212
162
|
def resolve(**kwargs)
|
|
213
163
|
MatchOptions::JsonResolver.resolve(**kwargs)
|
|
214
164
|
end
|
|
215
165
|
|
|
216
|
-
# Delegate to JsonResolver
|
|
217
166
|
def get_profile_options(profile)
|
|
218
167
|
MatchOptions::JsonResolver.get_profile_options(profile)
|
|
219
168
|
end
|
|
220
169
|
|
|
221
|
-
# Get valid match dimensions for JSON
|
|
222
|
-
#
|
|
223
|
-
# @return [Array<Symbol>] Valid dimensions
|
|
224
170
|
def match_dimensions
|
|
225
171
|
MatchOptions::JsonResolver.match_dimensions
|
|
226
172
|
end
|
|
227
173
|
|
|
228
|
-
# Get format-specific default options
|
|
229
|
-
#
|
|
230
|
-
# @param format [Symbol] Format type
|
|
231
|
-
# @return [Hash] Default options for the format
|
|
232
174
|
def format_defaults(format)
|
|
233
175
|
MatchOptions::JsonResolver.format_defaults(format)
|
|
234
176
|
end
|
|
@@ -237,42 +179,25 @@ module Canon
|
|
|
237
179
|
|
|
238
180
|
# YAML-specific matching options
|
|
239
181
|
module Yaml
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
text_content
|
|
243
|
-
structural_whitespace
|
|
244
|
-
key_order
|
|
245
|
-
comments
|
|
246
|
-
].freeze
|
|
247
|
-
|
|
248
|
-
# Expose FORMAT_DEFAULTS from YamlResolver (for backward compatibility)
|
|
182
|
+
MATCH_DIMENSIONS = Dimensions::Registry.for(:yaml).names.freeze
|
|
183
|
+
|
|
249
184
|
FORMAT_DEFAULTS = MatchOptions::YamlResolver.const_get(:FORMAT_DEFAULTS)
|
|
250
185
|
|
|
251
|
-
# Expose MATCH_PROFILES from YamlResolver (for backward compatibility)
|
|
252
186
|
MATCH_PROFILES = MatchOptions::YamlResolver.const_get(:MATCH_PROFILES)
|
|
253
187
|
|
|
254
188
|
class << self
|
|
255
|
-
# Delegate to YamlResolver
|
|
256
189
|
def resolve(**kwargs)
|
|
257
190
|
MatchOptions::YamlResolver.resolve(**kwargs)
|
|
258
191
|
end
|
|
259
192
|
|
|
260
|
-
# Delegate to YamlResolver
|
|
261
193
|
def get_profile_options(profile)
|
|
262
194
|
MatchOptions::YamlResolver.get_profile_options(profile)
|
|
263
195
|
end
|
|
264
196
|
|
|
265
|
-
# Get valid match dimensions for YAML
|
|
266
|
-
#
|
|
267
|
-
# @return [Array<Symbol>] Valid dimensions
|
|
268
197
|
def match_dimensions
|
|
269
198
|
MatchOptions::YamlResolver.match_dimensions
|
|
270
199
|
end
|
|
271
200
|
|
|
272
|
-
# Get format-specific default options
|
|
273
|
-
#
|
|
274
|
-
# @param format [Symbol] Format type
|
|
275
|
-
# @return [Hash] Default options for the format
|
|
276
201
|
def format_defaults(format)
|
|
277
202
|
MatchOptions::YamlResolver.format_defaults(format)
|
|
278
203
|
end
|
|
@@ -10,37 +10,25 @@ module Canon
|
|
|
10
10
|
# * Canon::TreeDiff::Core::TreeNode — semantic tree diff nodes.
|
|
11
11
|
# * Backend-specific nodes (Nokogiri or Moxml) — live parsed nodes.
|
|
12
12
|
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
# Every node query in the codebase should go through this module.
|
|
18
|
-
# Do not create private dispatch methods in consumers.
|
|
13
|
+
# Architecture: NodeInspector handles Canon-native types (Canon::Xml::Node,
|
|
14
|
+
# TreeNode) directly, then delegates ALL backend-specific queries to
|
|
15
|
+
# XmlParsing. No Moxml/Nokogiri constants are referenced here — that
|
|
16
|
+
# knowledge lives exclusively in XmlParsing.
|
|
19
17
|
module NodeInspector
|
|
20
|
-
NOKOGIRI_TEXT_TYPE = defined?(Nokogiri::XML::Node::TEXT_NODE) ? Nokogiri::XML::Node::TEXT_NODE : 3
|
|
21
|
-
|
|
22
18
|
# --- Type predicates ---
|
|
23
19
|
|
|
24
20
|
def self.text_node?(node)
|
|
25
21
|
return false unless node
|
|
26
22
|
return node.node_type == :text if node.is_a?(Canon::Xml::Node)
|
|
27
23
|
|
|
28
|
-
|
|
29
|
-
node.is_a?(Nokogiri::XML::Text) || node.is_a?(Moxml::Text)
|
|
30
|
-
else
|
|
31
|
-
node.is_a?(Moxml::Text)
|
|
32
|
-
end
|
|
24
|
+
XmlParsing.text_node?(node)
|
|
33
25
|
end
|
|
34
26
|
|
|
35
27
|
def self.element_node?(node)
|
|
36
28
|
return false unless node
|
|
37
29
|
return node.node_type == :element if node.is_a?(Canon::Xml::Node)
|
|
38
30
|
|
|
39
|
-
|
|
40
|
-
node.is_a?(Nokogiri::XML::Element) || node.is_a?(Moxml::Element)
|
|
41
|
-
else
|
|
42
|
-
node.is_a?(Moxml::Element)
|
|
43
|
-
end
|
|
31
|
+
XmlParsing.element?(node)
|
|
44
32
|
end
|
|
45
33
|
|
|
46
34
|
def self.comment_node?(node)
|
|
@@ -57,7 +45,7 @@ module Canon
|
|
|
57
45
|
end
|
|
58
46
|
false
|
|
59
47
|
else
|
|
60
|
-
|
|
48
|
+
XmlParsing.comment?(node)
|
|
61
49
|
end
|
|
62
50
|
end
|
|
63
51
|
|
|
@@ -100,7 +88,6 @@ module Canon
|
|
|
100
88
|
|
|
101
89
|
# --- Node queries ---
|
|
102
90
|
|
|
103
|
-
# Unified node name extraction across all node types.
|
|
104
91
|
def self.name(node)
|
|
105
92
|
return nil unless node
|
|
106
93
|
return node.name if node.is_a?(Canon::Xml::Node)
|
|
@@ -109,7 +96,6 @@ module Canon
|
|
|
109
96
|
XmlParsing.name(node)
|
|
110
97
|
end
|
|
111
98
|
|
|
112
|
-
# Unified parent access across all node types.
|
|
113
99
|
def self.parent(node)
|
|
114
100
|
return nil unless node
|
|
115
101
|
return node.parent if node.is_a?(Canon::Xml::Node)
|
|
@@ -118,7 +104,6 @@ module Canon
|
|
|
118
104
|
XmlParsing.parent(node)
|
|
119
105
|
end
|
|
120
106
|
|
|
121
|
-
# Unified children access across all node types.
|
|
122
107
|
def self.children(node)
|
|
123
108
|
return [] unless node
|
|
124
109
|
return node.children if node.is_a?(Canon::Xml::Node)
|
|
@@ -127,34 +112,21 @@ module Canon
|
|
|
127
112
|
XmlParsing.children(node)
|
|
128
113
|
end
|
|
129
114
|
|
|
130
|
-
# Extract the text content of +node+ as a String.
|
|
131
115
|
def self.text_content(node)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
node.text_content.to_s
|
|
137
|
-
when Moxml::Text
|
|
138
|
-
node.content.to_s
|
|
139
|
-
else
|
|
140
|
-
XmlParsing.text_content(node).to_s
|
|
141
|
-
end
|
|
116
|
+
return node.value.to_s if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
117
|
+
return node.text_content.to_s if node.is_a?(Canon::Xml::Node)
|
|
118
|
+
|
|
119
|
+
XmlParsing.text_content(node).to_s
|
|
142
120
|
end
|
|
143
121
|
|
|
144
|
-
# Unified node type that always returns a symbol.
|
|
145
|
-
# Returns nil for unrecognised nodes.
|
|
146
122
|
def self.node_type(node)
|
|
147
123
|
return nil unless node
|
|
148
124
|
return node.node_type if node.is_a?(Canon::Xml::Node)
|
|
125
|
+
return node.type&.to_sym if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
149
126
|
|
|
150
|
-
|
|
151
|
-
node.type&.to_sym
|
|
152
|
-
else
|
|
153
|
-
XmlParsing.node_type(node)
|
|
154
|
-
end
|
|
127
|
+
XmlParsing.node_type(node)
|
|
155
128
|
end
|
|
156
129
|
|
|
157
|
-
# Unified attribute value access.
|
|
158
130
|
def self.attribute_value(node, attr_name)
|
|
159
131
|
return nil unless node
|
|
160
132
|
|
|
@@ -168,7 +140,6 @@ module Canon
|
|
|
168
140
|
end
|
|
169
141
|
end
|
|
170
142
|
|
|
171
|
-
# Unified namespace URI access.
|
|
172
143
|
def self.namespace_uri(node)
|
|
173
144
|
return nil unless node
|
|
174
145
|
|
|
@@ -179,7 +150,6 @@ module Canon
|
|
|
179
150
|
end
|
|
180
151
|
end
|
|
181
152
|
|
|
182
|
-
# Extract parse-time errors carried on a node or its owning document.
|
|
183
153
|
def self.parse_errors(node)
|
|
184
154
|
return [] if node.nil?
|
|
185
155
|
return Array(node.parse_errors).map(&:to_s) if node.is_a?(Canon::Xml::Node)
|
|
@@ -194,11 +164,6 @@ module Canon
|
|
|
194
164
|
[]
|
|
195
165
|
end
|
|
196
166
|
end
|
|
197
|
-
|
|
198
|
-
# Deprecated: use NodeInspector.parent instead.
|
|
199
|
-
def self.parent_of(node)
|
|
200
|
-
parent(node)
|
|
201
|
-
end
|
|
202
167
|
end
|
|
203
168
|
end
|
|
204
169
|
end
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Comparison
|
|
5
|
+
# Shared comparison pipeline helpers used by both algorithms.
|
|
6
|
+
#
|
|
7
|
+
# Both `dom_diff` and `semantic_diff` need to:
|
|
8
|
+
# - detect document format from inputs (with optional hint)
|
|
9
|
+
# - validate that the two formats are comparable
|
|
10
|
+
# - merge global config-sourced profile / options into the opts hash
|
|
11
|
+
# - capture original-string snapshots before parsing mutates inputs
|
|
12
|
+
# - parse both inputs through the format-specific comparator
|
|
13
|
+
#
|
|
14
|
+
# These steps are pure pipeline mechanics — they have nothing to do with
|
|
15
|
+
# the comparison algorithm itself. Keeping them here ensures the two
|
|
16
|
+
# algorithm entrypoints cannot drift out of sync (see lutaml/canon
|
|
17
|
+
# "Two Comparison Algorithms — Distinct by Design" in CLAUDE.md —
|
|
18
|
+
# the algorithm cores stay separate; only shared infrastructure is
|
|
19
|
+
# consolidated).
|
|
20
|
+
module Pipeline
|
|
21
|
+
# Formats whose Canon::Config exposes a match profile / options.
|
|
22
|
+
CONFIG_BACKED_FORMATS = %i[xml html json yaml string].freeze
|
|
23
|
+
|
|
24
|
+
# Cross-format compatibility groups. DOM comparison accepts these
|
|
25
|
+
# pairings because both sides parse to the same Ruby structure.
|
|
26
|
+
# Semantic comparison does not — it requires exact format match.
|
|
27
|
+
COMPATIBLE_FORMAT_GROUPS = [
|
|
28
|
+
%i[json ruby_object].freeze,
|
|
29
|
+
%i[yaml ruby_object].freeze,
|
|
30
|
+
].freeze
|
|
31
|
+
|
|
32
|
+
class << self
|
|
33
|
+
# Detect formats for both inputs, honouring an explicit hint.
|
|
34
|
+
#
|
|
35
|
+
# @param obj1 [Object] First input
|
|
36
|
+
# @param obj2 [Object] Second input
|
|
37
|
+
# @param format_hint [Symbol, nil] Explicit format override
|
|
38
|
+
# @return [Array<Symbol, Symbol>] Detected or hinted formats
|
|
39
|
+
def detect_formats(obj1, obj2, format_hint)
|
|
40
|
+
return [format_hint, format_hint] if format_hint
|
|
41
|
+
|
|
42
|
+
[FormatDetector.detect(obj1), FormatDetector.detect(obj2)]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# True when the two formats can be compared by the DOM algorithm.
|
|
46
|
+
#
|
|
47
|
+
# DOM allows `ruby_object` to be compared against `json` or `yaml`
|
|
48
|
+
# because both sides parse to the same Ruby structure. Semantic
|
|
49
|
+
# comparison does not allow this — it requires exact format match.
|
|
50
|
+
#
|
|
51
|
+
# @param format1 [Symbol]
|
|
52
|
+
# @param format2 [Symbol]
|
|
53
|
+
# @param strict [Boolean] When true, require exact match (semantic)
|
|
54
|
+
# @return [Boolean]
|
|
55
|
+
def formats_compatible?(format1, format2, strict: false)
|
|
56
|
+
return true if format1 == format2
|
|
57
|
+
return false if strict
|
|
58
|
+
|
|
59
|
+
COMPATIBLE_FORMAT_GROUPS.any? do |group|
|
|
60
|
+
group.include?(format1) && group.include?(format2)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Raise a helpful error if formats are incompatible.
|
|
65
|
+
#
|
|
66
|
+
# @param format1 [Symbol]
|
|
67
|
+
# @param format2 [Symbol]
|
|
68
|
+
# @param strict [Boolean] Passed to {formats_compatible?}
|
|
69
|
+
# @raise [Canon::CompareFormatMismatchError]
|
|
70
|
+
# @return [void]
|
|
71
|
+
def validate_compatible!(format1, format2, strict: false)
|
|
72
|
+
return if formats_compatible?(format1, format2, strict: strict)
|
|
73
|
+
|
|
74
|
+
raise Canon::CompareFormatMismatchError.new(format1, format2)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Merge global config-sourced profile and options into `opts`.
|
|
78
|
+
#
|
|
79
|
+
# Reads `Canon::Config.instance.<format>.match` for a global
|
|
80
|
+
# `profile` and `profile_options`, and merges them into a copy of
|
|
81
|
+
# the supplied opts hash. Caller-supplied values always win:
|
|
82
|
+
# config-derived `profile_options` extend rather than replace
|
|
83
|
+
# caller-supplied `global_options`.
|
|
84
|
+
#
|
|
85
|
+
# Returns the original opts hash unchanged when the format is not
|
|
86
|
+
# config-backed (e.g. `:ruby_object`).
|
|
87
|
+
#
|
|
88
|
+
# @param format [Symbol]
|
|
89
|
+
# @param opts [Hash] Caller opts (will not be mutated)
|
|
90
|
+
# @return [Hash] New opts hash with config globals merged in
|
|
91
|
+
def resolve_config(format, opts)
|
|
92
|
+
return opts unless CONFIG_BACKED_FORMATS.include?(format)
|
|
93
|
+
|
|
94
|
+
format_config = Canon::Config.instance.public_send(format)
|
|
95
|
+
match_config = format_config.match
|
|
96
|
+
profile = match_config.profile
|
|
97
|
+
profile_opts = match_config.profile_options
|
|
98
|
+
|
|
99
|
+
resolved = opts.dup
|
|
100
|
+
if resolved[:global_profile].nil? && profile
|
|
101
|
+
resolved[:global_profile] = profile
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
if profile_opts.any?
|
|
105
|
+
resolved[:global_options] = merge_profile_options(
|
|
106
|
+
resolved[:global_options], profile_opts
|
|
107
|
+
)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
resolved
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Capture pre-parse string snapshots for diff display.
|
|
114
|
+
#
|
|
115
|
+
# Parsing (especially HTML) can mutate inputs, so originals must
|
|
116
|
+
# be captured before any parsing happens. Strings pass through
|
|
117
|
+
# unchanged; parsed nodes are serialized via NodeSerializer.
|
|
118
|
+
#
|
|
119
|
+
# @param obj1 [Object]
|
|
120
|
+
# @param obj2 [Object]
|
|
121
|
+
# @return [Array<String, String>] Captured original strings
|
|
122
|
+
def capture_originals(obj1, obj2)
|
|
123
|
+
[extract_original_string(obj1), extract_original_string(obj2)]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Parse both inputs through the format-specific comparator.
|
|
127
|
+
#
|
|
128
|
+
# Delegates to `XmlComparator`, `HtmlComparator`, `JsonComparator`,
|
|
129
|
+
# or `YamlComparator` based on format. Uses `Cache` so the same
|
|
130
|
+
# string is not re-parsed across runs.
|
|
131
|
+
#
|
|
132
|
+
# @param obj1 [Object]
|
|
133
|
+
# @param obj2 [Object]
|
|
134
|
+
# @param format [Symbol]
|
|
135
|
+
# @param match_opts_hash [Hash] Resolved match options
|
|
136
|
+
# @return [Array<Object, Object>] Parsed documents
|
|
137
|
+
def parse_pair(obj1, obj2, format, match_opts_hash)
|
|
138
|
+
preprocessing = match_opts_hash[:preprocessing] || :none
|
|
139
|
+
|
|
140
|
+
case format
|
|
141
|
+
when :xml
|
|
142
|
+
[
|
|
143
|
+
parse_with_cache(obj1, format, preprocessing) do |doc|
|
|
144
|
+
XmlComparator.parse(doc, preprocessing)
|
|
145
|
+
end,
|
|
146
|
+
parse_with_cache(obj2, format, preprocessing) do |doc|
|
|
147
|
+
XmlComparator.parse(doc, preprocessing)
|
|
148
|
+
end,
|
|
149
|
+
]
|
|
150
|
+
when :html, :html4, :html5
|
|
151
|
+
[
|
|
152
|
+
parse_with_cache(obj1, format, preprocessing) do |doc|
|
|
153
|
+
HtmlComparator.parse(doc, preprocessing)
|
|
154
|
+
end,
|
|
155
|
+
parse_with_cache(obj2, format, preprocessing) do |doc|
|
|
156
|
+
HtmlComparator.parse(doc, preprocessing)
|
|
157
|
+
end,
|
|
158
|
+
]
|
|
159
|
+
when :json
|
|
160
|
+
[
|
|
161
|
+
parse_with_cache(obj1, format, :none) do |doc|
|
|
162
|
+
JsonComparator.parse(doc)
|
|
163
|
+
end,
|
|
164
|
+
parse_with_cache(obj2, format, :none) do |doc|
|
|
165
|
+
JsonComparator.parse(doc)
|
|
166
|
+
end,
|
|
167
|
+
]
|
|
168
|
+
when :yaml
|
|
169
|
+
[
|
|
170
|
+
parse_with_cache(obj1, format, :none) do |doc|
|
|
171
|
+
YamlComparator.parse(doc)
|
|
172
|
+
end,
|
|
173
|
+
parse_with_cache(obj2, format, :none) do |doc|
|
|
174
|
+
YamlComparator.parse(doc)
|
|
175
|
+
end,
|
|
176
|
+
]
|
|
177
|
+
else
|
|
178
|
+
[obj1, obj2]
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Pre-parse HTML strings through `HtmlParser.parse(_, :html5)`.
|
|
183
|
+
#
|
|
184
|
+
# The DOM comparator needs HTML4 and HTML5 inputs to share HTML's
|
|
185
|
+
# whitespace-sensitivity semantics, which means routing both
|
|
186
|
+
# through Nokogiri::HTML5.fragment up front (issue #118).
|
|
187
|
+
# The semantic comparator does not need this — it uses Canon's
|
|
188
|
+
# own HTML data model downstream — so this helper is opt-in.
|
|
189
|
+
#
|
|
190
|
+
# Returns the inputs unchanged if they are not strings.
|
|
191
|
+
#
|
|
192
|
+
# @param obj1 [Object]
|
|
193
|
+
# @param obj2 [Object]
|
|
194
|
+
# @return [Array<Object, Object>] Potentially pre-parsed HTML inputs
|
|
195
|
+
def preparse_html_pair(obj1, obj2)
|
|
196
|
+
[
|
|
197
|
+
html_string?(obj1) ? HtmlParser.parse(obj1, :html5) : obj1,
|
|
198
|
+
html_string?(obj2) ? HtmlParser.parse(obj2, :html5) : obj2,
|
|
199
|
+
]
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# True when the input is a String AND should be treated as HTML.
|
|
203
|
+
#
|
|
204
|
+
# @param obj [Object]
|
|
205
|
+
# @return [Boolean]
|
|
206
|
+
def html_string?(obj)
|
|
207
|
+
obj.is_a?(String)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
private
|
|
211
|
+
|
|
212
|
+
# Merge caller-supplied global_options with config profile_opts.
|
|
213
|
+
#
|
|
214
|
+
# Caller values win on key conflict; profile_opts fill in gaps.
|
|
215
|
+
# `MatchConfig#profile_options` already returns a fresh hash
|
|
216
|
+
# (via `Hash#except`), so we can return it directly without dup.
|
|
217
|
+
#
|
|
218
|
+
# @param existing [Hash, nil] Caller-supplied options
|
|
219
|
+
# @param profile_opts [Hash] Config-sourced options
|
|
220
|
+
# @return [Hash] Merged hash
|
|
221
|
+
def merge_profile_options(existing, profile_opts)
|
|
222
|
+
return profile_opts if existing.nil?
|
|
223
|
+
|
|
224
|
+
profile_opts.merge(existing)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Parse a single document with cache lookup.
|
|
228
|
+
#
|
|
229
|
+
# @param doc [Object] Document (string or already-parsed)
|
|
230
|
+
# @param format [Symbol] Document format
|
|
231
|
+
# @param preprocessing [Symbol] Preprocessing option
|
|
232
|
+
# @yield Block to parse the document if not cached
|
|
233
|
+
# @return [Object] Parsed document
|
|
234
|
+
def parse_with_cache(doc, format, preprocessing)
|
|
235
|
+
return doc unless doc.is_a?(String)
|
|
236
|
+
|
|
237
|
+
Cache.fetch(:document_parse,
|
|
238
|
+
Cache.key_for_document(doc, format, preprocessing)) do # rubocop:disable Lint/UselessDefaultValueArgument
|
|
239
|
+
yield doc
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Extract a string snapshot from various input types.
|
|
244
|
+
#
|
|
245
|
+
# Strings pass through; Nokogiri documents use to_html; Canon and
|
|
246
|
+
# other XML nodes go through NodeSerializer; everything else
|
|
247
|
+
# falls back to to_s.
|
|
248
|
+
#
|
|
249
|
+
# @param obj [Object]
|
|
250
|
+
# @return [String] String snapshot
|
|
251
|
+
def extract_original_string(obj)
|
|
252
|
+
case obj
|
|
253
|
+
when String
|
|
254
|
+
obj
|
|
255
|
+
when Nokogiri::XML::Document, Nokogiri::HTML::Document,
|
|
256
|
+
Nokogiri::XML::DocumentFragment, Nokogiri::HTML::DocumentFragment
|
|
257
|
+
obj.to_html
|
|
258
|
+
else
|
|
259
|
+
if Canon::XmlParsing.xml_node?(obj) || obj.is_a?(Canon::Xml::Node)
|
|
260
|
+
Canon::XmlParsing.serialize(obj)
|
|
261
|
+
else
|
|
262
|
+
obj.to_s
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|