canon 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +12 -22
- data/Rakefile +5 -2
- data/lib/canon/cache.rb +3 -1
- data/lib/canon/cli.rb +0 -3
- data/lib/canon/commands/diff_command.rb +0 -6
- data/lib/canon/commands/format_command.rb +0 -4
- data/lib/canon/commands.rb +9 -0
- data/lib/canon/comparison/child_realignment.rb +0 -2
- data/lib/canon/comparison/compare_profile.rb +30 -36
- data/lib/canon/comparison/comparison_result.rb +0 -2
- data/lib/canon/comparison/diff_node_builder.rb +353 -0
- data/lib/canon/comparison/dimensions/dimension.rb +51 -0
- data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
- data/lib/canon/comparison/dimensions/registry.rb +101 -60
- data/lib/canon/comparison/dimensions.rb +15 -46
- data/lib/canon/comparison/html_comparator.rb +18 -141
- data/lib/canon/comparison/html_compare_profile.rb +15 -18
- data/lib/canon/comparison/json_comparator.rb +4 -165
- data/lib/canon/comparison/json_parser.rb +0 -2
- data/lib/canon/comparison/markup_comparator.rb +14 -210
- data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
- data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
- data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
- data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
- data/lib/canon/comparison/match_options.rb +13 -88
- data/lib/canon/comparison/pipeline.rb +269 -0
- data/lib/canon/comparison/profile_definition.rb +0 -2
- data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
- data/lib/canon/comparison/strategies.rb +16 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
- data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
- data/lib/canon/comparison/xml_comparator.rb +4 -492
- data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
- data/lib/canon/comparison/xml_node_comparison.rb +4 -119
- data/lib/canon/comparison/yaml_comparator.rb +0 -3
- data/lib/canon/comparison.rb +143 -266
- data/lib/canon/config/config_dsl.rb +159 -0
- data/lib/canon/config/env_provider.rb +0 -3
- data/lib/canon/config/env_schema.rb +48 -58
- data/lib/canon/config/profile_loader.rb +0 -1
- data/lib/canon/config.rb +116 -468
- data/lib/canon/diff/diff_block_builder.rb +0 -2
- data/lib/canon/diff/diff_classifier.rb +0 -5
- data/lib/canon/diff/diff_context.rb +0 -2
- data/lib/canon/diff/diff_context_builder.rb +0 -2
- data/lib/canon/diff/diff_line_builder.rb +0 -3
- data/lib/canon/diff/diff_node_enricher.rb +0 -4
- data/lib/canon/diff/diff_node_mapper.rb +0 -4
- data/lib/canon/diff/diff_report_builder.rb +0 -4
- data/lib/canon/diff/formatting_detector.rb +0 -1
- data/lib/canon/diff/node_serializer.rb +0 -7
- data/lib/canon/diff.rb +39 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/debug_output.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
- data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
- data/lib/canon/diff_formatter.rb +11 -9
- data/lib/canon/formatters/html4_formatter.rb +0 -2
- data/lib/canon/formatters/html5_formatter.rb +0 -2
- data/lib/canon/formatters/html_formatter.rb +0 -3
- data/lib/canon/formatters/json_formatter.rb +0 -1
- data/lib/canon/formatters/xml_formatter.rb +0 -4
- data/lib/canon/formatters/yaml_formatter.rb +0 -1
- data/lib/canon/formatters.rb +16 -0
- data/lib/canon/html/data_model.rb +0 -10
- data/lib/canon/html.rb +4 -3
- data/lib/canon/options/cli_generator.rb +0 -2
- data/lib/canon/options/registry.rb +0 -2
- data/lib/canon/options.rb +9 -0
- data/lib/canon/pretty_printer/html.rb +0 -1
- data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
- data/lib/canon/pretty_printer.rb +12 -0
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters.rb +14 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
- data/lib/canon/tree_diff/core/node_signature.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +12 -5
- data/lib/canon/tree_diff/core.rb +17 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
- data/lib/canon/tree_diff/matchers.rb +15 -0
- data/lib/canon/tree_diff/operation_converter.rb +0 -8
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
- data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
- data/lib/canon/tree_diff/operations.rb +13 -0
- data/lib/canon/tree_diff.rb +26 -27
- data/lib/canon/validators/base_validator.rb +0 -2
- data/lib/canon/validators/html_validator.rb +0 -1
- data/lib/canon/validators/json_validator.rb +0 -1
- data/lib/canon/validators/xml_validator.rb +0 -1
- data/lib/canon/validators/yaml_validator.rb +0 -1
- data/lib/canon/validators.rb +12 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +0 -4
- data/lib/canon/xml/data_model.rb +0 -10
- data/lib/canon/xml/line_range_mapper.rb +0 -2
- data/lib/canon/xml/nodes/attribute_node.rb +0 -2
- data/lib/canon/xml/nodes/comment_node.rb +0 -2
- data/lib/canon/xml/nodes/element_node.rb +0 -2
- data/lib/canon/xml/nodes/namespace_node.rb +0 -2
- data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
- data/lib/canon/xml/nodes/root_node.rb +0 -2
- data/lib/canon/xml/nodes/text_node.rb +0 -2
- data/lib/canon/xml/nodes.rb +19 -0
- data/lib/canon/xml/processor.rb +0 -5
- data/lib/canon/xml/sax_builder.rb +0 -7
- data/lib/canon/xml.rb +33 -0
- data/lib/canon/xml_backend.rb +50 -14
- data/lib/canon/xml_parsing.rb +4 -2
- data/lib/canon.rb +25 -15
- data/lib/tasks/performance.rake +0 -58
- data/lib/tasks/performance_comparator.rb +132 -65
- data/lib/tasks/performance_helpers.rb +4 -249
- data/lib/tasks/performance_report.rb +309 -0
- metadata +24 -11
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
- data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
- data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
|
@@ -1,54 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# Comparison dimensions
|
|
4
|
-
#
|
|
5
|
-
# Provides dimension classes for comparing specific aspects of documents.
|
|
6
|
-
# Each dimension knows how to extract and compare data according to different behaviors.
|
|
7
|
-
#
|
|
8
|
-
# == Architecture
|
|
9
|
-
#
|
|
10
|
-
# Dimensions represent "WHAT to compare" - specific aspects of a document that can be compared:
|
|
11
|
-
# - Text content
|
|
12
|
-
# - Comments
|
|
13
|
-
# - Attribute values
|
|
14
|
-
# - Attribute presence
|
|
15
|
-
# - Attribute order
|
|
16
|
-
# - Element position
|
|
17
|
-
# - Structural whitespace
|
|
18
|
-
#
|
|
19
|
-
# == Behaviors
|
|
20
|
-
#
|
|
21
|
-
# Each dimension supports comparison behaviors:
|
|
22
|
-
# - :strict - Exact comparison
|
|
23
|
-
# - :normalize - Normalized comparison (e.g., collapse whitespace)
|
|
24
|
-
# - :ignore - Skip comparison
|
|
25
|
-
#
|
|
26
|
-
# == Usage
|
|
27
|
-
#
|
|
28
|
-
# # Get a dimension instance
|
|
29
|
-
# dimension = Canon::Comparison::Dimensions::Registry.get(:text_content)
|
|
30
|
-
#
|
|
31
|
-
# # Compare two nodes
|
|
32
|
-
# dimension.equivalent?(node1, node2, :normalize)
|
|
33
|
-
#
|
|
34
|
-
# # Or use the registry directly
|
|
35
|
-
# Canon::Comparison::Dimensions::Registry.compare(:text_content, node1, node2, :normalize)
|
|
36
|
-
|
|
37
|
-
require_relative "dimensions/base_dimension"
|
|
38
|
-
require_relative "dimensions/registry"
|
|
39
|
-
require_relative "dimensions/text_content_dimension"
|
|
40
|
-
require_relative "dimensions/comments_dimension"
|
|
41
|
-
require_relative "dimensions/attribute_values_dimension"
|
|
42
|
-
require_relative "dimensions/attribute_presence_dimension"
|
|
43
|
-
require_relative "dimensions/attribute_order_dimension"
|
|
44
|
-
require_relative "dimensions/element_position_dimension"
|
|
45
|
-
require_relative "dimensions/structural_whitespace_dimension"
|
|
46
|
-
|
|
47
3
|
module Canon
|
|
48
4
|
module Comparison
|
|
5
|
+
# Dimension value objects for comparison aspects.
|
|
6
|
+
#
|
|
7
|
+
# Each format (XML/HTML, JSON, YAML) has a distinct set of dimensions —
|
|
8
|
+
# specific aspects of a document that can be compared with different
|
|
9
|
+
# behaviors (:strict, :normalize, :ignore).
|
|
10
|
+
#
|
|
11
|
+
# A Dimension knows its metadata (name, valid behaviors, normative
|
|
12
|
+
# classification rule). Comparison logic stays in the comparators where
|
|
13
|
+
# it has full node context.
|
|
14
|
+
#
|
|
15
|
+
# DimensionSet groups dimensions per format. Registry provides pre-built
|
|
16
|
+
# sets with format lookup (html/html4/html5 all resolve to the XML set).
|
|
49
17
|
module Dimensions
|
|
50
|
-
|
|
51
|
-
|
|
18
|
+
autoload :Dimension, "canon/comparison/dimensions/dimension"
|
|
19
|
+
autoload :DimensionSet, "canon/comparison/dimensions/dimension_set"
|
|
20
|
+
autoload :Registry, "canon/comparison/dimensions/registry"
|
|
52
21
|
end
|
|
53
22
|
end
|
|
54
23
|
end
|
|
@@ -1,21 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
|
-
require_relative "../comparison" # Load base module with constants first
|
|
5
|
-
require_relative "markup_comparator"
|
|
6
|
-
require_relative "xml_comparator"
|
|
7
|
-
require_relative "match_options"
|
|
8
|
-
require_relative "comparison_result"
|
|
9
|
-
require_relative "compare_profile"
|
|
10
|
-
require_relative "html_compare_profile"
|
|
11
|
-
require_relative "../diff/diff_node"
|
|
12
|
-
require_relative "../diff/diff_classifier"
|
|
13
|
-
require_relative "strategies/match_strategy_factory"
|
|
14
|
-
require_relative "../html/data_model"
|
|
15
|
-
require_relative "xml_node_comparison"
|
|
16
|
-
require_relative "xml_comparator/diff_node_builder"
|
|
17
|
-
# Whitespace sensitivity module (single source of truth for sensitive elements)
|
|
18
|
-
require_relative "whitespace_sensitivity"
|
|
19
4
|
|
|
20
5
|
module Canon
|
|
21
6
|
module Comparison
|
|
@@ -106,12 +91,6 @@ module Canon
|
|
|
106
91
|
# Store resolved match options hash for use in comparison logic
|
|
107
92
|
opts[:match_opts] = match_opts_hash
|
|
108
93
|
|
|
109
|
-
# Use tree diff if semantic_diff option is enabled
|
|
110
|
-
if match_opts.semantic_diff?
|
|
111
|
-
return perform_semantic_tree_diff(html1, html2, opts,
|
|
112
|
-
match_opts_hash)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
94
|
# Create child_opts with resolved options
|
|
116
95
|
child_opts = opts.merge(child_opts)
|
|
117
96
|
|
|
@@ -188,9 +167,7 @@ module Canon
|
|
|
188
167
|
# accepted: dom_diff routes html/html4/html5 input through
|
|
189
168
|
# Nokogiri::HTML5.fragment per #118.
|
|
190
169
|
def fragment_node?(node)
|
|
191
|
-
|
|
192
|
-
node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
|
|
193
|
-
node.is_a?(Nokogiri::HTML5::DocumentFragment)
|
|
170
|
+
XmlBackend.document_fragment?(node)
|
|
194
171
|
end
|
|
195
172
|
|
|
196
173
|
# Compare children of document fragments using the shared
|
|
@@ -232,62 +209,6 @@ module Canon
|
|
|
232
209
|
end
|
|
233
210
|
end
|
|
234
211
|
|
|
235
|
-
# Perform semantic tree diff using SemanticTreeMatchStrategy
|
|
236
|
-
#
|
|
237
|
-
# @param html1 [String, Nokogiri::HTML::Document] First HTML
|
|
238
|
-
# @param html2 [String, Nokogiri::HTML::Document] Second HTML
|
|
239
|
-
# @param opts [Hash] Comparison options
|
|
240
|
-
# @param match_opts_hash [Hash] Resolved match options
|
|
241
|
-
# @return [Boolean, ComparisonResult] Result of tree diff comparison
|
|
242
|
-
def perform_semantic_tree_diff(html1, html2, opts, match_opts_hash)
|
|
243
|
-
# Capture original HTML strings for display (see equivalent? for details).
|
|
244
|
-
original_str1 = opts.delete(:_original_str1) ||
|
|
245
|
-
extract_original_string(html1)
|
|
246
|
-
original_str2 = opts.delete(:_original_str2) ||
|
|
247
|
-
extract_original_string(html2)
|
|
248
|
-
|
|
249
|
-
# Parse to Canon::Xml::Node (preserves preprocessing)
|
|
250
|
-
# For HTML, we parse as XML to get Canon::Xml::Node structure
|
|
251
|
-
node1 = parse_node_for_semantic(html1,
|
|
252
|
-
match_opts_hash[:preprocessing])
|
|
253
|
-
node2 = parse_node_for_semantic(html2,
|
|
254
|
-
match_opts_hash[:preprocessing])
|
|
255
|
-
|
|
256
|
-
# Create strategy using factory
|
|
257
|
-
strategy = Strategies::MatchStrategyFactory.create(
|
|
258
|
-
format: :html,
|
|
259
|
-
match_options: match_opts_hash,
|
|
260
|
-
)
|
|
261
|
-
|
|
262
|
-
# Pass Canon::Xml::Node directly - adapter now handles it
|
|
263
|
-
differences = strategy.match(node1, node2)
|
|
264
|
-
|
|
265
|
-
# Return based on verbose mode
|
|
266
|
-
if opts[:verbose]
|
|
267
|
-
# Get preprocessed strings for display
|
|
268
|
-
preprocessed = strategy.preprocess_for_display(node1, node2)
|
|
269
|
-
|
|
270
|
-
# Detect HTML version (default to HTML5 for Canon nodes)
|
|
271
|
-
html_version = :html5
|
|
272
|
-
|
|
273
|
-
# Return ComparisonResult with strategy metadata
|
|
274
|
-
ComparisonResult.new(
|
|
275
|
-
differences: differences,
|
|
276
|
-
preprocessed_strings: preprocessed,
|
|
277
|
-
original_strings: [original_str1, original_str2],
|
|
278
|
-
format: :html,
|
|
279
|
-
html_version: html_version,
|
|
280
|
-
match_options: match_opts_hash.merge(strategy.metadata),
|
|
281
|
-
algorithm: :semantic,
|
|
282
|
-
parse_errors_expected: Comparison.parse_errors_for(node1),
|
|
283
|
-
parse_errors_received: Comparison.parse_errors_for(node2),
|
|
284
|
-
)
|
|
285
|
-
else
|
|
286
|
-
# Simple boolean result - equivalent if no normative differences
|
|
287
|
-
differences.none?(&:normative?)
|
|
288
|
-
end
|
|
289
|
-
end
|
|
290
|
-
|
|
291
212
|
# Parse node as fragment to preserve actual content
|
|
292
213
|
# Uses HTML4.fragment or HTML5.fragment based on content detection
|
|
293
214
|
#
|
|
@@ -296,15 +217,11 @@ module Canon
|
|
|
296
217
|
# @param match_opts [Hash] Match options
|
|
297
218
|
# @return [Nokogiri::HTML::DocumentFragment] Parsed fragment
|
|
298
219
|
def parse_node_as_fragment(node, preprocessing = :none, match_opts = {})
|
|
299
|
-
|
|
300
|
-
if node.is_a?(Nokogiri::XML::DocumentFragment)
|
|
220
|
+
if XmlBackend.document_fragment?(node)
|
|
301
221
|
return node
|
|
302
222
|
end
|
|
303
223
|
|
|
304
|
-
|
|
305
|
-
# This handles cases where pre-parsed HTML4/HTML5 fragments have auto-inserted meta
|
|
306
|
-
html_string = if node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
|
|
307
|
-
node.is_a?(Nokogiri::HTML5::DocumentFragment)
|
|
224
|
+
html_string = if XmlBackend.document_fragment?(node)
|
|
308
225
|
node.to_s # Use to_s to avoid re-inserting meta tags
|
|
309
226
|
elsif node.is_a?(String)
|
|
310
227
|
node
|
|
@@ -312,10 +229,7 @@ module Canon
|
|
|
312
229
|
node.to_html
|
|
313
230
|
end
|
|
314
231
|
|
|
315
|
-
|
|
316
|
-
# Decode HTML named entities ( etc.) to UTF-8 characters since XML
|
|
317
|
-
# parser only understands the five XML entities.
|
|
318
|
-
frag = Nokogiri::XML.fragment(
|
|
232
|
+
frag = XmlBackend.xml_fragment(
|
|
319
233
|
decode_html_named_entities(html_string),
|
|
320
234
|
)
|
|
321
235
|
|
|
@@ -402,24 +316,15 @@ module Canon
|
|
|
402
316
|
|
|
403
317
|
# Normalize HTML documents to fragments to avoid DTD differences
|
|
404
318
|
# This ensures comparing string with document works correctly
|
|
405
|
-
if
|
|
406
|
-
node.is_a?(Nokogiri::HTML4::Document) ||
|
|
407
|
-
node.is_a?(Nokogiri::HTML5::Document)
|
|
408
|
-
# Get root element and create fragment from its outer HTML
|
|
409
|
-
# This avoids DOCTYPE and other document-level nodes
|
|
319
|
+
if XmlBackend.html_document?(node)
|
|
410
320
|
root = node.at_css("html") || node.root
|
|
411
321
|
if root
|
|
412
|
-
node =
|
|
322
|
+
node = XmlBackend.xml_fragment(root.to_html)
|
|
413
323
|
end
|
|
414
324
|
end
|
|
415
325
|
|
|
416
|
-
# For preprocessing modes that require whitespace filtering,
|
|
417
|
-
# apply the same post-parsing normalization used for string inputs.
|
|
418
|
-
# This is needed because dom_diff() pre-parses HTML5 strings into
|
|
419
|
-
# Nokogiri fragments before calling HtmlComparator, bypassing the
|
|
420
|
-
# string-input path where these filters are normally applied.
|
|
421
326
|
if %i[normalize format rendered].include?(preprocessing)
|
|
422
|
-
frag =
|
|
327
|
+
frag = XmlBackend.document_fragment?(node) ? node : XmlBackend.xml_fragment(node.to_html)
|
|
423
328
|
normalize_html_style_script_comments(frag)
|
|
424
329
|
if preprocessing == :rendered
|
|
425
330
|
normalize_rendered_whitespace(frag, match_opts)
|
|
@@ -467,11 +372,7 @@ module Canon
|
|
|
467
372
|
node
|
|
468
373
|
end
|
|
469
374
|
|
|
470
|
-
|
|
471
|
-
# Use XML fragment parser to avoid auto-inserted meta tags.
|
|
472
|
-
# Decode HTML named entities ( etc.) to UTF-8 characters since
|
|
473
|
-
# XML parser only understands the five XML entities.
|
|
474
|
-
frag = Nokogiri::XML.fragment(
|
|
375
|
+
frag = XmlBackend.xml_fragment(
|
|
475
376
|
decode_html_named_entities(html_string),
|
|
476
377
|
)
|
|
477
378
|
|
|
@@ -565,17 +466,7 @@ module Canon
|
|
|
565
466
|
# @param node [Canon::Xml::Node, Nokogiri::XML::Node] HTML node
|
|
566
467
|
# @return [Symbol] :html5 or :html4
|
|
567
468
|
def detect_html_version_from_node(node)
|
|
568
|
-
|
|
569
|
-
if node.is_a?(Nokogiri::HTML5::Document) ||
|
|
570
|
-
node.is_a?(Nokogiri::HTML5::DocumentFragment)
|
|
571
|
-
:html5
|
|
572
|
-
elsif node.is_a?(Nokogiri::HTML4::Document) ||
|
|
573
|
-
node.is_a?(Nokogiri::HTML4::DocumentFragment)
|
|
574
|
-
:html4
|
|
575
|
-
else
|
|
576
|
-
# Default to HTML5 for Canon::Xml::Node and unknown types
|
|
577
|
-
:html5
|
|
578
|
-
end
|
|
469
|
+
XmlBackend.html_version_from_node(node)
|
|
579
470
|
end
|
|
580
471
|
|
|
581
472
|
# Serialize node to string for diff display
|
|
@@ -585,7 +476,7 @@ module Canon
|
|
|
585
476
|
# @return [String] Serialized HTML string
|
|
586
477
|
def serialize_for_display(node)
|
|
587
478
|
if node.is_a?(Canon::Xml::Node)
|
|
588
|
-
|
|
479
|
+
Canon::Diff::NodeSerializer.serialize(node)
|
|
589
480
|
elsif Canon::XmlParsing.xml_node?(node)
|
|
590
481
|
Canon::XmlBackend.nokogiri? ? node.to_html : Canon::XmlParsing.serialize(node)
|
|
591
482
|
else
|
|
@@ -780,32 +671,18 @@ compare_profile = nil)
|
|
|
780
671
|
# XML documents typically have XML processing instructions or are
|
|
781
672
|
# instances of Nokogiri::XML::Document (not HTML variants)
|
|
782
673
|
def xml_document?(node)
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
node.is_a?(Nokogiri::HTML5::Document) ||
|
|
788
|
-
node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
|
|
789
|
-
node.is_a?(Nokogiri::HTML5::DocumentFragment)
|
|
790
|
-
|
|
791
|
-
# If it's an XML document, check for XML processing instruction
|
|
792
|
-
if node.is_a?(Nokogiri::XML::Document) && node.children.any? do |child|
|
|
793
|
-
child.is_a?(Nokogiri::XML::ProcessingInstruction) &&
|
|
794
|
-
child.name == "xml"
|
|
674
|
+
return false if XmlBackend.html_document?(node) || XmlBackend.document_fragment?(node)
|
|
675
|
+
|
|
676
|
+
if XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Document) && node.children.any? do |child|
|
|
677
|
+
child.is_a?(Nokogiri::XML::ProcessingInstruction) && child.name == "xml"
|
|
795
678
|
end
|
|
796
|
-
# XML documents often start with <?xml ...?> processing instruction
|
|
797
679
|
return true
|
|
798
|
-
|
|
799
|
-
# Note: We don't blindly return true here because HTML documents
|
|
800
|
-
# also inherit from XML::Document. We only return true if there's
|
|
801
|
-
# an XML processing instruction above.
|
|
802
680
|
end
|
|
803
681
|
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
end
|
|
682
|
+
if (node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)) &&
|
|
683
|
+
XmlBackend.nokogiri? && node.children.any? do |child|
|
|
684
|
+
child.is_a?(Nokogiri::XML::ProcessingInstruction) && child.name == "xml"
|
|
685
|
+
end
|
|
809
686
|
return true
|
|
810
687
|
end
|
|
811
688
|
|
|
@@ -1,9 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "compare_profile"
|
|
4
|
-
# Whitespace sensitivity module (single source of truth for sensitive elements)
|
|
5
|
-
require_relative "whitespace_sensitivity"
|
|
6
|
-
|
|
7
3
|
module Canon
|
|
8
4
|
module Comparison
|
|
9
5
|
# HtmlCompareProfile extends CompareProfile with HTML-specific comparison policies
|
|
@@ -29,34 +25,35 @@ module Canon
|
|
|
29
25
|
# Override for HTML-specific comment handling
|
|
30
26
|
#
|
|
31
27
|
# In HTML, comments are presentational content (not part of the DOM semantics)
|
|
32
|
-
# unless explicitly set to :strict.
|
|
33
|
-
# may carry semantic meaning.
|
|
34
|
-
#
|
|
35
|
-
# HTML default for comments is :ignore, so comments don't affect equivalence
|
|
36
|
-
# unless the user explicitly sets comments: :strict
|
|
28
|
+
# unless explicitly set to :strict.
|
|
37
29
|
#
|
|
38
30
|
# @param dimension [Symbol] The match dimension to check
|
|
39
31
|
# @return [Boolean] true if differences affect equivalence
|
|
40
32
|
def affects_equivalence?(dimension)
|
|
41
|
-
# Comments in HTML: default is :ignore (presentational)
|
|
42
|
-
# Only affect equivalence if explicitly set to :strict
|
|
43
33
|
if dimension == :comments
|
|
44
|
-
# Check if comments key exists in options
|
|
45
34
|
if match_options.is_a?(Hash)
|
|
46
|
-
# If comments key doesn't exist, default to false (HTML default: ignore)
|
|
47
35
|
return false unless match_options.key?(:comments)
|
|
48
36
|
|
|
49
|
-
# If key exists, check if it's :strict
|
|
50
37
|
return match_options[:comments] == :strict
|
|
51
38
|
elsif match_options.is_a?(ResolvedMatchOptions)
|
|
52
|
-
|
|
53
|
-
return behavior == :strict
|
|
39
|
+
return behavior_for(dimension) == :strict
|
|
54
40
|
end
|
|
55
|
-
# Default: comments don't affect equivalence in HTML
|
|
56
41
|
return false
|
|
57
42
|
end
|
|
58
43
|
|
|
59
|
-
|
|
44
|
+
super
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Override normative classification for HTML-specific comment handling.
|
|
48
|
+
#
|
|
49
|
+
# Delegates to the parent class for all dimensions, which in turn delegates
|
|
50
|
+
# to Dimension objects. For :comments, applies the same HTML-specific rule
|
|
51
|
+
# as affects_equivalence? — comments default to non-normative in HTML.
|
|
52
|
+
def normative_dimension?(dimension)
|
|
53
|
+
if dimension == :comments
|
|
54
|
+
return affects_equivalence?(:comments)
|
|
55
|
+
end
|
|
56
|
+
|
|
60
57
|
super
|
|
61
58
|
end
|
|
62
59
|
|
|
@@ -1,49 +1,30 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
-
require_relative "match_options"
|
|
5
|
-
require_relative "comparison_result"
|
|
6
4
|
|
|
7
5
|
module Canon
|
|
8
6
|
module Comparison
|
|
9
7
|
# JSON comparison class
|
|
10
|
-
#
|
|
8
|
+
# Delegates to RubyObjectComparator for actual comparison logic
|
|
11
9
|
class JsonComparator
|
|
12
|
-
# Default comparison options for JSON
|
|
13
10
|
DEFAULT_OPTS = {
|
|
14
|
-
# Output options
|
|
15
11
|
verbose: false,
|
|
16
|
-
|
|
17
|
-
# Match system options
|
|
18
12
|
match_profile: nil,
|
|
19
13
|
match: nil,
|
|
20
14
|
preprocessing: nil,
|
|
21
15
|
global_profile: nil,
|
|
22
16
|
global_options: nil,
|
|
23
|
-
|
|
24
|
-
# Diff display options
|
|
25
17
|
diff: nil,
|
|
26
18
|
}.freeze
|
|
27
19
|
|
|
28
20
|
class << self
|
|
29
|
-
# Parse JSON from string or return as-is
|
|
30
|
-
#
|
|
31
|
-
# @param obj [String, Hash, Array] JSON string or parsed object
|
|
32
|
-
# @return [Object] Parsed JSON object
|
|
33
21
|
def parse(obj)
|
|
34
22
|
parse_json(obj)
|
|
35
23
|
end
|
|
36
24
|
|
|
37
|
-
# Compare two JSON objects for equivalence
|
|
38
|
-
#
|
|
39
|
-
# @param json1 [String, Hash, Array] First JSON
|
|
40
|
-
# @param json2 [String, Hash, Array] Second JSON
|
|
41
|
-
# @param opts [Hash] Comparison options
|
|
42
|
-
# @return [Boolean, ComparisonResult] true if equivalent, or ComparisonResult if verbose
|
|
43
25
|
def equivalent?(json1, json2, opts = {})
|
|
44
26
|
opts = DEFAULT_OPTS.merge(opts)
|
|
45
27
|
|
|
46
|
-
# Resolve match options with format-specific defaults
|
|
47
28
|
match_opts_hash = MatchOptions::Json.resolve(
|
|
48
29
|
format: :json,
|
|
49
30
|
match_profile: opts[:match_profile],
|
|
@@ -53,24 +34,16 @@ module Canon
|
|
|
53
34
|
global_options: opts[:global_options],
|
|
54
35
|
)
|
|
55
36
|
|
|
56
|
-
# Wrap in ResolvedMatchOptions for consistency with XML/HTML
|
|
57
|
-
Canon::Comparison::ResolvedMatchOptions.new(
|
|
58
|
-
match_opts_hash,
|
|
59
|
-
format: :json,
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
# Store resolved match options for use in comparison logic
|
|
63
37
|
opts[:match_opts] = match_opts_hash
|
|
64
38
|
|
|
65
|
-
# Parse JSON if strings
|
|
66
39
|
obj1 = parse_json(json1)
|
|
67
40
|
obj2 = parse_json(json2)
|
|
68
41
|
|
|
69
42
|
differences = []
|
|
70
|
-
result =
|
|
43
|
+
result = RubyObjectComparator.compare_objects(obj1, obj2, opts,
|
|
44
|
+
differences, "")
|
|
71
45
|
|
|
72
46
|
if opts[:verbose]
|
|
73
|
-
# Format JSON for display
|
|
74
47
|
json_str1 = obj1.is_a?(String) ? obj1 : JSON.pretty_generate(obj1)
|
|
75
48
|
json_str2 = obj2.is_a?(String) ? obj2 : JSON.pretty_generate(obj2)
|
|
76
49
|
|
|
@@ -87,148 +60,14 @@ module Canon
|
|
|
87
60
|
|
|
88
61
|
private
|
|
89
62
|
|
|
90
|
-
# Parse JSON from string or return as-is
|
|
91
63
|
def parse_json(obj)
|
|
92
64
|
return obj unless obj.is_a?(String)
|
|
93
65
|
|
|
94
66
|
begin
|
|
95
67
|
JSON.parse(obj)
|
|
96
68
|
rescue JSON::ParserError
|
|
97
|
-
obj
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
# Compare Ruby objects (Hash, Array, primitives) for JSON/YAML
|
|
102
|
-
def compare_ruby_objects(obj1, obj2, opts, differences, path)
|
|
103
|
-
# Check for type mismatch
|
|
104
|
-
unless obj1.instance_of?(obj2.class)
|
|
105
|
-
add_ruby_difference(path, obj1, obj2, Comparison::UNEQUAL_TYPES,
|
|
106
|
-
opts, differences)
|
|
107
|
-
return Comparison::UNEQUAL_TYPES
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
case obj1
|
|
111
|
-
when Hash
|
|
112
|
-
compare_hashes(obj1, obj2, opts, differences, path)
|
|
113
|
-
when Array
|
|
114
|
-
compare_arrays(obj1, obj2, opts, differences, path)
|
|
115
|
-
when NilClass, TrueClass, FalseClass, Numeric, String, Symbol
|
|
116
|
-
compare_primitives(obj1, obj2, opts, differences, path)
|
|
117
|
-
else
|
|
118
|
-
# Fallback to equality comparison
|
|
119
|
-
if obj1 == obj2
|
|
120
|
-
Comparison::EQUIVALENT
|
|
121
|
-
else
|
|
122
|
-
add_ruby_difference(path, obj1, obj2,
|
|
123
|
-
Comparison::UNEQUAL_PRIMITIVES, opts,
|
|
124
|
-
differences)
|
|
125
|
-
Comparison::UNEQUAL_PRIMITIVES
|
|
126
|
-
end
|
|
127
|
-
end
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
# Compare two hashes
|
|
131
|
-
def compare_hashes(hash1, hash2, opts, differences, path)
|
|
132
|
-
keys1 = hash1.keys
|
|
133
|
-
keys2 = hash2.keys
|
|
134
|
-
|
|
135
|
-
# Sort keys if order should be ignored (based on match options)
|
|
136
|
-
match_opts = opts[:match_opts]
|
|
137
|
-
if match_opts[:key_order] != :strict
|
|
138
|
-
keys1 = keys1.sort_by(&:to_s)
|
|
139
|
-
keys2 = keys2.sort_by(&:to_s)
|
|
140
|
-
elsif keys1 != keys2
|
|
141
|
-
# Strict mode: key order matters
|
|
142
|
-
# Check if keys are in same order
|
|
143
|
-
# Keys are different or in different order
|
|
144
|
-
# First check if it's just ordering (same keys, different order)
|
|
145
|
-
if keys1.sort_by(&:to_s) == keys2.sort_by(&:to_s)
|
|
146
|
-
# Same keys, different order - this is a key_order difference
|
|
147
|
-
key_path = path.empty? ? "(key order)" : "#{path}.(key order)"
|
|
148
|
-
add_ruby_difference(key_path, keys1, keys2,
|
|
149
|
-
Comparison::UNEQUAL_HASH_KEY_ORDER, opts, differences)
|
|
150
|
-
return Comparison::UNEQUAL_HASH_KEY_ORDER
|
|
151
|
-
end
|
|
152
|
-
end
|
|
153
|
-
|
|
154
|
-
# Check for missing keys
|
|
155
|
-
missing_in_2 = keys1 - keys2
|
|
156
|
-
missing_in_1 = keys2 - keys1
|
|
157
|
-
|
|
158
|
-
missing_in_2.each do |key|
|
|
159
|
-
key_path = path.empty? ? key.to_s : "#{path}.#{key}"
|
|
160
|
-
add_ruby_difference(key_path, hash1[key], nil,
|
|
161
|
-
Comparison::MISSING_HASH_KEY, opts, differences)
|
|
162
|
-
end
|
|
163
|
-
|
|
164
|
-
missing_in_1.each do |key|
|
|
165
|
-
key_path = path.empty? ? key.to_s : "#{path}.#{key}"
|
|
166
|
-
add_ruby_difference(key_path, nil, hash2[key],
|
|
167
|
-
Comparison::MISSING_HASH_KEY, opts, differences)
|
|
168
|
-
end
|
|
169
|
-
|
|
170
|
-
has_missing_keys = !missing_in_1.empty? || !missing_in_2.empty?
|
|
171
|
-
|
|
172
|
-
# Compare common keys
|
|
173
|
-
common_keys = keys1 & keys2
|
|
174
|
-
all_equivalent = true
|
|
175
|
-
common_keys.each do |key|
|
|
176
|
-
key_path = path.empty? ? key.to_s : "#{path}.#{key}"
|
|
177
|
-
result = compare_ruby_objects(hash1[key], hash2[key], opts,
|
|
178
|
-
differences, key_path)
|
|
179
|
-
all_equivalent = false unless result == Comparison::EQUIVALENT
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
# Return appropriate status
|
|
183
|
-
return Comparison::MISSING_HASH_KEY if has_missing_keys && all_equivalent
|
|
184
|
-
return Comparison::UNEQUAL_HASH_VALUES unless all_equivalent
|
|
185
|
-
|
|
186
|
-
has_missing_keys ? Comparison::MISSING_HASH_KEY : Comparison::EQUIVALENT
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
# Compare two arrays
|
|
190
|
-
def compare_arrays(arr1, arr2, opts, differences, path)
|
|
191
|
-
unless arr1.length == arr2.length
|
|
192
|
-
add_ruby_difference(path, arr1, arr2,
|
|
193
|
-
Comparison::UNEQUAL_ARRAY_LENGTHS, opts,
|
|
194
|
-
differences)
|
|
195
|
-
return Comparison::UNEQUAL_ARRAY_LENGTHS
|
|
69
|
+
obj
|
|
196
70
|
end
|
|
197
|
-
|
|
198
|
-
all_equivalent = true
|
|
199
|
-
arr1.each_with_index do |elem1, index|
|
|
200
|
-
elem2 = arr2[index]
|
|
201
|
-
elem_path = "#{path}[#{index}]"
|
|
202
|
-
result = compare_ruby_objects(elem1, elem2, opts, differences,
|
|
203
|
-
elem_path)
|
|
204
|
-
all_equivalent = false unless result == Comparison::EQUIVALENT
|
|
205
|
-
end
|
|
206
|
-
|
|
207
|
-
all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ARRAY_ELEMENTS
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
# Compare primitive values
|
|
211
|
-
def compare_primitives(val1, val2, opts, differences, path)
|
|
212
|
-
if val1 == val2
|
|
213
|
-
Comparison::EQUIVALENT
|
|
214
|
-
else
|
|
215
|
-
add_ruby_difference(path, val1, val2,
|
|
216
|
-
Comparison::UNEQUAL_PRIMITIVES, opts,
|
|
217
|
-
differences)
|
|
218
|
-
Comparison::UNEQUAL_PRIMITIVES
|
|
219
|
-
end
|
|
220
|
-
end
|
|
221
|
-
|
|
222
|
-
# Add a Ruby object difference
|
|
223
|
-
def add_ruby_difference(path, obj1, obj2, diff_code, opts, differences)
|
|
224
|
-
return unless opts[:verbose]
|
|
225
|
-
|
|
226
|
-
differences << {
|
|
227
|
-
path: path,
|
|
228
|
-
value1: obj1,
|
|
229
|
-
value2: obj2,
|
|
230
|
-
diff_code: diff_code,
|
|
231
|
-
}
|
|
232
71
|
end
|
|
233
72
|
end
|
|
234
73
|
end
|
|
@@ -10,8 +10,6 @@ module Canon
|
|
|
10
10
|
# @param obj [String, Hash, Array] Object to parse
|
|
11
11
|
# @return [Hash, Array] Parsed Ruby object
|
|
12
12
|
def self.parse_json(obj)
|
|
13
|
-
# Delegate to JsonComparator's private method via public API
|
|
14
|
-
require_relative "json_comparator"
|
|
15
13
|
JsonComparator.parse_json(obj)
|
|
16
14
|
end
|
|
17
15
|
end
|