canon 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +16 -61
- data/README.adoc +5 -0
- data/Rakefile +17 -0
- data/docs/features/diff-formatting/comment-asymmetry.adoc +160 -0
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/child_realignment.rb +140 -0
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +36 -75
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +150 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +32 -77
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +43 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +89 -83
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +25 -23
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +16 -42
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +2 -2
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +1 -1
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/validators/html_validator.rb +1 -1
- data/lib/canon/validators/xml_validator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +131 -137
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +4 -6
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +271 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +7 -2
|
@@ -27,8 +27,8 @@ module Canon
|
|
|
27
27
|
def self.compare_nodes(node1, node2, opts, child_opts, diff_children,
|
|
28
28
|
differences)
|
|
29
29
|
# Handle DocumentFragment nodes - compare their children instead
|
|
30
|
-
if
|
|
31
|
-
|
|
30
|
+
if Canon::XmlParsing.document_fragment?(node1) &&
|
|
31
|
+
Canon::XmlParsing.document_fragment?(node2)
|
|
32
32
|
return compare_document_fragments(node1, node2, opts, child_opts,
|
|
33
33
|
diff_children, differences)
|
|
34
34
|
end
|
|
@@ -285,10 +285,14 @@ diff_children, differences)
|
|
|
285
285
|
return false if node1.class != node2.class
|
|
286
286
|
|
|
287
287
|
case node1
|
|
288
|
-
when Canon::Xml::Node
|
|
288
|
+
when Canon::Xml::Node
|
|
289
289
|
node1.node_type == node2.node_type
|
|
290
290
|
else
|
|
291
|
-
|
|
291
|
+
if Canon::XmlBackend.nokogiri?
|
|
292
|
+
node1.is_a?(Nokogiri::XML::Node) && node1.node_type == node2.node_type
|
|
293
|
+
else
|
|
294
|
+
Canon::XmlParsing.xml_node?(node1) && Canon::XmlParsing.node_type(node1) == Canon::XmlParsing.node_type(node2)
|
|
295
|
+
end
|
|
292
296
|
end
|
|
293
297
|
end
|
|
294
298
|
|
|
@@ -305,7 +309,7 @@ diff_children, differences)
|
|
|
305
309
|
def self.comment_node?(node, check_children: false)
|
|
306
310
|
return true if NodeInspector.comment_node?(node)
|
|
307
311
|
|
|
308
|
-
if check_children &&
|
|
312
|
+
if check_children && Canon::XmlParsing.element?(node) && !Canon::XmlParsing.children(node).empty?
|
|
309
313
|
node.children.any? { |child| NodeInspector.comment_node?(child) }
|
|
310
314
|
else
|
|
311
315
|
false
|
|
@@ -360,24 +364,20 @@ diff_children, differences)
|
|
|
360
364
|
# Dispatch by legacy Nokogiri/Moxml node type
|
|
361
365
|
def self.dispatch_legacy_node_type(node1, node2, opts, child_opts,
|
|
362
366
|
diff_children, differences)
|
|
363
|
-
# Import XmlComparator to use its comparison methods
|
|
364
367
|
require_relative "xml_comparator"
|
|
365
368
|
|
|
366
|
-
|
|
367
|
-
when Nokogiri::XML::Document
|
|
369
|
+
if Canon::XmlParsing.document?(node1)
|
|
368
370
|
XmlComparator.compare_document_nodes(node1, node2, opts, child_opts,
|
|
369
371
|
diff_children, differences)
|
|
370
|
-
|
|
371
|
-
if
|
|
372
|
+
elsif Canon::XmlParsing.xml_node?(node1)
|
|
373
|
+
if Canon::XmlParsing.element?(node1)
|
|
372
374
|
XmlComparator.compare_element_nodes(node1, node2, opts, child_opts,
|
|
373
375
|
diff_children, differences)
|
|
374
|
-
elsif node1.
|
|
376
|
+
elsif Canon::XmlParsing.text_node?(node1) || Canon::XmlParsing.cdata?(node1)
|
|
375
377
|
XmlComparator.compare_text_nodes(node1, node2, opts, differences)
|
|
376
|
-
elsif
|
|
378
|
+
elsif Canon::XmlParsing.comment?(node1)
|
|
377
379
|
XmlComparator.compare_comment_nodes(node1, node2, opts, differences)
|
|
378
|
-
elsif
|
|
379
|
-
XmlComparator.compare_text_nodes(node1, node2, opts, differences)
|
|
380
|
-
elsif node1.processing_instruction?
|
|
380
|
+
elsif Canon::XmlParsing.processing_instruction?(node1)
|
|
381
381
|
XmlComparator.compare_processing_instruction_nodes(node1, node2,
|
|
382
382
|
opts, differences)
|
|
383
383
|
else
|
|
@@ -27,6 +27,14 @@ module Canon
|
|
|
27
27
|
}.freeze
|
|
28
28
|
|
|
29
29
|
class << self
|
|
30
|
+
# Parse YAML from string or return as-is
|
|
31
|
+
#
|
|
32
|
+
# @param obj [String, Hash, Array] YAML string or parsed object
|
|
33
|
+
# @return [Object] Parsed YAML object
|
|
34
|
+
def parse(obj)
|
|
35
|
+
parse_yaml(obj)
|
|
36
|
+
end
|
|
37
|
+
|
|
30
38
|
# Compare two YAML objects for equivalence
|
|
31
39
|
#
|
|
32
40
|
# @param yaml1 [String, Hash, Array] First YAML
|
data/lib/canon/comparison.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "moxml"
|
|
4
|
-
require "nokogiri"
|
|
4
|
+
require "nokogiri" if Canon::XmlBackend.nokogiri?
|
|
5
5
|
require_relative "xml/whitespace_normalizer"
|
|
6
6
|
require_relative "comparison/xml_comparator"
|
|
7
7
|
require_relative "comparison/html_comparator"
|
|
@@ -104,6 +104,8 @@ module Canon
|
|
|
104
104
|
# - diff_code: Type of difference
|
|
105
105
|
#
|
|
106
106
|
module Comparison
|
|
107
|
+
autoload :ChildRealignment, "canon/comparison/child_realignment"
|
|
108
|
+
|
|
107
109
|
# Comparison result constants
|
|
108
110
|
EQUIVALENT = 1
|
|
109
111
|
MISSING_ATTRIBUTE = 2
|
|
@@ -314,7 +316,8 @@ module Canon
|
|
|
314
316
|
|
|
315
317
|
# Get global config options if not defined in opts
|
|
316
318
|
# This is needed because semantic_diff doesn't go through dom_diff's config handling
|
|
317
|
-
if !(opts[:match_profile] || opts[:global_options]) &&
|
|
319
|
+
if !(opts[:match_profile] || opts[:global_options]) && %i[xml html json
|
|
320
|
+
yaml string].include?(format1)
|
|
318
321
|
format_config = Canon::Config.instance.public_send(format1)
|
|
319
322
|
if format_config.match.profile
|
|
320
323
|
opts[:match_profile] =
|
|
@@ -331,7 +334,8 @@ module Canon
|
|
|
331
334
|
|
|
332
335
|
# Also read diff options from config (e.g., max_node_count for large documents)
|
|
333
336
|
# This is independent of match options and needs to be passed to TreeDiffIntegrator
|
|
334
|
-
if !match_opts_hash[:max_node_count] &&
|
|
337
|
+
if !match_opts_hash[:max_node_count] && %i[xml html json yaml
|
|
338
|
+
string].include?(format1)
|
|
335
339
|
diff_max_node = Canon::Config.instance.public_send(format1).diff.max_node_count
|
|
336
340
|
if diff_max_node > 10_000
|
|
337
341
|
match_opts_hash[:max_node_count] =
|
|
@@ -562,43 +566,39 @@ module Canon
|
|
|
562
566
|
|
|
563
567
|
case format
|
|
564
568
|
when :xml
|
|
565
|
-
# Delegate to XmlComparator's
|
|
566
|
-
# Adapter now handles Canon::Xml::Node directly
|
|
569
|
+
# Delegate to XmlComparator's parse - returns Canon::Xml::Node
|
|
567
570
|
doc1 = parse_with_cache(obj1, format, preprocessing) do |doc|
|
|
568
|
-
XmlComparator.
|
|
571
|
+
XmlComparator.parse(doc, preprocessing)
|
|
569
572
|
end
|
|
570
573
|
doc2 = parse_with_cache(obj2, format, preprocessing) do |doc|
|
|
571
|
-
XmlComparator.
|
|
574
|
+
XmlComparator.parse(doc, preprocessing)
|
|
572
575
|
end
|
|
573
576
|
[doc1, doc2]
|
|
574
577
|
when :html, :html4, :html5
|
|
575
|
-
# Delegate to HtmlComparator's parse_node_for_semantic for Canon::Xml::Node
|
|
576
578
|
[
|
|
577
579
|
parse_with_cache(obj1, format, preprocessing) do |doc|
|
|
578
|
-
HtmlComparator.
|
|
580
|
+
HtmlComparator.parse(doc, preprocessing)
|
|
579
581
|
end,
|
|
580
582
|
parse_with_cache(obj2, format, preprocessing) do |doc|
|
|
581
|
-
HtmlComparator.
|
|
583
|
+
HtmlComparator.parse(doc, preprocessing)
|
|
582
584
|
end,
|
|
583
585
|
]
|
|
584
586
|
when :json
|
|
585
|
-
# Delegate to JsonComparator's parse_json
|
|
586
587
|
[
|
|
587
588
|
parse_with_cache(obj1, format, :none) do |doc|
|
|
588
|
-
JsonComparator.
|
|
589
|
+
JsonComparator.parse(doc)
|
|
589
590
|
end,
|
|
590
591
|
parse_with_cache(obj2, format, :none) do |doc|
|
|
591
|
-
JsonComparator.
|
|
592
|
+
JsonComparator.parse(doc)
|
|
592
593
|
end,
|
|
593
594
|
]
|
|
594
595
|
when :yaml
|
|
595
|
-
# Delegate to YamlComparator's parse_yaml
|
|
596
596
|
[
|
|
597
597
|
parse_with_cache(obj1, format, :none) do |doc|
|
|
598
|
-
YamlComparator.
|
|
598
|
+
YamlComparator.parse(doc)
|
|
599
599
|
end,
|
|
600
600
|
parse_with_cache(obj2, format, :none) do |doc|
|
|
601
|
-
YamlComparator.
|
|
601
|
+
YamlComparator.parse(doc)
|
|
602
602
|
end,
|
|
603
603
|
]
|
|
604
604
|
else
|
|
@@ -649,12 +649,10 @@ module Canon
|
|
|
649
649
|
obj
|
|
650
650
|
when Nokogiri::XML::Document, Nokogiri::HTML::Document,
|
|
651
651
|
Nokogiri::XML::DocumentFragment, Nokogiri::HTML::DocumentFragment
|
|
652
|
-
obj.
|
|
652
|
+
obj.to_html
|
|
653
653
|
else
|
|
654
|
-
if obj.
|
|
655
|
-
obj
|
|
656
|
-
elsif obj.respond_to?(:to_xml)
|
|
657
|
-
obj.to_xml
|
|
654
|
+
if Canon::XmlParsing.xml_node?(obj) || obj.is_a?(Canon::Xml::Node)
|
|
655
|
+
Canon::XmlParsing.serialize(obj)
|
|
658
656
|
else
|
|
659
657
|
obj.to_s
|
|
660
658
|
end
|
|
@@ -665,7 +663,11 @@ module Canon
|
|
|
665
663
|
def serialize_document(doc, format)
|
|
666
664
|
case format
|
|
667
665
|
when :xml, :html, :html4, :html5
|
|
668
|
-
|
|
666
|
+
if Canon::XmlParsing.xml_node?(doc) || doc.is_a?(Canon::Xml::Node)
|
|
667
|
+
Canon::XmlParsing.serialize(doc)
|
|
668
|
+
else
|
|
669
|
+
doc.to_s
|
|
670
|
+
end
|
|
669
671
|
when :json
|
|
670
672
|
require "json"
|
|
671
673
|
JSON.pretty_generate(doc)
|
|
@@ -748,7 +750,7 @@ module Canon
|
|
|
748
750
|
|
|
749
751
|
# get match_profile if it is not defined in options
|
|
750
752
|
# but defined in config
|
|
751
|
-
if
|
|
753
|
+
if %i[xml html json yaml string].include?(comparison_format)
|
|
752
754
|
format_config = Canon::Config.instance.public_send(comparison_format)
|
|
753
755
|
if opts[:global_profile].nil? && format_config.match.profile
|
|
754
756
|
# Config-sourced profile has *global* priority (applied before
|
|
@@ -32,6 +32,19 @@ module Canon
|
|
|
32
32
|
@cache = nil
|
|
33
33
|
end
|
|
34
34
|
|
|
35
|
+
# Deep merge two hashes. Arrays are replaced (not concatenated).
|
|
36
|
+
def deep_merge(base, overlay)
|
|
37
|
+
result = base.dup
|
|
38
|
+
overlay.each do |key, value|
|
|
39
|
+
result[key] = if result[key].is_a?(Hash) && value.is_a?(Hash)
|
|
40
|
+
deep_merge(result[key], value)
|
|
41
|
+
else
|
|
42
|
+
value
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
result
|
|
46
|
+
end
|
|
47
|
+
|
|
35
48
|
private
|
|
36
49
|
|
|
37
50
|
def cache
|
|
@@ -116,19 +129,6 @@ module Canon
|
|
|
116
129
|
content = File.read(path)
|
|
117
130
|
YAML.safe_load(content, permitted_classes: [Symbol]) || {}
|
|
118
131
|
end
|
|
119
|
-
|
|
120
|
-
# Deep merge two hashes. Arrays are replaced (not concatenated).
|
|
121
|
-
def deep_merge(base, overlay)
|
|
122
|
-
result = base.dup
|
|
123
|
-
overlay.each do |key, value|
|
|
124
|
-
result[key] = if result[key].is_a?(Hash) && value.is_a?(Hash)
|
|
125
|
-
deep_merge(result[key], value)
|
|
126
|
-
else
|
|
127
|
-
value
|
|
128
|
-
end
|
|
129
|
-
end
|
|
130
|
-
result
|
|
131
|
-
end
|
|
132
132
|
end
|
|
133
133
|
end
|
|
134
134
|
end
|
data/lib/canon/config.rb
CHANGED
|
@@ -25,15 +25,19 @@ module Canon
|
|
|
25
25
|
|
|
26
26
|
# Delegate to instance
|
|
27
27
|
def method_missing(method, ...)
|
|
28
|
-
if
|
|
29
|
-
|
|
28
|
+
if %i[xml html json yaml string profile profile= diff_mode diff_mode=
|
|
29
|
+
use_color use_color= xml_match_profile xml_match_profile=
|
|
30
|
+
html_match_profile html_match_profile= reset!].include?(method)
|
|
31
|
+
@instance.public_send(method, ...)
|
|
30
32
|
else
|
|
31
33
|
super
|
|
32
34
|
end
|
|
33
35
|
end
|
|
34
36
|
|
|
35
37
|
def respond_to_missing?(method, include_private = false)
|
|
36
|
-
|
|
38
|
+
%i[xml html json yaml string profile profile= diff_mode diff_mode=
|
|
39
|
+
use_color use_color= xml_match_profile xml_match_profile=
|
|
40
|
+
html_match_profile html_match_profile= reset!].include?(method) || super
|
|
37
41
|
end
|
|
38
42
|
end
|
|
39
43
|
|
|
@@ -700,6 +704,24 @@ module Canon
|
|
|
700
704
|
@resolver.set_programmatic(:theme, value)
|
|
701
705
|
end
|
|
702
706
|
|
|
707
|
+
# Theme inheritance (custom theme with base + overrides)
|
|
708
|
+
def theme_inheritance
|
|
709
|
+
@resolver.resolve(:theme_inheritance)
|
|
710
|
+
end
|
|
711
|
+
|
|
712
|
+
def theme_inheritance=(value)
|
|
713
|
+
@resolver.set_programmatic(:theme_inheritance, value)
|
|
714
|
+
end
|
|
715
|
+
|
|
716
|
+
# Full custom theme hash
|
|
717
|
+
def custom_theme
|
|
718
|
+
@resolver.resolve(:custom_theme)
|
|
719
|
+
end
|
|
720
|
+
|
|
721
|
+
def custom_theme=(value)
|
|
722
|
+
@resolver.set_programmatic(:custom_theme, value)
|
|
723
|
+
end
|
|
724
|
+
|
|
703
725
|
# File size limit in bytes (default 5MB)
|
|
704
726
|
def max_file_size
|
|
705
727
|
@resolver.resolve(:max_file_size)
|
|
@@ -806,6 +828,8 @@ module Canon
|
|
|
806
828
|
max_node_count: 10_000, # Maximum nodes in tree
|
|
807
829
|
max_diff_lines: 10_000, # Maximum diff output lines
|
|
808
830
|
theme: :dark, # Default theme
|
|
831
|
+
theme_inheritance: nil, # Custom theme with base + overrides
|
|
832
|
+
custom_theme: nil, # Full custom theme hash
|
|
809
833
|
}
|
|
810
834
|
|
|
811
835
|
env = format ? EnvProvider.load_diff_for_format(format) : {}
|
|
@@ -846,8 +870,8 @@ module Canon
|
|
|
846
870
|
formats = data["formats"] || {}
|
|
847
871
|
|
|
848
872
|
format_configs.each do |fmt_key, fmt_cfg|
|
|
849
|
-
fmt_data = ProfileLoader.
|
|
850
|
-
|
|
873
|
+
fmt_data = ProfileLoader.deep_merge(shared,
|
|
874
|
+
formats[fmt_key.to_s] || {})
|
|
851
875
|
fmt_cfg.apply_profile_data(fmt_data)
|
|
852
876
|
end
|
|
853
877
|
end
|
|
@@ -22,7 +22,7 @@ module Canon
|
|
|
22
22
|
@match_options = match_options
|
|
23
23
|
# Use the compare_profile from ResolvedMatchOptions if available (e.g., HtmlCompareProfile)
|
|
24
24
|
# Otherwise create a base CompareProfile
|
|
25
|
-
@profile = if match_options.
|
|
25
|
+
@profile = if match_options.is_a?(Canon::Comparison::ResolvedMatchOptions) && match_options.compare_profile
|
|
26
26
|
match_options.compare_profile
|
|
27
27
|
else
|
|
28
28
|
Canon::Comparison::CompareProfile.new(match_options)
|
|
@@ -65,7 +65,7 @@ module Canon
|
|
|
65
65
|
# (since the dimension affects equivalence), which would prevent formatting
|
|
66
66
|
# detection from being applied.
|
|
67
67
|
if diff_node.dimension == :text_content &&
|
|
68
|
-
profile.
|
|
68
|
+
profile.behavior_for(:text_content) == :normalize &&
|
|
69
69
|
!inside_preserve_element?(diff_node) &&
|
|
70
70
|
formatting_only_diff?(diff_node)
|
|
71
71
|
diff_node.formatting = true
|
|
@@ -74,7 +74,7 @@ module Canon
|
|
|
74
74
|
end
|
|
75
75
|
|
|
76
76
|
# :whitespace_adjacency is a report-only re-label of an
|
|
77
|
-
# asymmetric whitespace mismatch emitted by
|
|
77
|
+
# asymmetric whitespace mismatch emitted by ChildRealignment's
|
|
78
78
|
# two-cursor walk. Equivalence behaviour is unchanged — the
|
|
79
79
|
# underlying mismatch is normative regardless of match options.
|
|
80
80
|
if diff_node.dimension == :whitespace_adjacency
|
|
@@ -83,6 +83,14 @@ module Canon
|
|
|
83
83
|
return diff_node
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
# :comments diffs from asymmetric comment nodes intentionally
|
|
87
|
+
# fall through to profile.normative_dimension? below. Unlike
|
|
88
|
+
# :whitespace_adjacency (always normative), the classification
|
|
89
|
+
# of comment diffs respects the :comments match option:
|
|
90
|
+
# :strict → normative, :ignore → informative. This is by
|
|
91
|
+
# design — callers can control whether asymmetric comments
|
|
92
|
+
# affect equivalence via the match profile.
|
|
93
|
+
|
|
86
94
|
# THIRD: Determine if this dimension is normative based on CompareProfile
|
|
87
95
|
# This respects the policy settings (strict/normalize/ignore)
|
|
88
96
|
is_normative = profile.normative_dimension?(diff_node.dimension)
|
|
@@ -176,16 +184,12 @@ module Canon
|
|
|
176
184
|
end
|
|
177
185
|
|
|
178
186
|
# HTML: non-breaking space (U+00A0) is never insignificant
|
|
179
|
-
text =
|
|
180
|
-
node.content
|
|
181
|
-
elsif node.respond_to?(:value)
|
|
182
|
-
node.value
|
|
183
|
-
end
|
|
187
|
+
text = Canon::Comparison::NodeInspector.text_content(node)
|
|
184
188
|
if text && Canon::Comparison::WhitespaceSensitivity.contains_nbsp?(text)
|
|
185
189
|
return true
|
|
186
190
|
end
|
|
187
191
|
|
|
188
|
-
return false unless node.
|
|
192
|
+
return false unless Canon::XmlParsing.element?(node) || node.is_a?(Canon::Xml::Node)
|
|
189
193
|
|
|
190
194
|
parent = node.parent
|
|
191
195
|
return false unless parent
|
|
@@ -215,49 +219,19 @@ module Canon
|
|
|
215
219
|
end
|
|
216
220
|
|
|
217
221
|
# Extract text content from a node for formatting comparison
|
|
218
|
-
# @param node [Object] The node to extract text from
|
|
219
|
-
# @return [String, nil] The text content or nil
|
|
220
222
|
def extract_text_content(node)
|
|
221
223
|
return nil if node.nil?
|
|
222
224
|
|
|
223
|
-
|
|
224
|
-
when Canon::Xml::Nodes::TextNode
|
|
225
|
-
node.value
|
|
226
|
-
when Canon::Xml::Node
|
|
227
|
-
node.text_content
|
|
228
|
-
when Nokogiri::XML::Node
|
|
229
|
-
node.content.to_s
|
|
230
|
-
when Moxml::Node
|
|
231
|
-
node.content.to_s
|
|
232
|
-
when String
|
|
233
|
-
node
|
|
234
|
-
else
|
|
235
|
-
node.to_s
|
|
236
|
-
end
|
|
225
|
+
Canon::Comparison::NodeInspector.text_content(node)
|
|
237
226
|
rescue StandardError
|
|
238
227
|
nil
|
|
239
228
|
end
|
|
240
229
|
|
|
241
|
-
# Check if a node is a text node
|
|
242
|
-
# @param node [Object] The node to check
|
|
243
|
-
# @return [Boolean] true if the node is a text node
|
|
244
230
|
def text_node?(node)
|
|
245
231
|
return false if node.nil?
|
|
232
|
+
return true if node.is_a?(String)
|
|
246
233
|
|
|
247
|
-
|
|
248
|
-
when Canon::Xml::Nodes::TextNode
|
|
249
|
-
true
|
|
250
|
-
when Canon::Xml::Node
|
|
251
|
-
node.node_type == :text
|
|
252
|
-
when Nokogiri::XML::Node
|
|
253
|
-
node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
|
254
|
-
when Moxml::Node
|
|
255
|
-
node.text?
|
|
256
|
-
when String
|
|
257
|
-
true
|
|
258
|
-
else
|
|
259
|
-
false
|
|
260
|
-
end
|
|
234
|
+
Canon::Comparison::NodeInspector.text_node?(node)
|
|
261
235
|
end
|
|
262
236
|
end
|
|
263
237
|
end
|
data/lib/canon/diff/diff_line.rb
CHANGED
|
@@ -1047,8 +1047,8 @@ module Canon
|
|
|
1047
1047
|
end
|
|
1048
1048
|
|
|
1049
1049
|
# search_start now points inside the innermost element
|
|
1050
|
-
line_idx = SourceLocator.
|
|
1051
|
-
|
|
1050
|
+
line_idx = SourceLocator.find_line_for_offset(search_start,
|
|
1051
|
+
line_map)
|
|
1052
1052
|
return nil unless line_idx
|
|
1053
1053
|
|
|
1054
1054
|
col = search_start - line_map[line_idx][:start_offset]
|
|
@@ -1133,8 +1133,8 @@ range_start, range_end)
|
|
|
1133
1133
|
# Walk up ancestors to find one with an "id" attribute
|
|
1134
1134
|
ancestors = []
|
|
1135
1135
|
current = node
|
|
1136
|
-
while current.
|
|
1137
|
-
ancestors << current
|
|
1136
|
+
while current.is_a?(Canon::Xml::Node)
|
|
1137
|
+
ancestors << current
|
|
1138
1138
|
current = current.parent
|
|
1139
1139
|
end
|
|
1140
1140
|
|
|
@@ -1143,14 +1143,14 @@ range_start, range_end)
|
|
|
1143
1143
|
anchor_name = nil
|
|
1144
1144
|
anchor_id = nil
|
|
1145
1145
|
ancestors.each do |anc|
|
|
1146
|
-
next unless anc.
|
|
1146
|
+
next unless anc.attribute_nodes
|
|
1147
1147
|
|
|
1148
1148
|
anc.attribute_nodes.each do |attr|
|
|
1149
|
-
next unless attr.
|
|
1149
|
+
next unless attr.name == "id"
|
|
1150
1150
|
|
|
1151
1151
|
anchor = anc
|
|
1152
1152
|
anchor_name = anc.name
|
|
1153
|
-
anchor_id = attr.
|
|
1153
|
+
anchor_id = attr.value
|
|
1154
1154
|
break
|
|
1155
1155
|
end
|
|
1156
1156
|
break if anchor
|
|
@@ -1219,8 +1219,8 @@ range_start, range_end)
|
|
|
1219
1219
|
# Search for value inside leaf element
|
|
1220
1220
|
value_pos = text.index(value, leaf_tag_end + 1)
|
|
1221
1221
|
if value_pos && value_pos < leaf_close
|
|
1222
|
-
line_idx = SourceLocator.
|
|
1223
|
-
|
|
1222
|
+
line_idx = SourceLocator.find_line_for_offset(value_pos,
|
|
1223
|
+
line_map)
|
|
1224
1224
|
return nil unless line_idx
|
|
1225
1225
|
|
|
1226
1226
|
col = value_pos - line_map[line_idx][:start_offset]
|
|
@@ -1234,8 +1234,8 @@ range_start, range_end)
|
|
|
1234
1234
|
# Direct search: value might be directly in the anchor's content
|
|
1235
1235
|
value_pos = text.index(value, anchor_tag_end + 1)
|
|
1236
1236
|
if value_pos && value_pos < anchor_close
|
|
1237
|
-
line_idx = SourceLocator.
|
|
1238
|
-
|
|
1237
|
+
line_idx = SourceLocator.find_line_for_offset(value_pos,
|
|
1238
|
+
line_map)
|
|
1239
1239
|
return nil unless line_idx
|
|
1240
1240
|
|
|
1241
1241
|
col = value_pos - line_map[line_idx][:start_offset]
|
|
@@ -1255,10 +1255,10 @@ range_start, range_end)
|
|
|
1255
1255
|
# @param line_map [Array<Hash>] pre-built line offset map
|
|
1256
1256
|
# @return [Hash, nil] location hash with :char_offset, :line_number, :col or nil
|
|
1257
1257
|
def locate_textnode_parent(textnode, value, text, line_map)
|
|
1258
|
-
return nil unless textnode.
|
|
1258
|
+
return nil unless textnode.is_a?(Canon::Xml::Node) && textnode.parent
|
|
1259
1259
|
|
|
1260
1260
|
parent = textnode.parent
|
|
1261
|
-
return nil unless parent.
|
|
1261
|
+
return nil unless parent.name
|
|
1262
1262
|
|
|
1263
1263
|
parent_name = parent.name
|
|
1264
1264
|
parent_attrs = element_attribute_signature(parent)
|
|
@@ -1286,8 +1286,8 @@ range_start, range_end)
|
|
|
1286
1286
|
# Search for value within this element
|
|
1287
1287
|
value_pos = text.index(value, anchor_tag_end + 1)
|
|
1288
1288
|
if value_pos && value_pos < anchor_close
|
|
1289
|
-
line_idx = SourceLocator.
|
|
1290
|
-
|
|
1289
|
+
line_idx = SourceLocator.find_line_for_offset(value_pos,
|
|
1290
|
+
line_map)
|
|
1291
1291
|
return nil unless line_idx
|
|
1292
1292
|
|
|
1293
1293
|
col = value_pos - line_map[line_idx][:start_offset]
|
|
@@ -1310,10 +1310,10 @@ range_start, range_end)
|
|
|
1310
1310
|
# @param line_map [Array<Hash>] pre-built line offset map
|
|
1311
1311
|
# @return [Hash, nil] location hash with :char_offset, :line_number, :col or nil
|
|
1312
1312
|
def locate_element_in_text2(textnode, text, line_map)
|
|
1313
|
-
return nil unless textnode.
|
|
1313
|
+
return nil unless textnode.is_a?(Canon::Xml::Node) && textnode.parent
|
|
1314
1314
|
|
|
1315
1315
|
parent = textnode.parent
|
|
1316
|
-
return nil unless parent.
|
|
1316
|
+
return nil unless parent.name
|
|
1317
1317
|
|
|
1318
1318
|
parent_name = parent.name
|
|
1319
1319
|
parent_attrs = element_attribute_signature(parent)
|
|
@@ -1340,8 +1340,8 @@ range_start, range_end)
|
|
|
1340
1340
|
|
|
1341
1341
|
if is_self_closing
|
|
1342
1342
|
# Self-closing element - return position of <
|
|
1343
|
-
line_idx = SourceLocator.
|
|
1344
|
-
|
|
1343
|
+
line_idx = SourceLocator.find_line_for_offset(anchor_pos,
|
|
1344
|
+
line_map)
|
|
1345
1345
|
return nil unless line_idx
|
|
1346
1346
|
|
|
1347
1347
|
col = anchor_pos - line_map[line_idx][:start_offset]
|
|
@@ -1349,8 +1349,8 @@ range_start, range_end)
|
|
|
1349
1349
|
col: col }
|
|
1350
1350
|
else
|
|
1351
1351
|
# Regular element - return position of >
|
|
1352
|
-
line_idx = SourceLocator.
|
|
1353
|
-
|
|
1352
|
+
line_idx = SourceLocator.find_line_for_offset(tag_end_pos,
|
|
1353
|
+
line_map)
|
|
1354
1354
|
return nil unless line_idx
|
|
1355
1355
|
|
|
1356
1356
|
col = tag_end_pos - line_map[line_idx][:start_offset]
|
|
@@ -1368,10 +1368,8 @@ range_start, range_end)
|
|
|
1368
1368
|
# Build a string representation of an element's attributes for matching.
|
|
1369
1369
|
def element_attribute_signature(element)
|
|
1370
1370
|
sig = {}
|
|
1371
|
-
if element.
|
|
1371
|
+
if element.is_a?(Canon::Xml::Node) && element.attribute_nodes
|
|
1372
1372
|
element.attribute_nodes.each do |attr|
|
|
1373
|
-
next unless attr.respond_to?(:name) && attr.respond_to?(:value)
|
|
1374
|
-
|
|
1375
1373
|
sig[attr.name] = attr.value
|
|
1376
1374
|
end
|
|
1377
1375
|
end
|
|
@@ -51,16 +51,16 @@ module Canon
|
|
|
51
51
|
return node.children.map { |child| serialize(child) }.join
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
-
# Handle Nokogiri nodes
|
|
55
|
-
if
|
|
56
|
-
return node
|
|
54
|
+
# Handle Nokogiri/moxml nodes
|
|
55
|
+
if Canon::XmlParsing.xml_node?(node)
|
|
56
|
+
return Canon::XmlParsing.serialize(node)
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
# Handle tree diff nodes and other objects with serialization
|
|
60
|
+
if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
61
|
+
return serialize_treenode(node)
|
|
61
62
|
end
|
|
62
63
|
|
|
63
|
-
# Fallback to string
|
|
64
64
|
node.to_s
|
|
65
65
|
end
|
|
66
66
|
|
|
@@ -105,23 +105,20 @@ module Canon
|
|
|
105
105
|
return attrs
|
|
106
106
|
end
|
|
107
107
|
|
|
108
|
-
# Handle Nokogiri elements
|
|
109
|
-
if
|
|
108
|
+
# Handle Nokogiri/moxml elements via XmlParsing
|
|
109
|
+
if Canon::XmlParsing.element?(node)
|
|
110
110
|
attrs = {}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
value = if attr.respond_to?(:value)
|
|
114
|
-
attr.value
|
|
115
|
-
elsif attr.is_a?(String)
|
|
116
|
-
attr
|
|
117
|
-
else
|
|
118
|
-
attr.to_s
|
|
119
|
-
end
|
|
120
|
-
attrs[name] = value
|
|
111
|
+
Canon::XmlParsing.attributes(node).each do |attr|
|
|
112
|
+
attrs[attr.name] = attr.value
|
|
121
113
|
end
|
|
122
114
|
return attrs
|
|
123
115
|
end
|
|
124
116
|
|
|
117
|
+
# Handle other elements with attributes method
|
|
118
|
+
if node.is_a?(Canon::Xml::Node)
|
|
119
|
+
return {}
|
|
120
|
+
end
|
|
121
|
+
|
|
125
122
|
# Handle TreeNode attributes (already a hash)
|
|
126
123
|
if node.is_a?(Hash)
|
|
127
124
|
return node
|
|
@@ -143,10 +140,9 @@ module Canon
|
|
|
143
140
|
return node.name
|
|
144
141
|
end
|
|
145
142
|
|
|
146
|
-
# Handle Nokogiri elements
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
end
|
|
143
|
+
# Handle Nokogiri/moxml elements
|
|
144
|
+
name = Canon::XmlParsing.name(node)
|
|
145
|
+
return name.to_s if name
|
|
150
146
|
|
|
151
147
|
""
|
|
152
148
|
end
|
|
@@ -164,16 +160,13 @@ module Canon
|
|
|
164
160
|
return node.value.to_s
|
|
165
161
|
end
|
|
166
162
|
|
|
167
|
-
# Handle
|
|
168
|
-
if node.
|
|
169
|
-
return node.
|
|
163
|
+
# Handle Canon::Xml::Node
|
|
164
|
+
if node.is_a?(Canon::Xml::Node)
|
|
165
|
+
return node.text_content.to_s
|
|
170
166
|
end
|
|
171
167
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
end
|
|
175
|
-
|
|
176
|
-
""
|
|
168
|
+
# Handle Nokogiri/moxml nodes
|
|
169
|
+
Canon::XmlParsing.text_content(node).to_s
|
|
177
170
|
end
|
|
178
171
|
|
|
179
172
|
# Serialize attributes to string format
|