canon 0.2.9 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +21 -22
- data/Rakefile +25 -2
- data/lib/canon/cache.rb +18 -27
- data/lib/canon/cli.rb +0 -3
- data/lib/canon/commands/diff_command.rb +0 -6
- data/lib/canon/commands/format_command.rb +0 -4
- data/lib/canon/commands.rb +9 -0
- data/lib/canon/comparison/child_realignment.rb +0 -2
- data/lib/canon/comparison/compare_profile.rb +30 -36
- data/lib/canon/comparison/comparison_result.rb +0 -2
- data/lib/canon/comparison/diff_node_builder.rb +353 -0
- data/lib/canon/comparison/dimensions/dimension.rb +51 -0
- data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
- data/lib/canon/comparison/dimensions/registry.rb +101 -60
- data/lib/canon/comparison/dimensions.rb +15 -46
- data/lib/canon/comparison/html_comparator.rb +20 -141
- data/lib/canon/comparison/html_compare_profile.rb +15 -18
- data/lib/canon/comparison/json_comparator.rb +4 -165
- data/lib/canon/comparison/json_parser.rb +0 -2
- data/lib/canon/comparison/markup_comparator.rb +14 -210
- data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
- data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
- data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
- data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
- data/lib/canon/comparison/match_options.rb +13 -88
- data/lib/canon/comparison/node_inspector.rb +13 -48
- data/lib/canon/comparison/pipeline.rb +269 -0
- data/lib/canon/comparison/profile_definition.rb +0 -2
- data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
- data/lib/canon/comparison/strategies.rb +16 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -5
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
- data/lib/canon/comparison/xml_comparator/node_parser.rb +2 -6
- data/lib/canon/comparison/xml_comparator.rb +4 -492
- data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
- data/lib/canon/comparison/xml_node_comparison.rb +4 -119
- data/lib/canon/comparison/yaml_comparator.rb +0 -3
- data/lib/canon/comparison.rb +144 -267
- data/lib/canon/config/config_dsl.rb +159 -0
- data/lib/canon/config/env_provider.rb +0 -3
- data/lib/canon/config/env_schema.rb +48 -58
- data/lib/canon/config/profile_loader.rb +0 -1
- data/lib/canon/config.rb +116 -468
- data/lib/canon/diff/diff_block_builder.rb +0 -2
- data/lib/canon/diff/diff_classifier.rb +0 -5
- data/lib/canon/diff/diff_context.rb +0 -2
- data/lib/canon/diff/diff_context_builder.rb +0 -2
- data/lib/canon/diff/diff_line_builder.rb +2 -3
- data/lib/canon/diff/diff_node_enricher.rb +0 -4
- data/lib/canon/diff/diff_node_mapper.rb +10 -12
- data/lib/canon/diff/diff_report_builder.rb +0 -4
- data/lib/canon/diff/formatting_detector.rb +3 -3
- data/lib/canon/diff/node_serializer.rb +0 -7
- data/lib/canon/diff/xml_serialization_formatter.rb +0 -3
- data/lib/canon/diff.rb +39 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +20 -17
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +119 -3
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -5
- data/lib/canon/diff_formatter/debug_output.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +27 -61
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -29
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
- data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
- data/lib/canon/diff_formatter.rb +26 -20
- data/lib/canon/formatters/html4_formatter.rb +0 -2
- data/lib/canon/formatters/html5_formatter.rb +0 -2
- data/lib/canon/formatters/html_formatter.rb +0 -3
- data/lib/canon/formatters/json_formatter.rb +0 -1
- data/lib/canon/formatters/xml_formatter.rb +0 -4
- data/lib/canon/formatters/yaml_formatter.rb +0 -1
- data/lib/canon/formatters.rb +16 -0
- data/lib/canon/html/data_model.rb +1 -11
- data/lib/canon/html.rb +4 -3
- data/lib/canon/options/cli_generator.rb +0 -2
- data/lib/canon/options/registry.rb +0 -2
- data/lib/canon/options.rb +9 -0
- data/lib/canon/pretty_printer/html.rb +0 -1
- data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
- data/lib/canon/pretty_printer.rb +12 -0
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters.rb +14 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
- data/lib/canon/tree_diff/core/node_signature.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +12 -5
- data/lib/canon/tree_diff/core.rb +17 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
- data/lib/canon/tree_diff/matchers.rb +15 -0
- data/lib/canon/tree_diff/operation_converter.rb +7 -15
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
- data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +6 -5
- data/lib/canon/tree_diff/operations.rb +13 -0
- data/lib/canon/tree_diff.rb +26 -27
- data/lib/canon/validators/base_validator.rb +5 -10
- data/lib/canon/validators/html_validator.rb +2 -8
- data/lib/canon/validators/json_validator.rb +0 -1
- data/lib/canon/validators/xml_validator.rb +2 -8
- data/lib/canon/validators/yaml_validator.rb +0 -1
- data/lib/canon/validators.rb +12 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +0 -4
- data/lib/canon/xml/data_model.rb +5 -15
- data/lib/canon/xml/line_range_mapper.rb +0 -2
- data/lib/canon/xml/nodes/attribute_node.rb +0 -2
- data/lib/canon/xml/nodes/comment_node.rb +0 -2
- data/lib/canon/xml/nodes/element_node.rb +0 -2
- data/lib/canon/xml/nodes/namespace_node.rb +0 -2
- data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
- data/lib/canon/xml/nodes/root_node.rb +0 -2
- data/lib/canon/xml/nodes/text_node.rb +0 -2
- data/lib/canon/xml/nodes.rb +19 -0
- data/lib/canon/xml/processor.rb +0 -5
- data/lib/canon/xml/sax_builder.rb +1 -8
- data/lib/canon/xml/whitespace_normalizer.rb +2 -2
- data/lib/canon/xml.rb +33 -0
- data/lib/canon/xml_backend.rb +50 -14
- data/lib/canon/xml_parsing.rb +32 -18
- data/lib/canon.rb +25 -15
- data/lib/tasks/performance.rake +0 -58
- data/lib/tasks/performance_comparator.rb +132 -65
- data/lib/tasks/performance_helpers.rb +4 -249
- data/lib/tasks/performance_report.rb +309 -0
- metadata +28 -15
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
- data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
- data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -270
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
|
-
require_relative "../xml_backend"
|
|
5
|
-
require_relative "../xml/c14n"
|
|
6
|
-
require_relative "../pretty_printer/xml"
|
|
7
|
-
require_relative "../validators/xml_validator"
|
|
8
4
|
|
|
9
5
|
module Canon
|
|
10
6
|
module Formatters
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
# Format-specific canonicalizers and parsers for the top-level
|
|
5
|
+
# `Canon.format` / `Canon.parse` API. Distinct from {DiffFormatter},
|
|
6
|
+
# which renders comparison output.
|
|
7
|
+
module Formatters
|
|
8
|
+
autoload :Html4Formatter, "canon/formatters/html4_formatter"
|
|
9
|
+
autoload :Html5Formatter, "canon/formatters/html5_formatter"
|
|
10
|
+
autoload :HtmlFormatter, "canon/formatters/html_formatter"
|
|
11
|
+
autoload :HtmlFormatterBase, "canon/formatters/html_formatter_base"
|
|
12
|
+
autoload :JsonFormatter, "canon/formatters/json_formatter"
|
|
13
|
+
autoload :XmlFormatter, "canon/formatters/xml_formatter"
|
|
14
|
+
autoload :YamlFormatter, "canon/formatters/yaml_formatter"
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -1,14 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
|
-
require_relative "../data_model"
|
|
5
|
-
require_relative "../xml/nodes/root_node"
|
|
6
|
-
require_relative "../xml/nodes/element_node"
|
|
7
|
-
require_relative "../xml/nodes/namespace_node"
|
|
8
|
-
require_relative "../xml/nodes/attribute_node"
|
|
9
|
-
require_relative "../xml/nodes/text_node"
|
|
10
|
-
require_relative "../xml/nodes/comment_node"
|
|
11
|
-
require_relative "../xml/nodes/processing_instruction_node"
|
|
12
4
|
|
|
13
5
|
module Canon
|
|
14
6
|
module Html
|
|
@@ -81,7 +73,6 @@ module Canon
|
|
|
81
73
|
def self.serialize(node)
|
|
82
74
|
# HTML nodes use the same serialization as XML
|
|
83
75
|
# Delegate to XML serialization implementation
|
|
84
|
-
require_relative "../xml/data_model"
|
|
85
76
|
Canon::Xml::DataModel.serialize(node)
|
|
86
77
|
end
|
|
87
78
|
|
|
@@ -89,7 +80,7 @@ module Canon
|
|
|
89
80
|
def self.build_from_nokogiri(nokogiri_doc)
|
|
90
81
|
root = Canon::Xml::Nodes::RootNode.new
|
|
91
82
|
|
|
92
|
-
if nokogiri_doc.
|
|
83
|
+
if nokogiri_doc.is_a?(Nokogiri::XML::Document) && nokogiri_doc.root
|
|
93
84
|
# For Documents (HTML4, HTML5): process the root element
|
|
94
85
|
root.add_child(build_element_node(nokogiri_doc.root))
|
|
95
86
|
|
|
@@ -222,7 +213,6 @@ module Canon
|
|
|
222
213
|
whitespace_sensitive_tags = %w[pre code textarea script style]
|
|
223
214
|
|
|
224
215
|
# Check if whitespace is between inline siblings
|
|
225
|
-
require_relative "../comparison/whitespace_sensitivity"
|
|
226
216
|
unless whitespace_sensitive_tags.include?(parent_name) ||
|
|
227
217
|
Canon::Comparison::WhitespaceSensitivity.inline_whitespace_significant?(nokogiri_text)
|
|
228
218
|
return nil
|
data/lib/canon/html.rb
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "html/data_model"
|
|
4
|
-
|
|
5
3
|
module Canon
|
|
6
|
-
# HTML-specific functionality for Canon
|
|
4
|
+
# HTML-specific functionality for Canon.
|
|
5
|
+
#
|
|
6
|
+
# Children are autoloaded — never `require_relative` them.
|
|
7
7
|
module Html
|
|
8
|
+
autoload :DataModel, "canon/html/data_model"
|
|
8
9
|
end
|
|
9
10
|
end
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
|
-
require_relative "html_void_elements"
|
|
5
4
|
|
|
6
5
|
module Canon
|
|
7
6
|
module PrettyPrinter
|
|
@@ -398,7 +397,6 @@ module Canon
|
|
|
398
397
|
|
|
399
398
|
# Load the default visualization map from DiffFormatter constants.
|
|
400
399
|
def default_vis_map
|
|
401
|
-
require_relative "../diff_formatter"
|
|
402
400
|
Canon::DiffFormatter::DEFAULT_VISUALIZATION_MAP
|
|
403
401
|
rescue LoadError, NameError
|
|
404
402
|
{ " " => "░", "\t" => "⇥", "\n" => "↵", "\r" => "⏎", "\u00A0" => "␣" }
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
# Pretty-printers that emit format-aware, fixture-ready output.
|
|
5
|
+
module PrettyPrinter
|
|
6
|
+
autoload :Html, "canon/pretty_printer/html"
|
|
7
|
+
autoload :HtmlVoidElements, "canon/pretty_printer/html_void_elements"
|
|
8
|
+
autoload :Json, "canon/pretty_printer/json"
|
|
9
|
+
autoload :Xml, "canon/pretty_printer/xml"
|
|
10
|
+
autoload :XmlNormalized, "canon/pretty_printer/xml_normalized"
|
|
11
|
+
end
|
|
12
|
+
end
|
|
@@ -193,7 +193,7 @@ module Canon
|
|
|
193
193
|
# @param element [Nokogiri::XML::Element] Element to check
|
|
194
194
|
# @return [Boolean] True if element is whitespace-sensitive
|
|
195
195
|
def whitespace_sensitive?(element)
|
|
196
|
-
return false unless
|
|
196
|
+
return false unless Canon::Comparison::NodeInspector.element_node?(element)
|
|
197
197
|
|
|
198
198
|
# List of HTML elements where whitespace is semantically significant
|
|
199
199
|
whitespace_sensitive_tags = %w[pre code textarea script style]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module TreeDiff
|
|
5
|
+
# Format-specific tree adapters that wrap parsed documents into
|
|
6
|
+
# Canon::TreeDiff::Core::TreeNode trees for the matcher.
|
|
7
|
+
module Adapters
|
|
8
|
+
autoload :HTMLAdapter, "canon/tree_diff/adapters/html_adapter"
|
|
9
|
+
autoload :JSONAdapter, "canon/tree_diff/adapters/json_adapter"
|
|
10
|
+
autoload :XMLAdapter, "canon/tree_diff/adapters/xml_adapter"
|
|
11
|
+
autoload :YAMLAdapter, "canon/tree_diff/adapters/yaml_adapter"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -35,18 +35,12 @@ module Canon
|
|
|
35
35
|
# @param attrs2 [Hash] Second attribute hash
|
|
36
36
|
# @return [Boolean] True if attributes are considered equal
|
|
37
37
|
def equal?(attrs1, attrs2)
|
|
38
|
-
# Handle nil/empty cases
|
|
39
38
|
return true if attrs1.nil? && attrs2.nil?
|
|
40
39
|
return false if attrs1.nil? || attrs2.nil?
|
|
41
40
|
|
|
42
|
-
attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
|
|
43
|
-
attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
|
|
44
|
-
|
|
45
41
|
if attribute_order == :strict
|
|
46
|
-
# Strict mode: order matters
|
|
47
42
|
attrs1 == attrs2
|
|
48
43
|
else
|
|
49
|
-
# Ignore/normalize mode: sort keys for comparison
|
|
50
44
|
normalize_for_comparison(attrs1) == normalize_for_comparison(attrs2)
|
|
51
45
|
end
|
|
52
46
|
end
|
|
@@ -142,7 +142,7 @@ module Canon
|
|
|
142
142
|
whitespace_sensitive_tags = %w[pre code textarea script style]
|
|
143
143
|
|
|
144
144
|
# Check if this node is whitespace-sensitive
|
|
145
|
-
if node.
|
|
145
|
+
if node.is_a?(TreeNode)
|
|
146
146
|
label = node.label.to_s.downcase
|
|
147
147
|
return true if whitespace_sensitive_tags.include?(label)
|
|
148
148
|
end
|
|
@@ -350,15 +350,22 @@ module Canon
|
|
|
350
350
|
#
|
|
351
351
|
# @return [String] XPath expression
|
|
352
352
|
def xpath
|
|
353
|
-
|
|
354
|
-
if @source_node.respond_to?(:path)
|
|
355
|
-
return @source_node.path
|
|
356
|
-
end
|
|
353
|
+
return @source_node.path if nokogiri_source?(@source_node)
|
|
357
354
|
|
|
358
|
-
# Otherwise construct path from tree structure
|
|
359
355
|
construct_path
|
|
360
356
|
end
|
|
361
357
|
|
|
358
|
+
# True when the supplied source node is a Nokogiri node that
|
|
359
|
+
# exposes an XPath via +path+. Nokogiri is an optional
|
|
360
|
+
# backend so the constant is guarded.
|
|
361
|
+
def nokogiri_source?(node)
|
|
362
|
+
return false unless Canon::XmlBackend.nokogiri?
|
|
363
|
+
|
|
364
|
+
node.is_a?(Nokogiri::XML::Node)
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
private :nokogiri_source?
|
|
368
|
+
|
|
362
369
|
# Construct path from tree structure
|
|
363
370
|
#
|
|
364
371
|
# @return [String] Path expression
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module TreeDiff
|
|
5
|
+
# Core tree-diff primitives: TreeNode, matching, signatures, weights,
|
|
6
|
+
# attribute comparison, and XML entity decoding. Children are
|
|
7
|
+
# autoloaded — never `require_relative` them.
|
|
8
|
+
module Core
|
|
9
|
+
autoload :AttributeComparator, "canon/tree_diff/core/attribute_comparator"
|
|
10
|
+
autoload :Matching, "canon/tree_diff/core/matching"
|
|
11
|
+
autoload :NodeSignature, "canon/tree_diff/core/node_signature"
|
|
12
|
+
autoload :NodeWeight, "canon/tree_diff/core/node_weight"
|
|
13
|
+
autoload :TreeNode, "canon/tree_diff/core/tree_node"
|
|
14
|
+
autoload :XmlEntityDecoder, "canon/tree_diff/core/xml_entity_decoder"
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -1,12 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../core/tree_node"
|
|
4
|
-
require_relative "../core/node_signature"
|
|
5
|
-
require_relative "../core/node_weight"
|
|
6
|
-
require_relative "../core/matching"
|
|
7
|
-
require_relative "../core/attribute_comparator"
|
|
8
|
-
require_relative "../core/xml_entity_decoder"
|
|
9
|
-
|
|
10
3
|
module Canon
|
|
11
4
|
module TreeDiff
|
|
12
5
|
module Matchers
|
|
@@ -1,9 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../core/tree_node"
|
|
4
|
-
require_relative "../core/node_signature"
|
|
5
|
-
require_relative "../core/matching"
|
|
6
|
-
|
|
7
3
|
module Canon
|
|
8
4
|
module TreeDiff
|
|
9
5
|
module Matchers
|
|
@@ -165,7 +161,7 @@ module Canon
|
|
|
165
161
|
whitespace_sensitive_tags = %w[pre code textarea script style]
|
|
166
162
|
|
|
167
163
|
# Check if this node is whitespace-sensitive
|
|
168
|
-
if node.
|
|
164
|
+
if node.is_a?(Core::TreeNode)
|
|
169
165
|
label = node.label.to_s.downcase
|
|
170
166
|
return true if whitespace_sensitive_tags.include?(label)
|
|
171
167
|
end
|
|
@@ -1,9 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../core/tree_node"
|
|
4
|
-
require_relative "../core/node_weight"
|
|
5
|
-
require_relative "../core/matching"
|
|
6
|
-
|
|
7
3
|
module Canon
|
|
8
4
|
module TreeDiff
|
|
9
5
|
module Matchers
|
|
@@ -218,7 +214,7 @@ module Canon
|
|
|
218
214
|
whitespace_sensitive_tags = %w[pre code textarea script style]
|
|
219
215
|
|
|
220
216
|
# Check if this node is whitespace-sensitive
|
|
221
|
-
if node.
|
|
217
|
+
if node.is_a?(Core::TreeNode)
|
|
222
218
|
label = node.label.to_s.downcase
|
|
223
219
|
return true if whitespace_sensitive_tags.include?(label)
|
|
224
220
|
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module TreeDiff
|
|
5
|
+
# Tree-matching strategies — hash-based, similarity-based,
|
|
6
|
+
# structural propagation, and the universal fallback.
|
|
7
|
+
module Matchers
|
|
8
|
+
autoload :HashMatcher, "canon/tree_diff/matchers/hash_matcher"
|
|
9
|
+
autoload :SimilarityMatcher, "canon/tree_diff/matchers/similarity_matcher"
|
|
10
|
+
autoload :StructuralPropagator,
|
|
11
|
+
"canon/tree_diff/matchers/structural_propagator"
|
|
12
|
+
autoload :UniversalMatcher, "canon/tree_diff/matchers/universal_matcher"
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -1,13 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../diff/diff_node"
|
|
4
|
-
require_relative "../comparison/match_options"
|
|
5
|
-
# OperationConverter helper modules
|
|
6
|
-
require_relative "operation_converter_helpers/metadata_enricher"
|
|
7
|
-
require_relative "operation_converter_helpers/reason_builder"
|
|
8
|
-
require_relative "operation_converter_helpers/post_processor"
|
|
9
|
-
require_relative "operation_converter_helpers/update_change_handler"
|
|
10
|
-
|
|
11
3
|
module Canon
|
|
12
4
|
module TreeDiff
|
|
13
5
|
# Converts TreeDiff Operations to DiffNodes for integration with Canon's
|
|
@@ -124,6 +116,8 @@ module Canon
|
|
|
124
116
|
end
|
|
125
117
|
end
|
|
126
118
|
|
|
119
|
+
public :convert_operation
|
|
120
|
+
|
|
127
121
|
# Convert INSERT operation to DiffNode
|
|
128
122
|
#
|
|
129
123
|
# @param operation [Operation] Insert operation
|
|
@@ -153,7 +147,7 @@ module Canon
|
|
|
153
147
|
|
|
154
148
|
# Determine dimension for INSERT/DELETE operations based on node type
|
|
155
149
|
def dimension_for_insert_delete(tree_node)
|
|
156
|
-
label = tree_node.
|
|
150
|
+
label = tree_node.is_a?(Canon::TreeDiff::Core::TreeNode) ? tree_node.label : nil
|
|
157
151
|
return :comments if label == "comment"
|
|
158
152
|
|
|
159
153
|
:element_structure
|
|
@@ -359,7 +353,7 @@ module Canon
|
|
|
359
353
|
def extract_source_node(tree_node)
|
|
360
354
|
return nil if tree_node.nil?
|
|
361
355
|
|
|
362
|
-
tree_node.
|
|
356
|
+
tree_node.is_a?(Canon::TreeDiff::Core::TreeNode) ? tree_node.source_node : tree_node
|
|
363
357
|
end
|
|
364
358
|
|
|
365
359
|
# Determine if a diff is normative based on match options
|
|
@@ -383,12 +377,10 @@ module Canon
|
|
|
383
377
|
return false if node.nil?
|
|
384
378
|
|
|
385
379
|
# Get element name from node
|
|
386
|
-
element_name = if node.
|
|
387
|
-
node.label
|
|
388
|
-
elsif node.respond_to?(:name)
|
|
389
|
-
node.name # Nokogiri node
|
|
380
|
+
element_name = if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
381
|
+
node.label
|
|
390
382
|
else
|
|
391
|
-
|
|
383
|
+
Canon::Comparison::NodeInspector.name(node)
|
|
392
384
|
end
|
|
393
385
|
|
|
394
386
|
# Check if it's in our metadata elements list
|
|
@@ -1,8 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../../diff/path_builder"
|
|
4
|
-
require_relative "../../diff/node_serializer"
|
|
5
|
-
|
|
6
3
|
module Canon
|
|
7
4
|
module TreeDiff
|
|
8
5
|
module OperationConverterHelpers
|
|
@@ -45,13 +42,7 @@ module Canon
|
|
|
45
42
|
def self.serialize(tree_node)
|
|
46
43
|
return nil if tree_node.nil?
|
|
47
44
|
|
|
48
|
-
|
|
49
|
-
source = if tree_node.respond_to?(:source_node)
|
|
50
|
-
tree_node.source_node
|
|
51
|
-
else
|
|
52
|
-
tree_node
|
|
53
|
-
end
|
|
54
|
-
|
|
45
|
+
source = tree_node.is_a?(Core::TreeNode) ? tree_node.source_node : tree_node
|
|
55
46
|
Canon::Diff::NodeSerializer.serialize(source)
|
|
56
47
|
end
|
|
57
48
|
|
|
@@ -62,8 +53,7 @@ module Canon
|
|
|
62
53
|
def self.extract_attributes(tree_node)
|
|
63
54
|
return nil if tree_node.nil?
|
|
64
55
|
|
|
65
|
-
|
|
66
|
-
tree_node.respond_to?(:attributes) ? (tree_node.attributes || {}) : {}
|
|
56
|
+
tree_node.is_a?(Core::TreeNode) ? (tree_node.attributes || {}) : {}
|
|
67
57
|
end
|
|
68
58
|
end
|
|
69
59
|
end
|
|
@@ -20,7 +20,7 @@ module Canon
|
|
|
20
20
|
# For each DELETE, try to find a matching INSERT
|
|
21
21
|
deletes.each do |delete_node|
|
|
22
22
|
node1 = delete_node.node1
|
|
23
|
-
next unless
|
|
23
|
+
next unless backend_element?(node1)
|
|
24
24
|
|
|
25
25
|
# Skip if node has no attributes (can't be attribute order diff)
|
|
26
26
|
next if node1.attributes.nil? || node1.attributes.empty?
|
|
@@ -28,7 +28,7 @@ module Canon
|
|
|
28
28
|
# Find inserts with same element name at same position
|
|
29
29
|
matching_insert = inserts.find do |insert_node|
|
|
30
30
|
node2 = insert_node.node2
|
|
31
|
-
next false unless
|
|
31
|
+
next false unless backend_element?(node2)
|
|
32
32
|
next false unless node1.name == node2.name
|
|
33
33
|
|
|
34
34
|
# Must have attributes to differ in order
|
|
@@ -59,6 +59,17 @@ module Canon
|
|
|
59
59
|
diff_nodes
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
# True when +node+ is a backend element (Nokogiri or Moxml) that
|
|
63
|
+
# exposes its attributes via +attributes+. Canon-native
|
|
64
|
+
# +ElementNode+ uses +attribute_nodes+ and is therefore excluded.
|
|
65
|
+
def self.backend_element?(node)
|
|
66
|
+
return false unless node
|
|
67
|
+
return false unless Canon::Comparison::NodeInspector.element_node?(node)
|
|
68
|
+
|
|
69
|
+
Canon::XmlBackend.nokogiri? ? node.is_a?(Nokogiri::XML::Node) : false
|
|
70
|
+
end
|
|
71
|
+
private_class_method :backend_element?
|
|
72
|
+
|
|
62
73
|
# Check if two attribute hashes are equal ignoring order
|
|
63
74
|
#
|
|
64
75
|
# @param attrs1 [Hash] First attribute hash
|
|
@@ -68,11 +79,6 @@ module Canon
|
|
|
68
79
|
return true if attrs1.nil? && attrs2.nil?
|
|
69
80
|
return false if attrs1.nil? || attrs2.nil?
|
|
70
81
|
|
|
71
|
-
# Convert to hashes if needed
|
|
72
|
-
attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
|
|
73
|
-
attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
|
|
74
|
-
|
|
75
|
-
# Compare as sets (order-independent)
|
|
76
82
|
attrs1.sort.to_h == attrs2.sort.to_h
|
|
77
83
|
end
|
|
78
84
|
|
|
@@ -16,7 +16,7 @@ module Canon
|
|
|
16
16
|
node = operation[:node]
|
|
17
17
|
content = operation[:content]
|
|
18
18
|
|
|
19
|
-
if node.
|
|
19
|
+
if node.is_a?(Core::TreeNode)
|
|
20
20
|
# Include content preview for clarity
|
|
21
21
|
"Element inserted: #{content || "<#{node.label}>"}"
|
|
22
22
|
else
|
|
@@ -32,7 +32,7 @@ module Canon
|
|
|
32
32
|
node = operation[:node]
|
|
33
33
|
content = operation[:content]
|
|
34
34
|
|
|
35
|
-
if node.
|
|
35
|
+
if node.is_a?(Core::TreeNode)
|
|
36
36
|
# Include content preview for clarity
|
|
37
37
|
"Element deleted: #{content || "<#{node.label}>"}"
|
|
38
38
|
else
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../operation_converter_helpers/reason_builder"
|
|
4
|
-
|
|
5
3
|
module Canon
|
|
6
4
|
module TreeDiff
|
|
7
5
|
module OperationConverterHelpers
|
|
@@ -20,8 +18,8 @@ module Canon
|
|
|
20
18
|
def self.convert(operation, metadata, is_metadata, normative_determiner)
|
|
21
19
|
tree_node1 = operation[:node1] # TreeNode from adapter
|
|
22
20
|
tree_node2 = operation[:node2] # TreeNode from adapter
|
|
23
|
-
node1 = tree_node1.
|
|
24
|
-
node2 = tree_node2.
|
|
21
|
+
node1 = tree_node1.is_a?(Core::TreeNode) ? tree_node1.source_node : tree_node1
|
|
22
|
+
node2 = tree_node2.is_a?(Core::TreeNode) ? tree_node2.source_node : tree_node2
|
|
25
23
|
changes = operation[:changes]
|
|
26
24
|
|
|
27
25
|
# Handle case where changes is a boolean or non-hash value
|
|
@@ -150,8 +148,8 @@ is_metadata, normative_determiner, tree_node1, tree_node2)
|
|
|
150
148
|
# @param tree_node2 [Object] Second tree node
|
|
151
149
|
# @return [Symbol] The dimension to use (:text_content or :comments)
|
|
152
150
|
def self.dimension_for_value_change(tree_node1, tree_node2)
|
|
153
|
-
label1 = tree_node1.
|
|
154
|
-
label2 = tree_node2.
|
|
151
|
+
label1 = tree_node1.is_a?(Core::TreeNode) ? tree_node1.label : nil
|
|
152
|
+
label2 = tree_node2.is_a?(Core::TreeNode) ? tree_node2.label : nil
|
|
155
153
|
|
|
156
154
|
# If either node is a comment, use :comments dimension
|
|
157
155
|
return :comments if label1 == "comment" || label2 == "comment"
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module TreeDiff
|
|
5
|
+
# Helper modules consumed by OperationConverter when converting
|
|
6
|
+
# tree operations into DiffNodes.
|
|
7
|
+
module OperationConverterHelpers
|
|
8
|
+
autoload :MetadataEnricher,
|
|
9
|
+
"canon/tree_diff/operation_converter_helpers/metadata_enricher"
|
|
10
|
+
autoload :PostProcessor,
|
|
11
|
+
"canon/tree_diff/operation_converter_helpers/post_processor"
|
|
12
|
+
autoload :ReasonBuilder,
|
|
13
|
+
"canon/tree_diff/operation_converter_helpers/reason_builder"
|
|
14
|
+
autoload :UpdateChangeHandler,
|
|
15
|
+
"canon/tree_diff/operation_converter_helpers/update_change_handler"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../core/xml_entity_decoder"
|
|
4
|
-
|
|
5
3
|
module Canon
|
|
6
4
|
module TreeDiff
|
|
7
5
|
module Operations
|
|
@@ -600,6 +598,10 @@ module Canon
|
|
|
600
598
|
depth
|
|
601
599
|
end
|
|
602
600
|
|
|
601
|
+
public :normalize_text, :calculate_depth, :text_similarity,
|
|
602
|
+
:extract_text_content, :collect_all_nodes, :nodes_identical?,
|
|
603
|
+
:detect_changes
|
|
604
|
+
|
|
603
605
|
# Check if a node is in a whitespace-sensitive context
|
|
604
606
|
#
|
|
605
607
|
# HTML elements where whitespace is significant: <pre>, <code>, <textarea>, <script>, <style>
|
|
@@ -615,13 +617,12 @@ module Canon
|
|
|
615
617
|
# Check if this node or any ancestor is whitespace-sensitive
|
|
616
618
|
current = node
|
|
617
619
|
while current
|
|
618
|
-
if current.
|
|
620
|
+
if current.is_a?(Core::TreeNode)
|
|
619
621
|
label = current.label.to_s.downcase
|
|
620
622
|
return true if whitespace_sensitive_tags.include?(label)
|
|
621
623
|
end
|
|
622
624
|
|
|
623
|
-
|
|
624
|
-
current = current.parent if current.respond_to?(:parent)
|
|
625
|
+
current = current.is_a?(Core::TreeNode) ? current.parent : nil
|
|
625
626
|
break unless current
|
|
626
627
|
end
|
|
627
628
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module TreeDiff
|
|
5
|
+
# Tree-diff operations: Operation (INSERT/DELETE/UPDATE/MOVE) and
|
|
6
|
+
# the OperationDetector that emits them.
|
|
7
|
+
module Operations
|
|
8
|
+
autoload :Operation, "canon/tree_diff/operations/operation"
|
|
9
|
+
autoload :OperationDetector,
|
|
10
|
+
"canon/tree_diff/operations/operation_detector"
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|