canon 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +12 -22
- data/Rakefile +5 -2
- data/lib/canon/cache.rb +3 -1
- data/lib/canon/cli.rb +0 -3
- data/lib/canon/commands/diff_command.rb +0 -6
- data/lib/canon/commands/format_command.rb +0 -4
- data/lib/canon/commands.rb +9 -0
- data/lib/canon/comparison/child_realignment.rb +0 -2
- data/lib/canon/comparison/compare_profile.rb +30 -36
- data/lib/canon/comparison/comparison_result.rb +0 -2
- data/lib/canon/comparison/diff_node_builder.rb +353 -0
- data/lib/canon/comparison/dimensions/dimension.rb +51 -0
- data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
- data/lib/canon/comparison/dimensions/registry.rb +101 -60
- data/lib/canon/comparison/dimensions.rb +15 -46
- data/lib/canon/comparison/html_comparator.rb +18 -141
- data/lib/canon/comparison/html_compare_profile.rb +15 -18
- data/lib/canon/comparison/json_comparator.rb +4 -165
- data/lib/canon/comparison/json_parser.rb +0 -2
- data/lib/canon/comparison/markup_comparator.rb +14 -210
- data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
- data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
- data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
- data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
- data/lib/canon/comparison/match_options.rb +13 -88
- data/lib/canon/comparison/pipeline.rb +269 -0
- data/lib/canon/comparison/profile_definition.rb +0 -2
- data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
- data/lib/canon/comparison/strategies.rb +16 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
- data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
- data/lib/canon/comparison/xml_comparator.rb +4 -492
- data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
- data/lib/canon/comparison/xml_node_comparison.rb +4 -119
- data/lib/canon/comparison/yaml_comparator.rb +0 -3
- data/lib/canon/comparison.rb +143 -266
- data/lib/canon/config/config_dsl.rb +159 -0
- data/lib/canon/config/env_provider.rb +0 -3
- data/lib/canon/config/env_schema.rb +48 -58
- data/lib/canon/config/profile_loader.rb +0 -1
- data/lib/canon/config.rb +116 -468
- data/lib/canon/diff/diff_block_builder.rb +0 -2
- data/lib/canon/diff/diff_classifier.rb +0 -5
- data/lib/canon/diff/diff_context.rb +0 -2
- data/lib/canon/diff/diff_context_builder.rb +0 -2
- data/lib/canon/diff/diff_line_builder.rb +0 -3
- data/lib/canon/diff/diff_node_enricher.rb +0 -4
- data/lib/canon/diff/diff_node_mapper.rb +0 -4
- data/lib/canon/diff/diff_report_builder.rb +0 -4
- data/lib/canon/diff/formatting_detector.rb +0 -1
- data/lib/canon/diff/node_serializer.rb +0 -7
- data/lib/canon/diff.rb +39 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/debug_output.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
- data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
- data/lib/canon/diff_formatter.rb +11 -9
- data/lib/canon/formatters/html4_formatter.rb +0 -2
- data/lib/canon/formatters/html5_formatter.rb +0 -2
- data/lib/canon/formatters/html_formatter.rb +0 -3
- data/lib/canon/formatters/json_formatter.rb +0 -1
- data/lib/canon/formatters/xml_formatter.rb +0 -4
- data/lib/canon/formatters/yaml_formatter.rb +0 -1
- data/lib/canon/formatters.rb +16 -0
- data/lib/canon/html/data_model.rb +0 -10
- data/lib/canon/html.rb +4 -3
- data/lib/canon/options/cli_generator.rb +0 -2
- data/lib/canon/options/registry.rb +0 -2
- data/lib/canon/options.rb +9 -0
- data/lib/canon/pretty_printer/html.rb +0 -1
- data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
- data/lib/canon/pretty_printer.rb +12 -0
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters.rb +14 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
- data/lib/canon/tree_diff/core/node_signature.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +12 -5
- data/lib/canon/tree_diff/core.rb +17 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
- data/lib/canon/tree_diff/matchers.rb +15 -0
- data/lib/canon/tree_diff/operation_converter.rb +0 -8
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
- data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
- data/lib/canon/tree_diff/operations.rb +13 -0
- data/lib/canon/tree_diff.rb +26 -27
- data/lib/canon/validators/base_validator.rb +0 -2
- data/lib/canon/validators/html_validator.rb +0 -1
- data/lib/canon/validators/json_validator.rb +0 -1
- data/lib/canon/validators/xml_validator.rb +0 -1
- data/lib/canon/validators/yaml_validator.rb +0 -1
- data/lib/canon/validators.rb +12 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +0 -4
- data/lib/canon/xml/data_model.rb +0 -10
- data/lib/canon/xml/line_range_mapper.rb +0 -2
- data/lib/canon/xml/nodes/attribute_node.rb +0 -2
- data/lib/canon/xml/nodes/comment_node.rb +0 -2
- data/lib/canon/xml/nodes/element_node.rb +0 -2
- data/lib/canon/xml/nodes/namespace_node.rb +0 -2
- data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
- data/lib/canon/xml/nodes/root_node.rb +0 -2
- data/lib/canon/xml/nodes/text_node.rb +0 -2
- data/lib/canon/xml/nodes.rb +19 -0
- data/lib/canon/xml/processor.rb +0 -5
- data/lib/canon/xml/sax_builder.rb +0 -7
- data/lib/canon/xml.rb +33 -0
- data/lib/canon/xml_backend.rb +50 -14
- data/lib/canon/xml_parsing.rb +4 -2
- data/lib/canon.rb +25 -15
- data/lib/tasks/performance.rake +0 -58
- data/lib/tasks/performance_comparator.rb +132 -65
- data/lib/tasks/performance_helpers.rb +4 -249
- data/lib/tasks/performance_report.rb +309 -0
- metadata +24 -11
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
- data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
- data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "base_dimension"
|
|
4
|
-
require_relative "../match_options"
|
|
5
|
-
|
|
6
|
-
module Canon
|
|
7
|
-
module Comparison
|
|
8
|
-
module Dimensions
|
|
9
|
-
# Attribute values dimension
|
|
10
|
-
#
|
|
11
|
-
# Handles comparison of attribute values.
|
|
12
|
-
# Supports :strict, :strip, :compact, :normalize, and :ignore behaviors.
|
|
13
|
-
#
|
|
14
|
-
# Behaviors:
|
|
15
|
-
# - :strict - Exact attribute value comparison
|
|
16
|
-
# - :strip - Compare with leading/trailing whitespace removed
|
|
17
|
-
# - :compact - Compare with internal whitespace collapsed
|
|
18
|
-
# - :normalize - Compare with whitespace stripped and collapsed
|
|
19
|
-
# - :ignore - Skip attribute value comparison
|
|
20
|
-
class AttributeValuesDimension < BaseDimension
|
|
21
|
-
# Extract attribute values from a node
|
|
22
|
-
#
|
|
23
|
-
# Returns a hash of attribute name to value.
|
|
24
|
-
#
|
|
25
|
-
# @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
|
|
26
|
-
# @return [Hash] Attribute name to value mapping
|
|
27
|
-
def extract_data(node)
|
|
28
|
-
return {} unless node
|
|
29
|
-
|
|
30
|
-
if Canon::XmlBackend.nokogiri?
|
|
31
|
-
extract_from_nokogiri(node)
|
|
32
|
-
else
|
|
33
|
-
extract_from_moxml(node)
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# Strict attribute value comparison
|
|
38
|
-
#
|
|
39
|
-
# @param attrs1 [Hash] First attributes hash
|
|
40
|
-
# @param attrs2 [Hash] Second attributes hash
|
|
41
|
-
# @return [Boolean] true if all attribute values are exactly equal
|
|
42
|
-
def compare_strict(attrs1, attrs2)
|
|
43
|
-
# Get all unique attribute names
|
|
44
|
-
all_keys = (attrs1.keys | attrs2.keys)
|
|
45
|
-
|
|
46
|
-
all_keys.all? do |key|
|
|
47
|
-
attrs1[key].to_s == attrs2[key].to_s
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Strip comparison
|
|
52
|
-
#
|
|
53
|
-
# Compare with leading/trailing whitespace removed.
|
|
54
|
-
#
|
|
55
|
-
# @param attrs1 [Hash] First attributes hash
|
|
56
|
-
# @param attrs2 [Hash] Second attributes hash
|
|
57
|
-
# @return [Boolean] true if stripped values are equal
|
|
58
|
-
def compare_strip(attrs1, attrs2)
|
|
59
|
-
all_keys = (attrs1.keys | attrs2.keys)
|
|
60
|
-
|
|
61
|
-
all_keys.all? do |key|
|
|
62
|
-
attrs1[key].to_s.strip == attrs2[key].to_s.strip
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# Compact comparison
|
|
67
|
-
#
|
|
68
|
-
# Compare with internal whitespace collapsed.
|
|
69
|
-
#
|
|
70
|
-
# @param attrs1 [Hash] First attributes hash
|
|
71
|
-
# @param attrs2 [Hash] Second attributes hash
|
|
72
|
-
# @return [Boolean] true if compacted values are equal
|
|
73
|
-
def compare_compact(attrs1, attrs2)
|
|
74
|
-
all_keys = (attrs1.keys | attrs2.keys)
|
|
75
|
-
|
|
76
|
-
all_keys.all? do |key|
|
|
77
|
-
compact_whitespace(attrs1[key].to_s) == compact_whitespace(attrs2[key].to_s)
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
# Normalized comparison
|
|
82
|
-
#
|
|
83
|
-
# Compare with whitespace stripped and collapsed.
|
|
84
|
-
#
|
|
85
|
-
# @param attrs1 [Hash] First attributes hash
|
|
86
|
-
# @param attrs2 [Hash] Second attributes hash
|
|
87
|
-
# @return [Boolean] true if normalized values are equal
|
|
88
|
-
def compare_normalize(attrs1, attrs2)
|
|
89
|
-
all_keys = (attrs1.keys | attrs2.keys)
|
|
90
|
-
|
|
91
|
-
all_keys.all? do |key|
|
|
92
|
-
normalize_text(attrs1[key].to_s) == normalize_text(attrs2[key].to_s)
|
|
93
|
-
end
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
# Compare with custom behavior
|
|
97
|
-
#
|
|
98
|
-
# Supports the extended behaviors for attribute values.
|
|
99
|
-
#
|
|
100
|
-
# @param data1 [Object] First data
|
|
101
|
-
# @param data2 [Object] Second data
|
|
102
|
-
# @param behavior [Symbol] Comparison behavior
|
|
103
|
-
# @return [Boolean] true if data matches according to behavior
|
|
104
|
-
def compare(data1, data2, behavior)
|
|
105
|
-
case behavior
|
|
106
|
-
when :strip
|
|
107
|
-
compare_strip(data1, data2)
|
|
108
|
-
when :compact
|
|
109
|
-
compare_compact(data1, data2)
|
|
110
|
-
else
|
|
111
|
-
super
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
private
|
|
116
|
-
|
|
117
|
-
# Extract attributes from Moxml node
|
|
118
|
-
#
|
|
119
|
-
# @param node [Moxml::Node] Moxml node
|
|
120
|
-
# @return [Hash] Attribute name to value mapping
|
|
121
|
-
def extract_from_moxml(node)
|
|
122
|
-
return {} unless node.node_type == :element
|
|
123
|
-
|
|
124
|
-
attrs = {}
|
|
125
|
-
node.attributes.each do |attr|
|
|
126
|
-
attrs[attr.name] = attr.value
|
|
127
|
-
end
|
|
128
|
-
attrs
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
# Extract attributes from Nokogiri node
|
|
132
|
-
#
|
|
133
|
-
# @param node [Nokogiri::XML::Node] Nokogiri node
|
|
134
|
-
# @return [Hash] Attribute name to value mapping
|
|
135
|
-
def extract_from_nokogiri(node)
|
|
136
|
-
return {} unless node.node_type == Nokogiri::XML::Node::ELEMENT_NODE
|
|
137
|
-
|
|
138
|
-
attrs = {}
|
|
139
|
-
node.attribute_nodes.each do |attr|
|
|
140
|
-
attrs[attr.name] = attr.value
|
|
141
|
-
end
|
|
142
|
-
attrs
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
# Compact whitespace
|
|
146
|
-
#
|
|
147
|
-
# Collapses internal whitespace without trimming.
|
|
148
|
-
#
|
|
149
|
-
# @param text [String] Text to compact
|
|
150
|
-
# @return [String] Compacted text
|
|
151
|
-
def compact_whitespace(text)
|
|
152
|
-
text.gsub(/[\p{Space}\u00a0]+/, " ")
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
# Normalize text
|
|
156
|
-
#
|
|
157
|
-
# Collapses and trims whitespace.
|
|
158
|
-
#
|
|
159
|
-
# @param text [String] Text to normalize
|
|
160
|
-
# @return [String] Normalized text
|
|
161
|
-
def normalize_text(text)
|
|
162
|
-
MatchOptions.normalize_text(text)
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
end
|
|
166
|
-
end
|
|
167
|
-
end
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Canon
|
|
4
|
-
module Comparison
|
|
5
|
-
module Dimensions
|
|
6
|
-
# Base class for comparison dimensions
|
|
7
|
-
#
|
|
8
|
-
# A dimension represents "WHAT to compare" - a specific aspect of a document
|
|
9
|
-
# that can be compared (e.g., text content, attributes, comments).
|
|
10
|
-
#
|
|
11
|
-
# Each dimension knows how to:
|
|
12
|
-
# - Extract relevant data from a node
|
|
13
|
-
# - Compare data according to a behavior (:strict, :normalize, :ignore)
|
|
14
|
-
#
|
|
15
|
-
# Subclasses must implement:
|
|
16
|
-
# - extract_data(node) - Extract relevant data from a node
|
|
17
|
-
# - compare_strict(data1, data2) - Strict comparison
|
|
18
|
-
# - compare_normalize(data1, data2) - Normalized comparison (optional)
|
|
19
|
-
#
|
|
20
|
-
# @abstract Subclass and implement abstract methods
|
|
21
|
-
class BaseDimension
|
|
22
|
-
# Behavior constants
|
|
23
|
-
STRICT = :strict
|
|
24
|
-
NORMALIZE = :normalize
|
|
25
|
-
IGNORE = :ignore
|
|
26
|
-
|
|
27
|
-
# Get the dimension name
|
|
28
|
-
#
|
|
29
|
-
# @return [Symbol] Dimension name
|
|
30
|
-
def dimension_name
|
|
31
|
-
@dimension_name ||= self.class.name.split("::").last.gsub(
|
|
32
|
-
/Dimension$/, ""
|
|
33
|
-
).downcase.to_sym
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Compare extracted data according to behavior
|
|
37
|
-
#
|
|
38
|
-
# @param data1 [Object] First data
|
|
39
|
-
# @param data2 [Object] Second data
|
|
40
|
-
# @param behavior [Symbol] Comparison behavior (:strict, :normalize, :ignore)
|
|
41
|
-
# @return [Boolean] true if data matches according to behavior
|
|
42
|
-
def compare(data1, data2, behavior)
|
|
43
|
-
case behavior
|
|
44
|
-
when STRICT
|
|
45
|
-
compare_strict(data1, data2)
|
|
46
|
-
when NORMALIZE
|
|
47
|
-
compare_normalize(data1, data2)
|
|
48
|
-
when IGNORE
|
|
49
|
-
true
|
|
50
|
-
else
|
|
51
|
-
raise Error, "Unknown behavior: #{behavior}"
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Check if two nodes are equivalent for this dimension
|
|
56
|
-
#
|
|
57
|
-
# @param node1 [Object] First node
|
|
58
|
-
# @param node2 [Object] Second node
|
|
59
|
-
# @param behavior [Symbol] Comparison behavior
|
|
60
|
-
# @return [Boolean] true if nodes match for this dimension
|
|
61
|
-
def equivalent?(node1, node2, behavior)
|
|
62
|
-
data1 = extract_data(node1)
|
|
63
|
-
data2 = extract_data(node2)
|
|
64
|
-
compare(data1, data2, behavior)
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# Extract data from a node
|
|
68
|
-
#
|
|
69
|
-
# @param node [Object] Node to extract data from
|
|
70
|
-
# @return [Object] Extracted data
|
|
71
|
-
# @abstract Subclass must implement
|
|
72
|
-
def extract_data(node)
|
|
73
|
-
raise NotImplementedError, "#{self.class} must implement extract_data"
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# Strict comparison
|
|
77
|
-
#
|
|
78
|
-
# @param data1 [Object] First data
|
|
79
|
-
# @param data2 [Object] Second data
|
|
80
|
-
# @return [Boolean] true if data matches strictly
|
|
81
|
-
# @abstract Subclass must implement
|
|
82
|
-
def compare_strict(data1, data2)
|
|
83
|
-
raise NotImplementedError,
|
|
84
|
-
"#{self.class} must implement compare_strict"
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
# Normalized comparison
|
|
88
|
-
#
|
|
89
|
-
# @param data1 [Object] First data
|
|
90
|
-
# @param data2 [Object] Second data
|
|
91
|
-
# @return [Boolean] true if data matches after normalization
|
|
92
|
-
def compare_normalize(data1, data2)
|
|
93
|
-
# Default implementation: delegate to strict comparison
|
|
94
|
-
compare_strict(data1, data2)
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# Check if this dimension supports normalization
|
|
98
|
-
#
|
|
99
|
-
# @return [Boolean] true if normalization is supported
|
|
100
|
-
def supports_normalization?
|
|
101
|
-
# Check if compare_normalize is overridden (not the default implementation)
|
|
102
|
-
method(:compare_normalize).owner != BaseDimension
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
end
|
|
107
|
-
end
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "base_dimension"
|
|
4
|
-
|
|
5
|
-
module Canon
|
|
6
|
-
module Comparison
|
|
7
|
-
module Dimensions
|
|
8
|
-
# Comments dimension
|
|
9
|
-
#
|
|
10
|
-
# Handles comparison of comment nodes.
|
|
11
|
-
# Supports :strict and :ignore behaviors.
|
|
12
|
-
#
|
|
13
|
-
# Behaviors:
|
|
14
|
-
# - :strict - Exact comment comparison including whitespace
|
|
15
|
-
# - :ignore - Skip comment comparison
|
|
16
|
-
class CommentsDimension < BaseDimension
|
|
17
|
-
# Extract comments from a node
|
|
18
|
-
#
|
|
19
|
-
# @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
|
|
20
|
-
# @return [Array<String>] Array of comment strings
|
|
21
|
-
def extract_data(node)
|
|
22
|
-
return [] unless node
|
|
23
|
-
|
|
24
|
-
if Canon::XmlBackend.nokogiri?
|
|
25
|
-
extract_from_nokogiri(node)
|
|
26
|
-
else
|
|
27
|
-
extract_from_moxml(node)
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Strict comment comparison
|
|
32
|
-
#
|
|
33
|
-
# @param comments1 [Array<String>] First comments array
|
|
34
|
-
# @param comments2 [Array<String>] Second comments array
|
|
35
|
-
# @return [Boolean] true if comments are exactly equal
|
|
36
|
-
def compare_strict(comments1, comments2) # rubocop:disable Naming/PredicateMethod
|
|
37
|
-
comments1 == comments2
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Normalized comment comparison
|
|
41
|
-
#
|
|
42
|
-
# For comments, normalized comparison collapses whitespace in each comment.
|
|
43
|
-
#
|
|
44
|
-
# @param comments1 [Array<String>] First comments array
|
|
45
|
-
# @param comments2 [Array<String>] Second comments array
|
|
46
|
-
# @return [Boolean] true if normalized comments are equal
|
|
47
|
-
def compare_normalize(comments1, comments2) # rubocop:disable Naming/PredicateMethod
|
|
48
|
-
normalize_comments(comments1) == normalize_comments(comments2)
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
private
|
|
52
|
-
|
|
53
|
-
# Extract comments from Moxml node
|
|
54
|
-
#
|
|
55
|
-
# @param node [Moxml::Node] Moxml node
|
|
56
|
-
# @return [Array<String>] Array of comment strings
|
|
57
|
-
def extract_from_moxml(node)
|
|
58
|
-
comments = []
|
|
59
|
-
|
|
60
|
-
# If node itself is a comment
|
|
61
|
-
if node.node_type == :comment
|
|
62
|
-
comments << node.content
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
# Extract child comments
|
|
66
|
-
node.children.each do |child|
|
|
67
|
-
comments << child.content if child.node_type == :comment
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
comments
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
# Extract comments from Nokogiri node
|
|
74
|
-
#
|
|
75
|
-
# @param node [Nokogiri::XML::Node] Nokogiri node
|
|
76
|
-
# @return [Array<String>] Array of comment strings
|
|
77
|
-
def extract_from_nokogiri(node)
|
|
78
|
-
comments = []
|
|
79
|
-
|
|
80
|
-
# If node itself is a comment
|
|
81
|
-
if node.node_type == Nokogiri::XML::Node::COMMENT_NODE
|
|
82
|
-
comments << node.content
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# Extract child comments
|
|
86
|
-
node.children.each do |child|
|
|
87
|
-
if child.node_type == Nokogiri::XML::Node::COMMENT_NODE
|
|
88
|
-
comments << child.content
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
comments
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
# Normalize comments by collapsing whitespace
|
|
96
|
-
#
|
|
97
|
-
# @param comments [Array<String>] Comments to normalize
|
|
98
|
-
# @return [Array<String>] Normalized comments
|
|
99
|
-
def normalize_comments(comments)
|
|
100
|
-
comments.map { |c| normalize_text(c) }
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
# Normalize text by collapsing whitespace
|
|
104
|
-
#
|
|
105
|
-
# @param text [String, nil] Text to normalize
|
|
106
|
-
# @return [String] Normalized text
|
|
107
|
-
def normalize_text(text)
|
|
108
|
-
return "" if text.nil?
|
|
109
|
-
|
|
110
|
-
text.to_s
|
|
111
|
-
.gsub(/[\p{Space}\u00a0]+/, " ")
|
|
112
|
-
.strip
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
end
|
|
116
|
-
end
|
|
117
|
-
end
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "base_dimension"
|
|
4
|
-
|
|
5
|
-
module Canon
|
|
6
|
-
module Comparison
|
|
7
|
-
module Dimensions
|
|
8
|
-
# Element position dimension
|
|
9
|
-
#
|
|
10
|
-
# Handles comparison of element positions within their parent.
|
|
11
|
-
# Supports :strict and :ignore behaviors.
|
|
12
|
-
#
|
|
13
|
-
# Behaviors:
|
|
14
|
-
# - :strict - Elements must appear in the same position (index)
|
|
15
|
-
# - :ignore - Element position doesn't matter
|
|
16
|
-
class ElementPositionDimension < BaseDimension
|
|
17
|
-
# Extract element position from a node
|
|
18
|
-
#
|
|
19
|
-
# Returns the index of this node among its siblings of the same type.
|
|
20
|
-
#
|
|
21
|
-
# @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
|
|
22
|
-
# @return [Integer] Position index (0-based)
|
|
23
|
-
def extract_data(node)
|
|
24
|
-
return 0 unless node
|
|
25
|
-
|
|
26
|
-
if Canon::XmlBackend.nokogiri?
|
|
27
|
-
extract_from_nokogiri(node)
|
|
28
|
-
else
|
|
29
|
-
extract_from_moxml(node)
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Strict element position comparison
|
|
34
|
-
#
|
|
35
|
-
# @param pos1 [Integer] First position
|
|
36
|
-
# @param pos2 [Integer] Second position
|
|
37
|
-
# @return [Boolean] true if positions are equal
|
|
38
|
-
def compare_strict(pos1, pos2) # rubocop:disable Naming/PredicateMethod
|
|
39
|
-
pos1 == pos2
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
private
|
|
43
|
-
|
|
44
|
-
# Extract position from Moxml node
|
|
45
|
-
#
|
|
46
|
-
# @param node [Moxml::Node] Moxml node
|
|
47
|
-
# @return [Integer] Position index
|
|
48
|
-
def extract_from_moxml(node)
|
|
49
|
-
return 0 unless node.parent
|
|
50
|
-
|
|
51
|
-
# Find position among siblings of the same element name
|
|
52
|
-
siblings = node.parent.children
|
|
53
|
-
node.name
|
|
54
|
-
|
|
55
|
-
siblings.each_with_index do |sibling, index|
|
|
56
|
-
if sibling == node
|
|
57
|
-
return index
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
0
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Extract position from Nokogiri node
|
|
65
|
-
#
|
|
66
|
-
# @param node [Nokogiri::XML::Node] Nokogiri node
|
|
67
|
-
# @return [Integer] Position index
|
|
68
|
-
def extract_from_nokogiri(node)
|
|
69
|
-
return 0 unless node.parent
|
|
70
|
-
|
|
71
|
-
# Find position among siblings
|
|
72
|
-
siblings = node.parent.children
|
|
73
|
-
node.name
|
|
74
|
-
|
|
75
|
-
siblings.each_with_index do |sibling, index|
|
|
76
|
-
if sibling == node
|
|
77
|
-
return index
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
0
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
end
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "base_dimension"
|
|
4
|
-
require_relative "../match_options"
|
|
5
|
-
|
|
6
|
-
module Canon
|
|
7
|
-
module Comparison
|
|
8
|
-
module Dimensions
|
|
9
|
-
# Structural whitespace dimension
|
|
10
|
-
#
|
|
11
|
-
# Handles comparison of structural whitespace (whitespace between elements).
|
|
12
|
-
# Supports :strict, :normalize, and :ignore behaviors.
|
|
13
|
-
#
|
|
14
|
-
# Behaviors:
|
|
15
|
-
# - :strict - Exact whitespace comparison
|
|
16
|
-
# - :normalize - Collapse whitespace and compare
|
|
17
|
-
# - :ignore - Skip structural whitespace comparison
|
|
18
|
-
class StructuralWhitespaceDimension < BaseDimension
|
|
19
|
-
# Extract structural whitespace from a node
|
|
20
|
-
#
|
|
21
|
-
# Returns whitespace text nodes that are between elements (structural).
|
|
22
|
-
#
|
|
23
|
-
# @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
|
|
24
|
-
# @return [Array<String>] Array of structural whitespace strings
|
|
25
|
-
def extract_data(node)
|
|
26
|
-
return [] unless node
|
|
27
|
-
|
|
28
|
-
if Canon::XmlBackend.nokogiri?
|
|
29
|
-
extract_from_nokogiri(node)
|
|
30
|
-
else
|
|
31
|
-
extract_from_moxml(node)
|
|
32
|
-
end
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Strict structural whitespace comparison
|
|
36
|
-
#
|
|
37
|
-
# @param ws1 [Array<String>] First whitespace array
|
|
38
|
-
# @param ws2 [Array<String>] Second whitespace array
|
|
39
|
-
# @return [Boolean] true if structural whitespace is exactly equal
|
|
40
|
-
def compare_strict(ws1, ws2) # rubocop:disable Naming/PredicateMethod
|
|
41
|
-
ws1 == ws2
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# Normalized structural whitespace comparison
|
|
45
|
-
#
|
|
46
|
-
# Collapses whitespace in each entry and compares.
|
|
47
|
-
#
|
|
48
|
-
# @param ws1 [Array<String>] First whitespace array
|
|
49
|
-
# @param ws2 [Array<String>] Second whitespace array
|
|
50
|
-
# @return [Boolean] true if normalized structural whitespace is equal
|
|
51
|
-
def compare_normalize(ws1, ws2) # rubocop:disable Naming/PredicateMethod
|
|
52
|
-
normalize_whitespace(ws1) == normalize_whitespace(ws2)
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
private
|
|
56
|
-
|
|
57
|
-
# Extract structural whitespace from Moxml node
|
|
58
|
-
#
|
|
59
|
-
# @param node [Moxml::Node] Moxml node
|
|
60
|
-
# @return [Array<String>] Array of structural whitespace strings
|
|
61
|
-
def extract_from_moxml(node)
|
|
62
|
-
whitespace = []
|
|
63
|
-
|
|
64
|
-
node.children.each do |child|
|
|
65
|
-
if child.node_type == :text
|
|
66
|
-
text = child.content.strip
|
|
67
|
-
# Check if this is purely whitespace (structural)
|
|
68
|
-
if text.empty? || child.content =~ /\A\s*\z/
|
|
69
|
-
whitespace << child.content
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
whitespace
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
# Extract structural whitespace from Nokogiri node
|
|
78
|
-
#
|
|
79
|
-
# @param node [Nokogiri::XML::Node] Nokogiri node
|
|
80
|
-
# @return [Array<String>] Array of structural whitespace strings
|
|
81
|
-
def extract_from_nokogiri(node)
|
|
82
|
-
whitespace = []
|
|
83
|
-
|
|
84
|
-
node.children.each do |child|
|
|
85
|
-
if child.node_type == Nokogiri::XML::Node::TEXT_NODE
|
|
86
|
-
text = child.content.strip
|
|
87
|
-
# Check if this is purely whitespace (structural)
|
|
88
|
-
if text.empty? || child.content =~ /\A\s*\z/
|
|
89
|
-
whitespace << child.content
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
whitespace
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# Normalize whitespace array
|
|
98
|
-
#
|
|
99
|
-
# @param whitespace [Array<String>] Whitespace strings
|
|
100
|
-
# @return [Array<String>] Normalized whitespace strings
|
|
101
|
-
def normalize_whitespace(whitespace)
|
|
102
|
-
whitespace.map { |ws| normalize_text(ws) }
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
# Normalize text
|
|
106
|
-
#
|
|
107
|
-
# @param text [String, nil] Text to normalize
|
|
108
|
-
# @return [String] Normalized text
|
|
109
|
-
def normalize_text(text)
|
|
110
|
-
MatchOptions.normalize_text(text)
|
|
111
|
-
end
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
end
|
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "base_dimension"
|
|
4
|
-
require_relative "../match_options"
|
|
5
|
-
|
|
6
|
-
module Canon
|
|
7
|
-
module Comparison
|
|
8
|
-
module Dimensions
|
|
9
|
-
# Text content dimension
|
|
10
|
-
#
|
|
11
|
-
# Handles comparison of text content in nodes.
|
|
12
|
-
# Supports :strict, :normalize, and :ignore behaviors.
|
|
13
|
-
#
|
|
14
|
-
# Behaviors:
|
|
15
|
-
# - :strict - Exact text comparison including whitespace
|
|
16
|
-
# - :normalize - Collapse whitespace and compare
|
|
17
|
-
# - :ignore - Skip text content comparison
|
|
18
|
-
class TextContentDimension < BaseDimension
|
|
19
|
-
# Extract text content from a node
|
|
20
|
-
#
|
|
21
|
-
# @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
|
|
22
|
-
# @return [String, nil] Text content or nil if not a text node
|
|
23
|
-
def extract_data(node)
|
|
24
|
-
return nil unless node
|
|
25
|
-
|
|
26
|
-
if Canon::XmlBackend.nokogiri?
|
|
27
|
-
extract_from_nokogiri(node)
|
|
28
|
-
else
|
|
29
|
-
extract_from_moxml(node)
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Strict text comparison
|
|
34
|
-
#
|
|
35
|
-
# @param text1 [String, nil] First text
|
|
36
|
-
# @param text2 [String, nil] Second text
|
|
37
|
-
# @return [Boolean] true if texts are exactly equal
|
|
38
|
-
def compare_strict(text1, text2) # rubocop:disable Naming/PredicateMethod
|
|
39
|
-
text1.to_s == text2.to_s
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# Normalized text comparison
|
|
43
|
-
#
|
|
44
|
-
# Collapses whitespace and compares.
|
|
45
|
-
# Two whitespace-only strings that both normalize to empty are equivalent.
|
|
46
|
-
#
|
|
47
|
-
# @param text1 [String, nil] First text
|
|
48
|
-
# @param text2 [String, nil] Second text
|
|
49
|
-
# @return [Boolean] true if normalized texts are equal
|
|
50
|
-
def compare_normalize(text1, text2) # rubocop:disable Naming/PredicateMethod
|
|
51
|
-
normalized1 = normalize_text(text1)
|
|
52
|
-
normalized2 = normalize_text(text2)
|
|
53
|
-
|
|
54
|
-
# Both empty after normalization = equivalent
|
|
55
|
-
# This handles whitespace-only text nodes that normalize to empty
|
|
56
|
-
return true if normalized1.empty? && normalized2.empty?
|
|
57
|
-
|
|
58
|
-
normalized1 == normalized2
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
private
|
|
62
|
-
|
|
63
|
-
# Extract text from Moxml node
|
|
64
|
-
#
|
|
65
|
-
# @param node [Moxml::Node] Moxml node
|
|
66
|
-
# @return [String, nil] Text content
|
|
67
|
-
def extract_from_moxml(node)
|
|
68
|
-
case node.node_type
|
|
69
|
-
when :text, :cdata
|
|
70
|
-
node.content
|
|
71
|
-
when :element
|
|
72
|
-
# For element nodes, extract concatenated text from children
|
|
73
|
-
node.text
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
# Extract text from Nokogiri node
|
|
78
|
-
#
|
|
79
|
-
# @param node [Nokogiri::XML::Node] Nokogiri node
|
|
80
|
-
# @return [String, nil] Text content
|
|
81
|
-
def extract_from_nokogiri(node)
|
|
82
|
-
case node.node_type
|
|
83
|
-
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
|
|
84
|
-
node.content
|
|
85
|
-
when Nokogiri::XML::Node::ELEMENT_NODE
|
|
86
|
-
node.content
|
|
87
|
-
end
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
# Normalize text by collapsing whitespace
|
|
91
|
-
#
|
|
92
|
-
# Uses MatchOptions.normalize_text for consistency.
|
|
93
|
-
#
|
|
94
|
-
# @param text [String, nil] Text to normalize
|
|
95
|
-
# @return [String] Normalized text
|
|
96
|
-
def normalize_text(text)
|
|
97
|
-
MatchOptions.normalize_text(text)
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
end
|