canon 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +12 -22
  3. data/Rakefile +5 -2
  4. data/lib/canon/cache.rb +3 -1
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +18 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/pipeline.rb +269 -0
  28. data/lib/canon/comparison/profile_definition.rb +0 -2
  29. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  30. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  31. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  32. data/lib/canon/comparison/strategies.rb +16 -0
  33. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
  34. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  35. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  36. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  37. data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
  38. data/lib/canon/comparison/xml_comparator.rb +4 -492
  39. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  40. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  41. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  42. data/lib/canon/comparison.rb +143 -266
  43. data/lib/canon/config/config_dsl.rb +159 -0
  44. data/lib/canon/config/env_provider.rb +0 -3
  45. data/lib/canon/config/env_schema.rb +48 -58
  46. data/lib/canon/config/profile_loader.rb +0 -1
  47. data/lib/canon/config.rb +116 -468
  48. data/lib/canon/diff/diff_block_builder.rb +0 -2
  49. data/lib/canon/diff/diff_classifier.rb +0 -5
  50. data/lib/canon/diff/diff_context.rb +0 -2
  51. data/lib/canon/diff/diff_context_builder.rb +0 -2
  52. data/lib/canon/diff/diff_line_builder.rb +0 -3
  53. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  54. data/lib/canon/diff/diff_node_mapper.rb +0 -4
  55. data/lib/canon/diff/diff_report_builder.rb +0 -4
  56. data/lib/canon/diff/formatting_detector.rb +0 -1
  57. data/lib/canon/diff/node_serializer.rb +0 -7
  58. data/lib/canon/diff.rb +39 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  61. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  62. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  63. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  64. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
  66. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  67. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
  68. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  70. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
  71. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
  72. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  74. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  75. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  76. data/lib/canon/diff_formatter.rb +11 -9
  77. data/lib/canon/formatters/html4_formatter.rb +0 -2
  78. data/lib/canon/formatters/html5_formatter.rb +0 -2
  79. data/lib/canon/formatters/html_formatter.rb +0 -3
  80. data/lib/canon/formatters/json_formatter.rb +0 -1
  81. data/lib/canon/formatters/xml_formatter.rb +0 -4
  82. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  83. data/lib/canon/formatters.rb +16 -0
  84. data/lib/canon/html/data_model.rb +0 -10
  85. data/lib/canon/html.rb +4 -3
  86. data/lib/canon/options/cli_generator.rb +0 -2
  87. data/lib/canon/options/registry.rb +0 -2
  88. data/lib/canon/options.rb +9 -0
  89. data/lib/canon/pretty_printer/html.rb +0 -1
  90. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  91. data/lib/canon/pretty_printer.rb +12 -0
  92. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  93. data/lib/canon/tree_diff/adapters.rb +14 -0
  94. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  95. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  96. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  97. data/lib/canon/tree_diff/core.rb +17 -0
  98. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  99. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  100. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  101. data/lib/canon/tree_diff/matchers.rb +15 -0
  102. data/lib/canon/tree_diff/operation_converter.rb +0 -8
  103. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  104. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  105. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  106. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  107. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  108. data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
  109. data/lib/canon/tree_diff/operations.rb +13 -0
  110. data/lib/canon/tree_diff.rb +26 -27
  111. data/lib/canon/validators/base_validator.rb +0 -2
  112. data/lib/canon/validators/html_validator.rb +0 -1
  113. data/lib/canon/validators/json_validator.rb +0 -1
  114. data/lib/canon/validators/xml_validator.rb +0 -1
  115. data/lib/canon/validators/yaml_validator.rb +0 -1
  116. data/lib/canon/validators.rb +12 -0
  117. data/lib/canon/version.rb +1 -1
  118. data/lib/canon/xml/c14n.rb +0 -4
  119. data/lib/canon/xml/data_model.rb +0 -10
  120. data/lib/canon/xml/line_range_mapper.rb +0 -2
  121. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  122. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  123. data/lib/canon/xml/nodes/element_node.rb +0 -2
  124. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  125. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  126. data/lib/canon/xml/nodes/root_node.rb +0 -2
  127. data/lib/canon/xml/nodes/text_node.rb +0 -2
  128. data/lib/canon/xml/nodes.rb +19 -0
  129. data/lib/canon/xml/processor.rb +0 -5
  130. data/lib/canon/xml/sax_builder.rb +0 -7
  131. data/lib/canon/xml.rb +33 -0
  132. data/lib/canon/xml_backend.rb +50 -14
  133. data/lib/canon/xml_parsing.rb +4 -2
  134. data/lib/canon.rb +25 -15
  135. data/lib/tasks/performance.rake +0 -58
  136. data/lib/tasks/performance_comparator.rb +132 -65
  137. data/lib/tasks/performance_helpers.rb +4 -249
  138. data/lib/tasks/performance_report.rb +309 -0
  139. metadata +24 -11
  140. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  141. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  142. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  143. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  144. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  145. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  146. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  147. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  148. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
@@ -1,167 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "base_dimension"
4
- require_relative "../match_options"
5
-
6
- module Canon
7
- module Comparison
8
- module Dimensions
9
- # Attribute values dimension
10
- #
11
- # Handles comparison of attribute values.
12
- # Supports :strict, :strip, :compact, :normalize, and :ignore behaviors.
13
- #
14
- # Behaviors:
15
- # - :strict - Exact attribute value comparison
16
- # - :strip - Compare with leading/trailing whitespace removed
17
- # - :compact - Compare with internal whitespace collapsed
18
- # - :normalize - Compare with whitespace stripped and collapsed
19
- # - :ignore - Skip attribute value comparison
20
- class AttributeValuesDimension < BaseDimension
21
- # Extract attribute values from a node
22
- #
23
- # Returns a hash of attribute name to value.
24
- #
25
- # @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
26
- # @return [Hash] Attribute name to value mapping
27
- def extract_data(node)
28
- return {} unless node
29
-
30
- if Canon::XmlBackend.nokogiri?
31
- extract_from_nokogiri(node)
32
- else
33
- extract_from_moxml(node)
34
- end
35
- end
36
-
37
- # Strict attribute value comparison
38
- #
39
- # @param attrs1 [Hash] First attributes hash
40
- # @param attrs2 [Hash] Second attributes hash
41
- # @return [Boolean] true if all attribute values are exactly equal
42
- def compare_strict(attrs1, attrs2)
43
- # Get all unique attribute names
44
- all_keys = (attrs1.keys | attrs2.keys)
45
-
46
- all_keys.all? do |key|
47
- attrs1[key].to_s == attrs2[key].to_s
48
- end
49
- end
50
-
51
- # Strip comparison
52
- #
53
- # Compare with leading/trailing whitespace removed.
54
- #
55
- # @param attrs1 [Hash] First attributes hash
56
- # @param attrs2 [Hash] Second attributes hash
57
- # @return [Boolean] true if stripped values are equal
58
- def compare_strip(attrs1, attrs2)
59
- all_keys = (attrs1.keys | attrs2.keys)
60
-
61
- all_keys.all? do |key|
62
- attrs1[key].to_s.strip == attrs2[key].to_s.strip
63
- end
64
- end
65
-
66
- # Compact comparison
67
- #
68
- # Compare with internal whitespace collapsed.
69
- #
70
- # @param attrs1 [Hash] First attributes hash
71
- # @param attrs2 [Hash] Second attributes hash
72
- # @return [Boolean] true if compacted values are equal
73
- def compare_compact(attrs1, attrs2)
74
- all_keys = (attrs1.keys | attrs2.keys)
75
-
76
- all_keys.all? do |key|
77
- compact_whitespace(attrs1[key].to_s) == compact_whitespace(attrs2[key].to_s)
78
- end
79
- end
80
-
81
- # Normalized comparison
82
- #
83
- # Compare with whitespace stripped and collapsed.
84
- #
85
- # @param attrs1 [Hash] First attributes hash
86
- # @param attrs2 [Hash] Second attributes hash
87
- # @return [Boolean] true if normalized values are equal
88
- def compare_normalize(attrs1, attrs2)
89
- all_keys = (attrs1.keys | attrs2.keys)
90
-
91
- all_keys.all? do |key|
92
- normalize_text(attrs1[key].to_s) == normalize_text(attrs2[key].to_s)
93
- end
94
- end
95
-
96
- # Compare with custom behavior
97
- #
98
- # Supports the extended behaviors for attribute values.
99
- #
100
- # @param data1 [Object] First data
101
- # @param data2 [Object] Second data
102
- # @param behavior [Symbol] Comparison behavior
103
- # @return [Boolean] true if data matches according to behavior
104
- def compare(data1, data2, behavior)
105
- case behavior
106
- when :strip
107
- compare_strip(data1, data2)
108
- when :compact
109
- compare_compact(data1, data2)
110
- else
111
- super
112
- end
113
- end
114
-
115
- private
116
-
117
- # Extract attributes from Moxml node
118
- #
119
- # @param node [Moxml::Node] Moxml node
120
- # @return [Hash] Attribute name to value mapping
121
- def extract_from_moxml(node)
122
- return {} unless node.node_type == :element
123
-
124
- attrs = {}
125
- node.attributes.each do |attr|
126
- attrs[attr.name] = attr.value
127
- end
128
- attrs
129
- end
130
-
131
- # Extract attributes from Nokogiri node
132
- #
133
- # @param node [Nokogiri::XML::Node] Nokogiri node
134
- # @return [Hash] Attribute name to value mapping
135
- def extract_from_nokogiri(node)
136
- return {} unless node.node_type == Nokogiri::XML::Node::ELEMENT_NODE
137
-
138
- attrs = {}
139
- node.attribute_nodes.each do |attr|
140
- attrs[attr.name] = attr.value
141
- end
142
- attrs
143
- end
144
-
145
- # Compact whitespace
146
- #
147
- # Collapses internal whitespace without trimming.
148
- #
149
- # @param text [String] Text to compact
150
- # @return [String] Compacted text
151
- def compact_whitespace(text)
152
- text.gsub(/[\p{Space}\u00a0]+/, " ")
153
- end
154
-
155
- # Normalize text
156
- #
157
- # Collapses and trims whitespace.
158
- #
159
- # @param text [String] Text to normalize
160
- # @return [String] Normalized text
161
- def normalize_text(text)
162
- MatchOptions.normalize_text(text)
163
- end
164
- end
165
- end
166
- end
167
- end
@@ -1,107 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Canon
4
- module Comparison
5
- module Dimensions
6
- # Base class for comparison dimensions
7
- #
8
- # A dimension represents "WHAT to compare" - a specific aspect of a document
9
- # that can be compared (e.g., text content, attributes, comments).
10
- #
11
- # Each dimension knows how to:
12
- # - Extract relevant data from a node
13
- # - Compare data according to a behavior (:strict, :normalize, :ignore)
14
- #
15
- # Subclasses must implement:
16
- # - extract_data(node) - Extract relevant data from a node
17
- # - compare_strict(data1, data2) - Strict comparison
18
- # - compare_normalize(data1, data2) - Normalized comparison (optional)
19
- #
20
- # @abstract Subclass and implement abstract methods
21
- class BaseDimension
22
- # Behavior constants
23
- STRICT = :strict
24
- NORMALIZE = :normalize
25
- IGNORE = :ignore
26
-
27
- # Get the dimension name
28
- #
29
- # @return [Symbol] Dimension name
30
- def dimension_name
31
- @dimension_name ||= self.class.name.split("::").last.gsub(
32
- /Dimension$/, ""
33
- ).downcase.to_sym
34
- end
35
-
36
- # Compare extracted data according to behavior
37
- #
38
- # @param data1 [Object] First data
39
- # @param data2 [Object] Second data
40
- # @param behavior [Symbol] Comparison behavior (:strict, :normalize, :ignore)
41
- # @return [Boolean] true if data matches according to behavior
42
- def compare(data1, data2, behavior)
43
- case behavior
44
- when STRICT
45
- compare_strict(data1, data2)
46
- when NORMALIZE
47
- compare_normalize(data1, data2)
48
- when IGNORE
49
- true
50
- else
51
- raise Error, "Unknown behavior: #{behavior}"
52
- end
53
- end
54
-
55
- # Check if two nodes are equivalent for this dimension
56
- #
57
- # @param node1 [Object] First node
58
- # @param node2 [Object] Second node
59
- # @param behavior [Symbol] Comparison behavior
60
- # @return [Boolean] true if nodes match for this dimension
61
- def equivalent?(node1, node2, behavior)
62
- data1 = extract_data(node1)
63
- data2 = extract_data(node2)
64
- compare(data1, data2, behavior)
65
- end
66
-
67
- # Extract data from a node
68
- #
69
- # @param node [Object] Node to extract data from
70
- # @return [Object] Extracted data
71
- # @abstract Subclass must implement
72
- def extract_data(node)
73
- raise NotImplementedError, "#{self.class} must implement extract_data"
74
- end
75
-
76
- # Strict comparison
77
- #
78
- # @param data1 [Object] First data
79
- # @param data2 [Object] Second data
80
- # @return [Boolean] true if data matches strictly
81
- # @abstract Subclass must implement
82
- def compare_strict(data1, data2)
83
- raise NotImplementedError,
84
- "#{self.class} must implement compare_strict"
85
- end
86
-
87
- # Normalized comparison
88
- #
89
- # @param data1 [Object] First data
90
- # @param data2 [Object] Second data
91
- # @return [Boolean] true if data matches after normalization
92
- def compare_normalize(data1, data2)
93
- # Default implementation: delegate to strict comparison
94
- compare_strict(data1, data2)
95
- end
96
-
97
- # Check if this dimension supports normalization
98
- #
99
- # @return [Boolean] true if normalization is supported
100
- def supports_normalization?
101
- # Check if compare_normalize is overridden (not the default implementation)
102
- method(:compare_normalize).owner != BaseDimension
103
- end
104
- end
105
- end
106
- end
107
- end
@@ -1,117 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "base_dimension"
4
-
5
- module Canon
6
- module Comparison
7
- module Dimensions
8
- # Comments dimension
9
- #
10
- # Handles comparison of comment nodes.
11
- # Supports :strict and :ignore behaviors.
12
- #
13
- # Behaviors:
14
- # - :strict - Exact comment comparison including whitespace
15
- # - :ignore - Skip comment comparison
16
- class CommentsDimension < BaseDimension
17
- # Extract comments from a node
18
- #
19
- # @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
20
- # @return [Array<String>] Array of comment strings
21
- def extract_data(node)
22
- return [] unless node
23
-
24
- if Canon::XmlBackend.nokogiri?
25
- extract_from_nokogiri(node)
26
- else
27
- extract_from_moxml(node)
28
- end
29
- end
30
-
31
- # Strict comment comparison
32
- #
33
- # @param comments1 [Array<String>] First comments array
34
- # @param comments2 [Array<String>] Second comments array
35
- # @return [Boolean] true if comments are exactly equal
36
- def compare_strict(comments1, comments2) # rubocop:disable Naming/PredicateMethod
37
- comments1 == comments2
38
- end
39
-
40
- # Normalized comment comparison
41
- #
42
- # For comments, normalized comparison collapses whitespace in each comment.
43
- #
44
- # @param comments1 [Array<String>] First comments array
45
- # @param comments2 [Array<String>] Second comments array
46
- # @return [Boolean] true if normalized comments are equal
47
- def compare_normalize(comments1, comments2) # rubocop:disable Naming/PredicateMethod
48
- normalize_comments(comments1) == normalize_comments(comments2)
49
- end
50
-
51
- private
52
-
53
- # Extract comments from Moxml node
54
- #
55
- # @param node [Moxml::Node] Moxml node
56
- # @return [Array<String>] Array of comment strings
57
- def extract_from_moxml(node)
58
- comments = []
59
-
60
- # If node itself is a comment
61
- if node.node_type == :comment
62
- comments << node.content
63
- end
64
-
65
- # Extract child comments
66
- node.children.each do |child|
67
- comments << child.content if child.node_type == :comment
68
- end
69
-
70
- comments
71
- end
72
-
73
- # Extract comments from Nokogiri node
74
- #
75
- # @param node [Nokogiri::XML::Node] Nokogiri node
76
- # @return [Array<String>] Array of comment strings
77
- def extract_from_nokogiri(node)
78
- comments = []
79
-
80
- # If node itself is a comment
81
- if node.node_type == Nokogiri::XML::Node::COMMENT_NODE
82
- comments << node.content
83
- end
84
-
85
- # Extract child comments
86
- node.children.each do |child|
87
- if child.node_type == Nokogiri::XML::Node::COMMENT_NODE
88
- comments << child.content
89
- end
90
- end
91
-
92
- comments
93
- end
94
-
95
- # Normalize comments by collapsing whitespace
96
- #
97
- # @param comments [Array<String>] Comments to normalize
98
- # @return [Array<String>] Normalized comments
99
- def normalize_comments(comments)
100
- comments.map { |c| normalize_text(c) }
101
- end
102
-
103
- # Normalize text by collapsing whitespace
104
- #
105
- # @param text [String, nil] Text to normalize
106
- # @return [String] Normalized text
107
- def normalize_text(text)
108
- return "" if text.nil?
109
-
110
- text.to_s
111
- .gsub(/[\p{Space}\u00a0]+/, " ")
112
- .strip
113
- end
114
- end
115
- end
116
- end
117
- end
@@ -1,86 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "base_dimension"
4
-
5
- module Canon
6
- module Comparison
7
- module Dimensions
8
- # Element position dimension
9
- #
10
- # Handles comparison of element positions within their parent.
11
- # Supports :strict and :ignore behaviors.
12
- #
13
- # Behaviors:
14
- # - :strict - Elements must appear in the same position (index)
15
- # - :ignore - Element position doesn't matter
16
- class ElementPositionDimension < BaseDimension
17
- # Extract element position from a node
18
- #
19
- # Returns the index of this node among its siblings of the same type.
20
- #
21
- # @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
22
- # @return [Integer] Position index (0-based)
23
- def extract_data(node)
24
- return 0 unless node
25
-
26
- if Canon::XmlBackend.nokogiri?
27
- extract_from_nokogiri(node)
28
- else
29
- extract_from_moxml(node)
30
- end
31
- end
32
-
33
- # Strict element position comparison
34
- #
35
- # @param pos1 [Integer] First position
36
- # @param pos2 [Integer] Second position
37
- # @return [Boolean] true if positions are equal
38
- def compare_strict(pos1, pos2) # rubocop:disable Naming/PredicateMethod
39
- pos1 == pos2
40
- end
41
-
42
- private
43
-
44
- # Extract position from Moxml node
45
- #
46
- # @param node [Moxml::Node] Moxml node
47
- # @return [Integer] Position index
48
- def extract_from_moxml(node)
49
- return 0 unless node.parent
50
-
51
- # Find position among siblings of the same element name
52
- siblings = node.parent.children
53
- node.name
54
-
55
- siblings.each_with_index do |sibling, index|
56
- if sibling == node
57
- return index
58
- end
59
- end
60
-
61
- 0
62
- end
63
-
64
- # Extract position from Nokogiri node
65
- #
66
- # @param node [Nokogiri::XML::Node] Nokogiri node
67
- # @return [Integer] Position index
68
- def extract_from_nokogiri(node)
69
- return 0 unless node.parent
70
-
71
- # Find position among siblings
72
- siblings = node.parent.children
73
- node.name
74
-
75
- siblings.each_with_index do |sibling, index|
76
- if sibling == node
77
- return index
78
- end
79
- end
80
-
81
- 0
82
- end
83
- end
84
- end
85
- end
86
- end
@@ -1,115 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "base_dimension"
4
- require_relative "../match_options"
5
-
6
- module Canon
7
- module Comparison
8
- module Dimensions
9
- # Structural whitespace dimension
10
- #
11
- # Handles comparison of structural whitespace (whitespace between elements).
12
- # Supports :strict, :normalize, and :ignore behaviors.
13
- #
14
- # Behaviors:
15
- # - :strict - Exact whitespace comparison
16
- # - :normalize - Collapse whitespace and compare
17
- # - :ignore - Skip structural whitespace comparison
18
- class StructuralWhitespaceDimension < BaseDimension
19
- # Extract structural whitespace from a node
20
- #
21
- # Returns whitespace text nodes that are between elements (structural).
22
- #
23
- # @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
24
- # @return [Array<String>] Array of structural whitespace strings
25
- def extract_data(node)
26
- return [] unless node
27
-
28
- if Canon::XmlBackend.nokogiri?
29
- extract_from_nokogiri(node)
30
- else
31
- extract_from_moxml(node)
32
- end
33
- end
34
-
35
- # Strict structural whitespace comparison
36
- #
37
- # @param ws1 [Array<String>] First whitespace array
38
- # @param ws2 [Array<String>] Second whitespace array
39
- # @return [Boolean] true if structural whitespace is exactly equal
40
- def compare_strict(ws1, ws2) # rubocop:disable Naming/PredicateMethod
41
- ws1 == ws2
42
- end
43
-
44
- # Normalized structural whitespace comparison
45
- #
46
- # Collapses whitespace in each entry and compares.
47
- #
48
- # @param ws1 [Array<String>] First whitespace array
49
- # @param ws2 [Array<String>] Second whitespace array
50
- # @return [Boolean] true if normalized structural whitespace is equal
51
- def compare_normalize(ws1, ws2) # rubocop:disable Naming/PredicateMethod
52
- normalize_whitespace(ws1) == normalize_whitespace(ws2)
53
- end
54
-
55
- private
56
-
57
- # Extract structural whitespace from Moxml node
58
- #
59
- # @param node [Moxml::Node] Moxml node
60
- # @return [Array<String>] Array of structural whitespace strings
61
- def extract_from_moxml(node)
62
- whitespace = []
63
-
64
- node.children.each do |child|
65
- if child.node_type == :text
66
- text = child.content.strip
67
- # Check if this is purely whitespace (structural)
68
- if text.empty? || child.content =~ /\A\s*\z/
69
- whitespace << child.content
70
- end
71
- end
72
- end
73
-
74
- whitespace
75
- end
76
-
77
- # Extract structural whitespace from Nokogiri node
78
- #
79
- # @param node [Nokogiri::XML::Node] Nokogiri node
80
- # @return [Array<String>] Array of structural whitespace strings
81
- def extract_from_nokogiri(node)
82
- whitespace = []
83
-
84
- node.children.each do |child|
85
- if child.node_type == Nokogiri::XML::Node::TEXT_NODE
86
- text = child.content.strip
87
- # Check if this is purely whitespace (structural)
88
- if text.empty? || child.content =~ /\A\s*\z/
89
- whitespace << child.content
90
- end
91
- end
92
- end
93
-
94
- whitespace
95
- end
96
-
97
- # Normalize whitespace array
98
- #
99
- # @param whitespace [Array<String>] Whitespace strings
100
- # @return [Array<String>] Normalized whitespace strings
101
- def normalize_whitespace(whitespace)
102
- whitespace.map { |ws| normalize_text(ws) }
103
- end
104
-
105
- # Normalize text
106
- #
107
- # @param text [String, nil] Text to normalize
108
- # @return [String] Normalized text
109
- def normalize_text(text)
110
- MatchOptions.normalize_text(text)
111
- end
112
- end
113
- end
114
- end
115
- end
@@ -1,102 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "base_dimension"
4
- require_relative "../match_options"
5
-
6
- module Canon
7
- module Comparison
8
- module Dimensions
9
- # Text content dimension
10
- #
11
- # Handles comparison of text content in nodes.
12
- # Supports :strict, :normalize, and :ignore behaviors.
13
- #
14
- # Behaviors:
15
- # - :strict - Exact text comparison including whitespace
16
- # - :normalize - Collapse whitespace and compare
17
- # - :ignore - Skip text content comparison
18
- class TextContentDimension < BaseDimension
19
- # Extract text content from a node
20
- #
21
- # @param node [Moxml::Node, Nokogiri::XML::Node] Node to extract from
22
- # @return [String, nil] Text content or nil if not a text node
23
- def extract_data(node)
24
- return nil unless node
25
-
26
- if Canon::XmlBackend.nokogiri?
27
- extract_from_nokogiri(node)
28
- else
29
- extract_from_moxml(node)
30
- end
31
- end
32
-
33
- # Strict text comparison
34
- #
35
- # @param text1 [String, nil] First text
36
- # @param text2 [String, nil] Second text
37
- # @return [Boolean] true if texts are exactly equal
38
- def compare_strict(text1, text2) # rubocop:disable Naming/PredicateMethod
39
- text1.to_s == text2.to_s
40
- end
41
-
42
- # Normalized text comparison
43
- #
44
- # Collapses whitespace and compares.
45
- # Two whitespace-only strings that both normalize to empty are equivalent.
46
- #
47
- # @param text1 [String, nil] First text
48
- # @param text2 [String, nil] Second text
49
- # @return [Boolean] true if normalized texts are equal
50
- def compare_normalize(text1, text2) # rubocop:disable Naming/PredicateMethod
51
- normalized1 = normalize_text(text1)
52
- normalized2 = normalize_text(text2)
53
-
54
- # Both empty after normalization = equivalent
55
- # This handles whitespace-only text nodes that normalize to empty
56
- return true if normalized1.empty? && normalized2.empty?
57
-
58
- normalized1 == normalized2
59
- end
60
-
61
- private
62
-
63
- # Extract text from Moxml node
64
- #
65
- # @param node [Moxml::Node] Moxml node
66
- # @return [String, nil] Text content
67
- def extract_from_moxml(node)
68
- case node.node_type
69
- when :text, :cdata
70
- node.content
71
- when :element
72
- # For element nodes, extract concatenated text from children
73
- node.text
74
- end
75
- end
76
-
77
- # Extract text from Nokogiri node
78
- #
79
- # @param node [Nokogiri::XML::Node] Nokogiri node
80
- # @return [String, nil] Text content
81
- def extract_from_nokogiri(node)
82
- case node.node_type
83
- when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
84
- node.content
85
- when Nokogiri::XML::Node::ELEMENT_NODE
86
- node.content
87
- end
88
- end
89
-
90
- # Normalize text by collapsing whitespace
91
- #
92
- # Uses MatchOptions.normalize_text for consistency.
93
- #
94
- # @param text [String, nil] Text to normalize
95
- # @return [String] Normalized text
96
- def normalize_text(text)
97
- MatchOptions.normalize_text(text)
98
- end
99
- end
100
- end
101
- end
102
- end