canon 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +69 -92
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/Gemfile +1 -0
  6. data/docs/_config.yml +90 -1
  7. data/docs/advanced/diff-classification.adoc +82 -2
  8. data/docs/advanced/extending-canon.adoc +193 -0
  9. data/docs/features/match-options/index.adoc +239 -1
  10. data/docs/internals/diffnode-enrichment.adoc +611 -0
  11. data/docs/internals/index.adoc +251 -0
  12. data/docs/lychee.toml +13 -6
  13. data/docs/understanding/architecture.adoc +749 -33
  14. data/docs/understanding/comparison-pipeline.adoc +122 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +87 -0
  27. data/lib/canon/comparison/html_comparator.rb +70 -26
  28. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  29. data/lib/canon/comparison/html_parser.rb +80 -0
  30. data/lib/canon/comparison/json_comparator.rb +12 -0
  31. data/lib/canon/comparison/json_parser.rb +19 -0
  32. data/lib/canon/comparison/markup_comparator.rb +293 -0
  33. data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
  34. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  35. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  36. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  37. data/lib/canon/comparison/match_options.rb +68 -463
  38. data/lib/canon/comparison/profile_definition.rb +149 -0
  39. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  40. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  41. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  42. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  43. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  44. data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
  45. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  46. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  47. data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
  48. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
  49. data/lib/canon/comparison/xml_comparator.rb +97 -684
  50. data/lib/canon/comparison/xml_node_comparison.rb +319 -0
  51. data/lib/canon/comparison/xml_parser.rb +19 -0
  52. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  53. data/lib/canon/comparison.rb +265 -110
  54. data/lib/canon/diff/diff_classifier.rb +101 -2
  55. data/lib/canon/diff/diff_node.rb +32 -2
  56. data/lib/canon/diff/formatting_detector.rb +1 -1
  57. data/lib/canon/diff/node_serializer.rb +191 -0
  58. data/lib/canon/diff/path_builder.rb +143 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  61. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  62. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  64. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  65. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  66. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  67. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  68. data/lib/canon/diff_formatter.rb +1 -1
  69. data/lib/canon/rspec_matchers.rb +38 -9
  70. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  71. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  72. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  73. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  74. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  75. data/lib/canon/version.rb +1 -1
  76. data/lib/canon/xml/data_model.rb +24 -13
  77. metadata +48 -2
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "match_options"
4
+
5
+ module Canon
6
+ module Comparison
7
+ # Profile definition DSL with full validation
8
+ #
9
+ # Provides a clean, validated way to define custom comparison profiles.
10
+ # Catches errors at definition time with clear, actionable messages.
11
+ #
12
+ # @example Define a custom profile
13
+ # Canon::Comparison.define_profile(:my_custom) do
14
+ # text_content :normalize
15
+ # comments :ignore
16
+ # preprocessing :rendered
17
+ # end
18
+ class ProfileDefinition
19
+ # All valid dimensions for XML/HTML comparison
20
+ # These must match MatchOptions::Xml::MATCH_DIMENSIONS
21
+ VALID_DIMENSIONS = %i[
22
+ text_content
23
+ structural_whitespace
24
+ attribute_presence
25
+ attribute_order
26
+ attribute_values
27
+ element_position
28
+ comments
29
+ ].freeze
30
+
31
+ # Behaviors valid for each dimension
32
+ # Maps dimension name to array of valid behavior symbols
33
+ DIMENSION_BEHAVIORS = {
34
+ text_content: %i[strict normalize ignore],
35
+ structural_whitespace: %i[strict normalize ignore],
36
+ attribute_presence: %i[strict ignore],
37
+ attribute_order: %i[strict ignore],
38
+ attribute_values: %i[strict strip compact normalize ignore],
39
+ element_position: %i[strict ignore],
40
+ comments: %i[strict ignore],
41
+ }.freeze
42
+
43
+ attr_reader :name, :settings
44
+
45
+ # Initialize a new profile definition
46
+ #
47
+ # @param name [Symbol] Profile name
48
+ def initialize(name)
49
+ @name = name
50
+ @settings = {}
51
+ end
52
+
53
+ # Define a profile using DSL syntax
54
+ #
55
+ # @param name [Symbol] Profile name
56
+ # @yield [ProfileDefinition] DSL block for defining profile
57
+ # @return [Hash] Profile settings hash
58
+ # @raise [ProfileError] if profile definition is invalid
59
+ def self.define(name, &block)
60
+ definition = new(name)
61
+ definition.instance_eval(&block) if block
62
+ definition.validate!
63
+ definition.to_h
64
+ end
65
+
66
+ # Create DSL methods for each dimension
67
+ VALID_DIMENSIONS.each do |dimension|
68
+ define_method(dimension) do |behavior|
69
+ @settings[dimension] = behavior
70
+ end
71
+ end
72
+
73
+ # Set preprocessing mode
74
+ #
75
+ # @param mode [Symbol] Preprocessing mode
76
+ # @raise [ProfileError] if mode is invalid
77
+ def preprocessing(mode)
78
+ unless MatchOptions::PREPROCESSING_OPTIONS.include?(mode)
79
+ raise ProfileError,
80
+ "Invalid preprocessing mode: #{mode}. " \
81
+ "Valid options: #{MatchOptions::PREPROCESSING_OPTIONS.join(', ')}"
82
+ end
83
+
84
+ @settings[:preprocessing] = mode
85
+ end
86
+
87
+ # Enable/disable semantic diff
88
+ #
89
+ # @param enabled [Boolean] Whether to enable semantic diff (default: true)
90
+ def semantic_diff(enabled: true)
91
+ @settings[:semantic_diff] = enabled
92
+ end
93
+
94
+ # Set similarity threshold for semantic matching
95
+ #
96
+ # @param value [Numeric] Threshold between 0 and 1
97
+ # @raise [ProfileError] if value is out of range
98
+ def similarity_threshold(value)
99
+ unless value.is_a?(Numeric) && value >= 0 && value <= 1
100
+ raise ProfileError,
101
+ "Similarity threshold must be between 0 and 1, got: #{value}"
102
+ end
103
+
104
+ @settings[:similarity_threshold] = value
105
+ end
106
+
107
+ # Validate the profile definition
108
+ #
109
+ # @raise [ProfileError] if profile definition is invalid
110
+ def validate!
111
+ @settings.each do |key, value|
112
+ validate_dimension!(key, value) if VALID_DIMENSIONS.include?(key)
113
+ end
114
+ end
115
+
116
+ # Convert to hash
117
+ #
118
+ # @return [Hash] Profile settings
119
+ def to_h
120
+ @settings.dup
121
+ end
122
+
123
+ private
124
+
125
+ # Validate a dimension setting
126
+ #
127
+ # @param dimension [Symbol] Dimension name
128
+ # @param behavior [Symbol] Behavior value
129
+ # @raise [ProfileError] if dimension or behavior is invalid
130
+ def validate_dimension!(dimension, behavior)
131
+ unless DIMENSION_BEHAVIORS.key?(dimension)
132
+ raise ProfileError,
133
+ "Unknown dimension: #{dimension}. " \
134
+ "Valid dimensions: #{VALID_DIMENSIONS.join(', ')}"
135
+ end
136
+
137
+ valid_behaviors = DIMENSION_BEHAVIORS[dimension]
138
+ unless valid_behaviors.include?(behavior)
139
+ raise ProfileError,
140
+ "Invalid behavior '#{behavior}' for dimension '#{dimension}'. " \
141
+ "Valid behaviors: #{valid_behaviors.join(', ')}"
142
+ end
143
+ end
144
+ end
145
+
146
+ # Custom error for profile definition issues
147
+ class ProfileError < Error; end
148
+ end
149
+ end
@@ -0,0 +1,180 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Ruby Object Comparison Utilities
6
+ #
7
+ # Provides public comparison methods for Ruby objects (Hash, Array, primitives).
8
+ # This module extracts shared comparison logic that was previously
9
+ # accessed via send() from YamlComparator.
10
+ module RubyObjectComparator
11
+ # Compare Ruby objects (Hash, Array, primitives) for JSON/YAML
12
+ #
13
+ # @param obj1 [Object] First object
14
+ # @param obj2 [Object] Second object
15
+ # @param opts [Hash] Comparison options
16
+ # @param differences [Array] Array to append differences to
17
+ # @param path [String] Current path in the object structure
18
+ # @return [Symbol] Comparison result constant
19
+ def self.compare_objects(obj1, obj2, opts, differences, path)
20
+ # Check for type mismatch
21
+ unless obj1.instance_of?(obj2.class)
22
+ add_difference(path, obj1, obj2, Comparison::UNEQUAL_TYPES,
23
+ opts, differences)
24
+ return Comparison::UNEQUAL_TYPES
25
+ end
26
+
27
+ case obj1
28
+ when Hash
29
+ compare_hashes(obj1, obj2, opts, differences, path)
30
+ when Array
31
+ compare_arrays(obj1, obj2, opts, differences, path)
32
+ when NilClass, TrueClass, FalseClass, Numeric, String, Symbol
33
+ compare_primitives(obj1, obj2, opts, differences, path)
34
+ else
35
+ # Fallback to equality comparison
36
+ if obj1 == obj2
37
+ Comparison::EQUIVALENT
38
+ else
39
+ add_difference(path, obj1, obj2,
40
+ Comparison::UNEQUAL_PRIMITIVES, opts,
41
+ differences)
42
+ Comparison::UNEQUAL_PRIMITIVES
43
+ end
44
+ end
45
+ end
46
+
47
+ # Compare two hashes
48
+ #
49
+ # @param hash1 [Hash] First hash
50
+ # @param hash2 [Hash] Second hash
51
+ # @param opts [Hash] Comparison options
52
+ # @param differences [Array] Array to append differences to
53
+ # @param path [String] Current path in the object structure
54
+ # @return [Symbol] Comparison result constant
55
+ def self.compare_hashes(hash1, hash2, opts, differences, path)
56
+ keys1 = hash1.keys
57
+ keys2 = hash2.keys
58
+
59
+ # Sort keys if order should be ignored (based on match options)
60
+ match_opts = opts[:match_opts]
61
+ if match_opts && match_opts[:key_order] != :strict
62
+ keys1 = keys1.sort_by(&:to_s)
63
+ keys2 = keys2.sort_by(&:to_s)
64
+ elsif match_opts && match_opts[:key_order] == :strict
65
+ # Strict mode: key order matters
66
+ # Check if keys are in same order
67
+ # Keys are different or in different order
68
+ # First check if it's just ordering (same keys, different order)
69
+ if (keys1 != keys2) && (keys1.sort_by(&:to_s) == keys2.sort_by(&:to_s))
70
+ # Same keys, different order - this is a key_order difference
71
+ key_path = path.empty? ? "(key order)" : "#{path}.(key order)"
72
+ add_difference(key_path, keys1, keys2,
73
+ Comparison::UNEQUAL_HASH_KEY_ORDER, opts, differences)
74
+ return Comparison::UNEQUAL_HASH_KEY_ORDER
75
+ end
76
+ end
77
+
78
+ # Check for missing keys
79
+ missing_in_second = keys1 - keys2
80
+ missing_in_first = keys2 - keys1
81
+
82
+ missing_in_second.each do |key|
83
+ key_path = path.empty? ? key.to_s : "#{path}.#{key}"
84
+ add_difference(key_path, hash1[key], nil,
85
+ Comparison::MISSING_HASH_KEY, opts, differences)
86
+ end
87
+
88
+ missing_in_first.each do |key|
89
+ key_path = path.empty? ? key.to_s : "#{path}.#{key}"
90
+ add_difference(key_path, nil, hash2[key],
91
+ Comparison::MISSING_HASH_KEY, opts, differences)
92
+ end
93
+
94
+ has_missing_keys = !missing_in_first.empty? || !missing_in_second.empty?
95
+
96
+ # Compare common keys
97
+ common_keys = keys1 & keys2
98
+ all_equivalent = true
99
+ common_keys.each do |key|
100
+ key_path = path.empty? ? key.to_s : "#{path}.#{key}"
101
+ result = compare_objects(hash1[key], hash2[key], opts,
102
+ differences, key_path)
103
+ all_equivalent = false unless result == Comparison::EQUIVALENT
104
+ end
105
+
106
+ # Return appropriate status
107
+ return Comparison::MISSING_HASH_KEY if has_missing_keys && all_equivalent
108
+ return Comparison::UNEQUAL_HASH_VALUES unless all_equivalent
109
+
110
+ has_missing_keys ? Comparison::MISSING_HASH_KEY : Comparison::EQUIVALENT
111
+ end
112
+
113
+ # Compare two arrays
114
+ #
115
+ # @param arr1 [Array] First array
116
+ # @param arr2 [Array] Second array
117
+ # @param opts [Hash] Comparison options
118
+ # @param differences [Array] Array to append differences to
119
+ # @param path [String] Current path in the object structure
120
+ # @return [Symbol] Comparison result constant
121
+ def self.compare_arrays(arr1, arr2, opts, differences, path)
122
+ unless arr1.length == arr2.length
123
+ add_difference(path, arr1, arr2,
124
+ Comparison::UNEQUAL_ARRAY_LENGTHS, opts,
125
+ differences)
126
+ return Comparison::UNEQUAL_ARRAY_LENGTHS
127
+ end
128
+
129
+ all_equivalent = true
130
+ arr1.each_with_index do |elem1, index|
131
+ elem2 = arr2[index]
132
+ elem_path = "#{path}[#{index}]"
133
+ result = compare_objects(elem1, elem2, opts, differences,
134
+ elem_path)
135
+ all_equivalent = false unless result == Comparison::EQUIVALENT
136
+ end
137
+
138
+ all_equivalent ? Comparison::EQUIVALENT : Comparison::UNEQUAL_ARRAY_ELEMENTS
139
+ end
140
+
141
+ # Compare primitive values
142
+ #
143
+ # @param val1 [Object] First value
144
+ # @param val2 [Object] Second value
145
+ # @param opts [Hash] Comparison options
146
+ # @param differences [Array] Array to append differences to
147
+ # @param path [String] Current path in the object structure
148
+ # @return [Symbol] Comparison result constant
149
+ def self.compare_primitives(val1, val2, opts, differences, path)
150
+ if val1 == val2
151
+ Comparison::EQUIVALENT
152
+ else
153
+ add_difference(path, val1, val2,
154
+ Comparison::UNEQUAL_PRIMITIVES, opts,
155
+ differences)
156
+ Comparison::UNEQUAL_PRIMITIVES
157
+ end
158
+ end
159
+
160
+ # Add a Ruby object difference
161
+ #
162
+ # @param path [String] Path to the difference
163
+ # @param obj1 [Object] First object
164
+ # @param obj2 [Object] Second object
165
+ # @param diff_code [Symbol] Difference code
166
+ # @param opts [Hash] Comparison options
167
+ # @param differences [Array] Array to append difference to
168
+ def self.add_difference(path, obj1, obj2, diff_code, opts, differences)
169
+ return unless opts[:verbose]
170
+
171
+ differences << {
172
+ path: path,
173
+ value1: obj1,
174
+ value2: obj2,
175
+ difference: diff_code,
176
+ }
177
+ end
178
+ end
179
+ end
180
+ end
@@ -3,6 +3,7 @@
3
3
  require_relative "base_match_strategy"
4
4
  require_relative "../../tree_diff/tree_diff_integrator"
5
5
  require_relative "../../tree_diff/operation_converter"
6
+ require_relative "../xml_node_comparison"
6
7
 
7
8
  module Canon
8
9
  module Comparison
@@ -125,10 +126,9 @@ module Canon
125
126
  # @return [Array<String>] Preprocessed strings
126
127
  def preprocess_xml(doc1, doc2)
127
128
  # Serialize XML to string
128
- # Use XmlComparator's serializer for Canon::Xml::Node
129
+ # Use XmlNodeComparison's serializer for Canon::Xml::Node
129
130
  xml1 = if doc1.is_a?(Canon::Xml::Node)
130
- require_relative "../xml_comparator"
131
- XmlComparator.send(:serialize_node_to_xml, doc1)
131
+ XmlNodeComparison.serialize_node_to_xml(doc1)
132
132
  elsif doc1.respond_to?(:to_xml)
133
133
  doc1.to_xml
134
134
  else
@@ -136,8 +136,7 @@ module Canon
136
136
  end
137
137
 
138
138
  xml2 = if doc2.is_a?(Canon::Xml::Node)
139
- require_relative "../xml_comparator"
140
- XmlComparator.send(:serialize_node_to_xml, doc2)
139
+ XmlNodeComparison.serialize_node_to_xml(doc2)
141
140
  elsif doc2.respond_to?(:to_xml)
142
141
  doc2.to_xml
143
142
  else
@@ -161,12 +160,11 @@ module Canon
161
160
  # @param doc2 [Object] Second HTML document
162
161
  # @return [Array<String>] Preprocessed strings
163
162
  def preprocess_html(doc1, doc2)
164
- # For Canon::Xml::Node, use XmlComparator's serializer
163
+ # For Canon::Xml::Node, use XmlNodeComparison's serializer
165
164
  # For XML::DocumentFragment (from parse_node_as_fragment), use to_s
166
165
  # to avoid Nokogiri auto-inserting meta tags during to_html serialization
167
166
  html1 = if doc1.is_a?(Canon::Xml::Node)
168
- require_relative "../xml_comparator"
169
- XmlComparator.send(:serialize_node_to_xml, doc1)
167
+ XmlNodeComparison.serialize_node_to_xml(doc1)
170
168
  elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
171
169
  doc1.to_s
172
170
  elsif doc1.respond_to?(:to_html)
@@ -176,8 +174,7 @@ module Canon
176
174
  end
177
175
 
178
176
  html2 = if doc2.is_a?(Canon::Xml::Node)
179
- require_relative "../xml_comparator"
180
- XmlComparator.send(:serialize_node_to_xml, doc2)
177
+ XmlNodeComparison.serialize_node_to_xml(doc2)
181
178
  elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
182
179
  doc2.to_s
183
180
  elsif doc2.respond_to?(:to_html)
@@ -0,0 +1,208 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Whitespace sensitivity utilities for element-level control
6
+ #
7
+ # This module provides logic to determine whether whitespace should be
8
+ # preserved during comparison based on:
9
+ # - Format-specific defaults (HTML has built-in sensitive elements)
10
+ # - User-configured whitelist (elements that care about whitespace)
11
+ # - User-configured blacklist (elements that don't care about whitespace)
12
+ # - xml:space attribute in the document itself
13
+ # - respect_xml_space flag (whether to honor or override xml:space)
14
+ #
15
+ # == Priority Order
16
+ #
17
+ # 1. respect_xml_space: false → User config only (ignore xml:space)
18
+ # 2. User whitelist → Use whitelist (user explicitly declared)
19
+ # 3. Format defaults → HTML: [:pre, :textarea, :script, :style], XML: []
20
+ # 4. User blacklist → Remove from defaults/whitelist
21
+ # 5. xml:space="preserve" → Element is sensitive
22
+ # 6. xml:space="default" → Use steps 1-4
23
+ #
24
+ # == Usage
25
+ #
26
+ # WhitespaceSensitivity.element_sensitive?(node, opts)
27
+ # => true if whitespace should be preserved for this element
28
+ module WhitespaceSensitivity
29
+ class << self
30
+ # Check if an element is whitespace-sensitive based on configuration
31
+ #
32
+ # @param node [Object] The element node to check
33
+ # @param opts [Hash] Comparison options containing match_opts
34
+ # @return [Boolean] true if whitespace should be preserved for this element
35
+ def element_sensitive?(node, opts)
36
+ match_opts = opts[:match_opts]
37
+ return false unless match_opts
38
+ return false unless text_node_parent?(node)
39
+
40
+ parent = node.parent
41
+
42
+ # 1. Check if we should ignore xml:space (user override)
43
+ if !respect_xml_space?(match_opts)
44
+ return user_config_sensitive?(parent, match_opts)
45
+ end
46
+
47
+ # 2. Check xml:space="preserve" (document declaration)
48
+ return true if xml_space_preserve?(parent)
49
+
50
+ # 3. Check xml:space="default" (use configured behavior)
51
+ return false if xml_space_default?(parent)
52
+
53
+ # 4. Use user configuration + format defaults
54
+ configured_sensitive?(parent, match_opts)
55
+ end
56
+
57
+ # Check if whitespace-only text node should be filtered
58
+ #
59
+ # @param node [Object] The text node to check
60
+ # @param opts [Hash] Comparison options
61
+ # @return [Boolean] true if node should be preserved (not filtered)
62
+ def preserve_whitespace_node?(node, opts)
63
+ return false unless node.respond_to?(:parent)
64
+ return false unless node.parent
65
+
66
+ element_sensitive?(node, opts)
67
+ end
68
+
69
+ # Get format-specific default sensitive elements
70
+ #
71
+ # This is the SINGLE SOURCE OF TRUTH for default whitespace-sensitive
72
+ # elements. All other code should use this method to get the list.
73
+ #
74
+ # @param match_opts [Hash] Resolved match options
75
+ # @return [Array<Symbol>] Default sensitive element names
76
+ def format_default_sensitive_elements(match_opts)
77
+ format = match_opts[:format] || :xml
78
+
79
+ case format
80
+ when :html, :html4, :html5
81
+ # HTML specification: these elements preserve whitespace
82
+ %i[pre code textarea script style].freeze
83
+ when :xml
84
+ # XML has no default sensitive elements - purely user-controlled
85
+ [].freeze
86
+ else
87
+ [].freeze
88
+ end
89
+ end
90
+
91
+ # Check if an element is in the default sensitive list for its format
92
+ #
93
+ # Convenience method for checking element sensitivity without building
94
+ # the full list first.
95
+ #
96
+ # @param element_name [String, Symbol] The element name to check
97
+ # @param match_opts [Hash] Resolved match options
98
+ # @return [Boolean] true if element is in default sensitive list
99
+ def default_sensitive_element?(element_name, match_opts)
100
+ format_default_sensitive_elements(match_opts)
101
+ .include?(element_name.to_sym)
102
+ end
103
+
104
+ private
105
+
106
+ # Check if we should respect xml:space attribute
107
+ #
108
+ # @param match_opts [Hash] Resolved match options
109
+ # @return [Boolean] true if xml:space should be respected
110
+ def respect_xml_space?(match_opts)
111
+ if match_opts.key?(:respect_xml_space)
112
+ match_opts[:respect_xml_space]
113
+ else
114
+ true
115
+ end
116
+ end
117
+
118
+ # Check if xml:space="preserve" is set
119
+ #
120
+ # @param element [Object] The element to check
121
+ # @return [Boolean] true if xml:space="preserve"
122
+ def xml_space_preserve?(element)
123
+ if element.is_a?(Canon::Xml::Nodes::ElementNode)
124
+ # Check attribute_nodes for xml:space attribute
125
+ # xml:space is stored with name="space" and namespace_uri="http://www.w3.org/XML/1998/namespace"
126
+ element.attribute_nodes.any? do |attr|
127
+ attr.name == "space" &&
128
+ attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
129
+ attr.value == "preserve"
130
+ end
131
+ elsif element.respond_to?(:[])
132
+ element["xml:space"] == "preserve"
133
+ else
134
+ false
135
+ end
136
+ end
137
+
138
+ # Check if xml:space="default" is set
139
+ #
140
+ # @param element [Object] The element to check
141
+ # @return [Boolean] true if xml:space="default"
142
+ def xml_space_default?(element)
143
+ if element.is_a?(Canon::Xml::Nodes::ElementNode)
144
+ # Check attribute_nodes for xml:space attribute
145
+ # xml:space is stored with name="space" and namespace_uri="http://www.w3.org/XML/1998/namespace"
146
+ element.attribute_nodes.any? do |attr|
147
+ attr.name == "space" &&
148
+ attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
149
+ attr.value == "default"
150
+ end
151
+ elsif element.respond_to?(:[])
152
+ element["xml:space"] == "default"
153
+ else
154
+ false
155
+ end
156
+ end
157
+
158
+ # Check sensitivity based on user configuration
159
+ #
160
+ # @param element [Object] The element to check
161
+ # @param match_opts [Hash] Resolved match options
162
+ # @return [Boolean] true if element is in whitelist
163
+ def user_config_sensitive?(element, match_opts)
164
+ return false unless match_opts[:whitespace_sensitive_elements]
165
+
166
+ match_opts[:whitespace_sensitive_elements].include?(element.name.to_sym)
167
+ end
168
+
169
+ # Check sensitivity based on user config + format defaults
170
+ #
171
+ # @param element [Object] The element to check
172
+ # @param match_opts [Hash] Resolved match options
173
+ # @return [Boolean] true if element should be sensitive
174
+ def configured_sensitive?(element, match_opts)
175
+ # Start with format defaults
176
+ sensitive = format_default_sensitive_elements(match_opts).to_set
177
+
178
+ # Apply whitelist (adds to defaults)
179
+ if match_opts[:whitespace_sensitive_elements]
180
+ sensitive |= match_opts[:whitespace_sensitive_elements]
181
+ end
182
+
183
+ # Apply blacklist (removes from everything)
184
+ if match_opts[:whitespace_insensitive_elements]
185
+ sensitive -= match_opts[:whitespace_insensitive_elements]
186
+ end
187
+
188
+ sensitive.include?(element.name.to_sym)
189
+ end
190
+
191
+ # Check if node has a parent that's an element (not document root)
192
+ #
193
+ # @param node [Object] The node to check
194
+ # @return [Boolean] true if node has an element parent
195
+ def text_node_parent?(node)
196
+ return false unless node.respond_to?(:parent)
197
+ return false unless node.parent
198
+
199
+ parent = node.parent
200
+ return true if parent.respond_to?(:element?) && parent.element?
201
+
202
+ # Nokogiri compatibility
203
+ parent.respond_to?(:node_type) && parent.node_type == :element
204
+ end
205
+ end
206
+ end
207
+ end
208
+ end