canon 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +69 -92
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/Gemfile +1 -0
  6. data/docs/_config.yml +90 -1
  7. data/docs/advanced/diff-classification.adoc +82 -2
  8. data/docs/advanced/extending-canon.adoc +193 -0
  9. data/docs/features/match-options/index.adoc +239 -1
  10. data/docs/internals/diffnode-enrichment.adoc +611 -0
  11. data/docs/internals/index.adoc +251 -0
  12. data/docs/lychee.toml +13 -6
  13. data/docs/understanding/architecture.adoc +749 -33
  14. data/docs/understanding/comparison-pipeline.adoc +122 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +87 -0
  27. data/lib/canon/comparison/html_comparator.rb +70 -26
  28. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  29. data/lib/canon/comparison/html_parser.rb +80 -0
  30. data/lib/canon/comparison/json_comparator.rb +12 -0
  31. data/lib/canon/comparison/json_parser.rb +19 -0
  32. data/lib/canon/comparison/markup_comparator.rb +293 -0
  33. data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
  34. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  35. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  36. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  37. data/lib/canon/comparison/match_options.rb +68 -463
  38. data/lib/canon/comparison/profile_definition.rb +149 -0
  39. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  40. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  41. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  42. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  43. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  44. data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
  45. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  46. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  47. data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
  48. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
  49. data/lib/canon/comparison/xml_comparator.rb +97 -684
  50. data/lib/canon/comparison/xml_node_comparison.rb +319 -0
  51. data/lib/canon/comparison/xml_parser.rb +19 -0
  52. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  53. data/lib/canon/comparison.rb +265 -110
  54. data/lib/canon/diff/diff_classifier.rb +101 -2
  55. data/lib/canon/diff/diff_node.rb +32 -2
  56. data/lib/canon/diff/formatting_detector.rb +1 -1
  57. data/lib/canon/diff/node_serializer.rb +191 -0
  58. data/lib/canon/diff/path_builder.rb +143 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  61. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  62. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  64. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  65. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  66. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  67. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  68. data/lib/canon/diff_formatter.rb +1 -1
  69. data/lib/canon/rspec_matchers.rb +38 -9
  70. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  71. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  72. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  73. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  74. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  75. data/lib/canon/version.rb +1 -1
  76. data/lib/canon/xml/data_model.rb +24 -13
  77. metadata +48 -2
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../diff/path_builder"
4
+ require_relative "../../diff/node_serializer"
5
+
6
+ module Canon
7
+ module TreeDiff
8
+ module OperationConverterHelpers
9
+ # Metadata enrichment for DiffNodes
10
+ # Handles path building, serialization, and attribute extraction
11
+ module MetadataEnricher
12
+ # Enrich DiffNode with canonical path, serialized content, and attributes
13
+ # This extracts presentation-ready metadata from TreeNodes for Stage 4 rendering
14
+ #
15
+ # @param tree_node1 [Canon::TreeDiff::Core::TreeNode, nil] First tree node
16
+ # @param tree_node2 [Canon::TreeDiff::Core::TreeNode, nil] Second tree node
17
+ # @param format [Symbol] Document format
18
+ # @return [Hash] Enriched metadata hash
19
+ def self.enrich(tree_node1, tree_node2, format)
20
+ {
21
+ path: build_path(tree_node1 || tree_node2, format),
22
+ serialized_before: serialize(tree_node1),
23
+ serialized_after: serialize(tree_node2),
24
+ attributes_before: extract_attributes(tree_node1),
25
+ attributes_after: extract_attributes(tree_node2),
26
+ }
27
+ end
28
+
29
+ # Build canonical path for a TreeNode
30
+ #
31
+ # @param tree_node [Canon::TreeDiff::Core::TreeNode] Tree node
32
+ # @param format [Symbol] Document format
33
+ # @return [String, nil] Canonical path with ordinal indices
34
+ def self.build_path(tree_node, format)
35
+ return nil if tree_node.nil?
36
+
37
+ Canon::Diff::PathBuilder.build(tree_node,
38
+ format: format == :xml ? :document : :fragment)
39
+ end
40
+
41
+ # Serialize a TreeNode's source node to string
42
+ #
43
+ # @param tree_node [Canon::TreeDiff::Core::TreeNode, nil] Tree node
44
+ # @return [String, nil] Serialized content
45
+ def self.serialize(tree_node)
46
+ return nil if tree_node.nil?
47
+
48
+ # Extract source node from TreeNode
49
+ source = if tree_node.respond_to?(:source_node)
50
+ tree_node.source_node
51
+ else
52
+ tree_node
53
+ end
54
+
55
+ Canon::Diff::NodeSerializer.serialize(source)
56
+ end
57
+
58
+ # Extract attributes from a TreeNode
59
+ #
60
+ # @param tree_node [Canon::TreeDiff::Core::TreeNode, nil] Tree node
61
+ # @return [Hash, nil] Attributes hash
62
+ def self.extract_attributes(tree_node)
63
+ return nil if tree_node.nil?
64
+
65
+ # Use TreeNode's attributes directly (already normalized by adapter)
66
+ tree_node.respond_to?(:attributes) ? (tree_node.attributes || {}) : {}
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module TreeDiff
5
+ module OperationConverterHelpers
6
+ # Post-processing of DiffNodes
7
+ # Handles detection of attribute-order-only differences and other optimizations
8
+ module PostProcessor
9
+ # Detect INSERT/DELETE pairs that differ only in attribute order
10
+ # and reclassify them to use the attribute_order dimension
11
+ #
12
+ # @param diff_nodes [Array<DiffNode>] Diff nodes to process
13
+ # @param normative_determiner [#call] Proc/object to determine normative status
14
+ # @return [Array<DiffNode>] Processed diff nodes
15
+ def self.detect_attribute_order_diffs(diff_nodes, normative_determiner)
16
+ # Group nodes by parent and element type
17
+ deletes = diff_nodes.select { |dn| dn.node1 && !dn.node2 }
18
+ inserts = diff_nodes.select { |dn| !dn.node1 && dn.node2 }
19
+
20
+ # For each DELETE, try to find a matching INSERT
21
+ deletes.each do |delete_node|
22
+ node1 = delete_node.node1
23
+ next unless node1.respond_to?(:name) && node1.respond_to?(:attributes)
24
+
25
+ # Skip if node has no attributes (can't be attribute order diff)
26
+ next if node1.attributes.nil? || node1.attributes.empty?
27
+
28
+ # Find inserts with same element name at same position
29
+ matching_insert = inserts.find do |insert_node|
30
+ node2 = insert_node.node2
31
+ next false unless node2.respond_to?(:name) && node2.respond_to?(:attributes)
32
+ next false unless node1.name == node2.name
33
+
34
+ # Must have attributes to differ in order
35
+ next false if node2.attributes.nil? || node2.attributes.empty?
36
+
37
+ # Check if they differ only in attribute order
38
+ next false unless attributes_equal_ignoring_order?(
39
+ node1.attributes, node2.attributes
40
+ )
41
+
42
+ # Ensure same content (text and children structure)
43
+ nodes_same_except_attr_order?(node1, node2)
44
+ end
45
+
46
+ next unless matching_insert
47
+
48
+ # Found an attribute-order-only difference
49
+ # Reclassify both nodes to use attribute_order dimension
50
+ delete_node.dimension = :attribute_order
51
+ delete_node.reason = "attribute order changed"
52
+ delete_node.normative = normative_determiner.call(:attribute_order)
53
+
54
+ matching_insert.dimension = :attribute_order
55
+ matching_insert.reason = "attribute order changed"
56
+ matching_insert.normative = normative_determiner.call(:attribute_order)
57
+ end
58
+
59
+ diff_nodes
60
+ end
61
+
62
+ # Check if two attribute hashes are equal ignoring order
63
+ #
64
+ # @param attrs1 [Hash] First attribute hash
65
+ # @param attrs2 [Hash] Second attribute hash
66
+ # @return [Boolean] True if attributes are equal (ignoring order)
67
+ def self.attributes_equal_ignoring_order?(attrs1, attrs2)
68
+ return true if attrs1.nil? && attrs2.nil?
69
+ return false if attrs1.nil? || attrs2.nil?
70
+
71
+ # Convert to hashes if needed
72
+ attrs1 = attrs1.to_h if attrs1.respond_to?(:to_h)
73
+ attrs2 = attrs2.to_h if attrs2.respond_to?(:to_h)
74
+
75
+ # Compare as sets (order-independent)
76
+ attrs1.sort.to_h == attrs2.sort.to_h
77
+ end
78
+
79
+ # Check if two nodes are the same except for attribute order
80
+ #
81
+ # @param node1 [Nokogiri::XML::Node] First node
82
+ # @param node2 [Nokogiri::XML::Node] Second node
83
+ # @return [Boolean] True if nodes are same except attribute order
84
+ def self.nodes_same_except_attr_order?(node1, node2)
85
+ # Same text content
86
+ return false if node1.text != node2.text
87
+
88
+ # Same number of children
89
+ return false if node1.children.length != node2.children.length
90
+
91
+ # If has children, they should have same structure
92
+ if node1.children.any?
93
+ node1.children.zip(node2.children).all? do |child1, child2|
94
+ child1.name == child2.name
95
+ end
96
+ else
97
+ true
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Canon
6
+ module TreeDiff
7
+ module OperationConverterHelpers
8
+ # Reason string builders for operations
9
+ # Handles creation of human-readable reason messages for DiffNodes
10
+ module ReasonBuilder
11
+ # Build reason string for INSERT operation
12
+ #
13
+ # @param operation [Operation] Operation
14
+ # @return [String] Reason description
15
+ def self.build_insert_reason(operation)
16
+ node = operation[:node]
17
+ content = operation[:content]
18
+
19
+ if node.respond_to?(:label)
20
+ # Include content preview for clarity
21
+ "Element inserted: #{content || "<#{node.label}>"}"
22
+ else
23
+ "Element inserted"
24
+ end
25
+ end
26
+
27
+ # Build reason string for DELETE operation
28
+ #
29
+ # @param operation [Operation] Operation
30
+ # @return [String] Reason description
31
+ def self.build_delete_reason(operation)
32
+ node = operation[:node]
33
+ content = operation[:content]
34
+
35
+ if node.respond_to?(:label)
36
+ # Include content preview for clarity
37
+ "Element deleted: #{content || "<#{node.label}>"}"
38
+ else
39
+ "Element deleted"
40
+ end
41
+ end
42
+
43
+ # Build reason string for UPDATE operation
44
+ #
45
+ # @param operation [Operation] Operation
46
+ # @return [String] Reason description
47
+ def self.build_update_reason(operation)
48
+ change_type = operation[:change_type] || "content"
49
+ "updated #{change_type}"
50
+ end
51
+
52
+ # Build reason string for MOVE operation
53
+ #
54
+ # @param operation [Operation] Operation
55
+ # @return [String] Reason description
56
+ def self.build_move_reason(operation)
57
+ from_pos = operation[:from_position]
58
+ to_pos = operation[:to_position]
59
+
60
+ if from_pos && to_pos
61
+ "moved from position #{from_pos} to #{to_pos}"
62
+ else
63
+ "moved to different position"
64
+ end
65
+ end
66
+
67
+ # Build detailed reason for attribute differences
68
+ #
69
+ # @param old_attrs [Hash] Old attributes
70
+ # @param new_attrs [Hash] New attributes
71
+ # @return [String] Detailed reason
72
+ def self.build_attribute_diff_details(old_attrs, new_attrs)
73
+ old_keys = Set.new(old_attrs.keys)
74
+ new_keys = Set.new(new_attrs.keys)
75
+
76
+ missing = old_keys - new_keys
77
+ extra = new_keys - old_keys
78
+ changed = (old_keys & new_keys).reject do |k|
79
+ old_attrs[k] == new_attrs[k]
80
+ end
81
+
82
+ parts = []
83
+ parts << "Missing: #{missing.to_a.join(', ')}" if missing.any?
84
+ parts << "Extra: #{extra.to_a.join(', ')}" if extra.any?
85
+ if changed.any?
86
+ parts << "Changed: #{changed.map do |k|
87
+ "#{k}=\"#{truncate(old_attrs[k],
88
+ 20)}\" → \"#{truncate(new_attrs[k], 20)}\""
89
+ end.join(', ')}"
90
+ end
91
+
92
+ parts.any? ? "Attributes differ (#{parts.join('; ')})" : "Attribute values differ"
93
+ end
94
+
95
+ # Build reason for attribute value changes
96
+ #
97
+ # @param changes [Hash] Changes hash
98
+ # @return [String] Reason description
99
+ def self.build_attribute_value_reason(changes)
100
+ # Changes can be either true (flag) or { old: ..., new: ... } (detailed)
101
+ if changes.is_a?(Hash) && changes.key?(:old)
102
+ build_attribute_diff_details(changes[:old], changes[:new])
103
+ else
104
+ "attribute values differ"
105
+ end
106
+ end
107
+
108
+ # Build reason for attribute order changes
109
+ #
110
+ # @param changes [Hash] Changes hash
111
+ # @return [String] Reason description
112
+ def self.build_attribute_order_reason(changes)
113
+ if changes.is_a?(Hash) && changes.key?(:old)
114
+ old_order = changes[:old]
115
+ new_order = changes[:new]
116
+ "Attribute order changed: [#{old_order.join(', ')}] → [#{new_order.join(', ')}]"
117
+ else
118
+ "attribute order differs"
119
+ end
120
+ end
121
+
122
+ # Build reason for text content changes
123
+ #
124
+ # @param changes [Hash] Changes hash
125
+ # @return [String] Reason description
126
+ def self.build_text_content_reason(changes)
127
+ if changes.is_a?(Hash) && changes.key?(:old)
128
+ old_val = changes[:old] || ""
129
+ new_val = changes[:new] || ""
130
+ preview_old = truncate(old_val.to_s, 40)
131
+ preview_new = truncate(new_val.to_s, 40)
132
+ "Text content changed: \"#{preview_old}\" → \"#{preview_new}\""
133
+ else
134
+ "text content differs"
135
+ end
136
+ end
137
+
138
+ # Build reason for element name changes
139
+ #
140
+ # @param changes [Hash] Changes hash
141
+ # @return [String] Reason description
142
+ def self.build_element_name_reason(changes)
143
+ if changes.is_a?(Hash) && changes.key?(:old)
144
+ old_label = changes[:old]
145
+ new_label = changes[:new]
146
+ "Element name changed: <#{old_label}> → <#{new_label}>"
147
+ else
148
+ "element name differs"
149
+ end
150
+ end
151
+
152
+ # Truncate text for reason messages
153
+ #
154
+ # @param text [String] Text to truncate
155
+ # @param max_length [Integer] Maximum length
156
+ # @return [String] Truncated text
157
+ def self.truncate(text, max_length)
158
+ return "" if text.nil?
159
+
160
+ text = text.to_s
161
+ return text if text.length <= max_length
162
+
163
+ "#{text[0...max_length - 3]}..."
164
+ end
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../operation_converter_helpers/reason_builder"
4
+
5
+ module Canon
6
+ module TreeDiff
7
+ module OperationConverterHelpers
8
+ # Handles UPDATE operation conversion
9
+ # Processes different change types (attributes, attribute_order, value, label)
10
+ module UpdateChangeHandler
11
+ # Convert UPDATE operation to DiffNode(s)
12
+ #
13
+ # May return multiple DiffNodes if multiple dimensions changed
14
+ #
15
+ # @param operation [Operation] Update operation
16
+ # @param metadata [Hash] Enriched metadata from MetadataEnricher
17
+ # @param is_metadata [Boolean] Whether nodes are metadata elements
18
+ # @param normative_determiner [#call] Proc/object to determine normative status
19
+ # @return [Array<DiffNode>] Diff nodes representing updates
20
+ def self.convert(operation, metadata, is_metadata, normative_determiner)
21
+ tree_node1 = operation[:node1] # TreeNode from adapter
22
+ tree_node2 = operation[:node2] # TreeNode from adapter
23
+ node1 = tree_node1.respond_to?(:source_node) ? tree_node1.source_node : tree_node1
24
+ node2 = tree_node2.respond_to?(:source_node) ? tree_node2.source_node : tree_node2
25
+ changes = operation[:changes]
26
+
27
+ # Handle case where changes is a boolean or non-hash value
28
+ changes = {} unless changes.is_a?(Hash)
29
+
30
+ diff_nodes = []
31
+
32
+ # Create separate DiffNode for each change dimension
33
+ # This ensures each dimension can be classified independently
34
+
35
+ if changes.key?(:attributes)
36
+ diff_nodes << create_attribute_value_diff(
37
+ node1, node2, changes[:attributes], metadata, is_metadata, normative_determiner
38
+ )
39
+ end
40
+
41
+ if changes.key?(:attribute_order)
42
+ diff_nodes << create_attribute_order_diff(
43
+ node1, node2, changes[:attribute_order], metadata, is_metadata, normative_determiner
44
+ )
45
+ end
46
+
47
+ if changes.key?(:value)
48
+ diff_nodes << create_text_content_diff(
49
+ node1, node2, changes[:value], metadata, is_metadata, normative_determiner
50
+ )
51
+ end
52
+
53
+ if changes.key?(:label)
54
+ diff_nodes << create_element_name_diff(
55
+ node1, node2, changes[:label], metadata, is_metadata, normative_determiner
56
+ )
57
+ end
58
+
59
+ # If no specific changes detected, create a generic update
60
+ if diff_nodes.empty?
61
+ diff_nodes << create_generic_update_diff(
62
+ node1, node2, metadata, is_metadata, normative_determiner
63
+ )
64
+ end
65
+
66
+ diff_nodes
67
+ end
68
+
69
+ # Create DiffNode for attribute value differences
70
+ #
71
+ # @param node1 [Object] First node
72
+ # @param node2 [Object] Second node
73
+ # @param changes [Object] Attribute changes
74
+ # @param metadata [Hash] Enriched metadata
75
+ # @param is_metadata [Boolean] Whether nodes are metadata elements
76
+ # @param normative_determiner [#call] Proc to determine normative status
77
+ # @return [DiffNode] Diff node for attribute value differences
78
+ def self.create_attribute_value_diff(node1, node2, changes, metadata,
79
+ is_metadata, normative_determiner)
80
+ diff_details = ReasonBuilder.build_attribute_value_reason(changes)
81
+
82
+ diff_node = Canon::Diff::DiffNode.new(
83
+ node1: node1,
84
+ node2: node2,
85
+ dimension: :attribute_values,
86
+ reason: diff_details,
87
+ **metadata,
88
+ )
89
+ diff_node.normative = is_metadata ? false : normative_determiner.call(:attribute_values)
90
+ diff_node
91
+ end
92
+
93
+ # Create DiffNode for attribute order differences
94
+ #
95
+ # @param node1 [Object] First node
96
+ # @param node2 [Object] Second node
97
+ # @param changes [Object] Attribute order changes
98
+ # @param metadata [Hash] Enriched metadata
99
+ # @param is_metadata [Boolean] Whether nodes are metadata elements
100
+ # @param normative_determiner [#call] Proc to determine normative status
101
+ # @return [DiffNode] Diff node for attribute order differences
102
+ def self.create_attribute_order_diff(node1, node2, changes, metadata,
103
+ is_metadata, normative_determiner)
104
+ reason = ReasonBuilder.build_attribute_order_reason(changes)
105
+
106
+ diff_node = Canon::Diff::DiffNode.new(
107
+ node1: node1,
108
+ node2: node2,
109
+ dimension: :attribute_order,
110
+ reason: reason,
111
+ **metadata,
112
+ )
113
+ diff_node.normative = is_metadata ? false : normative_determiner.call(:attribute_order)
114
+ diff_node
115
+ end
116
+
117
+ # Create DiffNode for text content differences
118
+ #
119
+ # @param node1 [Object] First node
120
+ # @param node2 [Object] Second node
121
+ # @param changes [Object] Value changes
122
+ # @param metadata [Hash] Enriched metadata
123
+ # @param is_metadata [Boolean] Whether nodes are metadata elements
124
+ # @param normative_determiner [#call] Proc to determine normative status
125
+ # @return [DiffNode] Diff node for text content differences
126
+ def self.create_text_content_diff(node1, node2, changes, metadata,
127
+ is_metadata, normative_determiner)
128
+ reason = ReasonBuilder.build_text_content_reason(changes)
129
+
130
+ diff_node = Canon::Diff::DiffNode.new(
131
+ node1: node1,
132
+ node2: node2,
133
+ dimension: :text_content,
134
+ reason: reason,
135
+ **metadata,
136
+ )
137
+ diff_node.normative = is_metadata ? false : normative_determiner.call(:text_content)
138
+ diff_node
139
+ end
140
+
141
+ # Create DiffNode for element name differences
142
+ #
143
+ # @param node1 [Object] First node
144
+ # @param node2 [Object] Second node
145
+ # @param changes [Object] Label changes
146
+ # @param metadata [Hash] Enriched metadata
147
+ # @param is_metadata [Boolean] Whether nodes are metadata elements
148
+ # @param normative_determiner [#call] Proc to determine normative status
149
+ # @return [DiffNode] Diff node for element name differences
150
+ def self.create_element_name_diff(node1, node2, changes, metadata,
151
+ is_metadata, normative_determiner)
152
+ reason = ReasonBuilder.build_element_name_reason(changes)
153
+
154
+ diff_node = Canon::Diff::DiffNode.new(
155
+ node1: node1,
156
+ node2: node2,
157
+ dimension: :element_structure,
158
+ reason: reason,
159
+ **metadata,
160
+ )
161
+ diff_node.normative = is_metadata ? false : normative_determiner.call(:element_structure)
162
+ diff_node
163
+ end
164
+
165
+ # Create generic update DiffNode
166
+ #
167
+ # @param node1 [Object] First node
168
+ # @param node2 [Object] Second node
169
+ # @param metadata [Hash] Enriched metadata
170
+ # @param is_metadata [Boolean] Whether nodes are metadata elements
171
+ # @param normative_determiner [#call] Proc to determine normative status
172
+ # @return [DiffNode] Generic update diff node
173
+ def self.create_generic_update_diff(node1, node2, metadata,
174
+ is_metadata, normative_determiner)
175
+ diff_node = Canon::Diff::DiffNode.new(
176
+ node1: node1,
177
+ node2: node2,
178
+ dimension: :text_content,
179
+ reason: "content differs",
180
+ **metadata,
181
+ )
182
+ diff_node.normative = is_metadata ? false : normative_determiner.call(:text_content)
183
+ diff_node
184
+ end
185
+ end
186
+ end
187
+ end
188
+ end
data/lib/canon/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
- VERSION = "0.1.7"
4
+ VERSION = "0.1.9"
5
5
  end
@@ -18,8 +18,9 @@ module Canon
18
18
  # Build XPath data model from XML string
19
19
  #
20
20
  # @param xml_string [String] XML content to parse
21
+ # @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
21
22
  # @return [Nodes::RootNode] Root of the data model tree
22
- def self.from_xml(xml_string)
23
+ def self.from_xml(xml_string, preserve_whitespace: false)
23
24
  # Parse with Nokogiri
24
25
  doc = Nokogiri::XML(xml_string) do |config|
25
26
  config.nonet # Disable network access
@@ -30,7 +31,7 @@ module Canon
30
31
  check_for_relative_namespace_uris(doc)
31
32
 
32
33
  # Convert to XPath data model
33
- build_from_nokogiri(doc)
34
+ build_from_nokogiri(doc, preserve_whitespace: preserve_whitespace)
34
35
  end
35
36
 
36
37
  # Alias for compatibility with base class interface
@@ -74,19 +75,21 @@ module Canon
74
75
 
75
76
  # Build XPath data model from Nokogiri document or fragment
76
77
  # rubocop:disable Metrics/MethodLength
77
- def self.build_from_nokogiri(nokogiri_doc)
78
+ def self.build_from_nokogiri(nokogiri_doc, preserve_whitespace: false)
78
79
  root = Nodes::RootNode.new
79
80
 
80
81
  if nokogiri_doc.respond_to?(:root) && nokogiri_doc.root
81
82
  # For Documents (XML, HTML4, HTML5, Moxml): process the root element
82
- root.add_child(build_element_node(nokogiri_doc.root))
83
+ root.add_child(build_element_node(nokogiri_doc.root,
84
+ preserve_whitespace: preserve_whitespace))
83
85
 
84
86
  # Process PIs and comments outside doc element
85
87
  nokogiri_doc.children.each do |child|
86
88
  next if child == nokogiri_doc.root
87
89
  next if child.is_a?(Nokogiri::XML::DTD)
88
90
 
89
- node = build_node_from_nokogiri(child)
91
+ node = build_node_from_nokogiri(child,
92
+ preserve_whitespace: preserve_whitespace)
90
93
  root.add_child(node) if node
91
94
  end
92
95
  else
@@ -95,7 +98,8 @@ module Canon
95
98
  nokogiri_doc.children.each do |child|
96
99
  next if child.is_a?(Nokogiri::XML::DTD)
97
100
 
98
- node = build_node_from_nokogiri(child)
101
+ node = build_node_from_nokogiri(child,
102
+ preserve_whitespace: preserve_whitespace)
99
103
  root.add_child(node) if node
100
104
  end
101
105
  end
@@ -104,12 +108,15 @@ module Canon
104
108
  end
105
109
 
106
110
  # Build node from Nokogiri node
107
- def self.build_node_from_nokogiri(nokogiri_node)
111
+ def self.build_node_from_nokogiri(nokogiri_node,
112
+ preserve_whitespace: false)
108
113
  case nokogiri_node
109
114
  when Nokogiri::XML::Element
110
- build_element_node(nokogiri_node)
115
+ build_element_node(nokogiri_node,
116
+ preserve_whitespace: preserve_whitespace)
111
117
  when Nokogiri::XML::Text
112
- build_text_node(nokogiri_node)
118
+ build_text_node(nokogiri_node,
119
+ preserve_whitespace: preserve_whitespace)
113
120
  when Nokogiri::XML::Comment
114
121
  build_comment_node(nokogiri_node)
115
122
  when Nokogiri::XML::ProcessingInstruction
@@ -119,7 +126,7 @@ module Canon
119
126
 
120
127
  # Build element node from Nokogiri element
121
128
  # rubocop:disable Metrics/MethodLength
122
- def self.build_element_node(nokogiri_element)
129
+ def self.build_element_node(nokogiri_element, preserve_whitespace: false)
123
130
  element = Nodes::ElementNode.new(
124
131
  name: nokogiri_element.name,
125
132
  namespace_uri: nokogiri_element.namespace&.href,
@@ -134,7 +141,8 @@ module Canon
134
141
 
135
142
  # Build child nodes
136
143
  nokogiri_element.children.each do |child|
137
- node = build_node_from_nokogiri(child)
144
+ node = build_node_from_nokogiri(child,
145
+ preserve_whitespace: preserve_whitespace)
138
146
  element.add_child(node) if node
139
147
  end
140
148
 
@@ -195,13 +203,16 @@ module Canon
195
203
  end
196
204
 
197
205
  # Build text node from Nokogiri text node
198
- def self.build_text_node(nokogiri_text)
206
+ def self.build_text_node(nokogiri_text, preserve_whitespace: false)
199
207
  # XML text nodes: preserve all content including whitespace
200
208
  # Unlike HTML, XML treats all whitespace as significant
201
209
  content = nokogiri_text.content
202
210
 
203
211
  # Skip empty text nodes between elements (common formatting whitespace)
204
- return nil if content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element)
212
+ # UNLESS preserve_whitespace is true (for structural_whitespace: :strict)
213
+ if !preserve_whitespace && content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element)
214
+ return nil
215
+ end
205
216
 
206
217
  # Nokogiri already handles CDATA conversion and entity resolution
207
218
  Nodes::TextNode.new(value: content)