canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Canon
6
+ module TreeDiff
7
+ module Adapters
8
+ # JSONAdapter converts JSON objects to TreeNode structures and back,
9
+ # enabling semantic tree diffing on JSON documents.
10
+ #
11
+ # This adapter:
12
+ # - Converts Hash/Array JSON structures to TreeNode tree
13
+ # - Handles nested objects, arrays, and primitive values
14
+ # - Preserves type information for round-trip conversion
15
+ # - Maps JSON structure to tree representation
16
+ #
17
+ # JSON to TreeNode mapping:
18
+ # - Objects (Hash): TreeNode with label "object", children for each key
19
+ # - Arrays: TreeNode with label "array", indexed children
20
+ # - Primitives: TreeNode with label "value", value stored directly
21
+ #
22
+ # @example Convert JSON to TreeNode
23
+ # json = { "name" => "John", "age" => 30 }
24
+ # adapter = JSONAdapter.new
25
+ # tree = adapter.to_tree(json)
26
+ #
27
+ class JSONAdapter
28
+ attr_reader :match_options
29
+
30
+ # Initialize adapter with match options
31
+ #
32
+ # @param match_options [Hash] Match options (for future use)
33
+ def initialize(match_options: {})
34
+ @match_options = match_options
35
+ end
36
+
37
+ # Convert JSON structure to TreeNode
38
+ #
39
+ # @param data [Hash, Array, String, Numeric, Boolean, nil] JSON data
40
+ # @param key [String, nil] Key name if this is a hash value
41
+ # @return [Core::TreeNode] Root tree node
42
+ def to_tree(data, key = nil)
43
+ case data
44
+ when Hash
45
+ convert_object(data, key)
46
+ when Array
47
+ convert_array(data, key)
48
+ else
49
+ convert_value(data, key)
50
+ end
51
+ end
52
+
53
+ # Convert TreeNode back to JSON structure
54
+ #
55
+ # @param tree_node [Core::TreeNode] Root tree node
56
+ # @return [Hash, Array, Object] JSON structure
57
+ def from_tree(tree_node)
58
+ case tree_node.label
59
+ when "object"
60
+ build_object(tree_node)
61
+ when "array"
62
+ build_array(tree_node)
63
+ when "value"
64
+ parse_value(tree_node)
65
+ else
66
+ # Fallback for custom labels
67
+ tree_node.value
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ # Convert JSON object (Hash) to TreeNode
74
+ #
75
+ # @param hash [Hash] JSON object
76
+ # @param key [String, nil] Key name if this is nested
77
+ # @return [Core::TreeNode] Tree node
78
+ def convert_object(hash, key = nil)
79
+ attributes = key ? { "key" => key } : {}
80
+
81
+ tree_node = Core::TreeNode.new(
82
+ label: "object",
83
+ value: nil,
84
+ attributes: attributes,
85
+ )
86
+
87
+ hash.each do |k, v|
88
+ child = to_tree(v, k.to_s)
89
+ tree_node.add_child(child)
90
+ end
91
+
92
+ tree_node
93
+ end
94
+
95
+ # Convert JSON array to TreeNode
96
+ #
97
+ # @param array [Array] JSON array
98
+ # @param key [String, nil] Key name if this is nested
99
+ # @return [Core::TreeNode] Tree node
100
+ def convert_array(array, key = nil)
101
+ attributes = key ? { "key" => key } : {}
102
+
103
+ tree_node = Core::TreeNode.new(
104
+ label: "array",
105
+ value: nil,
106
+ attributes: attributes,
107
+ )
108
+
109
+ array.each_with_index do |item, index|
110
+ child = to_tree(item, index.to_s)
111
+ tree_node.add_child(child)
112
+ end
113
+
114
+ tree_node
115
+ end
116
+
117
+ # Convert primitive value to TreeNode
118
+ #
119
+ # @param value [String, Numeric, Boolean, nil] Primitive value
120
+ # @param key [String, nil] Key name
121
+ # @return [Core::TreeNode] Tree node
122
+ def convert_value(value, key = nil)
123
+ attributes = {}
124
+ attributes["key"] = key if key
125
+ attributes["type"] = value_type(value)
126
+
127
+ Core::TreeNode.new(
128
+ label: "value",
129
+ value: value.to_s,
130
+ attributes: attributes,
131
+ )
132
+ end
133
+
134
+ # Determine value type
135
+ #
136
+ # @param value [Object] Value
137
+ # @return [String] Type name
138
+ def value_type(value)
139
+ case value
140
+ when String then "string"
141
+ when Integer then "integer"
142
+ when Float then "float"
143
+ when TrueClass, FalseClass then "boolean"
144
+ when NilClass then "null"
145
+ else "unknown"
146
+ end
147
+ end
148
+
149
+ # Build Hash from object TreeNode
150
+ #
151
+ # @param tree_node [Core::TreeNode] Object tree node
152
+ # @return [Hash] Reconstructed hash
153
+ def build_object(tree_node)
154
+ hash = {}
155
+
156
+ tree_node.children.each do |child|
157
+ key = child.attributes["key"]
158
+ hash[key] = from_tree(child) if key
159
+ end
160
+
161
+ hash
162
+ end
163
+
164
+ # Build Array from array TreeNode
165
+ #
166
+ # @param tree_node [Core::TreeNode] Array tree node
167
+ # @return [Array] Reconstructed array
168
+ def build_array(tree_node)
169
+ array = []
170
+
171
+ tree_node.children.each do |child|
172
+ array << from_tree(child)
173
+ end
174
+
175
+ array
176
+ end
177
+
178
+ # Parse value from value TreeNode
179
+ #
180
+ # @param tree_node [Core::TreeNode] Value tree node
181
+ # @return [Object] Parsed value
182
+ def parse_value(tree_node)
183
+ type = tree_node.attributes["type"]
184
+ value_str = tree_node.value
185
+
186
+ case type
187
+ when "string"
188
+ value_str
189
+ when "integer"
190
+ value_str.to_i
191
+ when "float"
192
+ value_str.to_f
193
+ when "boolean"
194
+ value_str == "true"
195
+ when "null"
196
+ nil
197
+ else
198
+ value_str
199
+ end
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end
@@ -0,0 +1,285 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module Canon
6
+ module TreeDiff
7
+ module Adapters
8
+ # XMLAdapter converts Nokogiri XML documents to TreeNode structures
9
+ # and back, enabling semantic tree diffing on XML documents.
10
+ #
11
+ # This adapter:
12
+ # - Converts Nokogiri::XML::Document to TreeNode tree
13
+ # - Preserves element names, text content, and attributes
14
+ # - Handles namespaces appropriately
15
+ # - Maintains document structure for round-trip conversion
16
+ #
17
+ # @example Convert XML to TreeNode
18
+ # xml = Nokogiri::XML("<root><child>text</child></root>")
19
+ # adapter = XMLAdapter.new
20
+ # tree = adapter.to_tree(xml)
21
+ #
22
+ class XMLAdapter
23
+ attr_reader :match_options
24
+
25
+ # Initialize adapter with match options
26
+ #
27
+ # @param match_options [Hash] Match options for text/attribute normalization
28
+ def initialize(match_options: {})
29
+ @match_options = match_options
30
+ end
31
+
32
+ # Convert Nokogiri XML document/element or Canon::Xml::Node to TreeNode
33
+ #
34
+ # @param node [Nokogiri::XML::Document, Nokogiri::XML::Element, Canon::Xml::Node] XML node
35
+ # @return [Core::TreeNode] Root tree node
36
+ def to_tree(node)
37
+ # Handle nil nodes
38
+ return nil if node.nil?
39
+
40
+ # Handle Canon::Xml::Node types first
41
+ case node
42
+ when Canon::Xml::Nodes::RootNode
43
+ return to_tree_from_canon_root(node)
44
+ when Canon::Xml::Nodes::ElementNode
45
+ return to_tree_from_canon_element(node)
46
+ when Canon::Xml::Nodes::TextNode
47
+ return to_tree_from_canon_text(node)
48
+ when Canon::Xml::Nodes::CommentNode
49
+ return to_tree_from_canon_comment(node)
50
+ end
51
+
52
+ # Fallback to Nokogiri (legacy support)
53
+ case node
54
+ when Nokogiri::XML::Document
55
+ # Start from root element
56
+ root = node.root
57
+ raise ArgumentError, "Document has no root element" if root.nil?
58
+
59
+ to_tree(root)
60
+ when Nokogiri::XML::Element
61
+ convert_element(node)
62
+ else
63
+ raise ArgumentError, "Unsupported node type: #{node.class}"
64
+ end
65
+ end
66
+
67
+ # Convert TreeNode back to Nokogiri XML
68
+ #
69
+ # @param tree_node [Core::TreeNode] Root tree node
70
+ # @param doc [Nokogiri::XML::Document] Optional document to use
71
+ # @return [Nokogiri::XML::Document, Nokogiri::XML::Element]
72
+ def from_tree(tree_node, doc = nil)
73
+ doc ||= Nokogiri::XML::Document.new
74
+
75
+ element = build_element(tree_node, doc)
76
+
77
+ if doc.root.nil?
78
+ doc.root = element
79
+ doc
80
+ else
81
+ element
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ # Convert a Nokogiri element to TreeNode
88
+ #
89
+ # @param element [Nokogiri::XML::Element] XML element
90
+ # @return [Core::TreeNode] Tree node
91
+ def convert_element(element)
92
+ # Get element name (with namespace prefix if present)
93
+ element.name
94
+
95
+ # Create label that includes namespace URI to ensure elements
96
+ # with different namespaces are treated as different nodes
97
+ # Format: {namespace_uri}name or just name if no namespace
98
+ namespace_uri = element.namespace&.href
99
+ label = if namespace_uri && !namespace_uri.empty?
100
+ "{#{namespace_uri}}#{element.name}"
101
+ else
102
+ element.name
103
+ end
104
+
105
+ # Collect attributes and sort them alphabetically
106
+ # This ensures attribute order doesn't affect hash matching
107
+ # (matches behavior of attribute_order: :ignore in match options)
108
+ attributes = {}
109
+ element.attributes.each do |name, attr|
110
+ attributes[name] = attr.value
111
+ end
112
+ # Sort attributes by key to normalize order
113
+ attributes = attributes.sort.to_h
114
+
115
+ # Get text content (only direct text, not from children)
116
+ text_value = extract_text_value(element)
117
+
118
+ # Create tree node with source node reference
119
+ tree_node = Core::TreeNode.new(
120
+ label: label,
121
+ value: text_value,
122
+ attributes: attributes,
123
+ source_node: element, # Preserve reference to original Nokogiri node
124
+ )
125
+
126
+ # Process child elements
127
+ element.element_children.each do |child|
128
+ child_node = convert_element(child)
129
+ tree_node.add_child(child_node)
130
+ end
131
+
132
+ tree_node
133
+ end
134
+
135
+ # Extract direct text content from element
136
+ #
137
+ # Preserves original text for proper normalization during comparison.
138
+ # Normalization happens in OperationDetector based on match_options,
139
+ # NOT during tree conversion.
140
+ #
141
+ # For mixed content (text nodes + child elements), joins text nodes
142
+ # with a space to prevent text from running together when elements
143
+ # like <br/> separate the text.
144
+ #
145
+ # @param element [Nokogiri::XML::Element] XML element
146
+ # @return [String, nil] Text content or nil
147
+ def extract_text_value(element)
148
+ # Get only direct text nodes, not from nested elements
149
+ text_nodes = element.children.select(&:text?)
150
+
151
+ # For mixed content (has both text nodes and element children),
152
+ # join text nodes with space to handle implicit whitespace around
153
+ # block-level elements like <br/>
154
+ # Example: "Text<br/>More" should become "Text More" not "TextMore"
155
+ separator = element.element_children.any? ? " " : ""
156
+ text = text_nodes.map(&:text).join(separator)
157
+
158
+ # CRITICAL FIX: Return original text without stripping
159
+ # Normalization will be applied during comparison based on match_options
160
+ # Only return nil for truly empty text or whitespace-only text
161
+ text.strip.empty? ? nil : text
162
+ end
163
+
164
+ # Build Nokogiri element from TreeNode
165
+ #
166
+ # @param tree_node [Core::TreeNode] Tree node
167
+ # @param doc [Nokogiri::XML::Document] Document
168
+ # @return [Nokogiri::XML::Element] XML element
169
+ def build_element(tree_node, doc)
170
+ element = Nokogiri::XML::Element.new(tree_node.label, doc)
171
+
172
+ # Add attributes
173
+ tree_node.attributes.each do |name, value|
174
+ element[name] = value
175
+ end
176
+
177
+ # Add text content if present
178
+ if tree_node.value && !tree_node.value.empty?
179
+ element.content = tree_node.value
180
+ end
181
+
182
+ # Add child elements
183
+ tree_node.children.each do |child|
184
+ child_element = build_element(child, doc)
185
+ element.add_child(child_element)
186
+ end
187
+
188
+ element
189
+ end
190
+
191
+ # Convert Canon::Xml::Nodes::RootNode to TreeNode
192
+ #
193
+ # @param root_node [Canon::Xml::Nodes::RootNode] Root node
194
+ # @return [Core::TreeNode, nil] Tree node for first child (document element)
195
+ def to_tree_from_canon_root(root_node)
196
+ # Root node: process first child (document element)
197
+ return nil if root_node.children.empty?
198
+
199
+ to_tree(root_node.children.first)
200
+ end
201
+
202
+ # Convert Canon::Xml::Nodes::ElementNode to TreeNode
203
+ #
204
+ # @param element_node [Canon::Xml::Nodes::ElementNode] Element node
205
+ # @return [Core::TreeNode] Tree node
206
+ def to_tree_from_canon_element(element_node)
207
+ # Create label that includes namespace URI to ensure elements
208
+ # with different namespaces are treated as different nodes
209
+ # Format: {namespace_uri}name or just name if no namespace
210
+ namespace_uri = element_node.namespace_uri
211
+ label = if namespace_uri && !namespace_uri.empty?
212
+ "{#{namespace_uri}}#{element_node.name}"
213
+ else
214
+ element_node.name
215
+ end
216
+
217
+ # Create TreeNode from Canon::Xml::Nodes::ElementNode
218
+ tree_node = Core::TreeNode.new(
219
+ label: label,
220
+ value: nil, # Elements don't have values
221
+ attributes: extract_canon_attributes(element_node),
222
+ children: [],
223
+ source_node: element_node, # Preserve reference to Canon node
224
+ )
225
+
226
+ # Process children recursively
227
+ element_node.children.each do |child|
228
+ child_tree = to_tree(child)
229
+ tree_node.add_child(child_tree) if child_tree
230
+ end
231
+
232
+ tree_node
233
+ end
234
+
235
+ # Convert Canon::Xml::Nodes::TextNode to TreeNode
236
+ #
237
+ # @param text_node [Canon::Xml::Nodes::TextNode] Text node
238
+ # @return [Core::TreeNode, nil] Tree node or nil for whitespace-only text
239
+ def to_tree_from_canon_text(text_node)
240
+ # Extract text value
241
+ text_value = text_node.value.to_s
242
+
243
+ # Return nil for whitespace-only text
244
+ return nil if text_value.strip.empty?
245
+
246
+ Core::TreeNode.new(
247
+ label: "text",
248
+ value: text_value,
249
+ attributes: {},
250
+ children: [],
251
+ source_node: text_node,
252
+ )
253
+ end
254
+
255
+ # Convert Canon::Xml::Nodes::CommentNode to TreeNode
256
+ #
257
+ # @param comment_node [Canon::Xml::Nodes::CommentNode] Comment node
258
+ # @return [Core::TreeNode] Tree node
259
+ def to_tree_from_canon_comment(comment_node)
260
+ Core::TreeNode.new(
261
+ label: "comment",
262
+ value: comment_node.value,
263
+ attributes: {},
264
+ children: [],
265
+ source_node: comment_node,
266
+ )
267
+ end
268
+
269
+ # Extract attributes from Canon::Xml::Nodes::ElementNode
270
+ #
271
+ # @param element_node [Canon::Xml::Nodes::ElementNode] Element node
272
+ # @return [Hash] Attributes hash sorted by key
273
+ def extract_canon_attributes(element_node)
274
+ # Canon::Xml::Nodes::ElementNode has attribute_nodes array
275
+ attrs = {}
276
+ element_node.attribute_nodes.each do |attr|
277
+ attrs[attr.name] = attr.value
278
+ end
279
+ # Sort attributes by key to normalize order
280
+ attrs.sort.to_h
281
+ end
282
+ end
283
+ end
284
+ end
285
+ end
@@ -0,0 +1,213 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+
5
+ module Canon
6
+ module TreeDiff
7
+ module Adapters
8
+ # YAMLAdapter converts YAML structures to TreeNode structures and back,
9
+ # enabling semantic tree diffing on YAML documents.
10
+ #
11
+ # This adapter:
12
+ # - Converts Hash/Array YAML structures to TreeNode tree
13
+ # - Handles nested objects, arrays, and primitive values
14
+ # - Preserves type information for round-trip conversion
15
+ # - Maps YAML structure to tree representation
16
+ #
17
+ # YAML to TreeNode mapping (similar to JSON):
18
+ # - Objects (Hash): TreeNode with label "object", children for each key
19
+ # - Arrays: TreeNode with label "array", indexed children
20
+ # - Primitives: TreeNode with label "value", value stored directly
21
+ #
22
+ # @example Convert YAML to TreeNode
23
+ # yaml = { "name" => "John", "age" => 30 }
24
+ # adapter = YAMLAdapter.new
25
+ # tree = adapter.to_tree(yaml)
26
+ #
27
+ class YAMLAdapter
28
+ attr_reader :match_options
29
+
30
+ # Initialize adapter with match options
31
+ #
32
+ # @param match_options [Hash] Match options (for future use)
33
+ def initialize(match_options: {})
34
+ @match_options = match_options
35
+ end
36
+
37
+ # Convert YAML structure to TreeNode
38
+ #
39
+ # @param data [Hash, Array, String, Numeric, Boolean, nil] YAML data
40
+ # @param key [String, nil] Key name if this is a hash value
41
+ # @return [Core::TreeNode] Root tree node
42
+ def to_tree(data, key = nil)
43
+ case data
44
+ when Hash
45
+ convert_object(data, key)
46
+ when Array
47
+ convert_array(data, key)
48
+ else
49
+ convert_value(data, key)
50
+ end
51
+ end
52
+
53
+ # Convert TreeNode back to YAML structure
54
+ #
55
+ # @param tree_node [Core::TreeNode] Root tree node
56
+ # @return [Hash, Array, Object] YAML structure
57
+ def from_tree(tree_node)
58
+ case tree_node.label
59
+ when "object"
60
+ build_object(tree_node)
61
+ when "array"
62
+ build_array(tree_node)
63
+ when "value"
64
+ parse_value(tree_node)
65
+ else
66
+ # Fallback for custom labels
67
+ tree_node.value
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ # Convert YAML object (Hash) to TreeNode
74
+ #
75
+ # @param hash [Hash] YAML object
76
+ # @param key [String, nil] Key name if this is nested
77
+ # @return [Core::TreeNode] Tree node
78
+ def convert_object(hash, key = nil)
79
+ attributes = key ? { "key" => key } : {}
80
+
81
+ tree_node = Core::TreeNode.new(
82
+ label: "object",
83
+ value: nil,
84
+ attributes: attributes,
85
+ )
86
+
87
+ hash.each do |k, v|
88
+ child = to_tree(v, k.to_s)
89
+ tree_node.add_child(child)
90
+ end
91
+
92
+ tree_node
93
+ end
94
+
95
+ # Convert YAML array to TreeNode
96
+ #
97
+ # @param array [Array] YAML array
98
+ # @param key [String, nil] Key name if this is nested
99
+ # @return [Core::TreeNode] Tree node
100
+ def convert_array(array, key = nil)
101
+ attributes = key ? { "key" => key } : {}
102
+
103
+ tree_node = Core::TreeNode.new(
104
+ label: "array",
105
+ value: nil,
106
+ attributes: attributes,
107
+ )
108
+
109
+ array.each_with_index do |item, index|
110
+ child = to_tree(item, index.to_s)
111
+ tree_node.add_child(child)
112
+ end
113
+
114
+ tree_node
115
+ end
116
+
117
+ # Convert primitive value to TreeNode
118
+ #
119
+ # @param value [String, Numeric, Boolean, nil] Primitive value
120
+ # @param key [String, nil] Key name
121
+ # @return [Core::TreeNode] Tree node
122
+ def convert_value(value, key = nil)
123
+ attributes = {}
124
+ attributes["key"] = key if key
125
+ attributes["type"] = value_type(value)
126
+
127
+ Core::TreeNode.new(
128
+ label: "value",
129
+ value: value.to_s,
130
+ attributes: attributes,
131
+ )
132
+ end
133
+
134
+ # Determine value type
135
+ #
136
+ # @param value [Object] Value
137
+ # @return [String] Type name
138
+ def value_type(value)
139
+ case value
140
+ when String then "string"
141
+ when Integer then "integer"
142
+ when Float then "float"
143
+ when TrueClass, FalseClass then "boolean"
144
+ when NilClass then "null"
145
+ when Symbol then "symbol"
146
+ when Date then "date"
147
+ when Time, DateTime then "time"
148
+ else "unknown"
149
+ end
150
+ end
151
+
152
+ # Build Hash from object TreeNode
153
+ #
154
+ # @param tree_node [Core::TreeNode] Object tree node
155
+ # @return [Hash] Reconstructed hash
156
+ def build_object(tree_node)
157
+ hash = {}
158
+
159
+ tree_node.children.each do |child|
160
+ key = child.attributes["key"]
161
+ hash[key] = from_tree(child) if key
162
+ end
163
+
164
+ hash
165
+ end
166
+
167
+ # Build Array from array TreeNode
168
+ #
169
+ # @param tree_node [Core::TreeNode] Array tree node
170
+ # @return [Array] Reconstructed array
171
+ def build_array(tree_node)
172
+ array = []
173
+
174
+ tree_node.children.each do |child|
175
+ array << from_tree(child)
176
+ end
177
+
178
+ array
179
+ end
180
+
181
+ # Parse value from value TreeNode
182
+ #
183
+ # @param tree_node [Core::TreeNode] Value tree node
184
+ # @return [Object] Parsed value
185
+ def parse_value(tree_node)
186
+ type = tree_node.attributes["type"]
187
+ value_str = tree_node.value
188
+
189
+ case type
190
+ when "string"
191
+ value_str
192
+ when "integer"
193
+ value_str.to_i
194
+ when "float"
195
+ value_str.to_f
196
+ when "boolean"
197
+ value_str == "true"
198
+ when "null"
199
+ nil
200
+ when "symbol"
201
+ value_str.to_sym
202
+ when "date"
203
+ Date.parse(value_str)
204
+ when "time"
205
+ Time.parse(value_str)
206
+ else
207
+ value_str
208
+ end
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end