canon 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +163 -67
- data/README.adoc +400 -7
- data/docs/Gemfile +9 -0
- data/docs/INDEX.adoc +99 -182
- data/docs/_config.yml +100 -0
- data/docs/advanced/diff-classification.adoc +547 -0
- data/docs/advanced/diff-pipeline.adoc +358 -0
- data/docs/advanced/index.adoc +214 -0
- data/docs/advanced/semantic-diff-report.adoc +390 -0
- data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
- data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
- data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
- data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
- data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
- data/docs/features/diff-formatting/display-filtering.adoc +472 -0
- data/docs/features/diff-formatting/index.adoc +140 -0
- data/docs/features/environment-configuration/index.adoc +327 -0
- data/docs/features/environment-configuration/override-system.adoc +436 -0
- data/docs/features/environment-configuration/size-limits.adoc +273 -0
- data/docs/features/index.adoc +173 -0
- data/docs/features/input-validation/index.adoc +521 -0
- data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
- data/docs/features/match-options/html-policies.adoc +312 -0
- data/docs/features/match-options/index.adoc +621 -0
- data/docs/getting-started/index.adoc +83 -0
- data/docs/getting-started/quick-start.adoc +76 -0
- data/docs/guides/choosing-configuration.adoc +689 -0
- data/docs/guides/index.adoc +181 -0
- data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
- data/docs/interfaces/index.adoc +101 -0
- data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
- data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
- data/docs/lychee.toml +65 -0
- data/docs/reference/cli-options.adoc +418 -0
- data/docs/reference/environment-variables.adoc +375 -0
- data/docs/reference/index.adoc +204 -0
- data/docs/reference/options-across-interfaces.adoc +417 -0
- data/docs/understanding/algorithms/dom-diff.adoc +389 -0
- data/docs/understanding/algorithms/index.adoc +314 -0
- data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
- data/docs/understanding/architecture.adoc +447 -0
- data/docs/understanding/comparison-pipeline.adoc +317 -0
- data/docs/understanding/formats/html.adoc +380 -0
- data/docs/understanding/formats/index.adoc +261 -0
- data/docs/understanding/formats/json.adoc +390 -0
- data/docs/understanding/formats/xml.adoc +366 -0
- data/docs/understanding/formats/yaml.adoc +504 -0
- data/docs/understanding/index.adoc +130 -0
- data/lib/canon/cli.rb +42 -1
- data/lib/canon/commands/diff_command.rb +108 -23
- data/lib/canon/comparison/compare_profile.rb +101 -0
- data/lib/canon/comparison/comparison_result.rb +41 -2
- data/lib/canon/comparison/html_comparator.rb +292 -71
- data/lib/canon/comparison/html_compare_profile.rb +117 -0
- data/lib/canon/comparison/match_options.rb +42 -4
- data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
- data/lib/canon/comparison/xml_comparator.rb +695 -91
- data/lib/canon/comparison.rb +207 -2
- data/lib/canon/config/env_provider.rb +71 -0
- data/lib/canon/config/env_schema.rb +58 -0
- data/lib/canon/config/override_resolver.rb +55 -0
- data/lib/canon/config/type_converter.rb +59 -0
- data/lib/canon/config.rb +158 -29
- data/lib/canon/data_model.rb +29 -0
- data/lib/canon/diff/diff_classifier.rb +74 -14
- data/lib/canon/diff/diff_context_builder.rb +41 -0
- data/lib/canon/diff/diff_line.rb +18 -2
- data/lib/canon/diff/diff_node.rb +18 -3
- data/lib/canon/diff/diff_node_mapper.rb +71 -12
- data/lib/canon/diff/formatting_detector.rb +53 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
- data/lib/canon/diff_formatter/debug_output.rb +7 -1
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
- data/lib/canon/diff_formatter/legend.rb +42 -0
- data/lib/canon/diff_formatter.rb +78 -9
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html_formatter_base.rb +35 -1
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/yaml_formatter.rb +3 -0
- data/lib/canon/html/data_model.rb +229 -0
- data/lib/canon/html.rb +9 -0
- data/lib/canon/options/cli_generator.rb +70 -0
- data/lib/canon/options/registry.rb +234 -0
- data/lib/canon/rspec_matchers.rb +34 -13
- data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
- data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
- data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
- data/lib/canon/tree_diff/core/matching.rb +241 -0
- data/lib/canon/tree_diff/core/node_signature.rb +164 -0
- data/lib/canon/tree_diff/core/node_weight.rb +135 -0
- data/lib/canon/tree_diff/core/tree_node.rb +450 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
- data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
- data/lib/canon/tree_diff/operation_converter.rb +631 -0
- data/lib/canon/tree_diff/operations/operation.rb +92 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
- data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
- data/lib/canon/tree_diff.rb +33 -0
- data/lib/canon/validators/json_validator.rb +3 -1
- data/lib/canon/validators/yaml_validator.rb +3 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +22 -23
- data/lib/canon/xml/element_matcher.rb +128 -20
- data/lib/canon/xml/namespace_helper.rb +110 -0
- data/lib/canon.rb +3 -0
- metadata +81 -23
- data/_config.yml +0 -116
- data/docs/ADVANCED_TOPICS.adoc +0 -20
- data/docs/BASIC_USAGE.adoc +0 -16
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/docs/DIFF_FORMATTING.adoc +0 -540
- data/docs/FORMATS.adoc +0 -447
- data/docs/INPUT_VALIDATION.adoc +0 -477
- data/docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/docs/MATCH_OPTIONS.adoc +0 -719
- data/docs/MODES.adoc +0 -432
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/docs/OPTIONS.adoc +0 -1387
- data/docs/PREPROCESSING.adoc +0 -491
- data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
- data/docs/UNDERSTANDING_CANON.adoc +0 -17
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module TreeDiff
|
|
7
|
+
module Adapters
|
|
8
|
+
# JSONAdapter converts JSON objects to TreeNode structures and back,
|
|
9
|
+
# enabling semantic tree diffing on JSON documents.
|
|
10
|
+
#
|
|
11
|
+
# This adapter:
|
|
12
|
+
# - Converts Hash/Array JSON structures to TreeNode tree
|
|
13
|
+
# - Handles nested objects, arrays, and primitive values
|
|
14
|
+
# - Preserves type information for round-trip conversion
|
|
15
|
+
# - Maps JSON structure to tree representation
|
|
16
|
+
#
|
|
17
|
+
# JSON to TreeNode mapping:
|
|
18
|
+
# - Objects (Hash): TreeNode with label "object", children for each key
|
|
19
|
+
# - Arrays: TreeNode with label "array", indexed children
|
|
20
|
+
# - Primitives: TreeNode with label "value", value stored directly
|
|
21
|
+
#
|
|
22
|
+
# @example Convert JSON to TreeNode
|
|
23
|
+
# json = { "name" => "John", "age" => 30 }
|
|
24
|
+
# adapter = JSONAdapter.new
|
|
25
|
+
# tree = adapter.to_tree(json)
|
|
26
|
+
#
|
|
27
|
+
class JSONAdapter
|
|
28
|
+
attr_reader :match_options
|
|
29
|
+
|
|
30
|
+
# Initialize adapter with match options
|
|
31
|
+
#
|
|
32
|
+
# @param match_options [Hash] Match options (for future use)
|
|
33
|
+
def initialize(match_options: {})
|
|
34
|
+
@match_options = match_options
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Convert JSON structure to TreeNode
|
|
38
|
+
#
|
|
39
|
+
# @param data [Hash, Array, String, Numeric, Boolean, nil] JSON data
|
|
40
|
+
# @param key [String, nil] Key name if this is a hash value
|
|
41
|
+
# @return [Core::TreeNode] Root tree node
|
|
42
|
+
def to_tree(data, key = nil)
|
|
43
|
+
case data
|
|
44
|
+
when Hash
|
|
45
|
+
convert_object(data, key)
|
|
46
|
+
when Array
|
|
47
|
+
convert_array(data, key)
|
|
48
|
+
else
|
|
49
|
+
convert_value(data, key)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Convert TreeNode back to JSON structure
|
|
54
|
+
#
|
|
55
|
+
# @param tree_node [Core::TreeNode] Root tree node
|
|
56
|
+
# @return [Hash, Array, Object] JSON structure
|
|
57
|
+
def from_tree(tree_node)
|
|
58
|
+
case tree_node.label
|
|
59
|
+
when "object"
|
|
60
|
+
build_object(tree_node)
|
|
61
|
+
when "array"
|
|
62
|
+
build_array(tree_node)
|
|
63
|
+
when "value"
|
|
64
|
+
parse_value(tree_node)
|
|
65
|
+
else
|
|
66
|
+
# Fallback for custom labels
|
|
67
|
+
tree_node.value
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
# Convert JSON object (Hash) to TreeNode
|
|
74
|
+
#
|
|
75
|
+
# @param hash [Hash] JSON object
|
|
76
|
+
# @param key [String, nil] Key name if this is nested
|
|
77
|
+
# @return [Core::TreeNode] Tree node
|
|
78
|
+
def convert_object(hash, key = nil)
|
|
79
|
+
attributes = key ? { "key" => key } : {}
|
|
80
|
+
|
|
81
|
+
tree_node = Core::TreeNode.new(
|
|
82
|
+
label: "object",
|
|
83
|
+
value: nil,
|
|
84
|
+
attributes: attributes,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
hash.each do |k, v|
|
|
88
|
+
child = to_tree(v, k.to_s)
|
|
89
|
+
tree_node.add_child(child)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
tree_node
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Convert JSON array to TreeNode
|
|
96
|
+
#
|
|
97
|
+
# @param array [Array] JSON array
|
|
98
|
+
# @param key [String, nil] Key name if this is nested
|
|
99
|
+
# @return [Core::TreeNode] Tree node
|
|
100
|
+
def convert_array(array, key = nil)
|
|
101
|
+
attributes = key ? { "key" => key } : {}
|
|
102
|
+
|
|
103
|
+
tree_node = Core::TreeNode.new(
|
|
104
|
+
label: "array",
|
|
105
|
+
value: nil,
|
|
106
|
+
attributes: attributes,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
array.each_with_index do |item, index|
|
|
110
|
+
child = to_tree(item, index.to_s)
|
|
111
|
+
tree_node.add_child(child)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
tree_node
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Convert primitive value to TreeNode
|
|
118
|
+
#
|
|
119
|
+
# @param value [String, Numeric, Boolean, nil] Primitive value
|
|
120
|
+
# @param key [String, nil] Key name
|
|
121
|
+
# @return [Core::TreeNode] Tree node
|
|
122
|
+
def convert_value(value, key = nil)
|
|
123
|
+
attributes = {}
|
|
124
|
+
attributes["key"] = key if key
|
|
125
|
+
attributes["type"] = value_type(value)
|
|
126
|
+
|
|
127
|
+
Core::TreeNode.new(
|
|
128
|
+
label: "value",
|
|
129
|
+
value: value.to_s,
|
|
130
|
+
attributes: attributes,
|
|
131
|
+
)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Determine value type
|
|
135
|
+
#
|
|
136
|
+
# @param value [Object] Value
|
|
137
|
+
# @return [String] Type name
|
|
138
|
+
def value_type(value)
|
|
139
|
+
case value
|
|
140
|
+
when String then "string"
|
|
141
|
+
when Integer then "integer"
|
|
142
|
+
when Float then "float"
|
|
143
|
+
when TrueClass, FalseClass then "boolean"
|
|
144
|
+
when NilClass then "null"
|
|
145
|
+
else "unknown"
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Build Hash from object TreeNode
|
|
150
|
+
#
|
|
151
|
+
# @param tree_node [Core::TreeNode] Object tree node
|
|
152
|
+
# @return [Hash] Reconstructed hash
|
|
153
|
+
def build_object(tree_node)
|
|
154
|
+
hash = {}
|
|
155
|
+
|
|
156
|
+
tree_node.children.each do |child|
|
|
157
|
+
key = child.attributes["key"]
|
|
158
|
+
hash[key] = from_tree(child) if key
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
hash
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Build Array from array TreeNode
|
|
165
|
+
#
|
|
166
|
+
# @param tree_node [Core::TreeNode] Array tree node
|
|
167
|
+
# @return [Array] Reconstructed array
|
|
168
|
+
def build_array(tree_node)
|
|
169
|
+
array = []
|
|
170
|
+
|
|
171
|
+
tree_node.children.each do |child|
|
|
172
|
+
array << from_tree(child)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
array
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Parse value from value TreeNode
|
|
179
|
+
#
|
|
180
|
+
# @param tree_node [Core::TreeNode] Value tree node
|
|
181
|
+
# @return [Object] Parsed value
|
|
182
|
+
def parse_value(tree_node)
|
|
183
|
+
type = tree_node.attributes["type"]
|
|
184
|
+
value_str = tree_node.value
|
|
185
|
+
|
|
186
|
+
case type
|
|
187
|
+
when "string"
|
|
188
|
+
value_str
|
|
189
|
+
when "integer"
|
|
190
|
+
value_str.to_i
|
|
191
|
+
when "float"
|
|
192
|
+
value_str.to_f
|
|
193
|
+
when "boolean"
|
|
194
|
+
value_str == "true"
|
|
195
|
+
when "null"
|
|
196
|
+
nil
|
|
197
|
+
else
|
|
198
|
+
value_str
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module TreeDiff
|
|
7
|
+
module Adapters
|
|
8
|
+
# XMLAdapter converts Nokogiri XML documents to TreeNode structures
|
|
9
|
+
# and back, enabling semantic tree diffing on XML documents.
|
|
10
|
+
#
|
|
11
|
+
# This adapter:
|
|
12
|
+
# - Converts Nokogiri::XML::Document to TreeNode tree
|
|
13
|
+
# - Preserves element names, text content, and attributes
|
|
14
|
+
# - Handles namespaces appropriately
|
|
15
|
+
# - Maintains document structure for round-trip conversion
|
|
16
|
+
#
|
|
17
|
+
# @example Convert XML to TreeNode
|
|
18
|
+
# xml = Nokogiri::XML("<root><child>text</child></root>")
|
|
19
|
+
# adapter = XMLAdapter.new
|
|
20
|
+
# tree = adapter.to_tree(xml)
|
|
21
|
+
#
|
|
22
|
+
class XMLAdapter
|
|
23
|
+
attr_reader :match_options
|
|
24
|
+
|
|
25
|
+
# Initialize adapter with match options
|
|
26
|
+
#
|
|
27
|
+
# @param match_options [Hash] Match options for text/attribute normalization
|
|
28
|
+
def initialize(match_options: {})
|
|
29
|
+
@match_options = match_options
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Convert Nokogiri XML document/element or Canon::Xml::Node to TreeNode
|
|
33
|
+
#
|
|
34
|
+
# @param node [Nokogiri::XML::Document, Nokogiri::XML::Element, Canon::Xml::Node] XML node
|
|
35
|
+
# @return [Core::TreeNode] Root tree node
|
|
36
|
+
def to_tree(node)
|
|
37
|
+
# Handle nil nodes
|
|
38
|
+
return nil if node.nil?
|
|
39
|
+
|
|
40
|
+
# Handle Canon::Xml::Node types first
|
|
41
|
+
case node
|
|
42
|
+
when Canon::Xml::Nodes::RootNode
|
|
43
|
+
return to_tree_from_canon_root(node)
|
|
44
|
+
when Canon::Xml::Nodes::ElementNode
|
|
45
|
+
return to_tree_from_canon_element(node)
|
|
46
|
+
when Canon::Xml::Nodes::TextNode
|
|
47
|
+
return to_tree_from_canon_text(node)
|
|
48
|
+
when Canon::Xml::Nodes::CommentNode
|
|
49
|
+
return to_tree_from_canon_comment(node)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Fallback to Nokogiri (legacy support)
|
|
53
|
+
case node
|
|
54
|
+
when Nokogiri::XML::Document
|
|
55
|
+
# Start from root element
|
|
56
|
+
root = node.root
|
|
57
|
+
raise ArgumentError, "Document has no root element" if root.nil?
|
|
58
|
+
|
|
59
|
+
to_tree(root)
|
|
60
|
+
when Nokogiri::XML::Element
|
|
61
|
+
convert_element(node)
|
|
62
|
+
else
|
|
63
|
+
raise ArgumentError, "Unsupported node type: #{node.class}"
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Convert TreeNode back to Nokogiri XML
|
|
68
|
+
#
|
|
69
|
+
# @param tree_node [Core::TreeNode] Root tree node
|
|
70
|
+
# @param doc [Nokogiri::XML::Document] Optional document to use
|
|
71
|
+
# @return [Nokogiri::XML::Document, Nokogiri::XML::Element]
|
|
72
|
+
def from_tree(tree_node, doc = nil)
|
|
73
|
+
doc ||= Nokogiri::XML::Document.new
|
|
74
|
+
|
|
75
|
+
element = build_element(tree_node, doc)
|
|
76
|
+
|
|
77
|
+
if doc.root.nil?
|
|
78
|
+
doc.root = element
|
|
79
|
+
doc
|
|
80
|
+
else
|
|
81
|
+
element
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
# Convert a Nokogiri element to TreeNode
|
|
88
|
+
#
|
|
89
|
+
# @param element [Nokogiri::XML::Element] XML element
|
|
90
|
+
# @return [Core::TreeNode] Tree node
|
|
91
|
+
def convert_element(element)
|
|
92
|
+
# Get element name (with namespace prefix if present)
|
|
93
|
+
element.name
|
|
94
|
+
|
|
95
|
+
# Create label that includes namespace URI to ensure elements
|
|
96
|
+
# with different namespaces are treated as different nodes
|
|
97
|
+
# Format: {namespace_uri}name or just name if no namespace
|
|
98
|
+
namespace_uri = element.namespace&.href
|
|
99
|
+
label = if namespace_uri && !namespace_uri.empty?
|
|
100
|
+
"{#{namespace_uri}}#{element.name}"
|
|
101
|
+
else
|
|
102
|
+
element.name
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Collect attributes and sort them alphabetically
|
|
106
|
+
# This ensures attribute order doesn't affect hash matching
|
|
107
|
+
# (matches behavior of attribute_order: :ignore in match options)
|
|
108
|
+
attributes = {}
|
|
109
|
+
element.attributes.each do |name, attr|
|
|
110
|
+
attributes[name] = attr.value
|
|
111
|
+
end
|
|
112
|
+
# Sort attributes by key to normalize order
|
|
113
|
+
attributes = attributes.sort.to_h
|
|
114
|
+
|
|
115
|
+
# Get text content (only direct text, not from children)
|
|
116
|
+
text_value = extract_text_value(element)
|
|
117
|
+
|
|
118
|
+
# Create tree node with source node reference
|
|
119
|
+
tree_node = Core::TreeNode.new(
|
|
120
|
+
label: label,
|
|
121
|
+
value: text_value,
|
|
122
|
+
attributes: attributes,
|
|
123
|
+
source_node: element, # Preserve reference to original Nokogiri node
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Process child elements
|
|
127
|
+
element.element_children.each do |child|
|
|
128
|
+
child_node = convert_element(child)
|
|
129
|
+
tree_node.add_child(child_node)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
tree_node
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Extract direct text content from element
|
|
136
|
+
#
|
|
137
|
+
# Preserves original text for proper normalization during comparison.
|
|
138
|
+
# Normalization happens in OperationDetector based on match_options,
|
|
139
|
+
# NOT during tree conversion.
|
|
140
|
+
#
|
|
141
|
+
# For mixed content (text nodes + child elements), joins text nodes
|
|
142
|
+
# with a space to prevent text from running together when elements
|
|
143
|
+
# like <br/> separate the text.
|
|
144
|
+
#
|
|
145
|
+
# @param element [Nokogiri::XML::Element] XML element
|
|
146
|
+
# @return [String, nil] Text content or nil
|
|
147
|
+
def extract_text_value(element)
|
|
148
|
+
# Get only direct text nodes, not from nested elements
|
|
149
|
+
text_nodes = element.children.select(&:text?)
|
|
150
|
+
|
|
151
|
+
# For mixed content (has both text nodes and element children),
|
|
152
|
+
# join text nodes with space to handle implicit whitespace around
|
|
153
|
+
# block-level elements like <br/>
|
|
154
|
+
# Example: "Text<br/>More" should become "Text More" not "TextMore"
|
|
155
|
+
separator = element.element_children.any? ? " " : ""
|
|
156
|
+
text = text_nodes.map(&:text).join(separator)
|
|
157
|
+
|
|
158
|
+
# CRITICAL FIX: Return original text without stripping
|
|
159
|
+
# Normalization will be applied during comparison based on match_options
|
|
160
|
+
# Only return nil for truly empty text or whitespace-only text
|
|
161
|
+
text.strip.empty? ? nil : text
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Build Nokogiri element from TreeNode
|
|
165
|
+
#
|
|
166
|
+
# @param tree_node [Core::TreeNode] Tree node
|
|
167
|
+
# @param doc [Nokogiri::XML::Document] Document
|
|
168
|
+
# @return [Nokogiri::XML::Element] XML element
|
|
169
|
+
def build_element(tree_node, doc)
|
|
170
|
+
element = Nokogiri::XML::Element.new(tree_node.label, doc)
|
|
171
|
+
|
|
172
|
+
# Add attributes
|
|
173
|
+
tree_node.attributes.each do |name, value|
|
|
174
|
+
element[name] = value
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Add text content if present
|
|
178
|
+
if tree_node.value && !tree_node.value.empty?
|
|
179
|
+
element.content = tree_node.value
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Add child elements
|
|
183
|
+
tree_node.children.each do |child|
|
|
184
|
+
child_element = build_element(child, doc)
|
|
185
|
+
element.add_child(child_element)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
element
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Convert Canon::Xml::Nodes::RootNode to TreeNode
|
|
192
|
+
#
|
|
193
|
+
# @param root_node [Canon::Xml::Nodes::RootNode] Root node
|
|
194
|
+
# @return [Core::TreeNode, nil] Tree node for first child (document element)
|
|
195
|
+
def to_tree_from_canon_root(root_node)
|
|
196
|
+
# Root node: process first child (document element)
|
|
197
|
+
return nil if root_node.children.empty?
|
|
198
|
+
|
|
199
|
+
to_tree(root_node.children.first)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Convert Canon::Xml::Nodes::ElementNode to TreeNode
|
|
203
|
+
#
|
|
204
|
+
# @param element_node [Canon::Xml::Nodes::ElementNode] Element node
|
|
205
|
+
# @return [Core::TreeNode] Tree node
|
|
206
|
+
def to_tree_from_canon_element(element_node)
|
|
207
|
+
# Create label that includes namespace URI to ensure elements
|
|
208
|
+
# with different namespaces are treated as different nodes
|
|
209
|
+
# Format: {namespace_uri}name or just name if no namespace
|
|
210
|
+
namespace_uri = element_node.namespace_uri
|
|
211
|
+
label = if namespace_uri && !namespace_uri.empty?
|
|
212
|
+
"{#{namespace_uri}}#{element_node.name}"
|
|
213
|
+
else
|
|
214
|
+
element_node.name
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Create TreeNode from Canon::Xml::Nodes::ElementNode
|
|
218
|
+
tree_node = Core::TreeNode.new(
|
|
219
|
+
label: label,
|
|
220
|
+
value: nil, # Elements don't have values
|
|
221
|
+
attributes: extract_canon_attributes(element_node),
|
|
222
|
+
children: [],
|
|
223
|
+
source_node: element_node, # Preserve reference to Canon node
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Process children recursively
|
|
227
|
+
element_node.children.each do |child|
|
|
228
|
+
child_tree = to_tree(child)
|
|
229
|
+
tree_node.add_child(child_tree) if child_tree
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
tree_node
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Convert Canon::Xml::Nodes::TextNode to TreeNode
|
|
236
|
+
#
|
|
237
|
+
# @param text_node [Canon::Xml::Nodes::TextNode] Text node
|
|
238
|
+
# @return [Core::TreeNode, nil] Tree node or nil for whitespace-only text
|
|
239
|
+
def to_tree_from_canon_text(text_node)
|
|
240
|
+
# Extract text value
|
|
241
|
+
text_value = text_node.value.to_s
|
|
242
|
+
|
|
243
|
+
# Return nil for whitespace-only text
|
|
244
|
+
return nil if text_value.strip.empty?
|
|
245
|
+
|
|
246
|
+
Core::TreeNode.new(
|
|
247
|
+
label: "text",
|
|
248
|
+
value: text_value,
|
|
249
|
+
attributes: {},
|
|
250
|
+
children: [],
|
|
251
|
+
source_node: text_node,
|
|
252
|
+
)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Convert Canon::Xml::Nodes::CommentNode to TreeNode
|
|
256
|
+
#
|
|
257
|
+
# @param comment_node [Canon::Xml::Nodes::CommentNode] Comment node
|
|
258
|
+
# @return [Core::TreeNode] Tree node
|
|
259
|
+
def to_tree_from_canon_comment(comment_node)
|
|
260
|
+
Core::TreeNode.new(
|
|
261
|
+
label: "comment",
|
|
262
|
+
value: comment_node.value,
|
|
263
|
+
attributes: {},
|
|
264
|
+
children: [],
|
|
265
|
+
source_node: comment_node,
|
|
266
|
+
)
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Extract attributes from Canon::Xml::Nodes::ElementNode
|
|
270
|
+
#
|
|
271
|
+
# @param element_node [Canon::Xml::Nodes::ElementNode] Element node
|
|
272
|
+
# @return [Hash] Attributes hash sorted by key
|
|
273
|
+
def extract_canon_attributes(element_node)
|
|
274
|
+
# Canon::Xml::Nodes::ElementNode has attribute_nodes array
|
|
275
|
+
attrs = {}
|
|
276
|
+
element_node.attribute_nodes.each do |attr|
|
|
277
|
+
attrs[attr.name] = attr.value
|
|
278
|
+
end
|
|
279
|
+
# Sort attributes by key to normalize order
|
|
280
|
+
attrs.sort.to_h
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
end
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "yaml"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module TreeDiff
|
|
7
|
+
module Adapters
|
|
8
|
+
# YAMLAdapter converts YAML structures to TreeNode structures and back,
|
|
9
|
+
# enabling semantic tree diffing on YAML documents.
|
|
10
|
+
#
|
|
11
|
+
# This adapter:
|
|
12
|
+
# - Converts Hash/Array YAML structures to TreeNode tree
|
|
13
|
+
# - Handles nested objects, arrays, and primitive values
|
|
14
|
+
# - Preserves type information for round-trip conversion
|
|
15
|
+
# - Maps YAML structure to tree representation
|
|
16
|
+
#
|
|
17
|
+
# YAML to TreeNode mapping (similar to JSON):
|
|
18
|
+
# - Objects (Hash): TreeNode with label "object", children for each key
|
|
19
|
+
# - Arrays: TreeNode with label "array", indexed children
|
|
20
|
+
# - Primitives: TreeNode with label "value", value stored directly
|
|
21
|
+
#
|
|
22
|
+
# @example Convert YAML to TreeNode
|
|
23
|
+
# yaml = { "name" => "John", "age" => 30 }
|
|
24
|
+
# adapter = YAMLAdapter.new
|
|
25
|
+
# tree = adapter.to_tree(yaml)
|
|
26
|
+
#
|
|
27
|
+
class YAMLAdapter
|
|
28
|
+
attr_reader :match_options
|
|
29
|
+
|
|
30
|
+
# Initialize adapter with match options
|
|
31
|
+
#
|
|
32
|
+
# @param match_options [Hash] Match options (for future use)
|
|
33
|
+
def initialize(match_options: {})
|
|
34
|
+
@match_options = match_options
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Convert YAML structure to TreeNode
|
|
38
|
+
#
|
|
39
|
+
# @param data [Hash, Array, String, Numeric, Boolean, nil] YAML data
|
|
40
|
+
# @param key [String, nil] Key name if this is a hash value
|
|
41
|
+
# @return [Core::TreeNode] Root tree node
|
|
42
|
+
def to_tree(data, key = nil)
|
|
43
|
+
case data
|
|
44
|
+
when Hash
|
|
45
|
+
convert_object(data, key)
|
|
46
|
+
when Array
|
|
47
|
+
convert_array(data, key)
|
|
48
|
+
else
|
|
49
|
+
convert_value(data, key)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Convert TreeNode back to YAML structure
|
|
54
|
+
#
|
|
55
|
+
# @param tree_node [Core::TreeNode] Root tree node
|
|
56
|
+
# @return [Hash, Array, Object] YAML structure
|
|
57
|
+
def from_tree(tree_node)
|
|
58
|
+
case tree_node.label
|
|
59
|
+
when "object"
|
|
60
|
+
build_object(tree_node)
|
|
61
|
+
when "array"
|
|
62
|
+
build_array(tree_node)
|
|
63
|
+
when "value"
|
|
64
|
+
parse_value(tree_node)
|
|
65
|
+
else
|
|
66
|
+
# Fallback for custom labels
|
|
67
|
+
tree_node.value
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
# Convert YAML object (Hash) to TreeNode
|
|
74
|
+
#
|
|
75
|
+
# @param hash [Hash] YAML object
|
|
76
|
+
# @param key [String, nil] Key name if this is nested
|
|
77
|
+
# @return [Core::TreeNode] Tree node
|
|
78
|
+
def convert_object(hash, key = nil)
|
|
79
|
+
attributes = key ? { "key" => key } : {}
|
|
80
|
+
|
|
81
|
+
tree_node = Core::TreeNode.new(
|
|
82
|
+
label: "object",
|
|
83
|
+
value: nil,
|
|
84
|
+
attributes: attributes,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
hash.each do |k, v|
|
|
88
|
+
child = to_tree(v, k.to_s)
|
|
89
|
+
tree_node.add_child(child)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
tree_node
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Convert YAML array to TreeNode
|
|
96
|
+
#
|
|
97
|
+
# @param array [Array] YAML array
|
|
98
|
+
# @param key [String, nil] Key name if this is nested
|
|
99
|
+
# @return [Core::TreeNode] Tree node
|
|
100
|
+
def convert_array(array, key = nil)
|
|
101
|
+
attributes = key ? { "key" => key } : {}
|
|
102
|
+
|
|
103
|
+
tree_node = Core::TreeNode.new(
|
|
104
|
+
label: "array",
|
|
105
|
+
value: nil,
|
|
106
|
+
attributes: attributes,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
array.each_with_index do |item, index|
|
|
110
|
+
child = to_tree(item, index.to_s)
|
|
111
|
+
tree_node.add_child(child)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
tree_node
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Convert primitive value to TreeNode
|
|
118
|
+
#
|
|
119
|
+
# @param value [String, Numeric, Boolean, nil] Primitive value
|
|
120
|
+
# @param key [String, nil] Key name
|
|
121
|
+
# @return [Core::TreeNode] Tree node
|
|
122
|
+
def convert_value(value, key = nil)
|
|
123
|
+
attributes = {}
|
|
124
|
+
attributes["key"] = key if key
|
|
125
|
+
attributes["type"] = value_type(value)
|
|
126
|
+
|
|
127
|
+
Core::TreeNode.new(
|
|
128
|
+
label: "value",
|
|
129
|
+
value: value.to_s,
|
|
130
|
+
attributes: attributes,
|
|
131
|
+
)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Determine value type
|
|
135
|
+
#
|
|
136
|
+
# @param value [Object] Value
|
|
137
|
+
# @return [String] Type name
|
|
138
|
+
def value_type(value)
|
|
139
|
+
case value
|
|
140
|
+
when String then "string"
|
|
141
|
+
when Integer then "integer"
|
|
142
|
+
when Float then "float"
|
|
143
|
+
when TrueClass, FalseClass then "boolean"
|
|
144
|
+
when NilClass then "null"
|
|
145
|
+
when Symbol then "symbol"
|
|
146
|
+
when Date then "date"
|
|
147
|
+
when Time, DateTime then "time"
|
|
148
|
+
else "unknown"
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Build Hash from object TreeNode
|
|
153
|
+
#
|
|
154
|
+
# @param tree_node [Core::TreeNode] Object tree node
|
|
155
|
+
# @return [Hash] Reconstructed hash
|
|
156
|
+
def build_object(tree_node)
|
|
157
|
+
hash = {}
|
|
158
|
+
|
|
159
|
+
tree_node.children.each do |child|
|
|
160
|
+
key = child.attributes["key"]
|
|
161
|
+
hash[key] = from_tree(child) if key
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
hash
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Build Array from array TreeNode
|
|
168
|
+
#
|
|
169
|
+
# @param tree_node [Core::TreeNode] Array tree node
|
|
170
|
+
# @return [Array] Reconstructed array
|
|
171
|
+
def build_array(tree_node)
|
|
172
|
+
array = []
|
|
173
|
+
|
|
174
|
+
tree_node.children.each do |child|
|
|
175
|
+
array << from_tree(child)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
array
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Parse value from value TreeNode
|
|
182
|
+
#
|
|
183
|
+
# @param tree_node [Core::TreeNode] Value tree node
|
|
184
|
+
# @return [Object] Parsed value
|
|
185
|
+
def parse_value(tree_node)
|
|
186
|
+
type = tree_node.attributes["type"]
|
|
187
|
+
value_str = tree_node.value
|
|
188
|
+
|
|
189
|
+
case type
|
|
190
|
+
when "string"
|
|
191
|
+
value_str
|
|
192
|
+
when "integer"
|
|
193
|
+
value_str.to_i
|
|
194
|
+
when "float"
|
|
195
|
+
value_str.to_f
|
|
196
|
+
when "boolean"
|
|
197
|
+
value_str == "true"
|
|
198
|
+
when "null"
|
|
199
|
+
nil
|
|
200
|
+
when "symbol"
|
|
201
|
+
value_str.to_sym
|
|
202
|
+
when "date"
|
|
203
|
+
Date.parse(value_str)
|
|
204
|
+
when "time"
|
|
205
|
+
Time.parse(value_str)
|
|
206
|
+
else
|
|
207
|
+
value_str
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|