canon 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +69 -92
- data/README.adoc +13 -13
- data/docs/.lycheeignore +69 -0
- data/docs/Gemfile +1 -0
- data/docs/_config.yml +90 -1
- data/docs/advanced/diff-classification.adoc +82 -2
- data/docs/advanced/extending-canon.adoc +193 -0
- data/docs/features/match-options/index.adoc +239 -1
- data/docs/internals/diffnode-enrichment.adoc +611 -0
- data/docs/internals/index.adoc +251 -0
- data/docs/lychee.toml +13 -6
- data/docs/understanding/architecture.adoc +749 -33
- data/docs/understanding/comparison-pipeline.adoc +122 -0
- data/lib/canon/cache.rb +129 -0
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
- data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
- data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
- data/lib/canon/comparison/dimensions/registry.rb +77 -0
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
- data/lib/canon/comparison/dimensions.rb +54 -0
- data/lib/canon/comparison/format_detector.rb +87 -0
- data/lib/canon/comparison/html_comparator.rb +70 -26
- data/lib/canon/comparison/html_compare_profile.rb +8 -2
- data/lib/canon/comparison/html_parser.rb +80 -0
- data/lib/canon/comparison/json_comparator.rb +12 -0
- data/lib/canon/comparison/json_parser.rb +19 -0
- data/lib/canon/comparison/markup_comparator.rb +293 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
- data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
- data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
- data/lib/canon/comparison/match_options.rb +68 -463
- data/lib/canon/comparison/profile_definition.rb +149 -0
- data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
- data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
- data/lib/canon/comparison/xml_comparator.rb +97 -684
- data/lib/canon/comparison/xml_node_comparison.rb +319 -0
- data/lib/canon/comparison/xml_parser.rb +19 -0
- data/lib/canon/comparison/yaml_comparator.rb +3 -3
- data/lib/canon/comparison.rb +265 -110
- data/lib/canon/diff/diff_classifier.rb +101 -2
- data/lib/canon/diff/diff_node.rb +32 -2
- data/lib/canon/diff/formatting_detector.rb +1 -1
- data/lib/canon/diff/node_serializer.rb +191 -0
- data/lib/canon/diff/path_builder.rb +143 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
- data/lib/canon/diff_formatter.rb +1 -1
- data/lib/canon/rspec_matchers.rb +38 -9
- data/lib/canon/tree_diff/operation_converter.rb +92 -338
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +24 -13
- metadata +48 -2
|
@@ -0,0 +1,611 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: DiffNode Enrichment
|
|
3
|
+
parent: Internals
|
|
4
|
+
nav_order: 1
|
|
5
|
+
---
|
|
6
|
+
= DiffNode Enrichment
|
|
7
|
+
|
|
8
|
+
== Purpose
|
|
9
|
+
|
|
10
|
+
This document explains how DiffNode objects carry complete information about differences through Canon's comparison pipeline, including location paths, serialized content, and normalized attributes.
|
|
11
|
+
|
|
12
|
+
== DiffNode structure
|
|
13
|
+
|
|
14
|
+
=== Properties
|
|
15
|
+
|
|
16
|
+
DiffNode objects contain all information needed to understand and display a difference:
|
|
17
|
+
|
|
18
|
+
[source,ruby]
|
|
19
|
+
----
|
|
20
|
+
class DiffNode
|
|
21
|
+
# Core properties
|
|
22
|
+
attr_reader :node1, :node2 # Raw node references
|
|
23
|
+
attr_accessor :dimension, :reason # What changed and why
|
|
24
|
+
attr_accessor :normative, :formatting # Classification
|
|
25
|
+
|
|
26
|
+
# Location and display information
|
|
27
|
+
attr_accessor :path # Canonical path with ordinal indices
|
|
28
|
+
attr_accessor :serialized_before # Serialized "before" content
|
|
29
|
+
attr_accessor :serialized_after # Serialized "after" content
|
|
30
|
+
attr_accessor :attributes_before # Normalized "before" attributes
|
|
31
|
+
attr_accessor :attributes_after # Normalized "after" attributes
|
|
32
|
+
end
|
|
33
|
+
----
|
|
34
|
+
|
|
35
|
+
=== Property categories
|
|
36
|
+
|
|
37
|
+
**Core properties** - Describe what changed:
|
|
38
|
+
|
|
39
|
+
* `node1, node2` - Raw node references from original documents
|
|
40
|
+
* `dimension` - Type of difference (`:text_content`, `:attribute_values`, `:element_structure`, etc.)
|
|
41
|
+
* `reason` - Human-readable explanation
|
|
42
|
+
* `normative` - Affects semantic equivalence (true) or formatting only (false)
|
|
43
|
+
* `formatting` - Purely cosmetic whitespace difference
|
|
44
|
+
|
|
45
|
+
**Location and display properties** - Enable accurate rendering:
|
|
46
|
+
|
|
47
|
+
* `path` - Canonical path with ordinal indices
|
|
48
|
+
* `serialized_before/after` - Serialized content captured at comparison time
|
|
49
|
+
* `attributes_before/after` - Normalized attribute hashes
|
|
50
|
+
|
|
51
|
+
== Architecture
|
|
52
|
+
|
|
53
|
+
=== Enrichment flow
|
|
54
|
+
|
|
55
|
+
[mermaid]
|
|
56
|
+
----
|
|
57
|
+
graph TD
|
|
58
|
+
A[Raw Nodes] --> B{Algorithm Layer}
|
|
59
|
+
B -->|DOM| C[XmlComparator]
|
|
60
|
+
B -->|Semantic| D[OperationConverter]
|
|
61
|
+
C --> E[enrich_diff_metadata]
|
|
62
|
+
D --> E
|
|
63
|
+
E --> F[PathBuilder]
|
|
64
|
+
E --> G[NodeSerializer]
|
|
65
|
+
F --> H[DiffNode.path]
|
|
66
|
+
G --> I[DiffNode.serialized_before/after]
|
|
67
|
+
G --> J[DiffNode.attributes_before/after]
|
|
68
|
+
H --> K[Enriched DiffNode]
|
|
69
|
+
I --> K
|
|
70
|
+
J --> K
|
|
71
|
+
K --> L[Layer 4: Rendering]
|
|
72
|
+
|
|
73
|
+
style B fill:#fff4e1
|
|
74
|
+
style E fill:#e1f5ff
|
|
75
|
+
style K fill:#e1ffe1
|
|
76
|
+
----
|
|
77
|
+
|
|
78
|
+
=== Library-agnostic design
|
|
79
|
+
|
|
80
|
+
Canon supports multiple parsing libraries (Nokogiri, Moxml, Canon::Xml::Node) and must remain library-agnostic to support future libraries. The enrichment utilities handle this by:
|
|
81
|
+
|
|
82
|
+
1. **Detecting node type** using `respond_to?` checks
|
|
83
|
+
2. **Calling appropriate methods** for each library
|
|
84
|
+
3. **Normalizing output** to library-agnostic format
|
|
85
|
+
|
|
86
|
+
This allows Layer 4 to work with enriched metadata without knowing which parsing library created the nodes.
|
|
87
|
+
|
|
88
|
+
== PathBuilder: Canonical paths with ordinal indices
|
|
89
|
+
|
|
90
|
+
=== Purpose
|
|
91
|
+
|
|
92
|
+
Generate unambiguous XPath-like paths that uniquely identify nodes regardless of parsing library.
|
|
93
|
+
|
|
94
|
+
=== Location
|
|
95
|
+
|
|
96
|
+
`lib/canon/diff/path_builder.rb`
|
|
97
|
+
|
|
98
|
+
=== API
|
|
99
|
+
|
|
100
|
+
[source,ruby]
|
|
101
|
+
----
|
|
102
|
+
# Build canonical path from node
|
|
103
|
+
path = Canon::Diff::PathBuilder.build(node, format: :document)
|
|
104
|
+
# => "/#document/div[0]/body[0]/p[1]/span[2]"
|
|
105
|
+
|
|
106
|
+
# Build human-readable path
|
|
107
|
+
human = Canon::Diff::PathBuilder.human_path(node)
|
|
108
|
+
# => "#document → div[0] → body[0] → p[1] → span[2]"
|
|
109
|
+
----
|
|
110
|
+
|
|
111
|
+
=== Implementation
|
|
112
|
+
|
|
113
|
+
==== segment_for_node
|
|
114
|
+
|
|
115
|
+
Generates a single path segment with ordinal index:
|
|
116
|
+
|
|
117
|
+
[source,ruby]
|
|
118
|
+
----
|
|
119
|
+
def self.segment_for_node(tree_node)
|
|
120
|
+
# Get label/name - handles TreeNodes and raw nodes
|
|
121
|
+
label = if tree_node.respond_to?(:label)
|
|
122
|
+
tree_node.label # TreeNode (semantic diff)
|
|
123
|
+
elsif tree_node.respond_to?(:name)
|
|
124
|
+
tree_node.name # Canon::Xml::Node or Nokogiri
|
|
125
|
+
else
|
|
126
|
+
"unknown"
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Get ordinal index among siblings with same label
|
|
130
|
+
index = ordinal_index(tree_node)
|
|
131
|
+
|
|
132
|
+
"#{label}[#{index}]"
|
|
133
|
+
end
|
|
134
|
+
----
|
|
135
|
+
|
|
136
|
+
==== ordinal_index
|
|
137
|
+
|
|
138
|
+
Calculates position among siblings with same label:
|
|
139
|
+
|
|
140
|
+
[source,ruby]
|
|
141
|
+
----
|
|
142
|
+
def self.ordinal_index(tree_node)
|
|
143
|
+
return 0 unless tree_node.respond_to?(:parent)
|
|
144
|
+
return 0 unless tree_node.parent
|
|
145
|
+
return 0 unless tree_node.parent.respond_to?(:children)
|
|
146
|
+
|
|
147
|
+
siblings = tree_node.parent.children
|
|
148
|
+
return 0 unless siblings
|
|
149
|
+
|
|
150
|
+
# Handle Nokogiri NodeSet
|
|
151
|
+
siblings = siblings.to_a unless siblings.is_a?(Array)
|
|
152
|
+
|
|
153
|
+
# Get my label for comparison
|
|
154
|
+
my_label = if tree_node.respond_to?(:label)
|
|
155
|
+
tree_node.label
|
|
156
|
+
elsif tree_node.respond_to?(:name)
|
|
157
|
+
tree_node.name
|
|
158
|
+
else
|
|
159
|
+
nil
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
return 0 unless my_label
|
|
163
|
+
|
|
164
|
+
# Find position among same-label siblings
|
|
165
|
+
same_label_siblings = siblings.select do |s|
|
|
166
|
+
sibling_label = if s.respond_to?(:label)
|
|
167
|
+
s.label
|
|
168
|
+
elsif s.respond_to?(:name)
|
|
169
|
+
s.name
|
|
170
|
+
else
|
|
171
|
+
nil
|
|
172
|
+
end
|
|
173
|
+
sibling_label == my_label
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
same_label_siblings.index(tree_node) || 0
|
|
177
|
+
end
|
|
178
|
+
----
|
|
179
|
+
|
|
180
|
+
=== Example
|
|
181
|
+
|
|
182
|
+
Given this HTML:
|
|
183
|
+
|
|
184
|
+
[source,html]
|
|
185
|
+
----
|
|
186
|
+
<html>
|
|
187
|
+
<body>
|
|
188
|
+
<div>
|
|
189
|
+
<p>First paragraph</p>
|
|
190
|
+
<p>Second paragraph</p>
|
|
191
|
+
<span>A span</span>
|
|
192
|
+
<span>Another span</span>
|
|
193
|
+
</div>
|
|
194
|
+
</body>
|
|
195
|
+
</html>
|
|
196
|
+
----
|
|
197
|
+
|
|
198
|
+
PathBuilder generates:
|
|
199
|
+
|
|
200
|
+
[source,text]
|
|
201
|
+
----
|
|
202
|
+
/#document/html[0]/body[0]/div[0]/p[0] # First paragraph
|
|
203
|
+
/#document/html[0]/body[0]/div[0]/p[1] # Second paragraph
|
|
204
|
+
/#document/html[0]/body[0]/div[0]/span[0] # First span
|
|
205
|
+
/#document/html[0]/body[0]/div[0]/span[1] # Second span
|
|
206
|
+
----
|
|
207
|
+
|
|
208
|
+
== NodeSerializer: Library-agnostic serialization
|
|
209
|
+
|
|
210
|
+
=== Purpose
|
|
211
|
+
|
|
212
|
+
Serialize nodes and extract attributes in a library-agnostic way.
|
|
213
|
+
|
|
214
|
+
=== Location
|
|
215
|
+
|
|
216
|
+
`lib/canon/diff/node_serializer.rb`
|
|
217
|
+
|
|
218
|
+
=== API
|
|
219
|
+
|
|
220
|
+
[source,ruby]
|
|
221
|
+
----
|
|
222
|
+
# Serialize any node
|
|
223
|
+
serialized = Canon::Diff::NodeSerializer.serialize(node)
|
|
224
|
+
|
|
225
|
+
# Extract attributes as hash
|
|
226
|
+
attrs = Canon::Diff::NodeSerializer.extract_attributes(node)
|
|
227
|
+
# => {"lang" => "EN-GB", "xml:lang" => "EN-GB", "id" => "example"}
|
|
228
|
+
----
|
|
229
|
+
|
|
230
|
+
=== Implementation
|
|
231
|
+
|
|
232
|
+
==== serialize
|
|
233
|
+
|
|
234
|
+
Handles different node types:
|
|
235
|
+
|
|
236
|
+
[source,ruby]
|
|
237
|
+
----
|
|
238
|
+
def self.serialize(node)
|
|
239
|
+
return "" if node.nil?
|
|
240
|
+
|
|
241
|
+
# Canon::Xml::Node - use DataModel serializer
|
|
242
|
+
if node.is_a?(Canon::Xml::Node)
|
|
243
|
+
return Canon::Xml::DataModel.serialize(node)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Nokogiri HTML nodes
|
|
247
|
+
if node.respond_to?(:to_html)
|
|
248
|
+
return node.to_html
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Nokogiri/Moxml XML nodes
|
|
252
|
+
if node.respond_to?(:to_xml)
|
|
253
|
+
return node.to_xml
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Fallback
|
|
257
|
+
node.to_s
|
|
258
|
+
end
|
|
259
|
+
----
|
|
260
|
+
|
|
261
|
+
==== extract_attributes
|
|
262
|
+
|
|
263
|
+
Extracts normalized attribute hash:
|
|
264
|
+
|
|
265
|
+
[source,ruby]
|
|
266
|
+
----
|
|
267
|
+
def self.extract_attributes(node)
|
|
268
|
+
return {} if node.nil?
|
|
269
|
+
|
|
270
|
+
# Canon::Xml::Nodes::ElementNode (uses attribute_nodes array)
|
|
271
|
+
if node.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
272
|
+
attrs = {}
|
|
273
|
+
node.attribute_nodes.each do |attr|
|
|
274
|
+
attrs[attr.name] = attr.value
|
|
275
|
+
end
|
|
276
|
+
return attrs
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# Nokogiri/Moxml (attributes is Hash-like)
|
|
280
|
+
if node.respond_to?(:attributes) && node.attributes.is_a?(Hash)
|
|
281
|
+
attrs = {}
|
|
282
|
+
node.attributes.each do |name, attr|
|
|
283
|
+
value = if attr.respond_to?(:value)
|
|
284
|
+
attr.value
|
|
285
|
+
else
|
|
286
|
+
attr.to_s
|
|
287
|
+
end
|
|
288
|
+
attrs[name] = value
|
|
289
|
+
end
|
|
290
|
+
return attrs
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
{}
|
|
294
|
+
end
|
|
295
|
+
----
|
|
296
|
+
|
|
297
|
+
=== Example
|
|
298
|
+
|
|
299
|
+
Given this element:
|
|
300
|
+
|
|
301
|
+
[source,html]
|
|
302
|
+
----
|
|
303
|
+
<span lang="EN-GB" xml:lang="EN-GB" id="example">Text</span>
|
|
304
|
+
----
|
|
305
|
+
|
|
306
|
+
NodeSerializer extracts:
|
|
307
|
+
|
|
308
|
+
[source,ruby]
|
|
309
|
+
----
|
|
310
|
+
Canon::Diff::NodeSerializer.extract_attributes(node)
|
|
311
|
+
# => {"lang" => "EN-GB", "xml:lang" => "EN-GB", "id" => "example"}
|
|
312
|
+
----
|
|
313
|
+
|
|
314
|
+
== Algorithm integration
|
|
315
|
+
|
|
316
|
+
=== DOM algorithm enrichment
|
|
317
|
+
|
|
318
|
+
In `lib/canon/comparison/xml_comparator.rb`:
|
|
319
|
+
|
|
320
|
+
[source,ruby]
|
|
321
|
+
----
|
|
322
|
+
module Canon
|
|
323
|
+
module Comparison
|
|
324
|
+
class XmlComparator
|
|
325
|
+
private
|
|
326
|
+
|
|
327
|
+
def add_difference(node1, node2, diff1, diff2, dimension, _opts,
|
|
328
|
+
differences)
|
|
329
|
+
# Build reason
|
|
330
|
+
reason = build_difference_reason(node1, node2, diff1, diff2, dimension)
|
|
331
|
+
|
|
332
|
+
# Enrich with metadata for Layer 4
|
|
333
|
+
metadata = enrich_diff_metadata(node1, node2)
|
|
334
|
+
|
|
335
|
+
# Create DiffNode with enriched metadata
|
|
336
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
337
|
+
node1: node1,
|
|
338
|
+
node2: node2,
|
|
339
|
+
dimension: dimension,
|
|
340
|
+
reason: reason,
|
|
341
|
+
**metadata # Spreads enriched metadata
|
|
342
|
+
)
|
|
343
|
+
differences << diff_node
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def enrich_diff_metadata(node1, node2)
|
|
347
|
+
{
|
|
348
|
+
path: build_path_for_node(node1 || node2),
|
|
349
|
+
serialized_before: serialize_node(node1),
|
|
350
|
+
serialized_after: serialize_node(node2),
|
|
351
|
+
attributes_before: extract_attributes(node1),
|
|
352
|
+
attributes_after: extract_attributes(node2),
|
|
353
|
+
}
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def build_path_for_node(node)
|
|
357
|
+
return nil if node.nil?
|
|
358
|
+
Canon::Diff::PathBuilder.build(node, format: :document)
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def serialize_node(node)
|
|
362
|
+
return nil if node.nil?
|
|
363
|
+
Canon::Diff::NodeSerializer.serialize(node)
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
def extract_attributes(node)
|
|
367
|
+
return nil if node.nil?
|
|
368
|
+
Canon::Diff::NodeSerializer.extract_attributes(node)
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
----
|
|
374
|
+
|
|
375
|
+
=== Semantic algorithm enrichment
|
|
376
|
+
|
|
377
|
+
In `lib/canon/tree_diff/operation_converter.rb`:
|
|
378
|
+
|
|
379
|
+
[source,ruby]
|
|
380
|
+
----
|
|
381
|
+
module Canon
|
|
382
|
+
module TreeDiff
|
|
383
|
+
class OperationConverter
|
|
384
|
+
private
|
|
385
|
+
|
|
386
|
+
def convert_insert(operation)
|
|
387
|
+
tree_node2 = operation[:node]
|
|
388
|
+
node2 = extract_source_node(tree_node2)
|
|
389
|
+
|
|
390
|
+
# Enrich with metadata for Layer 4
|
|
391
|
+
metadata = enrich_diff_metadata(nil, tree_node2)
|
|
392
|
+
|
|
393
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
394
|
+
node1: nil,
|
|
395
|
+
node2: node2,
|
|
396
|
+
dimension: :element_structure,
|
|
397
|
+
reason: build_insert_reason(operation),
|
|
398
|
+
**metadata # Spreads enriched metadata
|
|
399
|
+
)
|
|
400
|
+
diff_node.normative = determine_normative(:element_structure)
|
|
401
|
+
diff_node
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
def enrich_diff_metadata(tree_node1, tree_node2)
|
|
405
|
+
{
|
|
406
|
+
path: build_path_for_node(tree_node1 || tree_node2),
|
|
407
|
+
serialized_before: serialize_node(tree_node1),
|
|
408
|
+
serialized_after: serialize_node(tree_node2),
|
|
409
|
+
attributes_before: extract_attributes(tree_node1),
|
|
410
|
+
attributes_after: extract_attributes(tree_node2),
|
|
411
|
+
}
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
def build_path_for_node(tree_node)
|
|
415
|
+
return nil if tree_node.nil?
|
|
416
|
+
# Use fragment format for HTML, document for XML
|
|
417
|
+
format = @format == :xml ? :document : :fragment
|
|
418
|
+
Canon::Diff::PathBuilder.build(tree_node, format: format)
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def serialize_node(tree_node)
|
|
422
|
+
return nil if tree_node.nil?
|
|
423
|
+
source = extract_source_node(tree_node)
|
|
424
|
+
Canon::Diff::NodeSerializer.serialize(source)
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def extract_attributes(tree_node)
|
|
428
|
+
return nil if tree_node.nil?
|
|
429
|
+
# TreeNode has attributes directly (normalized by adapter)
|
|
430
|
+
tree_node.respond_to?(:attributes) ? (tree_node.attributes || {}) : {}
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
----
|
|
436
|
+
|
|
437
|
+
== Layer 4 rendering
|
|
438
|
+
|
|
439
|
+
=== Using enriched metadata
|
|
440
|
+
|
|
441
|
+
In `lib/canon/diff_formatter/diff_detail_formatter.rb`:
|
|
442
|
+
|
|
443
|
+
[source,ruby]
|
|
444
|
+
----
|
|
445
|
+
module Canon
|
|
446
|
+
class DiffFormatter
|
|
447
|
+
module DiffDetailFormatter
|
|
448
|
+
private
|
|
449
|
+
|
|
450
|
+
def extract_location(diff)
|
|
451
|
+
# Use enriched path if available (with ordinal indices)
|
|
452
|
+
if diff.respond_to?(:path) && diff.path
|
|
453
|
+
return diff.path
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# Fallback: extract from node (legacy path)
|
|
457
|
+
node = diff.respond_to?(:node1) ? (diff.node1 || diff.node2) : nil
|
|
458
|
+
if node.respond_to?(:name)
|
|
459
|
+
return extract_xpath(node)
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
# Final fallback
|
|
463
|
+
diff.respond_to?(:dimension) ? diff.dimension.to_s : "(unknown)"
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
def format_element_structure_details(diff, use_color)
|
|
467
|
+
# Use enriched serialized content if available
|
|
468
|
+
serialized_before = diff.respond_to?(:serialized_before) ? diff.serialized_before : nil
|
|
469
|
+
serialized_after = diff.respond_to?(:serialized_after) ? diff.serialized_after : nil
|
|
470
|
+
|
|
471
|
+
if node1.nil? && !node2.nil?
|
|
472
|
+
# INSERT - use serialized_after
|
|
473
|
+
content_preview = serialized_after || extract_content_preview(node2, 50)
|
|
474
|
+
detail1 = colorize("(not present)", :red, use_color)
|
|
475
|
+
detail2 = content_preview
|
|
476
|
+
changes = "Element inserted"
|
|
477
|
+
elsif !node1.nil? && node2.nil?
|
|
478
|
+
# DELETE - use serialized_before
|
|
479
|
+
content_preview = serialized_before || extract_content_preview(node1, 50)
|
|
480
|
+
detail1 = content_preview
|
|
481
|
+
detail2 = colorize("(not present)", :green, use_color)
|
|
482
|
+
changes = "Element deleted"
|
|
483
|
+
else
|
|
484
|
+
# STRUCTURAL CHANGE - use both
|
|
485
|
+
detail1 = serialized_before || extract_content_preview(node1, 50)
|
|
486
|
+
detail2 = serialized_after || extract_content_preview(node2, 50)
|
|
487
|
+
changes = "Element structure changed"
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
[detail1, detail2, changes]
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
def format_attribute_values_details(diff, use_color)
|
|
494
|
+
# Use enriched attributes if available
|
|
495
|
+
attrs1_before = diff.respond_to?(:attributes_before) ? diff.attributes_before : nil
|
|
496
|
+
attrs2_after = diff.respond_to?(:attributes_after) ? diff.attributes_after : nil
|
|
497
|
+
|
|
498
|
+
if attrs1_before && attrs2_after
|
|
499
|
+
# Use enriched attributes
|
|
500
|
+
all_keys = (attrs1_before.keys + attrs2_after.keys).uniq
|
|
501
|
+
differing_attrs = all_keys.reject { |key| attrs1_before[key] == attrs2_after[key] }
|
|
502
|
+
else
|
|
503
|
+
# Fallback to extracting from nodes
|
|
504
|
+
differing_attrs = find_all_differing_attributes(diff.node1, diff.node2)
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
# ... format using differing_attrs
|
|
508
|
+
end
|
|
509
|
+
end
|
|
510
|
+
end
|
|
511
|
+
end
|
|
512
|
+
----
|
|
513
|
+
|
|
514
|
+
=== Benefits
|
|
515
|
+
|
|
516
|
+
1. **Accurate before/after**: Shows actual node state at diff creation time
|
|
517
|
+
2. **Useful paths**: Ordinal indices make XPaths actionable for debugging
|
|
518
|
+
3. **Library flexibility**: New parsing libraries work without changing Layer 4
|
|
519
|
+
4. **Performance**: Metadata captured once, not re-computed
|
|
520
|
+
5. **Testability**: Enriched DiffNodes are self-contained
|
|
521
|
+
|
|
522
|
+
== Testing
|
|
523
|
+
|
|
524
|
+
=== PathBuilder tests
|
|
525
|
+
|
|
526
|
+
`spec/canon/diff/path_builder_spec.rb`:
|
|
527
|
+
|
|
528
|
+
[source,ruby]
|
|
529
|
+
----
|
|
530
|
+
RSpec.describe Canon::Diff::PathBuilder do
|
|
531
|
+
describe ".build" do
|
|
532
|
+
it "generates canonical path with ordinal indices" do
|
|
533
|
+
# TreeNodes from semantic diff
|
|
534
|
+
tree_node = build_tree_node_with_siblings
|
|
535
|
+
path = Canon::Diff::PathBuilder.build(tree_node)
|
|
536
|
+
expect(path).to eq("/#document-fragment/div[0]/p[1]/span[2]")
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
it "handles Nokogiri nodes" do
|
|
540
|
+
html = "<div><p></p><p></p></div>"
|
|
541
|
+
doc = Nokogiri::HTML4.fragment(html)
|
|
542
|
+
p_tag = doc.at_css("p:last")
|
|
543
|
+
path = Canon::Diff::PathBuilder.build(p_tag)
|
|
544
|
+
expect(path).to include("/p[1]")
|
|
545
|
+
end
|
|
546
|
+
end
|
|
547
|
+
end
|
|
548
|
+
----
|
|
549
|
+
|
|
550
|
+
=== NodeSerializer tests
|
|
551
|
+
|
|
552
|
+
`spec/canon/diff/node_serializer_spec.rb`:
|
|
553
|
+
|
|
554
|
+
[source,ruby]
|
|
555
|
+
----
|
|
556
|
+
RSpec.describe Canon::Diff::NodeSerializer do
|
|
557
|
+
describe ".serialize" do
|
|
558
|
+
it "serializes Canon::Xml::Node" do
|
|
559
|
+
node = Canon::Xml::DataModel.from_xml("<div>Text</div>")
|
|
560
|
+
serialized = Canon::Diff::NodeSerializer.serialize(node)
|
|
561
|
+
expect(serialized).to include("<div")
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
it "serializes Nokogiri nodes" do
|
|
565
|
+
node = Nokogiri::HTML4.fragment("<span>Text</span>").children.first
|
|
566
|
+
serialized = Canon::Diff::NodeSerializer.serialize(node)
|
|
567
|
+
expect(serialized).to include("<span")
|
|
568
|
+
end
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
describe ".extract_attributes" do
|
|
572
|
+
it "extracts normalized attributes" do
|
|
573
|
+
node = Nokogiri::HTML4.fragment("<span lang='en' id='test'>").children.first
|
|
574
|
+
attrs = Canon::Diff::NodeSerializer.extract_attributes(node)
|
|
575
|
+
expect(attrs).to eq({"lang" => "en", "id" => "test"})
|
|
576
|
+
end
|
|
577
|
+
end
|
|
578
|
+
end
|
|
579
|
+
----
|
|
580
|
+
|
|
581
|
+
== Migration guide
|
|
582
|
+
|
|
583
|
+
If you have code that interacts with DiffNodes:
|
|
584
|
+
|
|
585
|
+
=== Before (old API)
|
|
586
|
+
|
|
587
|
+
[source,ruby]
|
|
588
|
+
----
|
|
589
|
+
diff_node = differences.first
|
|
590
|
+
path = extract_xpath_from_node(diff_node.node1)
|
|
591
|
+
before_content = diff_node.node1.to_s
|
|
592
|
+
after_content = diff_node.node2.to_s
|
|
593
|
+
----
|
|
594
|
+
|
|
595
|
+
=== After (new API)
|
|
596
|
+
|
|
597
|
+
[source,ruby]
|
|
598
|
+
----
|
|
599
|
+
diff_node = differences.first
|
|
600
|
+
path = diff_node.path # Enriched with ordinal indices
|
|
601
|
+
before_content = diff_node.serialized_before # Captured at diff creation
|
|
602
|
+
after_content = diff_node.serialized_after
|
|
603
|
+
----
|
|
604
|
+
|
|
605
|
+
The old API still works for backwards compatibility, but enriched properties provide more accurate and useful data.
|
|
606
|
+
|
|
607
|
+
== See also
|
|
608
|
+
|
|
609
|
+
* link:../understanding/architecture.adoc[Architecture] - 4-layer architecture overview
|
|
610
|
+
* link:../understanding/algorithms/[Algorithms] - DOM and Semantic algorithm details
|
|
611
|
+
* link:../features/diff-formatting/[Diff Formatting] - Layer 4 rendering options
|