canon 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +69 -92
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/Gemfile +1 -0
  6. data/docs/_config.yml +90 -1
  7. data/docs/advanced/diff-classification.adoc +82 -2
  8. data/docs/advanced/extending-canon.adoc +193 -0
  9. data/docs/features/match-options/index.adoc +239 -1
  10. data/docs/internals/diffnode-enrichment.adoc +611 -0
  11. data/docs/internals/index.adoc +251 -0
  12. data/docs/lychee.toml +13 -6
  13. data/docs/understanding/architecture.adoc +749 -33
  14. data/docs/understanding/comparison-pipeline.adoc +122 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +87 -0
  27. data/lib/canon/comparison/html_comparator.rb +70 -26
  28. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  29. data/lib/canon/comparison/html_parser.rb +80 -0
  30. data/lib/canon/comparison/json_comparator.rb +12 -0
  31. data/lib/canon/comparison/json_parser.rb +19 -0
  32. data/lib/canon/comparison/markup_comparator.rb +293 -0
  33. data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
  34. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  35. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  36. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  37. data/lib/canon/comparison/match_options.rb +68 -463
  38. data/lib/canon/comparison/profile_definition.rb +149 -0
  39. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  40. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  41. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  42. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  43. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  44. data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
  45. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  46. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  47. data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
  48. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
  49. data/lib/canon/comparison/xml_comparator.rb +97 -684
  50. data/lib/canon/comparison/xml_node_comparison.rb +319 -0
  51. data/lib/canon/comparison/xml_parser.rb +19 -0
  52. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  53. data/lib/canon/comparison.rb +265 -110
  54. data/lib/canon/diff/diff_classifier.rb +101 -2
  55. data/lib/canon/diff/diff_node.rb +32 -2
  56. data/lib/canon/diff/formatting_detector.rb +1 -1
  57. data/lib/canon/diff/node_serializer.rb +191 -0
  58. data/lib/canon/diff/path_builder.rb +143 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  61. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  62. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  64. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  65. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  66. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  67. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  68. data/lib/canon/diff_formatter.rb +1 -1
  69. data/lib/canon/rspec_matchers.rb +38 -9
  70. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  71. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  72. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  73. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  74. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  75. data/lib/canon/version.rb +1 -1
  76. data/lib/canon/xml/data_model.rb +24 -13
  77. metadata +48 -2
@@ -0,0 +1,611 @@
1
+ ---
2
+ title: DiffNode Enrichment
3
+ parent: Internals
4
+ nav_order: 1
5
+ ---
6
+ = DiffNode Enrichment
7
+
8
+ == Purpose
9
+
10
+ This document explains how DiffNode objects carry complete information about differences through Canon's comparison pipeline, including location paths, serialized content, and normalized attributes.
11
+
12
+ == DiffNode structure
13
+
14
+ === Properties
15
+
16
+ DiffNode objects contain all information needed to understand and display a difference:
17
+
18
+ [source,ruby]
19
+ ----
20
+ class DiffNode
21
+ # Core properties
22
+ attr_reader :node1, :node2 # Raw node references
23
+ attr_accessor :dimension, :reason # What changed and why
24
+ attr_accessor :normative, :formatting # Classification
25
+
26
+ # Location and display information
27
+ attr_accessor :path # Canonical path with ordinal indices
28
+ attr_accessor :serialized_before # Serialized "before" content
29
+ attr_accessor :serialized_after # Serialized "after" content
30
+ attr_accessor :attributes_before # Normalized "before" attributes
31
+ attr_accessor :attributes_after # Normalized "after" attributes
32
+ end
33
+ ----
34
+
35
+ === Property categories
36
+
37
+ **Core properties** - Describe what changed:
38
+
39
+ * `node1, node2` - Raw node references from original documents
40
+ * `dimension` - Type of difference (`:text_content`, `:attribute_values`, `:element_structure`, etc.)
41
+ * `reason` - Human-readable explanation
42
+ * `normative` - Affects semantic equivalence (true) or formatting only (false)
43
+ * `formatting` - Purely cosmetic whitespace difference
44
+
45
+ **Location and display properties** - Enable accurate rendering:
46
+
47
+ * `path` - Canonical path with ordinal indices
48
+ * `serialized_before/after` - Serialized content captured at comparison time
49
+ * `attributes_before/after` - Normalized attribute hashes
50
+
51
+ == Architecture
52
+
53
+ === Enrichment flow
54
+
55
+ [mermaid]
56
+ ----
57
+ graph TD
58
+ A[Raw Nodes] --> B{Algorithm Layer}
59
+ B -->|DOM| C[XmlComparator]
60
+ B -->|Semantic| D[OperationConverter]
61
+ C --> E[enrich_diff_metadata]
62
+ D --> E
63
+ E --> F[PathBuilder]
64
+ E --> G[NodeSerializer]
65
+ F --> H[DiffNode.path]
66
+ G --> I[DiffNode.serialized_before/after]
67
+ G --> J[DiffNode.attributes_before/after]
68
+ H --> K[Enriched DiffNode]
69
+ I --> K
70
+ J --> K
71
+ K --> L[Layer 4: Rendering]
72
+
73
+ style B fill:#fff4e1
74
+ style E fill:#e1f5ff
75
+ style K fill:#e1ffe1
76
+ ----
77
+
78
+ === Library-agnostic design
79
+
80
+ Canon supports multiple parsing libraries (Nokogiri, Moxml, Canon::Xml::Node) and must remain library-agnostic to support future libraries. The enrichment utilities handle this by:
81
+
82
+ 1. **Detecting node type** using `respond_to?` checks
83
+ 2. **Calling appropriate methods** for each library
84
+ 3. **Normalizing output** to library-agnostic format
85
+
86
+ This allows Layer 4 to work with enriched metadata without knowing which parsing library created the nodes.
87
+
88
+ == PathBuilder: Canonical paths with ordinal indices
89
+
90
+ === Purpose
91
+
92
+ Generate unambiguous XPath-like paths that uniquely identify nodes regardless of parsing library.
93
+
94
+ === Location
95
+
96
+ `lib/canon/diff/path_builder.rb`
97
+
98
+ === API
99
+
100
+ [source,ruby]
101
+ ----
102
+ # Build canonical path from node
103
+ path = Canon::Diff::PathBuilder.build(node, format: :document)
104
+ # => "/#document/div[0]/body[0]/p[1]/span[2]"
105
+
106
+ # Build human-readable path
107
+ human = Canon::Diff::PathBuilder.human_path(node)
108
+ # => "#document → div[0] → body[0] → p[1] → span[2]"
109
+ ----
110
+
111
+ === Implementation
112
+
113
+ ==== segment_for_node
114
+
115
+ Generates a single path segment with ordinal index:
116
+
117
+ [source,ruby]
118
+ ----
119
+ def self.segment_for_node(tree_node)
120
+ # Get label/name - handles TreeNodes and raw nodes
121
+ label = if tree_node.respond_to?(:label)
122
+ tree_node.label # TreeNode (semantic diff)
123
+ elsif tree_node.respond_to?(:name)
124
+ tree_node.name # Canon::Xml::Node or Nokogiri
125
+ else
126
+ "unknown"
127
+ end
128
+
129
+ # Get ordinal index among siblings with same label
130
+ index = ordinal_index(tree_node)
131
+
132
+ "#{label}[#{index}]"
133
+ end
134
+ ----
135
+
136
+ ==== ordinal_index
137
+
138
+ Calculates position among siblings with same label:
139
+
140
+ [source,ruby]
141
+ ----
142
+ def self.ordinal_index(tree_node)
143
+ return 0 unless tree_node.respond_to?(:parent)
144
+ return 0 unless tree_node.parent
145
+ return 0 unless tree_node.parent.respond_to?(:children)
146
+
147
+ siblings = tree_node.parent.children
148
+ return 0 unless siblings
149
+
150
+ # Handle Nokogiri NodeSet
151
+ siblings = siblings.to_a unless siblings.is_a?(Array)
152
+
153
+ # Get my label for comparison
154
+ my_label = if tree_node.respond_to?(:label)
155
+ tree_node.label
156
+ elsif tree_node.respond_to?(:name)
157
+ tree_node.name
158
+ else
159
+ nil
160
+ end
161
+
162
+ return 0 unless my_label
163
+
164
+ # Find position among same-label siblings
165
+ same_label_siblings = siblings.select do |s|
166
+ sibling_label = if s.respond_to?(:label)
167
+ s.label
168
+ elsif s.respond_to?(:name)
169
+ s.name
170
+ else
171
+ nil
172
+ end
173
+ sibling_label == my_label
174
+ end
175
+
176
+ same_label_siblings.index(tree_node) || 0
177
+ end
178
+ ----
179
+
180
+ === Example
181
+
182
+ Given this HTML:
183
+
184
+ [source,html]
185
+ ----
186
+ <html>
187
+ <body>
188
+ <div>
189
+ <p>First paragraph</p>
190
+ <p>Second paragraph</p>
191
+ <span>A span</span>
192
+ <span>Another span</span>
193
+ </div>
194
+ </body>
195
+ </html>
196
+ ----
197
+
198
+ PathBuilder generates:
199
+
200
+ [source,text]
201
+ ----
202
+ /#document/html[0]/body[0]/div[0]/p[0] # First paragraph
203
+ /#document/html[0]/body[0]/div[0]/p[1] # Second paragraph
204
+ /#document/html[0]/body[0]/div[0]/span[0] # First span
205
+ /#document/html[0]/body[0]/div[0]/span[1] # Second span
206
+ ----
207
+
208
+ == NodeSerializer: Library-agnostic serialization
209
+
210
+ === Purpose
211
+
212
+ Serialize nodes and extract attributes in a library-agnostic way.
213
+
214
+ === Location
215
+
216
+ `lib/canon/diff/node_serializer.rb`
217
+
218
+ === API
219
+
220
+ [source,ruby]
221
+ ----
222
+ # Serialize any node
223
+ serialized = Canon::Diff::NodeSerializer.serialize(node)
224
+
225
+ # Extract attributes as hash
226
+ attrs = Canon::Diff::NodeSerializer.extract_attributes(node)
227
+ # => {"lang" => "EN-GB", "xml:lang" => "EN-GB", "id" => "example"}
228
+ ----
229
+
230
+ === Implementation
231
+
232
+ ==== serialize
233
+
234
+ Handles different node types:
235
+
236
+ [source,ruby]
237
+ ----
238
+ def self.serialize(node)
239
+ return "" if node.nil?
240
+
241
+ # Canon::Xml::Node - use DataModel serializer
242
+ if node.is_a?(Canon::Xml::Node)
243
+ return Canon::Xml::DataModel.serialize(node)
244
+ end
245
+
246
+ # Nokogiri HTML nodes
247
+ if node.respond_to?(:to_html)
248
+ return node.to_html
249
+ end
250
+
251
+ # Nokogiri/Moxml XML nodes
252
+ if node.respond_to?(:to_xml)
253
+ return node.to_xml
254
+ end
255
+
256
+ # Fallback
257
+ node.to_s
258
+ end
259
+ ----
260
+
261
+ ==== extract_attributes
262
+
263
+ Extracts normalized attribute hash:
264
+
265
+ [source,ruby]
266
+ ----
267
+ def self.extract_attributes(node)
268
+ return {} if node.nil?
269
+
270
+ # Canon::Xml::Nodes::ElementNode (uses attribute_nodes array)
271
+ if node.is_a?(Canon::Xml::Nodes::ElementNode)
272
+ attrs = {}
273
+ node.attribute_nodes.each do |attr|
274
+ attrs[attr.name] = attr.value
275
+ end
276
+ return attrs
277
+ end
278
+
279
+ # Nokogiri/Moxml (attributes is Hash-like)
280
+ if node.respond_to?(:attributes) && node.attributes.is_a?(Hash)
281
+ attrs = {}
282
+ node.attributes.each do |name, attr|
283
+ value = if attr.respond_to?(:value)
284
+ attr.value
285
+ else
286
+ attr.to_s
287
+ end
288
+ attrs[name] = value
289
+ end
290
+ return attrs
291
+ end
292
+
293
+ {}
294
+ end
295
+ ----
296
+
297
+ === Example
298
+
299
+ Given this element:
300
+
301
+ [source,html]
302
+ ----
303
+ <span lang="EN-GB" xml:lang="EN-GB" id="example">Text</span>
304
+ ----
305
+
306
+ NodeSerializer extracts:
307
+
308
+ [source,ruby]
309
+ ----
310
+ Canon::Diff::NodeSerializer.extract_attributes(node)
311
+ # => {"lang" => "EN-GB", "xml:lang" => "EN-GB", "id" => "example"}
312
+ ----
313
+
314
+ == Algorithm integration
315
+
316
+ === DOM algorithm enrichment
317
+
318
+ In `lib/canon/comparison/xml_comparator.rb`:
319
+
320
+ [source,ruby]
321
+ ----
322
+ module Canon
323
+ module Comparison
324
+ class XmlComparator
325
+ private
326
+
327
+ def add_difference(node1, node2, diff1, diff2, dimension, _opts,
328
+ differences)
329
+ # Build reason
330
+ reason = build_difference_reason(node1, node2, diff1, diff2, dimension)
331
+
332
+ # Enrich with metadata for Layer 4
333
+ metadata = enrich_diff_metadata(node1, node2)
334
+
335
+ # Create DiffNode with enriched metadata
336
+ diff_node = Canon::Diff::DiffNode.new(
337
+ node1: node1,
338
+ node2: node2,
339
+ dimension: dimension,
340
+ reason: reason,
341
+ **metadata # Spreads enriched metadata
342
+ )
343
+ differences << diff_node
344
+ end
345
+
346
+ def enrich_diff_metadata(node1, node2)
347
+ {
348
+ path: build_path_for_node(node1 || node2),
349
+ serialized_before: serialize_node(node1),
350
+ serialized_after: serialize_node(node2),
351
+ attributes_before: extract_attributes(node1),
352
+ attributes_after: extract_attributes(node2),
353
+ }
354
+ end
355
+
356
+ def build_path_for_node(node)
357
+ return nil if node.nil?
358
+ Canon::Diff::PathBuilder.build(node, format: :document)
359
+ end
360
+
361
+ def serialize_node(node)
362
+ return nil if node.nil?
363
+ Canon::Diff::NodeSerializer.serialize(node)
364
+ end
365
+
366
+ def extract_attributes(node)
367
+ return nil if node.nil?
368
+ Canon::Diff::NodeSerializer.extract_attributes(node)
369
+ end
370
+ end
371
+ end
372
+ end
373
+ ----
374
+
375
+ === Semantic algorithm enrichment
376
+
377
+ In `lib/canon/tree_diff/operation_converter.rb`:
378
+
379
+ [source,ruby]
380
+ ----
381
+ module Canon
382
+ module TreeDiff
383
+ class OperationConverter
384
+ private
385
+
386
+ def convert_insert(operation)
387
+ tree_node2 = operation[:node]
388
+ node2 = extract_source_node(tree_node2)
389
+
390
+ # Enrich with metadata for Layer 4
391
+ metadata = enrich_diff_metadata(nil, tree_node2)
392
+
393
+ diff_node = Canon::Diff::DiffNode.new(
394
+ node1: nil,
395
+ node2: node2,
396
+ dimension: :element_structure,
397
+ reason: build_insert_reason(operation),
398
+ **metadata # Spreads enriched metadata
399
+ )
400
+ diff_node.normative = determine_normative(:element_structure)
401
+ diff_node
402
+ end
403
+
404
+ def enrich_diff_metadata(tree_node1, tree_node2)
405
+ {
406
+ path: build_path_for_node(tree_node1 || tree_node2),
407
+ serialized_before: serialize_node(tree_node1),
408
+ serialized_after: serialize_node(tree_node2),
409
+ attributes_before: extract_attributes(tree_node1),
410
+ attributes_after: extract_attributes(tree_node2),
411
+ }
412
+ end
413
+
414
+ def build_path_for_node(tree_node)
415
+ return nil if tree_node.nil?
416
+ # Use fragment format for HTML, document for XML
417
+ format = @format == :xml ? :document : :fragment
418
+ Canon::Diff::PathBuilder.build(tree_node, format: format)
419
+ end
420
+
421
+ def serialize_node(tree_node)
422
+ return nil if tree_node.nil?
423
+ source = extract_source_node(tree_node)
424
+ Canon::Diff::NodeSerializer.serialize(source)
425
+ end
426
+
427
+ def extract_attributes(tree_node)
428
+ return nil if tree_node.nil?
429
+ # TreeNode has attributes directly (normalized by adapter)
430
+ tree_node.respond_to?(:attributes) ? (tree_node.attributes || {}) : {}
431
+ end
432
+ end
433
+ end
434
+ end
435
+ ----
436
+
437
+ == Layer 4 rendering
438
+
439
+ === Using enriched metadata
440
+
441
+ In `lib/canon/diff_formatter/diff_detail_formatter.rb`:
442
+
443
+ [source,ruby]
444
+ ----
445
+ module Canon
446
+ class DiffFormatter
447
+ module DiffDetailFormatter
448
+ private
449
+
450
+ def extract_location(diff)
451
+ # Use enriched path if available (with ordinal indices)
452
+ if diff.respond_to?(:path) && diff.path
453
+ return diff.path
454
+ end
455
+
456
+ # Fallback: extract from node (legacy path)
457
+ node = diff.respond_to?(:node1) ? (diff.node1 || diff.node2) : nil
458
+ if node.respond_to?(:name)
459
+ return extract_xpath(node)
460
+ end
461
+
462
+ # Final fallback
463
+ diff.respond_to?(:dimension) ? diff.dimension.to_s : "(unknown)"
464
+ end
465
+
466
+ def format_element_structure_details(diff, use_color)
467
+ # Use enriched serialized content if available
468
+ serialized_before = diff.respond_to?(:serialized_before) ? diff.serialized_before : nil
469
+ serialized_after = diff.respond_to?(:serialized_after) ? diff.serialized_after : nil
470
+
471
+ if node1.nil? && !node2.nil?
472
+ # INSERT - use serialized_after
473
+ content_preview = serialized_after || extract_content_preview(node2, 50)
474
+ detail1 = colorize("(not present)", :red, use_color)
475
+ detail2 = content_preview
476
+ changes = "Element inserted"
477
+ elsif !node1.nil? && node2.nil?
478
+ # DELETE - use serialized_before
479
+ content_preview = serialized_before || extract_content_preview(node1, 50)
480
+ detail1 = content_preview
481
+ detail2 = colorize("(not present)", :green, use_color)
482
+ changes = "Element deleted"
483
+ else
484
+ # STRUCTURAL CHANGE - use both
485
+ detail1 = serialized_before || extract_content_preview(node1, 50)
486
+ detail2 = serialized_after || extract_content_preview(node2, 50)
487
+ changes = "Element structure changed"
488
+ end
489
+
490
+ [detail1, detail2, changes]
491
+ end
492
+
493
+ def format_attribute_values_details(diff, use_color)
494
+ # Use enriched attributes if available
495
+ attrs1_before = diff.respond_to?(:attributes_before) ? diff.attributes_before : nil
496
+ attrs2_after = diff.respond_to?(:attributes_after) ? diff.attributes_after : nil
497
+
498
+ if attrs1_before && attrs2_after
499
+ # Use enriched attributes
500
+ all_keys = (attrs1_before.keys + attrs2_after.keys).uniq
501
+ differing_attrs = all_keys.reject { |key| attrs1_before[key] == attrs2_after[key] }
502
+ else
503
+ # Fallback to extracting from nodes
504
+ differing_attrs = find_all_differing_attributes(diff.node1, diff.node2)
505
+ end
506
+
507
+ # ... format using differing_attrs
508
+ end
509
+ end
510
+ end
511
+ end
512
+ ----
513
+
514
+ === Benefits
515
+
516
+ 1. **Accurate before/after**: Shows actual node state at diff creation time
517
+ 2. **Useful paths**: Ordinal indices make XPaths actionable for debugging
518
+ 3. **Library flexibility**: New parsing libraries work without changing Layer 4
519
+ 4. **Performance**: Metadata captured once, not re-computed
520
+ 5. **Testability**: Enriched DiffNodes are self-contained
521
+
522
+ == Testing
523
+
524
+ === PathBuilder tests
525
+
526
+ `spec/canon/diff/path_builder_spec.rb`:
527
+
528
+ [source,ruby]
529
+ ----
530
+ RSpec.describe Canon::Diff::PathBuilder do
531
+ describe ".build" do
532
+ it "generates canonical path with ordinal indices" do
533
+ # TreeNodes from semantic diff
534
+ tree_node = build_tree_node_with_siblings
535
+ path = Canon::Diff::PathBuilder.build(tree_node)
536
+ expect(path).to eq("/#document-fragment/div[0]/p[1]/span[2]")
537
+ end
538
+
539
+ it "handles Nokogiri nodes" do
540
+ html = "<div><p></p><p></p></div>"
541
+ doc = Nokogiri::HTML4.fragment(html)
542
+ p_tag = doc.at_css("p:last")
543
+ path = Canon::Diff::PathBuilder.build(p_tag)
544
+ expect(path).to include("/p[1]")
545
+ end
546
+ end
547
+ end
548
+ ----
549
+
550
+ === NodeSerializer tests
551
+
552
+ `spec/canon/diff/node_serializer_spec.rb`:
553
+
554
+ [source,ruby]
555
+ ----
556
+ RSpec.describe Canon::Diff::NodeSerializer do
557
+ describe ".serialize" do
558
+ it "serializes Canon::Xml::Node" do
559
+ node = Canon::Xml::DataModel.from_xml("<div>Text</div>")
560
+ serialized = Canon::Diff::NodeSerializer.serialize(node)
561
+ expect(serialized).to include("<div")
562
+ end
563
+
564
+ it "serializes Nokogiri nodes" do
565
+ node = Nokogiri::HTML4.fragment("<span>Text</span>").children.first
566
+ serialized = Canon::Diff::NodeSerializer.serialize(node)
567
+ expect(serialized).to include("<span")
568
+ end
569
+ end
570
+
571
+ describe ".extract_attributes" do
572
+ it "extracts normalized attributes" do
573
+ node = Nokogiri::HTML4.fragment("<span lang='en' id='test'>").children.first
574
+ attrs = Canon::Diff::NodeSerializer.extract_attributes(node)
575
+ expect(attrs).to eq({"lang" => "en", "id" => "test"})
576
+ end
577
+ end
578
+ end
579
+ ----
580
+
581
+ == Migration guide
582
+
583
+ If you have code that interacts with DiffNodes:
584
+
585
+ === Before (old API)
586
+
587
+ [source,ruby]
588
+ ----
589
+ diff_node = differences.first
590
+ path = extract_xpath_from_node(diff_node.node1)
591
+ before_content = diff_node.node1.to_s
592
+ after_content = diff_node.node2.to_s
593
+ ----
594
+
595
+ === After (new API)
596
+
597
+ [source,ruby]
598
+ ----
599
+ diff_node = differences.first
600
+ path = diff_node.path # Enriched with ordinal indices
601
+ before_content = diff_node.serialized_before # Captured at diff creation
602
+ after_content = diff_node.serialized_after
603
+ ----
604
+
605
+ The old API still works for backwards compatibility, but enriched properties provide more accurate and useful data.
606
+
607
+ == See also
608
+
609
+ * link:../understanding/architecture.adoc[Architecture] - 4-layer architecture overview
610
+ * link:../understanding/algorithms/[Algorithms] - DOM and Semantic algorithm details
611
+ * link:../features/diff-formatting/[Diff Formatting] - Layer 4 rendering options