canon 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -67
  3. data/README.adoc +400 -7
  4. data/docs/Gemfile +9 -0
  5. data/docs/INDEX.adoc +99 -182
  6. data/docs/_config.yml +100 -0
  7. data/docs/advanced/diff-classification.adoc +547 -0
  8. data/docs/advanced/diff-pipeline.adoc +358 -0
  9. data/docs/advanced/index.adoc +214 -0
  10. data/docs/advanced/semantic-diff-report.adoc +390 -0
  11. data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
  12. data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
  13. data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
  14. data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
  15. data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
  16. data/docs/features/diff-formatting/display-filtering.adoc +472 -0
  17. data/docs/features/diff-formatting/index.adoc +140 -0
  18. data/docs/features/environment-configuration/index.adoc +327 -0
  19. data/docs/features/environment-configuration/override-system.adoc +436 -0
  20. data/docs/features/environment-configuration/size-limits.adoc +273 -0
  21. data/docs/features/index.adoc +173 -0
  22. data/docs/features/input-validation/index.adoc +521 -0
  23. data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
  24. data/docs/features/match-options/html-policies.adoc +312 -0
  25. data/docs/features/match-options/index.adoc +621 -0
  26. data/docs/getting-started/index.adoc +83 -0
  27. data/docs/getting-started/quick-start.adoc +76 -0
  28. data/docs/guides/choosing-configuration.adoc +689 -0
  29. data/docs/guides/index.adoc +181 -0
  30. data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
  31. data/docs/interfaces/index.adoc +101 -0
  32. data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
  33. data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
  34. data/docs/lychee.toml +65 -0
  35. data/docs/reference/cli-options.adoc +418 -0
  36. data/docs/reference/environment-variables.adoc +375 -0
  37. data/docs/reference/index.adoc +204 -0
  38. data/docs/reference/options-across-interfaces.adoc +417 -0
  39. data/docs/understanding/algorithms/dom-diff.adoc +389 -0
  40. data/docs/understanding/algorithms/index.adoc +314 -0
  41. data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
  42. data/docs/understanding/architecture.adoc +447 -0
  43. data/docs/understanding/comparison-pipeline.adoc +317 -0
  44. data/docs/understanding/formats/html.adoc +380 -0
  45. data/docs/understanding/formats/index.adoc +261 -0
  46. data/docs/understanding/formats/json.adoc +390 -0
  47. data/docs/understanding/formats/xml.adoc +366 -0
  48. data/docs/understanding/formats/yaml.adoc +504 -0
  49. data/docs/understanding/index.adoc +130 -0
  50. data/lib/canon/cli.rb +42 -1
  51. data/lib/canon/commands/diff_command.rb +108 -23
  52. data/lib/canon/comparison/compare_profile.rb +101 -0
  53. data/lib/canon/comparison/comparison_result.rb +41 -2
  54. data/lib/canon/comparison/html_comparator.rb +292 -71
  55. data/lib/canon/comparison/html_compare_profile.rb +117 -0
  56. data/lib/canon/comparison/match_options.rb +42 -4
  57. data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
  58. data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
  59. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
  60. data/lib/canon/comparison/xml_comparator.rb +695 -91
  61. data/lib/canon/comparison.rb +207 -2
  62. data/lib/canon/config/env_provider.rb +71 -0
  63. data/lib/canon/config/env_schema.rb +58 -0
  64. data/lib/canon/config/override_resolver.rb +55 -0
  65. data/lib/canon/config/type_converter.rb +59 -0
  66. data/lib/canon/config.rb +158 -29
  67. data/lib/canon/data_model.rb +29 -0
  68. data/lib/canon/diff/diff_classifier.rb +74 -14
  69. data/lib/canon/diff/diff_context_builder.rb +41 -0
  70. data/lib/canon/diff/diff_line.rb +18 -2
  71. data/lib/canon/diff/diff_node.rb +18 -3
  72. data/lib/canon/diff/diff_node_mapper.rb +71 -12
  73. data/lib/canon/diff/formatting_detector.rb +53 -0
  74. data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
  75. data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
  76. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
  77. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
  78. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
  79. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
  80. data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
  81. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
  82. data/lib/canon/diff_formatter/debug_output.rb +7 -1
  83. data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
  84. data/lib/canon/diff_formatter/legend.rb +42 -0
  85. data/lib/canon/diff_formatter.rb +78 -9
  86. data/lib/canon/errors.rb +56 -0
  87. data/lib/canon/formatters/html_formatter_base.rb +35 -1
  88. data/lib/canon/formatters/json_formatter.rb +3 -0
  89. data/lib/canon/formatters/yaml_formatter.rb +3 -0
  90. data/lib/canon/html/data_model.rb +229 -0
  91. data/lib/canon/html.rb +9 -0
  92. data/lib/canon/options/cli_generator.rb +70 -0
  93. data/lib/canon/options/registry.rb +234 -0
  94. data/lib/canon/rspec_matchers.rb +34 -13
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
  96. data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
  97. data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
  98. data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
  99. data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
  100. data/lib/canon/tree_diff/core/matching.rb +241 -0
  101. data/lib/canon/tree_diff/core/node_signature.rb +164 -0
  102. data/lib/canon/tree_diff/core/node_weight.rb +135 -0
  103. data/lib/canon/tree_diff/core/tree_node.rb +450 -0
  104. data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
  105. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
  106. data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
  107. data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
  108. data/lib/canon/tree_diff/operation_converter.rb +631 -0
  109. data/lib/canon/tree_diff/operations/operation.rb +92 -0
  110. data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
  111. data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
  112. data/lib/canon/tree_diff.rb +33 -0
  113. data/lib/canon/validators/json_validator.rb +3 -1
  114. data/lib/canon/validators/yaml_validator.rb +3 -1
  115. data/lib/canon/version.rb +1 -1
  116. data/lib/canon/xml/data_model.rb +22 -23
  117. data/lib/canon/xml/element_matcher.rb +128 -20
  118. data/lib/canon/xml/namespace_helper.rb +110 -0
  119. data/lib/canon.rb +3 -0
  120. metadata +81 -23
  121. data/_config.yml +0 -116
  122. data/docs/ADVANCED_TOPICS.adoc +0 -20
  123. data/docs/BASIC_USAGE.adoc +0 -16
  124. data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  125. data/docs/DIFF_ARCHITECTURE.adoc +0 -435
  126. data/docs/DIFF_FORMATTING.adoc +0 -540
  127. data/docs/FORMATS.adoc +0 -447
  128. data/docs/INPUT_VALIDATION.adoc +0 -477
  129. data/docs/MATCH_ARCHITECTURE.adoc +0 -463
  130. data/docs/MATCH_OPTIONS.adoc +0 -719
  131. data/docs/MODES.adoc +0 -432
  132. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  133. data/docs/OPTIONS.adoc +0 -1387
  134. data/docs/PREPROCESSING.adoc +0 -491
  135. data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
  136. data/docs/UNDERSTANDING_CANON.adoc +0 -17
@@ -0,0 +1,366 @@
1
+ ---
2
+ title: XML Format
3
+ parent: Format Support
4
+ grand_parent: Understanding
5
+ nav_order: 1
6
+ ---
7
+ = XML format
8
+ :toc:
9
+ :toclevels: 3
10
+
11
+ == Purpose
12
+
13
+ This page describes Canon's XML format support, including W3C Canonical XML implementation, namespace handling, and XML-specific features.
14
+
15
+ == Canonicalization
16
+
17
+ Canon implements the https://www.w3.org/TR/xml-c14n11/[W3C Canonical XML Version 1.1] specification.
18
+
19
+ **Key features:**
20
+
21
+ * Namespace declaration ordering (lexicographic by prefix)
22
+ * Attribute ordering (lexicographic by namespace URI, then local name)
23
+ * Character encoding normalization to UTF-8
24
+ * Special character encoding in text and attributes
25
+ * Removal of superfluous namespace declarations
26
+ * Support for xml:base, xml:lang, xml:space, and xml:id attributes
27
+ * Processing instruction and comment handling
28
+ * Document subset support with attribute inheritance
29
+
30
+ .XML canonicalization example
31
+ [example]
32
+ ====
33
+ [source,ruby]
34
+ ----
35
+ xml = <<~XML
36
+ <root xmlns:b="http://b.com" xmlns:a="http://a.com">
37
+ <item b:attr="2" a:attr="1">
38
+ Text content
39
+ </item>
40
+ </root>
41
+ XML
42
+
43
+ Canon.format(xml, :xml)
44
+ # => Namespace prefixes sorted, attributes sorted, whitespace normalized
45
+ ----
46
+ ====
47
+
48
+ == Format defaults
49
+
50
+ [cols="1,1"]
51
+ |===
52
+ |Dimension |Default Behavior
53
+
54
+ |`text_content`
55
+ |`:strict`
56
+
57
+ |`structural_whitespace`
58
+ |`:strict`
59
+
60
+ |`attribute_whitespace`
61
+ |`:strict`
62
+
63
+ |`attribute_order`
64
+ |`:ignore`
65
+
66
+ |`attribute_values`
67
+ |`:strict`
68
+
69
+ |`comments`
70
+ |`:strict`
71
+ |===
72
+
73
+ Default diff mode: `:by_object` (tree-based semantic diff)
74
+
75
+ NOTE: XML `attribute_order` defaults to `:ignore` because the XML specification
76
+ states that attribute order is not significant. Use the `strict` profile if you
77
+ need to enforce specific attribute ordering.
78
+
79
+ == Match profiles for XML
80
+
81
+ Canon provides predefined profiles optimized for XML documents. Each profile
82
+ configures preprocessing, match options, diff algorithm, and formatting.
83
+
84
+ === strict profile
85
+
86
+ **Purpose**: Character-perfect XML matching
87
+
88
+ **Configuration**:
89
+
90
+ [source,ruby]
91
+ ----
92
+ {
93
+ preprocessing: :none,
94
+ diff_algorithm: :dom, # DOM-based positional diff
95
+ diff_mode: :by_object, # Tree-based diff output
96
+ match: {
97
+ text_content: :strict,
98
+ structural_whitespace: :strict,
99
+ attribute_whitespace: :strict,
100
+ attribute_order: :strict,
101
+ attribute_values: :strict,
102
+ comments: :strict
103
+ }
104
+ }
105
+ ----
106
+
107
+ **Use when**: Testing exact serializer output, verifying XML formatting compliance, character-perfect matching required.
108
+
109
+ === rendered profile
110
+
111
+ **Purpose**: Browser-rendered equivalence
112
+
113
+ **Configuration**:
114
+
115
+ [source,ruby]
116
+ ----
117
+ {
118
+ preprocessing: :none,
119
+ diff_algorithm: :dom,
120
+ diff_mode: :by_line, # Line-based diff output
121
+ match: {
122
+ text_content: :normalize,
123
+ structural_whitespace: :normalize,
124
+ attribute_whitespace: :normalize,
125
+ attribute_order: :ignore,
126
+ attribute_values: :strict,
127
+ comments: :ignore
128
+ }
129
+ }
130
+ ----
131
+
132
+ **Use when**: Comparing how content would render (XHTML), ignoring formatting that doesn't affect display.
133
+
134
+ === spec_friendly profile
135
+
136
+ **Purpose**: Test-friendly comparison for RSpec
137
+
138
+ **Configuration**:
139
+
140
+ [source,ruby]
141
+ ----
142
+ {
143
+ preprocessing: :normalize, # Applies whitespace normalization
144
+ diff_algorithm: :dom,
145
+ diff_mode: :by_object,
146
+ match: {
147
+ text_content: :normalize,
148
+ structural_whitespace: :ignore,
149
+ attribute_whitespace: :normalize,
150
+ attribute_order: :ignore,
151
+ attribute_values: :strict,
152
+ comments: :ignore
153
+ }
154
+ }
155
+ ----
156
+
157
+ **Use when**: Writing RSpec tests, testing semantic correctness, ignoring pretty-printing differences. Most common for testing.
158
+
159
+ === content_only profile
160
+
161
+ **Purpose**: Maximum tolerance - only data matters
162
+
163
+ **Configuration**:
164
+
165
+ [source,ruby]
166
+ ----
167
+ {
168
+ preprocessing: :normalize,
169
+ diff_algorithm: :dom,
170
+ diff_mode: :by_object,
171
+ match: {
172
+ text_content: :normalize,
173
+ structural_whitespace: :ignore,
174
+ attribute_whitespace: :ignore,
175
+ attribute_order: :ignore,
176
+ attribute_values: :ignore,
177
+ comments: :ignore
178
+ }
179
+ }
180
+ ----
181
+
182
+ **Use when**: Only structural equivalence needed, maximum flexibility for formatting differences.
183
+
184
+ == XML-specific features
185
+
186
+ === Comment handling
187
+
188
+ XML comments are preserved in canonical form unless `--with-comments` is explicitly set.
189
+
190
+ .Comment handling example
191
+ [example]
192
+ ====
193
+ [source,ruby]
194
+ ----
195
+ xml_with_comments = <<~XML
196
+ <root>
197
+ <!-- Important note -->
198
+ <item>Value</item>
199
+ </root>
200
+ XML
201
+
202
+ # Comments preserved by default
203
+ Canon.format(xml_with_comments, :xml)
204
+
205
+ # Ignore comments in comparison
206
+ Canon::Comparison.equivalent?(xml1, xml2,
207
+ match: { comments: :ignore }
208
+ )
209
+ ----
210
+ ====
211
+
212
+ === Namespace normalization
213
+
214
+ Namespace declarations are sorted and duplicate declarations are removed.
215
+
216
+ .Namespace normalization example
217
+ [example]
218
+ ====
219
+ [source,xml]
220
+ ----
221
+ <!-- Before -->
222
+ <root xmlns:z="http://z.com" xmlns:a="http://a.com">
223
+ <item xmlns:z="http://z.com">Content</item>
224
+ </root>
225
+
226
+ <!-- After canonicalization -->
227
+ <root xmlns:a="http://a.com" xmlns:z="http://z.com">
228
+ <item>Content</item>
229
+ </root>
230
+ ----
231
+
232
+ Namespaces are sorted alphabetically by prefix, and redundant declarations are removed.
233
+
234
+ === Namespace comparison semantics
235
+
236
+ Canon compares XML elements using their namespace URI and local name, following
237
+ the XML specification. This means elements are identified by the pair
238
+ `{namespace_uri, local_name}` rather than by their qualified name
239
+ (prefix:local_name).
240
+
241
+ **Key principles:**
242
+
243
+ * Elements with different prefixes but the same namespace URI are considered equivalent
244
+ * Namespace prefixes themselves have no semantic meaning
245
+ * Inherited namespaces are treated the same as explicitly declared namespaces
246
+ * The diff output shows namespace information when namespaces differ
247
+
248
+ .Namespace URI comparison
249
+ [example]
250
+ ====
251
+ [source,ruby]
252
+ ----
253
+ # These are semantically equivalent
254
+ xml1 = '<root xmlns:a="http://example.com"><a:item>value</a:item></root>'
255
+ xml2 = '<root xmlns:b="http://example.com"><b:item>value</b:item></root>'
256
+
257
+ Canon::Comparison.equivalent?(xml1, xml2)
258
+ # => true
259
+
260
+ # Same local name, different namespace URIs - NOT equivalent
261
+ xml3 = '<root xmlns:a="http://example.com"><a:item>value</a:item></root>'
262
+ xml4 = '<root xmlns:a="http://other.com"><a:item>value</a:item></root>'
263
+
264
+ Canon::Comparison.equivalent?(xml3, xml4)
265
+ # => false
266
+ ----
267
+ ====
268
+
269
+ .Inherited vs explicit namespaces
270
+ [example]
271
+ ====
272
+ [source,xml]
273
+ ----
274
+ <!-- Inherited namespace -->
275
+ <root xmlns="http://example.com">
276
+ <item>value</item> <!-- item is in http://example.com namespace -->
277
+ </root>
278
+
279
+ <!-- Explicit namespace -->
280
+ <root>
281
+ <item xmlns="http://example.com">value</item>
282
+ </root>
283
+
284
+ <!-- Both items have namespace_uri = "http://example.com" -->
285
+ <!-- Canon considers them equivalent in namespace comparison -->
286
+ ----
287
+ ====
288
+
289
+ **Diff output with namespaces:**
290
+
291
+ When elements differ in namespace, the diff output includes namespace annotations:
292
+
293
+ [source]
294
+ ----
295
+ - <item> [namespace: http://example.com] "value"
296
+ + <item> [namespace: http://other.com] "value"
297
+ ----
298
+
299
+ This makes it clear when namespace differences are causing comparison failures.
300
+
301
+ === xml: attributes
302
+
303
+ Special attributes like `xml:lang`, `xml:space`, `xml:id`, and `xml:base` are properly handled per specification.
304
+
305
+ .xml:space example
306
+ [example]
307
+ ----
308
+ [source,xml]
309
+ ----
310
+ <root xml:space="preserve">
311
+ <pre> Whitespace preserved </pre>
312
+ </root>
313
+ ----
314
+
315
+ When `xml:space="preserve"` is set, whitespace is preserved in descendants.
316
+ ----
317
+
318
+ == Usage examples
319
+
320
+ === Basic XML comparison
321
+
322
+ [source,ruby]
323
+ ----
324
+ xml1 = File.read("file1.xml")
325
+ xml2 = File.read("file2.xml")
326
+
327
+ Canon::Comparison.equivalent?(xml1, xml2)
328
+ ----
329
+
330
+ === Test-friendly XML comparison
331
+
332
+ [source,ruby]
333
+ ----
334
+ expect(actual_xml).to be_xml_equivalent_to(expected_xml)
335
+ .with_profile(:spec_friendly)
336
+ ----
337
+
338
+ === Using XML comparator directly
339
+
340
+ [source,ruby]
341
+ ----
342
+ Canon::Comparison::XmlComparator.equivalent?(xml1, xml2,
343
+ match: { attribute_order: :ignore }
344
+ )
345
+ ----
346
+
347
+ === CLI usage
348
+
349
+ [source,bash]
350
+ ----
351
+ # Basic comparison
352
+ canon diff file1.xml file2.xml --verbose
353
+
354
+ # With spec-friendly profile
355
+ canon diff expected.xml actual.xml \
356
+ --match-profile spec_friendly \
357
+ --verbose
358
+ ----
359
+
360
+ == See also
361
+
362
+ * link:../comparison-pipeline.adoc[Comparison Pipeline] - Understanding the 4 layers
363
+ * link:../../features/match-options/[Match Options] - All matching options
364
+ * link:../../guides/choosing-configuration.adoc[Choosing Configuration] - Decision guide
365
+ * link:index.adoc[Format Support] - Overview of all formats
366
+ * link:html.adoc[HTML Format] - HTML-specific features