canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,478 @@
1
+ ---
2
+ layout: default
3
+ title: Ruby API
4
+ nav_order: 10
5
+ parent: Basic Usage
6
+ grand_parent: Documentation Index
7
+ ---
8
+ = Canon Ruby API
9
+ :toc:
10
+ :toclevels: 3
11
+
12
+ == Scope
13
+
14
+ This document describes how to use Canon from Ruby code. It covers formatting,
15
+ parsing, and comparison APIs.
16
+
17
+ For command-line usage, see link:CLI[CLI documentation].
18
+
19
+ For RSpec testing, see link:RSPEC[RSpec documentation].
20
+
21
+ == General
22
+
23
+ Canon provides a unified Ruby API for working with XML, HTML, JSON, and YAML
24
+ formats. All methods follow consistent patterns across formats.
25
+
26
+ == Formatting
27
+
28
+ === Canonical formatting
29
+
30
+ The `Canon.format` method produces canonical output (compact, normalized).
31
+
32
+ Syntax:
33
+
34
+ [source,ruby]
35
+ ----
36
+ Canon.format(content, format)
37
+ Canon.format_{format}(content) # Format-specific shorthand
38
+ ----
39
+
40
+ Where:
41
+
42
+ `content`:: The input string
43
+ `format`:: The format type (`:xml`, `:html`, `:json`, or `:yaml`)
44
+
45
+ .Canonical formatting examples
46
+ [example]
47
+ ====
48
+ [source,ruby]
49
+ ----
50
+ require 'canon'
51
+
52
+ # XML - compact canonical form
53
+ xml = '<root><b>2</b><a>1</a></root>'
54
+ Canon.format(xml, :xml)
55
+ # => "<root><a>1</a><b>2</b></root>"
56
+
57
+ Canon.format_xml(xml) # Shorthand
58
+ # => "<root><a>1</a><b>2</b></root>"
59
+
60
+ # HTML - compact canonical form
61
+ html = '<div><p>Hello</p></div>'
62
+ Canon.format(html, :html)
63
+ Canon.format_html(html) # Shorthand
64
+
65
+ # JSON - canonical with sorted keys
66
+ json = '{"z":3,"a":1,"b":2}'
67
+ Canon.format(json, :json)
68
+ # => {"a":1,"b":2,"z":3}
69
+
70
+ # YAML - canonical with sorted keys
71
+ yaml = "z: 3\na: 1\nb: 2"
72
+ Canon.format(yaml, :yaml)
73
+ ----
74
+ ====
75
+
76
+ === Pretty-print formatting
77
+
78
+ For human-readable output with indentation, use format-specific pretty printer
79
+ classes.
80
+
81
+ Syntax:
82
+
83
+ [source,ruby]
84
+ ----
85
+ Canon::{Format}::PrettyPrinter.new(indent: n, indent_type: type).format(content)
86
+ ----
87
+
88
+ Where:
89
+
90
+ `{Format}`:: The format module (`Xml`, `Html`, `Json`)
91
+ `n`:: Number of spaces (default: 2) or tabs (use 1 for tabs)
92
+ `type`:: Indentation type: `'space'` (default) or `'tab'`
93
+ `content`:: The input string
94
+
95
+ .Pretty-print examples
96
+ [example]
97
+ ====
98
+ [source,ruby]
99
+ ----
100
+ require 'canon/pretty_printer/xml'
101
+ require 'canon/pretty_printer/html'
102
+ require 'canon/pretty_printer/json'
103
+
104
+ xml_input = '<root><b>2</b><a>1</a></root>'
105
+
106
+ # XML with 2-space indentation (default)
107
+ Canon::Xml::PrettyPrinter.new(indent: 2).format(xml_input)
108
+ # =>
109
+ # <?xml version="1.0" encoding="UTF-8"?>
110
+ # <root>
111
+ # <a>1</a>
112
+ # <b>2</b>
113
+ # </root>
114
+
115
+ # XML with 4-space indentation
116
+ Canon::Xml::PrettyPrinter.new(indent: 4).format(xml_input)
117
+
118
+ # XML with tab indentation
119
+ Canon::Xml::PrettyPrinter.new(
120
+ indent: 1,
121
+ indent_type: 'tab'
122
+ ).format(xml_input)
123
+
124
+ # HTML with 2-space indentation
125
+ html_input = '<div><p>Hello</p></div>'
126
+ Canon::Html::PrettyPrinter.new(indent: 2).format(html_input)
127
+
128
+ # JSON with 2-space indentation
129
+ json_input = '{"z":3,"a":{"b":1}}'
130
+ Canon::Json::PrettyPrinter.new(indent: 2).format(json_input)
131
+
132
+ # JSON with tab indentation
133
+ Canon::Json::PrettyPrinter.new(
134
+ indent: 1,
135
+ indent_type: 'tab'
136
+ ).format(json_input)
137
+ ----
138
+ ====
139
+
140
+ == Parsing
141
+
142
+ The `Canon.parse` method parses content into Ruby objects or Nokogiri
143
+ documents.
144
+
145
+ Syntax:
146
+
147
+ [source,ruby]
148
+ ----
149
+ Canon.parse(content, format)
150
+ Canon.parse_{format}(content) # Format-specific shorthand
151
+ ----
152
+
153
+ Where:
154
+
155
+ `content`:: The input string
156
+ `format`:: The format type (`:xml`, `:html`, `:json`, or `:yaml`)
157
+
158
+ .Parsing examples
159
+ [example]
160
+ ====
161
+ [source,ruby]
162
+ ----
163
+ # Parse XML → Nokogiri::XML::Document
164
+ xml_doc = Canon.parse(xml_input, :xml)
165
+ xml_doc = Canon.parse_xml(xml_input)
166
+
167
+ # Parse HTML → Nokogiri::HTML5::Document (or XML::Document for XHTML)
168
+ html_doc = Canon.parse(html_input, :html)
169
+ html_doc = Canon.parse_html(html_input)
170
+
171
+ # Parse JSON → Ruby Hash/Array
172
+ json_obj = Canon.parse(json_input, :json)
173
+ json_obj = Canon.parse_json(json_input)
174
+
175
+ # Parse YAML → Ruby Hash/Array
176
+ yaml_obj = Canon.parse(yaml_input, :yaml)
177
+ yaml_obj = Canon.parse_yaml(yaml_input)
178
+ ----
179
+ ====
180
+
181
+ == Comparison
182
+
183
+ === General
184
+
185
+ The `Canon::Comparison.equivalent?` method compares two documents semantically.
186
+
187
+ The comparison uses depth-first traversal of DOM trees (XML/HTML) or object
188
+ graphs (JSON/YAML), comparing nodes/values based on configurable match
189
+ dimensions.
190
+
191
+ See link:MATCH_OPTIONS[Match options] for details on match dimensions and
192
+ profiles.
193
+
194
+ === Basic comparison
195
+
196
+ Syntax:
197
+
198
+ [source,ruby]
199
+ ----
200
+ Canon::Comparison.equivalent?(obj1, obj2, options = {})
201
+ ----
202
+
203
+ Where:
204
+
205
+ `obj1`:: First document (String, Nokogiri document, or Ruby object)
206
+ `obj2`:: Second document (String, Nokogiri document, or Ruby object)
207
+ `options`:: Hash of comparison options (optional)
208
+
209
+ Returns:
210
+
211
+ * `true` if documents are semantically equivalent
212
+ * `false` if documents differ
213
+ * `Hash` with `:differences` and `:preprocessed` keys if `verbose: true`
214
+
215
+ .Basic comparison examples
216
+ [example]
217
+ ====
218
+ [source,ruby]
219
+ ----
220
+ require 'canon/comparison'
221
+
222
+ # HTML comparison - ignores whitespace by default
223
+ html1 = '<div><p>Hello</p></div>'
224
+ html2 = '<div> <p> Hello </p> </div>'
225
+ Canon::Comparison.equivalent?(html1, html2)
226
+ # => true
227
+
228
+ # XML comparison - element order doesn't matter for children
229
+ xml1 = '<root><a>1</a><b>2</b></root>'
230
+ xml2 = '<root> <b>2</b> <a>1</a> </root>'
231
+ Canon::Comparison.equivalent?(xml1, xml2)
232
+ # => true
233
+
234
+ # JSON comparison
235
+ json1 = '{"a":1,"b":2}'
236
+ json2 = '{"b":2,"a":1}'
237
+ Canon::Comparison.equivalent?(json1, json2)
238
+ # => true
239
+
240
+ # With Nokogiri documents
241
+ doc1 = Nokogiri::HTML5(html1)
242
+ doc2 = Nokogiri::HTML5(html2)
243
+ Canon::Comparison.equivalent?(doc1, doc2)
244
+ # => true
245
+ ----
246
+ ====
247
+
248
+ === Comparison with match options
249
+
250
+ Match options control which aspects of documents are compared and how strictly.
251
+
252
+ Syntax:
253
+
254
+ [source,ruby]
255
+ ----
256
+ Canon::Comparison.equivalent?(obj1, obj2,
257
+ match: {
258
+ dimension1: behavior1,
259
+ dimension2: behavior2,
260
+ ...
261
+ }
262
+ )
263
+ ----
264
+
265
+ See link:MATCH_OPTIONS[Match options] for complete dimension reference.
266
+
267
+ .Match option examples
268
+ [example]
269
+ ====
270
+ [source,ruby]
271
+ ----
272
+ # Normalize whitespace in text content
273
+ Canon::Comparison.equivalent?(xml1, xml2,
274
+ match: {
275
+ text_content: :normalize,
276
+ structural_whitespace: :ignore
277
+ }
278
+ )
279
+
280
+ # Ignore comments
281
+ Canon::Comparison.equivalent?(xml1, xml2,
282
+ match: {
283
+ comments: :ignore
284
+ }
285
+ )
286
+
287
+ # Strict attribute order
288
+ Canon::Comparison.equivalent?(xml1, xml2,
289
+ match: {
290
+ attribute_order: :strict
291
+ }
292
+ )
293
+
294
+ # Multiple dimensions
295
+ Canon::Comparison.equivalent?(html1, html2,
296
+ match: {
297
+ text_content: :normalize,
298
+ structural_whitespace: :ignore,
299
+ attribute_order: :ignore,
300
+ comments: :ignore
301
+ }
302
+ )
303
+ ----
304
+ ====
305
+
306
+ === Using match profiles
307
+
308
+ Match profiles are predefined combinations of match dimension settings.
309
+
310
+ Syntax:
311
+
312
+ [source,ruby]
313
+ ----
314
+ Canon::Comparison.equivalent?(obj1, obj2,
315
+ match_profile: :profile_name
316
+ )
317
+ ----
318
+
319
+ Available profiles:
320
+
321
+ `:strict`:: Exact matching - all dimensions use `:strict` behavior
322
+ `:rendered`:: Mimics browser rendering - ignores formatting differences
323
+ `:spec_friendly`:: Test-friendly - ignores most formatting, focuses on content
324
+ `:content_only`:: Maximum tolerance - only semantic content matters
325
+
326
+ .Match profile examples
327
+ [example]
328
+ ====
329
+ [source,ruby]
330
+ ----
331
+ # Use spec_friendly profile (common for tests)
332
+ Canon::Comparison.equivalent?(xml1, xml2,
333
+ match_profile: :spec_friendly
334
+ )
335
+
336
+ # Use rendered profile (for HTML)
337
+ Canon::Comparison.equivalent?(html1, html2,
338
+ match_profile: :rendered
339
+ )
340
+
341
+ # Override profile with specific dimension
342
+ Canon::Comparison.equivalent?(xml1, xml2,
343
+ match_profile: :spec_friendly,
344
+ match: {
345
+ comments: :strict # Override profile setting
346
+ }
347
+ )
348
+ ----
349
+ ====
350
+
351
+ === Verbose mode
352
+
353
+ When `verbose: true` is specified, the method returns detailed comparison
354
+ results instead of a boolean.
355
+
356
+ Syntax:
357
+
358
+ [source,ruby]
359
+ ----
360
+ result = Canon::Comparison.equivalent?(obj1, obj2, verbose: true)
361
+ ----
362
+
363
+ Returns:
364
+
365
+ A Hash with two keys:
366
+
367
+ `:differences`:: Array of difference objects (empty if equivalent)
368
+ `:preprocessed`:: Two-element array of preprocessed documents
369
+
370
+ .Verbose mode examples
371
+ [example]
372
+ ====
373
+ [source,ruby]
374
+ ----
375
+ # Get detailed diff information
376
+ result = Canon::Comparison.equivalent?(xml1, xml2, verbose: true)
377
+
378
+ if result[:differences].empty?
379
+ puts "Documents are equivalent"
380
+ else
381
+ puts "Found #{result[:differences].size} differences"
382
+ result[:differences].each do |diff|
383
+ puts "Difference: #{diff}"
384
+ end
385
+ end
386
+
387
+ # Access preprocessed content
388
+ preprocessed1, preprocessed2 = result[:preprocessed]
389
+
390
+ # Verbose with custom options
391
+ result = Canon::Comparison.equivalent?(xml1, xml2,
392
+ verbose: true,
393
+ match: {
394
+ text_content: :normalize,
395
+ comments: :ignore
396
+ }
397
+ )
398
+ ----
399
+ ====
400
+
401
+ === Format-specific comparators
402
+
403
+ You can use format-specific comparator classes directly.
404
+
405
+ Syntax:
406
+
407
+ [source,ruby]
408
+ ----
409
+ Canon::Comparison::XmlComparator.equivalent?(obj1, obj2, options)
410
+ Canon::Comparison::HtmlComparator.equivalent?(obj1, obj2, options)
411
+ Canon::Comparison::JsonComparator.equivalent?(obj1, obj2, options)
412
+ Canon::Comparison::YamlComparator.equivalent?(obj1, obj2, options)
413
+ ----
414
+
415
+ .Format-specific comparator examples
416
+ [example]
417
+ ====
418
+ [source,ruby]
419
+ ----
420
+ # XML comparison with strict attribute order
421
+ Canon::Comparison::XmlComparator.equivalent?(xml1, xml2,
422
+ match: {
423
+ attribute_order: :strict
424
+ }
425
+ )
426
+
427
+ # HTML comparison with rendered profile
428
+ Canon::Comparison::HtmlComparator.equivalent?(html1, html2,
429
+ match_profile: :rendered
430
+ )
431
+
432
+ # JSON comparison ignoring key order
433
+ Canon::Comparison::JsonComparator.equivalent?(json1, json2,
434
+ match: {
435
+ key_order: :ignore
436
+ }
437
+ )
438
+ ----
439
+ ====
440
+
441
+ == Validation
442
+
443
+ Canon validates input before processing and raises `Canon::ValidationError`
444
+ for malformed input.
445
+
446
+ .Validation error handling
447
+ [example]
448
+ ====
449
+ [source,ruby]
450
+ ----
451
+ require 'canon'
452
+
453
+ malformed_xml = '<root><unclosed>'
454
+
455
+ begin
456
+ Canon.format(malformed_xml, :xml)
457
+ rescue Canon::ValidationError => e
458
+ puts e.message
459
+ # => XML Validation Error: Premature end of data in tag unclosed line 1
460
+ # Line: 1
461
+ # Column: 18
462
+
463
+ puts "Format: #{e.format}" # => :xml
464
+ puts "Line: #{e.line}" # => 1
465
+ puts "Column: #{e.column}" # => 18
466
+ end
467
+ ----
468
+ ====
469
+
470
+ See link:INPUT_VALIDATION[Input validation] for details.
471
+
472
+ == See also
473
+
474
+ * link:CLI[Command-line interface]
475
+ * link:RSPEC[RSpec matchers]
476
+ * link:MATCH_OPTIONS[Match options reference]
477
+ * link:FORMATS[Format support details]
478
+ * link:INPUT_VALIDATION[Input validation]