canon 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +163 -67
- data/README.adoc +400 -7
- data/docs/Gemfile +9 -0
- data/docs/INDEX.adoc +99 -182
- data/docs/_config.yml +100 -0
- data/docs/advanced/diff-classification.adoc +547 -0
- data/docs/advanced/diff-pipeline.adoc +358 -0
- data/docs/advanced/index.adoc +214 -0
- data/docs/advanced/semantic-diff-report.adoc +390 -0
- data/docs/{VERBOSE.adoc → advanced/verbose-mode-architecture.adoc} +51 -53
- data/docs/features/diff-formatting/algorithm-specific-output.adoc +533 -0
- data/docs/{CHARACTER_VISUALIZATION.adoc → features/diff-formatting/character-visualization.adoc} +23 -62
- data/docs/features/diff-formatting/colors-and-symbols.adoc +606 -0
- data/docs/features/diff-formatting/context-and-grouping.adoc +490 -0
- data/docs/features/diff-formatting/display-filtering.adoc +472 -0
- data/docs/features/diff-formatting/index.adoc +140 -0
- data/docs/features/environment-configuration/index.adoc +327 -0
- data/docs/features/environment-configuration/override-system.adoc +436 -0
- data/docs/features/environment-configuration/size-limits.adoc +273 -0
- data/docs/features/index.adoc +173 -0
- data/docs/features/input-validation/index.adoc +521 -0
- data/docs/features/match-options/algorithm-specific-behavior.adoc +365 -0
- data/docs/features/match-options/html-policies.adoc +312 -0
- data/docs/features/match-options/index.adoc +621 -0
- data/docs/getting-started/index.adoc +83 -0
- data/docs/getting-started/quick-start.adoc +76 -0
- data/docs/guides/choosing-configuration.adoc +689 -0
- data/docs/guides/index.adoc +181 -0
- data/docs/{CLI.adoc → interfaces/cli/index.adoc} +18 -13
- data/docs/interfaces/index.adoc +101 -0
- data/docs/{RSPEC.adoc → interfaces/rspec/index.adoc} +242 -31
- data/docs/{RUBY_API.adoc → interfaces/ruby-api/index.adoc} +118 -16
- data/docs/lychee.toml +65 -0
- data/docs/reference/cli-options.adoc +418 -0
- data/docs/reference/environment-variables.adoc +375 -0
- data/docs/reference/index.adoc +204 -0
- data/docs/reference/options-across-interfaces.adoc +417 -0
- data/docs/understanding/algorithms/dom-diff.adoc +389 -0
- data/docs/understanding/algorithms/index.adoc +314 -0
- data/docs/understanding/algorithms/semantic-tree-diff.adoc +533 -0
- data/docs/understanding/architecture.adoc +447 -0
- data/docs/understanding/comparison-pipeline.adoc +317 -0
- data/docs/understanding/formats/html.adoc +380 -0
- data/docs/understanding/formats/index.adoc +261 -0
- data/docs/understanding/formats/json.adoc +390 -0
- data/docs/understanding/formats/xml.adoc +366 -0
- data/docs/understanding/formats/yaml.adoc +504 -0
- data/docs/understanding/index.adoc +130 -0
- data/lib/canon/cli.rb +42 -1
- data/lib/canon/commands/diff_command.rb +108 -23
- data/lib/canon/comparison/compare_profile.rb +101 -0
- data/lib/canon/comparison/comparison_result.rb +41 -2
- data/lib/canon/comparison/html_comparator.rb +292 -71
- data/lib/canon/comparison/html_compare_profile.rb +117 -0
- data/lib/canon/comparison/match_options.rb +42 -4
- data/lib/canon/comparison/strategies/base_match_strategy.rb +99 -0
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +74 -0
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +220 -0
- data/lib/canon/comparison/xml_comparator.rb +695 -91
- data/lib/canon/comparison.rb +207 -2
- data/lib/canon/config/env_provider.rb +71 -0
- data/lib/canon/config/env_schema.rb +58 -0
- data/lib/canon/config/override_resolver.rb +55 -0
- data/lib/canon/config/type_converter.rb +59 -0
- data/lib/canon/config.rb +158 -29
- data/lib/canon/data_model.rb +29 -0
- data/lib/canon/diff/diff_classifier.rb +74 -14
- data/lib/canon/diff/diff_context_builder.rb +41 -0
- data/lib/canon/diff/diff_line.rb +18 -2
- data/lib/canon/diff/diff_node.rb +18 -3
- data/lib/canon/diff/diff_node_mapper.rb +71 -12
- data/lib/canon/diff/formatting_detector.rb +53 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +60 -5
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +68 -16
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -42
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +116 -31
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -37
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +126 -19
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +30 -1
- data/lib/canon/diff_formatter/debug_output.rb +7 -1
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +674 -57
- data/lib/canon/diff_formatter/legend.rb +42 -0
- data/lib/canon/diff_formatter.rb +78 -9
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html_formatter_base.rb +35 -1
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/yaml_formatter.rb +3 -0
- data/lib/canon/html/data_model.rb +229 -0
- data/lib/canon/html.rb +9 -0
- data/lib/canon/options/cli_generator.rb +70 -0
- data/lib/canon/options/registry.rb +234 -0
- data/lib/canon/rspec_matchers.rb +34 -13
- data/lib/canon/tree_diff/adapters/html_adapter.rb +316 -0
- data/lib/canon/tree_diff/adapters/json_adapter.rb +204 -0
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +285 -0
- data/lib/canon/tree_diff/adapters/yaml_adapter.rb +213 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +84 -0
- data/lib/canon/tree_diff/core/matching.rb +241 -0
- data/lib/canon/tree_diff/core/node_signature.rb +164 -0
- data/lib/canon/tree_diff/core/node_weight.rb +135 -0
- data/lib/canon/tree_diff/core/tree_node.rb +450 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +258 -0
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +168 -0
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +242 -0
- data/lib/canon/tree_diff/matchers/universal_matcher.rb +220 -0
- data/lib/canon/tree_diff/operation_converter.rb +631 -0
- data/lib/canon/tree_diff/operations/operation.rb +92 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +626 -0
- data/lib/canon/tree_diff/tree_diff_integrator.rb +140 -0
- data/lib/canon/tree_diff.rb +33 -0
- data/lib/canon/validators/json_validator.rb +3 -1
- data/lib/canon/validators/yaml_validator.rb +3 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +22 -23
- data/lib/canon/xml/element_matcher.rb +128 -20
- data/lib/canon/xml/namespace_helper.rb +110 -0
- data/lib/canon.rb +3 -0
- metadata +81 -23
- data/_config.yml +0 -116
- data/docs/ADVANCED_TOPICS.adoc +0 -20
- data/docs/BASIC_USAGE.adoc +0 -16
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
- data/docs/DIFF_ARCHITECTURE.adoc +0 -435
- data/docs/DIFF_FORMATTING.adoc +0 -540
- data/docs/FORMATS.adoc +0 -447
- data/docs/INPUT_VALIDATION.adoc +0 -477
- data/docs/MATCH_ARCHITECTURE.adoc +0 -463
- data/docs/MATCH_OPTIONS.adoc +0 -719
- data/docs/MODES.adoc +0 -432
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
- data/docs/OPTIONS.adoc +0 -1387
- data/docs/PREPROCESSING.adoc +0 -491
- data/docs/SEMANTIC_DIFF_REPORT.adoc +0 -528
- data/docs/UNDERSTANDING_CANON.adoc +0 -17
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: default
|
|
3
|
+
title: Features
|
|
4
|
+
nav_order: 5
|
|
5
|
+
has_children: true
|
|
6
|
+
---
|
|
7
|
+
= Features
|
|
8
|
+
|
|
9
|
+
Configure and customize Canon's behavior for your specific needs.
|
|
10
|
+
|
|
11
|
+
== Overview
|
|
12
|
+
|
|
13
|
+
Canon provides extensive configuration options to control how documents are processed, compared, and displayed. This section covers all configurable features organized by the 4-layer architecture.
|
|
14
|
+
|
|
15
|
+
== Configuration Layers
|
|
16
|
+
|
|
17
|
+
Canon's comparison system has 4 distinct configuration layers:
|
|
18
|
+
|
|
19
|
+
**Layer 1: link:preprocessing/[Preprocessing]**::
|
|
20
|
+
Transform documents before comparison to normalize formatting.
|
|
21
|
+
+
|
|
22
|
+
Options: `none`, `c14n`, `normalize`, `format`
|
|
23
|
+
|
|
24
|
+
**Layer 2: Algorithm Selection** (see link:../understanding/algorithms/[Algorithms])::
|
|
25
|
+
Choose the comparison strategy.
|
|
26
|
+
+
|
|
27
|
+
Options: `dom` (stable), `semantic` (experimental)
|
|
28
|
+
|
|
29
|
+
**Layer 3: link:match-options/[Match Options]**::
|
|
30
|
+
Configure what to compare and how strictly.
|
|
31
|
+
+
|
|
32
|
+
* Match dimensions (granular control)
|
|
33
|
+
* Match profiles (preset combinations)
|
|
34
|
+
|
|
35
|
+
**Layer 4: link:diff-formatting/[Diff Formatting]**::
|
|
36
|
+
Control how differences are displayed.
|
|
37
|
+
+
|
|
38
|
+
* Diff modes (`by_line`, `by_object`)
|
|
39
|
+
* Colors and symbols
|
|
40
|
+
* Context and grouping
|
|
41
|
+
* Character visualization
|
|
42
|
+
|
|
43
|
+
== Feature Categories
|
|
44
|
+
|
|
45
|
+
=== Canonicalization
|
|
46
|
+
|
|
47
|
+
link:canonicalization/[**Canonicalization**]::
|
|
48
|
+
Format-specific rules for converting documents to canonical form.
|
|
49
|
+
+
|
|
50
|
+
* link:canonicalization/xml-c14n[XML Canonical Form (C14N)]
|
|
51
|
+
* link:canonicalization/json-yaml-canonical[JSON/YAML Canonical Form]
|
|
52
|
+
* link:canonicalization/html-canonical[HTML Canonical Form]
|
|
53
|
+
|
|
54
|
+
=== Match Configuration
|
|
55
|
+
|
|
56
|
+
link:match-options/[**Match Options**]::
|
|
57
|
+
Fine-grained control over comparison behavior.
|
|
58
|
+
+
|
|
59
|
+
* link:match-options/dimensions[Match Dimensions] - Individual comparison aspects
|
|
60
|
+
* link:match-options/profiles[Match Profiles] - Preset combinations
|
|
61
|
+
* link:match-options/custom-matching[Custom Matching] - Advanced strategies
|
|
62
|
+
|
|
63
|
+
=== Processing
|
|
64
|
+
|
|
65
|
+
link:preprocessing/[**Preprocessing**]::
|
|
66
|
+
Transform documents before comparison.
|
|
67
|
+
+
|
|
68
|
+
* link:preprocessing/normalization[Normalization] - Whitespace and formatting
|
|
69
|
+
* link:preprocessing/formatting[Formatting] - Pretty-print before comparison
|
|
70
|
+
|
|
71
|
+
=== Output Customization
|
|
72
|
+
|
|
73
|
+
link:diff-formatting/[**Diff Formatting**]::
|
|
74
|
+
Customize how differences are displayed.
|
|
75
|
+
+
|
|
76
|
+
* link:diff-formatting/colors-and-symbols[Colors and Symbols] - Visual indicators
|
|
77
|
+
* link:diff-formatting/context-and-grouping[Context and Grouping] - Surrounding lines
|
|
78
|
+
* link:diff-formatting/character-visualization[Character Visualization] - Whitespace visibility
|
|
79
|
+
|
|
80
|
+
=== System Configuration
|
|
81
|
+
|
|
82
|
+
link:environment-configuration/[**Environment Configuration**]::
|
|
83
|
+
System-level settings and limits.
|
|
84
|
+
+
|
|
85
|
+
* link:environment-configuration/size-limits[Size Limits] - Prevent hangs on large files
|
|
86
|
+
* link:environment-configuration/override-system[Override System] - ENV variable configuration
|
|
87
|
+
|
|
88
|
+
link:input-validation/[**Input Validation**]::
|
|
89
|
+
Error handling and validation.
|
|
90
|
+
+
|
|
91
|
+
* Syntax validation
|
|
92
|
+
* Format detection
|
|
93
|
+
* Error messages
|
|
94
|
+
|
|
95
|
+
== Quick Configuration Examples
|
|
96
|
+
|
|
97
|
+
=== Test-Friendly Comparison
|
|
98
|
+
|
|
99
|
+
Ignore formatting differences for testing:
|
|
100
|
+
|
|
101
|
+
[source,ruby]
|
|
102
|
+
----
|
|
103
|
+
Canon::Comparison.equivalent?(expected, actual,
|
|
104
|
+
match_profile: :spec_friendly
|
|
105
|
+
)
|
|
106
|
+
----
|
|
107
|
+
|
|
108
|
+
=== Canonical Comparison
|
|
109
|
+
|
|
110
|
+
Compare after canonicalization:
|
|
111
|
+
|
|
112
|
+
[source,ruby]
|
|
113
|
+
----
|
|
114
|
+
Canon::Comparison.equivalent?(doc1, doc2,
|
|
115
|
+
preprocessing: :c14n,
|
|
116
|
+
match_profile: :strict
|
|
117
|
+
)
|
|
118
|
+
----
|
|
119
|
+
|
|
120
|
+
=== Custom Match Dimensions
|
|
121
|
+
|
|
122
|
+
Fine-grained control:
|
|
123
|
+
|
|
124
|
+
[source,ruby]
|
|
125
|
+
----
|
|
126
|
+
Canon::Comparison.equivalent?(doc1, doc2,
|
|
127
|
+
match: {
|
|
128
|
+
text_content: :normalize,
|
|
129
|
+
structural_whitespace: :ignore,
|
|
130
|
+
attribute_order: :ignore
|
|
131
|
+
}
|
|
132
|
+
)
|
|
133
|
+
----
|
|
134
|
+
|
|
135
|
+
=== Verbose Diff with Color
|
|
136
|
+
|
|
137
|
+
Detailed output:
|
|
138
|
+
|
|
139
|
+
[source,ruby]
|
|
140
|
+
----
|
|
141
|
+
result = Canon::Comparison.compare(doc1, doc2,
|
|
142
|
+
verbose: true,
|
|
143
|
+
use_color: true,
|
|
144
|
+
context_lines: 5
|
|
145
|
+
)
|
|
146
|
+
puts result.diff
|
|
147
|
+
----
|
|
148
|
+
|
|
149
|
+
== Common Use Cases
|
|
150
|
+
|
|
151
|
+
**Testing generated output**::
|
|
152
|
+
Use `match_profile: :spec_friendly` to ignore formatting differences.
|
|
153
|
+
|
|
154
|
+
**Validating canonicalization**::
|
|
155
|
+
Use `preprocessing: :c14n` with `match_profile: :strict` for exact comparison.
|
|
156
|
+
|
|
157
|
+
**Debugging whitespace issues**::
|
|
158
|
+
Enable link:diff-formatting/character-visualization[character visualization] to see invisible characters.
|
|
159
|
+
|
|
160
|
+
**Comparing large files**::
|
|
161
|
+
Configure link:environment-configuration/size-limits[size limits] to prevent hangs.
|
|
162
|
+
|
|
163
|
+
== Next Steps
|
|
164
|
+
|
|
165
|
+
* Review link:match-options/profiles[Match Profiles] for common scenarios
|
|
166
|
+
* Explore link:preprocessing/[Preprocessing] for normalization options
|
|
167
|
+
* Customize link:diff-formatting/[Diff Formatting] for better output
|
|
168
|
+
|
|
169
|
+
== See Also
|
|
170
|
+
|
|
171
|
+
* link:../understanding/[Understanding Canon] - How features work internally
|
|
172
|
+
* link:../interfaces/[Interfaces] - How to use these features
|
|
173
|
+
* link:../reference/[Reference] - Complete option listings
|
|
@@ -0,0 +1,521 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: default
|
|
3
|
+
title: Input Validation
|
|
4
|
+
parent: Features
|
|
5
|
+
nav_order: 6
|
|
6
|
+
has_children: false
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
:toc:
|
|
10
|
+
:toclevels: 3
|
|
11
|
+
|
|
12
|
+
== Purpose
|
|
13
|
+
|
|
14
|
+
Canon validates all input before performing comparisons to ensure you're working with well-formed data. Input validation catches syntax errors early and provides clear, actionable error messages with line and column information.
|
|
15
|
+
|
|
16
|
+
== When Validation Occurs
|
|
17
|
+
|
|
18
|
+
Canon validates input at the beginning of any comparison operation, before preprocessing or diff generation:
|
|
19
|
+
|
|
20
|
+
[source]
|
|
21
|
+
----
|
|
22
|
+
Input → Validation → Preprocessing → Comparison → Diff Output
|
|
23
|
+
↑
|
|
24
|
+
Fails here if syntax errors detected
|
|
25
|
+
----
|
|
26
|
+
|
|
27
|
+
This ensures that:
|
|
28
|
+
|
|
29
|
+
* Malformed input is caught early
|
|
30
|
+
* Error messages are clear and specific
|
|
31
|
+
* No time is wasted on invalid data
|
|
32
|
+
* Line numbers in errors match original input
|
|
33
|
+
|
|
34
|
+
== Supported Formats
|
|
35
|
+
|
|
36
|
+
Canon provides validators for all structured formats:
|
|
37
|
+
|
|
38
|
+
[cols="2,3"]
|
|
39
|
+
|===
|
|
40
|
+
|Format |Validator
|
|
41
|
+
|
|
42
|
+
|XML
|
|
43
|
+
|Nokogiri strict parser with detailed syntax error reporting
|
|
44
|
+
|
|
45
|
+
|HTML
|
|
46
|
+
|Nokogiri HTML parser (lenient for HTML quirks)
|
|
47
|
+
|
|
48
|
+
|JSON
|
|
49
|
+
|Ruby JSON parser with position tracking
|
|
50
|
+
|
|
51
|
+
|YAML
|
|
52
|
+
|Psych YAML parser with error context
|
|
53
|
+
|===
|
|
54
|
+
|
|
55
|
+
== Error Reporting
|
|
56
|
+
|
|
57
|
+
=== Error Information
|
|
58
|
+
|
|
59
|
+
When validation fails, Canon provides:
|
|
60
|
+
|
|
61
|
+
* **Error message**: Clear description of the problem
|
|
62
|
+
* **Line number**: Where the error occurred
|
|
63
|
+
* **Column number**: Specific position in the line (when available)
|
|
64
|
+
* **Context**: Surrounding content to help locate the issue
|
|
65
|
+
* **Format**: Which format validator detected the error
|
|
66
|
+
|
|
67
|
+
=== Error Example
|
|
68
|
+
|
|
69
|
+
.XML validation error
|
|
70
|
+
[example]
|
|
71
|
+
====
|
|
72
|
+
[source,ruby]
|
|
73
|
+
----
|
|
74
|
+
Canon.compare(invalid_xml, valid_xml, format: :xml)
|
|
75
|
+
|
|
76
|
+
# Raises:
|
|
77
|
+
# Canon::ValidationError: Opening and ending tag mismatch: item line 5 and root
|
|
78
|
+
# Format: xml
|
|
79
|
+
# Line: 5
|
|
80
|
+
# Column: 8
|
|
81
|
+
# Details: expected '</item>'
|
|
82
|
+
----
|
|
83
|
+
====
|
|
84
|
+
|
|
85
|
+
== Format-Specific Validation
|
|
86
|
+
|
|
87
|
+
=== XML Validation
|
|
88
|
+
|
|
89
|
+
XML validation uses Nokogiri's strict parsing mode.
|
|
90
|
+
|
|
91
|
+
**Detects**:
|
|
92
|
+
* Unclosed tags
|
|
93
|
+
* Mismatched opening/closing tags
|
|
94
|
+
* Invalid entity references
|
|
95
|
+
* Malformed attributes
|
|
96
|
+
* Invalid character data
|
|
97
|
+
* Namespace errors
|
|
98
|
+
|
|
99
|
+
.XML validation examples
|
|
100
|
+
[example]
|
|
101
|
+
====
|
|
102
|
+
**Unclosed tag**:
|
|
103
|
+
[source,xml]
|
|
104
|
+
----
|
|
105
|
+
<root>
|
|
106
|
+
<item>Value
|
|
107
|
+
</root>
|
|
108
|
+
----
|
|
109
|
+
|
|
110
|
+
Error: `Opening and ending tag mismatch: item line 2 and root`
|
|
111
|
+
|
|
112
|
+
**Invalid character**:
|
|
113
|
+
[source,xml]
|
|
114
|
+
----
|
|
115
|
+
<root>
|
|
116
|
+
<item>Value & Stuff</item>
|
|
117
|
+
</root>
|
|
118
|
+
----
|
|
119
|
+
|
|
120
|
+
Error: `Entity 'Stuff' not defined`
|
|
121
|
+
|
|
122
|
+
**Mismatched tags**:
|
|
123
|
+
[source,xml]
|
|
124
|
+
----
|
|
125
|
+
<root>
|
|
126
|
+
<item>Value</Item>
|
|
127
|
+
</root>
|
|
128
|
+
----
|
|
129
|
+
|
|
130
|
+
Error: `Opening and ending tag mismatch: item line 2 and Item`
|
|
131
|
+
====
|
|
132
|
+
|
|
133
|
+
=== HTML Validation
|
|
134
|
+
|
|
135
|
+
HTML validation is more lenient than XML, as HTML has quirks and browser-compatible handling.
|
|
136
|
+
|
|
137
|
+
**Detects**:
|
|
138
|
+
* Severely malformed structure
|
|
139
|
+
* Encoding issues
|
|
140
|
+
* Invalid nesting (in strict mode)
|
|
141
|
+
|
|
142
|
+
NOTE: HTML validation is intentionally lenient to handle real-world HTML that browsers accept.
|
|
143
|
+
|
|
144
|
+
=== JSON Validation
|
|
145
|
+
|
|
146
|
+
JSON validation uses Ruby's JSON parser.
|
|
147
|
+
|
|
148
|
+
**Detects**:
|
|
149
|
+
* Missing commas
|
|
150
|
+
* Trailing commas
|
|
151
|
+
* Unquoted keys
|
|
152
|
+
* Invalid escape sequences
|
|
153
|
+
* Unclosed strings, arrays, or objects
|
|
154
|
+
* Invalid Unicode sequences
|
|
155
|
+
|
|
156
|
+
.JSON validation examples
|
|
157
|
+
[example]
|
|
158
|
+
====
|
|
159
|
+
**Missing comma**:
|
|
160
|
+
[source,json]
|
|
161
|
+
----
|
|
162
|
+
{
|
|
163
|
+
"name": "John"
|
|
164
|
+
"age": 30
|
|
165
|
+
}
|
|
166
|
+
----
|
|
167
|
+
|
|
168
|
+
Error: `unexpected token at '"age": 30'`
|
|
169
|
+
|
|
170
|
+
**Trailing comma**:
|
|
171
|
+
[source,json]
|
|
172
|
+
----
|
|
173
|
+
{
|
|
174
|
+
"name": "John",
|
|
175
|
+
"age": 30,
|
|
176
|
+
}
|
|
177
|
+
----
|
|
178
|
+
|
|
179
|
+
Error: `unexpected token at '}'`
|
|
180
|
+
|
|
181
|
+
**Unquoted key**:
|
|
182
|
+
[source,json]
|
|
183
|
+
----
|
|
184
|
+
{
|
|
185
|
+
name: "John"
|
|
186
|
+
}
|
|
187
|
+
----
|
|
188
|
+
|
|
189
|
+
Error: `unexpected token`
|
|
190
|
+
====
|
|
191
|
+
|
|
192
|
+
=== YAML Validation
|
|
193
|
+
|
|
194
|
+
YAML validation uses the Psych parser.
|
|
195
|
+
|
|
196
|
+
**Detects**:
|
|
197
|
+
* Invalid indentation
|
|
198
|
+
* Incorrect list syntax
|
|
199
|
+
* Malformed block scalars
|
|
200
|
+
* Invalid anchors/aliases
|
|
201
|
+
* Encoding issues
|
|
202
|
+
|
|
203
|
+
.YAML validation examples
|
|
204
|
+
[example]
|
|
205
|
+
====
|
|
206
|
+
**Invalid indentation**:
|
|
207
|
+
[source,yaml]
|
|
208
|
+
----
|
|
209
|
+
root:
|
|
210
|
+
item: value
|
|
211
|
+
bad_indent: value
|
|
212
|
+
----
|
|
213
|
+
|
|
214
|
+
Error: `mapping values are not allowed in this context`
|
|
215
|
+
|
|
216
|
+
**Malformed list**:
|
|
217
|
+
[source,yaml]
|
|
218
|
+
----
|
|
219
|
+
list:
|
|
220
|
+
- item1
|
|
221
|
+
-item2
|
|
222
|
+
----
|
|
223
|
+
|
|
224
|
+
Error: `could not find expected ':'`
|
|
225
|
+
====
|
|
226
|
+
|
|
227
|
+
== ValidationError Details
|
|
228
|
+
|
|
229
|
+
The `Canon::ValidationError` exception provides structured error information:
|
|
230
|
+
|
|
231
|
+
[source,ruby]
|
|
232
|
+
----
|
|
233
|
+
begin
|
|
234
|
+
Canon.compare(invalid, valid, format: :xml)
|
|
235
|
+
rescue Canon::ValidationError => e
|
|
236
|
+
puts "Format: #{e.format}" # :xml
|
|
237
|
+
puts "Message: #{e.message}" # Error description
|
|
238
|
+
puts "Line: #{e.line}" # Line number
|
|
239
|
+
puts "Column: #{e.column}" # Column number
|
|
240
|
+
puts "Details: #{e.details}" # Additional context
|
|
241
|
+
end
|
|
242
|
+
----
|
|
243
|
+
|
|
244
|
+
== Error Context
|
|
245
|
+
|
|
246
|
+
Canon provides context to help locate errors:
|
|
247
|
+
|
|
248
|
+
=== Line and Column Numbers
|
|
249
|
+
|
|
250
|
+
When available, Canon reports exact line and column positions:
|
|
251
|
+
|
|
252
|
+
[source]
|
|
253
|
+
----
|
|
254
|
+
Line 15, Column 23
|
|
255
|
+
↓
|
|
256
|
+
<item id="abc" name=value">
|
|
257
|
+
↑
|
|
258
|
+
Missing opening quote
|
|
259
|
+
----
|
|
260
|
+
|
|
261
|
+
=== Surrounding Content
|
|
262
|
+
|
|
263
|
+
For some formats, Canon shows content around the error:
|
|
264
|
+
|
|
265
|
+
[source]
|
|
266
|
+
----
|
|
267
|
+
Near: { "name": "John" "age": 30 }
|
|
268
|
+
↑
|
|
269
|
+
Missing comma
|
|
270
|
+
----
|
|
271
|
+
|
|
272
|
+
== Handling Validation Errors
|
|
273
|
+
|
|
274
|
+
=== In Tests
|
|
275
|
+
|
|
276
|
+
When validation fails in tests, you get immediate feedback:
|
|
277
|
+
|
|
278
|
+
[source,ruby]
|
|
279
|
+
----
|
|
280
|
+
RSpec.describe "XML comparison" do
|
|
281
|
+
it "compares documents" do
|
|
282
|
+
expected = "<root><item>Value</root>" # Missing </item>
|
|
283
|
+
actual = "<root><item>Value</item></root>"
|
|
284
|
+
|
|
285
|
+
expect(actual).to match_xml(expected)
|
|
286
|
+
# Fails immediately with:
|
|
287
|
+
# Canon::ValidationError: Opening and ending tag mismatch
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
----
|
|
291
|
+
|
|
292
|
+
=== In CLI
|
|
293
|
+
|
|
294
|
+
The CLI shows validation errors with exit code 1:
|
|
295
|
+
|
|
296
|
+
[source,bash]
|
|
297
|
+
----
|
|
298
|
+
$ canon diff invalid.xml valid.xml
|
|
299
|
+
|
|
300
|
+
Error: Invalid XML
|
|
301
|
+
Format: xml
|
|
302
|
+
Line: 5
|
|
303
|
+
Column: 8
|
|
304
|
+
Opening and ending tag mismatch: item line 5 and root
|
|
305
|
+
|
|
306
|
+
$ echo $?
|
|
307
|
+
1
|
|
308
|
+
----
|
|
309
|
+
|
|
310
|
+
=== In Ruby API
|
|
311
|
+
|
|
312
|
+
Catch and handle validation errors explicitly:
|
|
313
|
+
|
|
314
|
+
[source,ruby]
|
|
315
|
+
----
|
|
316
|
+
begin
|
|
317
|
+
result = Canon.compare(input1, input2, format: :xml)
|
|
318
|
+
rescue Canon::ValidationError => e
|
|
319
|
+
logger.error("Invalid XML input: #{e.message}")
|
|
320
|
+
logger.error(" Line #{e.line}, Column #{e.column}")
|
|
321
|
+
# Handle error appropriately
|
|
322
|
+
end
|
|
323
|
+
----
|
|
324
|
+
|
|
325
|
+
== Validation Performance
|
|
326
|
+
|
|
327
|
+
=== Validation Speed
|
|
328
|
+
|
|
329
|
+
Validation is fast and adds minimal overhead:
|
|
330
|
+
|
|
331
|
+
* **XML**: ~1-2ms for typical documents
|
|
332
|
+
* **JSON**: <1ms for typical documents
|
|
333
|
+
* **YAML**: ~1-2ms for typical documents
|
|
334
|
+
|
|
335
|
+
=== Caching
|
|
336
|
+
|
|
337
|
+
Canon does not cache validation results because:
|
|
338
|
+
|
|
339
|
+
* Validation is fast
|
|
340
|
+
* Input may change between calls
|
|
341
|
+
* Memory overhead isn't worth the minimal time saved
|
|
342
|
+
|
|
343
|
+
== Pre-validation
|
|
344
|
+
|
|
345
|
+
To validate input before comparison without running the full comparison:
|
|
346
|
+
|
|
347
|
+
=== Direct Validator Access
|
|
348
|
+
|
|
349
|
+
[source,ruby]
|
|
350
|
+
----
|
|
351
|
+
require 'canon/validators/xml_validator'
|
|
352
|
+
|
|
353
|
+
begin
|
|
354
|
+
Canon::Validators::XmlValidator.validate!(xml_string)
|
|
355
|
+
puts "Valid XML"
|
|
356
|
+
rescue Canon::ValidationError => e
|
|
357
|
+
puts "Invalid: #{e.message}"
|
|
358
|
+
end
|
|
359
|
+
----
|
|
360
|
+
|
|
361
|
+
=== Format Detection and Validation
|
|
362
|
+
|
|
363
|
+
[source,ruby]
|
|
364
|
+
----
|
|
365
|
+
# Canon detects format and validates automatically
|
|
366
|
+
Canon.compare(input1, input2) # Auto-detects and validates
|
|
367
|
+
----
|
|
368
|
+
|
|
369
|
+
== Common Validation Issues
|
|
370
|
+
|
|
371
|
+
=== XML Issues
|
|
372
|
+
|
|
373
|
+
[cols="2,3,3"]
|
|
374
|
+
|===
|
|
375
|
+
|Issue |Example |Solution
|
|
376
|
+
|
|
377
|
+
|Unescaped ampersands
|
|
378
|
+
|`<item>A & B</item>`
|
|
379
|
+
|Use `&` or CDATA: `<![CDATA[A & B]]>`
|
|
380
|
+
|
|
381
|
+
|Mismatched tags
|
|
382
|
+
|`<item></Item>`
|
|
383
|
+
|Match case exactly: `<item></item>`
|
|
384
|
+
|
|
385
|
+
|Unclosed tags
|
|
386
|
+
|`<item>Value`
|
|
387
|
+
|Close all tags: `<item>Value</item>`
|
|
388
|
+
|
|
389
|
+
|Invalid namespace
|
|
390
|
+
|`<x:item>Value</x:item>`
|
|
391
|
+
|Define namespace: `<root xmlns:x="...">`
|
|
392
|
+
|===
|
|
393
|
+
|
|
394
|
+
=== JSON Issues
|
|
395
|
+
|
|
396
|
+
[cols="2,3,3"]
|
|
397
|
+
|===
|
|
398
|
+
|Issue |Example |Solution
|
|
399
|
+
|
|
400
|
+
|Trailing commas
|
|
401
|
+
|`{"a": 1,}`
|
|
402
|
+
|Remove trailing comma: `{"a": 1}`
|
|
403
|
+
|
|
404
|
+
|Unquoted keys
|
|
405
|
+
|`{name: "John"}`
|
|
406
|
+
|Quote keys: `{"name": "John"}`
|
|
407
|
+
|
|
408
|
+
|Single quotes
|
|
409
|
+
|`{'name': 'John'}`
|
|
410
|
+
|Use double quotes: `{"name": "John"}`
|
|
411
|
+
|
|
412
|
+
|Comments
|
|
413
|
+
|`{"a": 1 /* comment */}`
|
|
414
|
+
|Remove comments (not valid JSON)
|
|
415
|
+
|===
|
|
416
|
+
|
|
417
|
+
=== YAML Issues
|
|
418
|
+
|
|
419
|
+
[cols="2,3,3"]
|
|
420
|
+
|===
|
|
421
|
+
|Issue |Example |Solution
|
|
422
|
+
|
|
423
|
+
|Inconsistent indentation
|
|
424
|
+
|Mix of spaces and tabs
|
|
425
|
+
|Use spaces only, consistent depth
|
|
426
|
+
|
|
427
|
+
|Missing colon
|
|
428
|
+
|`key value`
|
|
429
|
+
|Add colon: `key: value`
|
|
430
|
+
|
|
431
|
+
|Invalid list
|
|
432
|
+
|`- item1 -item2`
|
|
433
|
+
|Newline: `- item1` + newline + `- item2`
|
|
434
|
+
|===
|
|
435
|
+
|
|
436
|
+
== Validation vs Comparison
|
|
437
|
+
|
|
438
|
+
Important distinction:
|
|
439
|
+
|
|
440
|
+
[cols="2,3"]
|
|
441
|
+
|===
|
|
442
|
+
|Validation |Comparison
|
|
443
|
+
|
|
444
|
+
|Checks syntax
|
|
445
|
+
|Checks semantic equivalence
|
|
446
|
+
|
|
447
|
+
|Ensures well-formed input
|
|
448
|
+
|Finds differences in content
|
|
449
|
+
|
|
450
|
+
|Fails on syntax errors
|
|
451
|
+
|Succeeds if semantically equivalent
|
|
452
|
+
|
|
453
|
+
|Fast (parse only)
|
|
454
|
+
|Slower (full comparison)
|
|
455
|
+
|
|
456
|
+
|Line/column errors
|
|
457
|
+
|Semantic difference reports
|
|
458
|
+
|===
|
|
459
|
+
|
|
460
|
+
== Configuration
|
|
461
|
+
|
|
462
|
+
Validation is always enabled and cannot be disabled. This is intentional to:
|
|
463
|
+
|
|
464
|
+
* Prevent confusing errors during comparison
|
|
465
|
+
* Ensure data quality
|
|
466
|
+
* Provide clear error messages
|
|
467
|
+
* Catch problems early
|
|
468
|
+
|
|
469
|
+
== Debugging Validation Failures
|
|
470
|
+
|
|
471
|
+
=== Identify the Problem Line
|
|
472
|
+
|
|
473
|
+
Use the line number from the error:
|
|
474
|
+
|
|
475
|
+
[source,bash]
|
|
476
|
+
----
|
|
477
|
+
# Show specific line in file
|
|
478
|
+
sed -n '15p' file.xml
|
|
479
|
+
|
|
480
|
+
# Show context around line 15
|
|
481
|
+
sed -n '12,18p' file.xml
|
|
482
|
+
----
|
|
483
|
+
|
|
484
|
+
=== Check for Hidden Characters
|
|
485
|
+
|
|
486
|
+
Validation errors sometimes result from invisible characters:
|
|
487
|
+
|
|
488
|
+
[source,bash]
|
|
489
|
+
----
|
|
490
|
+
# Show hidden characters
|
|
491
|
+
cat -A file.xml
|
|
492
|
+
|
|
493
|
+
# Check for BOM
|
|
494
|
+
file file.xml
|
|
495
|
+
|
|
496
|
+
# Check encoding
|
|
497
|
+
file -i file.xml
|
|
498
|
+
----
|
|
499
|
+
|
|
500
|
+
=== Validate Externally
|
|
501
|
+
|
|
502
|
+
Use format-specific validators to get different perspectives:
|
|
503
|
+
|
|
504
|
+
[source,bash]
|
|
505
|
+
----
|
|
506
|
+
# XML
|
|
507
|
+
xmllint --noout file.xml
|
|
508
|
+
|
|
509
|
+
# JSON
|
|
510
|
+
jq . file.json
|
|
511
|
+
|
|
512
|
+
# YAML
|
|
513
|
+
ruby -ryaml -e "YAML.load_file('file.yaml')"
|
|
514
|
+
----
|
|
515
|
+
|
|
516
|
+
== See Also
|
|
517
|
+
|
|
518
|
+
* link:../environment-configuration/index.html[Environment Configuration] - Size limits for validation
|
|
519
|
+
* link:../../interfaces/cli/index.html[CLI Interface] - Command-line error handling
|
|
520
|
+
* link:../../interfaces/ruby-api/index.html[Ruby API] - Programmatic error handling
|
|
521
|
+
* link:../../understanding/index.html[Understanding Canon] - How validation fits in the pipeline
|