canon 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +43 -31
- data/README.adoc +5 -0
- data/docs/advanced/semantic-diff-report.adoc +2 -0
- data/docs/features/diff-formatting/comment-asymmetry.adoc +160 -0
- data/docs/features/diff-formatting/display-preprocessing.adoc +1 -1
- data/docs/features/diff-formatting/whitespace-adjacency.adoc +9 -0
- data/docs/interfaces/ruby-api/index.adoc +20 -0
- data/docs/understanding/formats/html.adoc +17 -0
- data/lib/canon/comparison/child_realignment.rb +140 -0
- data/lib/canon/comparison/html_comparator.rb +18 -46
- data/lib/canon/comparison/node_inspector.rb +35 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +28 -73
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +33 -0
- data/lib/canon/comparison/xml_comparator.rb +53 -9
- data/lib/canon/comparison.rb +2 -0
- data/lib/canon/diff/diff_classifier.rb +9 -1
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +28 -8
- data/lib/canon/pretty_printer/html.rb +34 -0
- data/lib/canon/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b5218e18de7c596c5875ee1cf906331269cd58475a1f00de5c20af398bb07f08
|
|
4
|
+
data.tar.gz: 0dedd6f9e8ca265d37c610a183ed0e695ba68a9d8c9f0766b890f3d8db7d1f66
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c24944e5600684e24f4b32cd16d90d68f64ca07671da7cd30a4cc7e13e818e98f86ab849ee28f7780f40ae3514df1ba3087cb32f55c7923d5303c65819aa8d59
|
|
7
|
+
data.tar.gz: 13a3c944492c29a916569b86829cefd8bf7baaf247eea105ee3f608d25789ef7c79ab0b0a95a3806e66b85348dd629169f8e19bef127e3ca79685a0e13d1bca9
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-
|
|
3
|
+
# on 2026-05-05 13:09:45 UTC using RuboCop version 1.86.0.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -11,52 +11,58 @@ Gemspec/RequiredRubyVersion:
|
|
|
11
11
|
Exclude:
|
|
12
12
|
- 'canon.gemspec'
|
|
13
13
|
|
|
14
|
-
# Offense count:
|
|
14
|
+
# Offense count: 5
|
|
15
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
16
16
|
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
17
17
|
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
18
18
|
Layout/ArgumentAlignment:
|
|
19
19
|
Exclude:
|
|
20
|
-
- 'lib/canon/comparison/
|
|
21
|
-
- '
|
|
20
|
+
- 'lib/canon/comparison/child_realignment.rb'
|
|
21
|
+
- 'lib/canon/comparison/xml_comparator/child_comparison.rb'
|
|
22
|
+
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
22
23
|
|
|
23
|
-
# Offense count:
|
|
24
|
+
# Offense count: 5
|
|
24
25
|
# This cop supports safe autocorrection (--autocorrect).
|
|
25
26
|
# Configuration parameters: EnforcedStyleAlignWith.
|
|
26
27
|
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
27
28
|
Layout/BlockAlignment:
|
|
28
29
|
Exclude:
|
|
29
|
-
- '
|
|
30
|
+
- 'spec/canon/comparison/comments_asymmetry_spec.rb'
|
|
31
|
+
- 'spec/canon/comparison/whitespace_adjacency_spec.rb'
|
|
30
32
|
|
|
31
|
-
# Offense count:
|
|
33
|
+
# Offense count: 5
|
|
32
34
|
# This cop supports safe autocorrection (--autocorrect).
|
|
33
35
|
Layout/BlockEndNewline:
|
|
34
36
|
Exclude:
|
|
35
|
-
- '
|
|
37
|
+
- 'spec/canon/comparison/comments_asymmetry_spec.rb'
|
|
38
|
+
- 'spec/canon/comparison/whitespace_adjacency_spec.rb'
|
|
36
39
|
|
|
37
|
-
# Offense count:
|
|
40
|
+
# Offense count: 10
|
|
38
41
|
# This cop supports safe autocorrection (--autocorrect).
|
|
39
42
|
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
40
43
|
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
41
44
|
Layout/IndentationWidth:
|
|
42
45
|
Exclude:
|
|
43
|
-
- '
|
|
46
|
+
- 'spec/canon/comparison/comments_asymmetry_spec.rb'
|
|
47
|
+
- 'spec/canon/comparison/whitespace_adjacency_spec.rb'
|
|
44
48
|
|
|
45
|
-
# Offense count:
|
|
49
|
+
# Offense count: 1386
|
|
46
50
|
# This cop supports safe autocorrection (--autocorrect).
|
|
47
51
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
48
52
|
# URISchemes: http, https
|
|
49
53
|
Layout/LineLength:
|
|
50
54
|
Enabled: false
|
|
51
55
|
|
|
52
|
-
# Offense count:
|
|
56
|
+
# Offense count: 6
|
|
53
57
|
# This cop supports safe autocorrection (--autocorrect).
|
|
54
58
|
# Configuration parameters: AllowInHeredoc.
|
|
55
59
|
Layout/TrailingWhitespace:
|
|
56
60
|
Exclude:
|
|
57
|
-
- 'lib/canon/comparison/
|
|
61
|
+
- 'lib/canon/comparison/child_realignment.rb'
|
|
62
|
+
- 'lib/canon/comparison/xml_comparator/child_comparison.rb'
|
|
63
|
+
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
58
64
|
|
|
59
|
-
# Offense count:
|
|
65
|
+
# Offense count: 63
|
|
60
66
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
61
67
|
Lint/DuplicateBranch:
|
|
62
68
|
Enabled: false
|
|
@@ -101,7 +107,7 @@ Lint/UselessConstantScoping:
|
|
|
101
107
|
Exclude:
|
|
102
108
|
- 'lib/canon/diff_formatter/theme.rb'
|
|
103
109
|
|
|
104
|
-
# Offense count:
|
|
110
|
+
# Offense count: 321
|
|
105
111
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
106
112
|
Metrics/AbcSize:
|
|
107
113
|
Enabled: false
|
|
@@ -117,12 +123,12 @@ Metrics/BlockLength:
|
|
|
117
123
|
Metrics/BlockNesting:
|
|
118
124
|
Max: 4
|
|
119
125
|
|
|
120
|
-
# Offense count:
|
|
126
|
+
# Offense count: 285
|
|
121
127
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
122
128
|
Metrics/CyclomaticComplexity:
|
|
123
129
|
Enabled: false
|
|
124
130
|
|
|
125
|
-
# Offense count:
|
|
131
|
+
# Offense count: 529
|
|
126
132
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
127
133
|
Metrics/MethodLength:
|
|
128
134
|
Max: 146
|
|
@@ -132,7 +138,7 @@ Metrics/MethodLength:
|
|
|
132
138
|
Metrics/ParameterLists:
|
|
133
139
|
Max: 10
|
|
134
140
|
|
|
135
|
-
# Offense count:
|
|
141
|
+
# Offense count: 221
|
|
136
142
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
137
143
|
Metrics/PerceivedComplexity:
|
|
138
144
|
Enabled: false
|
|
@@ -165,13 +171,13 @@ Performance/CollectionLiteralInLoop:
|
|
|
165
171
|
- 'lib/canon/xml/xml_base_handler.rb'
|
|
166
172
|
- 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
|
|
167
173
|
|
|
168
|
-
# Offense count:
|
|
174
|
+
# Offense count: 107
|
|
169
175
|
# Configuration parameters: Prefixes, AllowedPatterns.
|
|
170
176
|
# Prefixes: when, with, without
|
|
171
177
|
RSpec/ContextWording:
|
|
172
178
|
Enabled: false
|
|
173
179
|
|
|
174
|
-
# Offense count:
|
|
180
|
+
# Offense count: 46
|
|
175
181
|
# Configuration parameters: IgnoredMetadata.
|
|
176
182
|
RSpec/DescribeClass:
|
|
177
183
|
Enabled: false
|
|
@@ -182,7 +188,7 @@ RSpec/DescribeMethod:
|
|
|
182
188
|
- 'spec/canon/comparison/multiple_differences_spec.rb'
|
|
183
189
|
- 'spec/canon/diff_formatter/character_map_customization_spec.rb'
|
|
184
190
|
|
|
185
|
-
# Offense count:
|
|
191
|
+
# Offense count: 876
|
|
186
192
|
# Configuration parameters: CountAsOne.
|
|
187
193
|
RSpec/ExampleLength:
|
|
188
194
|
Max: 44
|
|
@@ -196,12 +202,6 @@ RSpec/ExpectActual:
|
|
|
196
202
|
- 'spec/canon/rspec_matchers_spec.rb'
|
|
197
203
|
- 'spec/canon/string_matcher_spec.rb'
|
|
198
204
|
|
|
199
|
-
# Offense count: 7
|
|
200
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
201
|
-
RSpec/IncludeExamples:
|
|
202
|
-
Exclude:
|
|
203
|
-
- 'spec/canon/comparison/html4_html5_whitespace_parity_spec.rb'
|
|
204
|
-
|
|
205
205
|
# Offense count: 177
|
|
206
206
|
# Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
|
|
207
207
|
RSpec/IndexedLet:
|
|
@@ -240,7 +240,7 @@ RSpec/MultipleDescribes:
|
|
|
240
240
|
Exclude:
|
|
241
241
|
- 'spec/canon/comparison/match_options_spec.rb'
|
|
242
242
|
|
|
243
|
-
# Offense count:
|
|
243
|
+
# Offense count: 735
|
|
244
244
|
RSpec/MultipleExpectations:
|
|
245
245
|
Max: 15
|
|
246
246
|
|
|
@@ -249,7 +249,7 @@ RSpec/MultipleExpectations:
|
|
|
249
249
|
RSpec/MultipleMemoizedHelpers:
|
|
250
250
|
Max: 16
|
|
251
251
|
|
|
252
|
-
# Offense count:
|
|
252
|
+
# Offense count: 29
|
|
253
253
|
# Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
|
|
254
254
|
# SupportedStyles: always, named_only
|
|
255
255
|
RSpec/NamedSubject:
|
|
@@ -258,7 +258,7 @@ RSpec/NamedSubject:
|
|
|
258
258
|
- 'spec/canon/pretty_printer/json_spec.rb'
|
|
259
259
|
- 'spec/canon/pretty_printer/xml_spec.rb'
|
|
260
260
|
|
|
261
|
-
# Offense count:
|
|
261
|
+
# Offense count: 54
|
|
262
262
|
# Configuration parameters: AllowedGroups.
|
|
263
263
|
RSpec/NestedGroups:
|
|
264
264
|
Max: 4
|
|
@@ -292,7 +292,7 @@ RSpec/SpecFilePathFormat:
|
|
|
292
292
|
- 'spec/canon/yaml/formatter_spec.rb'
|
|
293
293
|
- 'spec/xml_c14n_spec.rb'
|
|
294
294
|
|
|
295
|
-
# Offense count:
|
|
295
|
+
# Offense count: 100
|
|
296
296
|
# Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
|
|
297
297
|
RSpec/VerifiedDoubles:
|
|
298
298
|
Exclude:
|
|
@@ -304,6 +304,18 @@ RSpec/VerifiedDoubles:
|
|
|
304
304
|
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
305
305
|
- 'spec/canon/tree_diff/operation_converter_spec.rb'
|
|
306
306
|
|
|
307
|
+
# Offense count: 8
|
|
308
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
309
|
+
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
310
|
+
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
311
|
+
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
312
|
+
# FunctionalMethods: let, let!, subject, watch
|
|
313
|
+
# AllowedMethods: lambda, proc, it
|
|
314
|
+
Style/BlockDelimiters:
|
|
315
|
+
Exclude:
|
|
316
|
+
- 'spec/canon/comparison/comments_asymmetry_spec.rb'
|
|
317
|
+
- 'spec/canon/comparison/whitespace_adjacency_spec.rb'
|
|
318
|
+
|
|
307
319
|
# Offense count: 1
|
|
308
320
|
# This cop supports safe autocorrection (--autocorrect).
|
|
309
321
|
# Configuration parameters: EnforcedStyle, AllowComments.
|
data/README.adoc
CHANGED
|
@@ -618,6 +618,11 @@ See link:docs/MODES[Diff modes] for details.
|
|
|
618
618
|
reported as a dedicated `:whitespace_adjacency` dimension with direction
|
|
619
619
|
wording (`before`/`after`/`adjacent to`) instead of cascading into
|
|
620
620
|
misleading `:text_content` mismatches
|
|
621
|
+
* **Asymmetric comment reporting**: A `<!-- ... -->` node present on only
|
|
622
|
+
one side is reported as a dedicated `:comments` dimension diff anchored
|
|
623
|
+
at the comment node, instead of shifting children alignment and
|
|
624
|
+
surfacing a misleading `:element_structure` "Element removed" diff
|
|
625
|
+
against an unrelated trailing sibling
|
|
621
626
|
* **Non-ASCII detection**: Warnings for unexpected Unicode characters
|
|
622
627
|
* **Customizable**: Character maps, context lines, grouping options
|
|
623
628
|
|
|
@@ -212,6 +212,8 @@ Reason: Text: "¬······:¬······"
|
|
|
212
212
|
|
|
213
213
|
This fallback is implemented in `Canon::DiffFormatter::DiffDetailFormatterHelpers::DimensionFormatter.format_text_content_details` and only triggers when `TextUtils.ambiguous_text_pair?` returns `true` _and_ at least one side has a parent element to render.
|
|
214
214
|
|
|
215
|
+
The same fallback also applies to the `whitespace_adjacency` dimension (see <<whitespace-adjacency,Whitespace adjacency>>): when the alignment partner of a stray whitespace node extracts to an empty / whitespace-only string, the Reason line reads `Whitespace inside <PARENT>` (rather than `Whitespace before ""`), and the Expected/Actual block surfaces each side's parent element compactly. See `format_whitespace_adjacency_details` and `Canon::Comparison::XmlComparator#build_whitespace_adjacency_reason`.
|
|
216
|
+
|
|
215
217
|
==== One-sided text diffs (added or removed text nodes)
|
|
216
218
|
|
|
217
219
|
When a `text_content` difference carries a text node on one side and `nil` on the other (issue #125) -- the shape that fragment-length mismatches and child-comparison emit when a text-node child is missing -- the renderer mirrors `element_structure`: the missing side reads `(not present)`, and the present side reads the text-node content (whitespace-visualised) plus a brief parent open-tag hint for context. The full ancestor subtree is *not* dumped; only the immediate parent's opening tag is shown, so a missing whitespace text node cannot make the diff look like the entire ancestor differs.
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Comment asymmetry in diff reports
|
|
3
|
+
parent: Diff Formatting
|
|
4
|
+
nav_order: 9
|
|
5
|
+
---
|
|
6
|
+
= Comment asymmetry in diff reports
|
|
7
|
+
:toc:
|
|
8
|
+
:toclevels: 2
|
|
9
|
+
|
|
10
|
+
== Purpose
|
|
11
|
+
|
|
12
|
+
Canon's diff reports anchor `<!-- ... -->` comment nodes that have no
|
|
13
|
+
counterpart on the other side to a dedicated `:comments` dimension
|
|
14
|
+
instead of letting the resulting children-array length mismatch cascade
|
|
15
|
+
into a misleading `:element_structure` "Element removed" diff against
|
|
16
|
+
the trailing named sibling.
|
|
17
|
+
|
|
18
|
+
This is a *report-only* shape change — equivalence verdicts are
|
|
19
|
+
unchanged. Whether asymmetric comments cause a non-equivalent verdict
|
|
20
|
+
or not depends on the `comments` match option (`:strict` /
|
|
21
|
+
`:ignore` / `:exact`), as before.
|
|
22
|
+
|
|
23
|
+
== The problem
|
|
24
|
+
|
|
25
|
+
Consider an HTML fragment compared with `verbose: true`:
|
|
26
|
+
|
|
27
|
+
[source,html]
|
|
28
|
+
----
|
|
29
|
+
<!-- expected -->
|
|
30
|
+
<body>
|
|
31
|
+
<div>first</div>
|
|
32
|
+
<div>second</div>
|
|
33
|
+
<!-- a comment that exists only on side A -->
|
|
34
|
+
<div style="mso-element:footnote-list"></div>
|
|
35
|
+
</body>
|
|
36
|
+
|
|
37
|
+
<!-- actual -->
|
|
38
|
+
<body>
|
|
39
|
+
<div>first</div>
|
|
40
|
+
<div>second</div>
|
|
41
|
+
<div style="mso-element:footnote-list"></div>
|
|
42
|
+
</body>
|
|
43
|
+
----
|
|
44
|
+
|
|
45
|
+
The `<div style="mso-element:footnote-list">` is byte-identical between
|
|
46
|
+
the two sides; the only real difference is the comment on the expected
|
|
47
|
+
side. Pre-#144, the diff report contained:
|
|
48
|
+
|
|
49
|
+
[source]
|
|
50
|
+
----
|
|
51
|
+
DIFFERENCE #1 — element_structure: Element removed:
|
|
52
|
+
<div style="mso-element:footnote-list"/>
|
|
53
|
+
----
|
|
54
|
+
|
|
55
|
+
That is the wrong dimension, anchored at the wrong node. The element is
|
|
56
|
+
present on both sides — what is missing is the comment.
|
|
57
|
+
|
|
58
|
+
The cascade comes from positional alignment in
|
|
59
|
+
`Canon::Comparison::HtmlComparator#compare_fragment_children` (and the
|
|
60
|
+
analogous walker in `XmlComparatorHelpers::ChildComparison`): in
|
|
61
|
+
verbose mode, comments are intentionally kept by `filter_children` so
|
|
62
|
+
informative differences can be recorded, but the resulting unequal
|
|
63
|
+
children-array lengths fell through to a name-based mismatch heuristic
|
|
64
|
+
that filtered out generic `#`-prefixed names (`#text`, `#comment`),
|
|
65
|
+
leaving the trailing named element to take the blame.
|
|
66
|
+
|
|
67
|
+
== The contract
|
|
68
|
+
|
|
69
|
+
When the children alignment encounters a comment node on one side
|
|
70
|
+
paired against a non-comment node on the other (or sitting past the
|
|
71
|
+
trailing edge of the shorter side), Canon:
|
|
72
|
+
|
|
73
|
+
1. Treats the comment as a *single-side gap* in the alignment.
|
|
74
|
+
2. Emits one `:comments` diff entry anchored at the comment node
|
|
75
|
+
itself (not at a mis-paired neighbouring element).
|
|
76
|
+
3. Advances only the cursor that carries the comment, so the next
|
|
77
|
+
iteration aligns content against content.
|
|
78
|
+
|
|
79
|
+
The Reason line names the side that carries the comment and surfaces
|
|
80
|
+
its text:
|
|
81
|
+
|
|
82
|
+
[source]
|
|
83
|
+
----
|
|
84
|
+
DIFFERENCE #1 — comments: Comment present on EXPECTED only:
|
|
85
|
+
<!-- a comment that exists only on side A -->
|
|
86
|
+
----
|
|
87
|
+
|
|
88
|
+
== Combined with whitespace asymmetry
|
|
89
|
+
|
|
90
|
+
The same realignment walk handles asymmetric whitespace-only text
|
|
91
|
+
nodes (link:whitespace-adjacency.adoc[issue #137]) and asymmetric
|
|
92
|
+
comment nodes together. When a children mismatch is fully explained by
|
|
93
|
+
a combination of asymmetric whitespace and asymmetric comments, the
|
|
94
|
+
walker emits one diff per asymmetric node with the appropriate
|
|
95
|
+
dimension (`:whitespace_adjacency` for whitespace, `:comments` for
|
|
96
|
+
comments) — no `:element_structure` diff is produced.
|
|
97
|
+
|
|
98
|
+
When a real structural mismatch coexists with an asymmetric comment,
|
|
99
|
+
both kinds of diff are emitted — the structural one under
|
|
100
|
+
`:element_structure`, the comment one under `:comments`.
|
|
101
|
+
|
|
102
|
+
== Working with :comments diffs programmatically
|
|
103
|
+
|
|
104
|
+
[source,ruby]
|
|
105
|
+
----
|
|
106
|
+
result = Canon::Comparison.equivalent?(html1, html2,
|
|
107
|
+
format: :html5, verbose: true)
|
|
108
|
+
|
|
109
|
+
comment_diffs = result.differences.select { |d| d.dimension == :comments }
|
|
110
|
+
|
|
111
|
+
# Whether these affect equivalence depends on the comments match option.
|
|
112
|
+
# Under the default :ignore profile they are informative; under :strict
|
|
113
|
+
# they are normative.
|
|
114
|
+
----
|
|
115
|
+
|
|
116
|
+
== What this contract does NOT do
|
|
117
|
+
|
|
118
|
+
* **Does not silence asymmetric comments.** They are always reported
|
|
119
|
+
in verbose output; the change is the dimension label and the anchor
|
|
120
|
+
node.
|
|
121
|
+
* **Does not affect symmetric comments.** When both sides carry
|
|
122
|
+
parallel comment nodes, those compare normally — content-vs-content
|
|
123
|
+
comparison applies.
|
|
124
|
+
* **Does not change equivalence outcomes.** A comparison whose
|
|
125
|
+
equivalence verdict was driven by asymmetric comments retains the
|
|
126
|
+
same verdict; only the report shape changes.
|
|
127
|
+
|
|
128
|
+
== Where it runs
|
|
129
|
+
|
|
130
|
+
The noise-aware realignment is a single shared implementation:
|
|
131
|
+
|
|
132
|
+
* `Canon::Comparison::ChildRealignment` — the two-cursor walk that
|
|
133
|
+
detects noise nodes via `NodeInspector.noise_dimension_for`,
|
|
134
|
+
emits per-orphan diffs with the appropriate dimension
|
|
135
|
+
(`:whitespace_adjacency`, `:comments`), and advances only the
|
|
136
|
+
noise-side cursor so content nodes stay aligned.
|
|
137
|
+
|
|
138
|
+
Both comparison paths delegate to `ChildRealignment.walk`:
|
|
139
|
+
|
|
140
|
+
* `Canon::Comparison::HtmlComparator#compare_fragment_children` — the
|
|
141
|
+
HTML fragment path (passes `emit_structural_orphans: true` because it
|
|
142
|
+
has no separate length-mismatch step).
|
|
143
|
+
* `Canon::Comparison::XmlComparatorHelpers::ChildComparison` — the XML
|
|
144
|
+
comparator path (passes `emit_structural_orphans: false`; structural
|
|
145
|
+
orphans are handled by the pre-walk length-mismatch step via
|
|
146
|
+
`asymmetric_noise_explains_length_diff?`).
|
|
147
|
+
|
|
148
|
+
== Related
|
|
149
|
+
|
|
150
|
+
* link:whitespace-adjacency.adoc[Whitespace adjacency] — sibling
|
|
151
|
+
contract for asymmetric whitespace-only text nodes.
|
|
152
|
+
* link:../../advanced/diff-classification.adoc[Diff classification] —
|
|
153
|
+
Normative vs informative differences.
|
|
154
|
+
|
|
155
|
+
== History
|
|
156
|
+
|
|
157
|
+
The false-positive cascade was reported in
|
|
158
|
+
https://github.com/lutaml/canon/issues/144[issue #144]. The fix
|
|
159
|
+
mirrors the structure of the `:whitespace_adjacency` work in
|
|
160
|
+
https://github.com/lutaml/canon/issues/137[issue #137].
|
|
@@ -430,7 +430,7 @@ pretty-printer. This is a known future work item.
|
|
|
430
430
|
|✓ Full
|
|
431
431
|
|✓ (via XML serializer)
|
|
432
432
|
|✓ Full
|
|
433
|
-
|`:pretty_print` uses `Canon::PrettyPrinter::Html
|
|
433
|
+
|`:pretty_print` uses `Canon::PrettyPrinter::Html` in fixture-ready mode (`FORMAT|AS_XHTML|NO_DECLARATION`); `:normalize_pretty_print` falls back to `XmlNormalized` pending a dedicated `HtmlNormalized`; `:c14n` uses Nokogiri HTML5 serialization. In fixture-ready mode, stray structural whitespace (whitespace-only text nodes between block-level siblings) is stripped before formatting so that libxml's `FORMAT` flag produces correct indentation. Whitespace inside `<pre>`, `<script>`, `<style>`, and `<textarea>` is preserved.
|
|
434
434
|
|
|
435
435
|
|JSON
|
|
436
436
|
|Planned
|
|
@@ -103,6 +103,15 @@ edge of a parent.
|
|
|
103
103
|
`adjacent to`:: Degenerate fallback for a whitespace node with no
|
|
104
104
|
non-whitespace siblings at all. Rarely emitted.
|
|
105
105
|
|
|
106
|
+
When the alignment partner extracts to an empty / whitespace-only
|
|
107
|
+
string (e.g. an element with no text descendants), the direction
|
|
108
|
+
phrasing degenerates to `Whitespace before ""` which carries no
|
|
109
|
+
information. In that case Canon falls back to naming the parent
|
|
110
|
+
element instead — `Whitespace inside <PARENT>` — and the
|
|
111
|
+
Expected/Actual detail block renders each side's parent element
|
|
112
|
+
compactly per the contract from
|
|
113
|
+
link:../../advanced/semantic-diff-report.adoc#parent-context-fallback-for-ambiguous-text-diffs[issue #112].
|
|
114
|
+
|
|
106
115
|
NOTE: An earlier wording (`Whitespace surrounding "X"`) classified the
|
|
107
116
|
*whitespace node's position among its own siblings* rather than its
|
|
108
117
|
direction relative to the partner. That label was misleading when the
|
|
@@ -116,6 +116,9 @@ Where:
|
|
|
116
116
|
`{Format}`:: The format module (`Xml`, `Html`, `Json`)
|
|
117
117
|
`n`:: Number of spaces (default: 2) or tabs (use 1 for tabs)
|
|
118
118
|
`type`:: Indentation type: `'space'` (default) or `'tab'`
|
|
119
|
+
`fixture_ready`:: (HTML only) When `true`, emit indented XHTML-shaped
|
|
120
|
+
output that strips structural whitespace before formatting. Designed for
|
|
121
|
+
copy-paste into RSpec heredoc fixtures. Default: `false`.
|
|
119
122
|
`content`:: The input string
|
|
120
123
|
|
|
121
124
|
.Pretty-print examples
|
|
@@ -151,6 +154,23 @@ Canon::Xml::PrettyPrinter.new(
|
|
|
151
154
|
html_input = '<div><p>Hello</p></div>'
|
|
152
155
|
Canon::Html::PrettyPrinter.new(indent: 2).format(html_input)
|
|
153
156
|
|
|
157
|
+
# HTML fixture-ready mode: produces indented XHTML-shaped output
|
|
158
|
+
# suitable for pasting into RSpec heredoc fixtures. Strips stray
|
|
159
|
+
# structural whitespace (inter-element text nodes) so libxml's FORMAT
|
|
160
|
+
# flag can indent block-level siblings that would otherwise be treated
|
|
161
|
+
# as mixed content. Whitespace inside <pre>, <script>, <style>, and
|
|
162
|
+
# <textarea> is preserved.
|
|
163
|
+
Canon::Html::PrettyPrinter.new(indent: 2, fixture_ready: true)
|
|
164
|
+
.format('<html><body><div>a</div> <div>b</div></body></html>')
|
|
165
|
+
# =>
|
|
166
|
+
# <html xmlns="http://www.w3.org/1999/xhtml">
|
|
167
|
+
# <head>...</head>
|
|
168
|
+
# <body>
|
|
169
|
+
# <div>a</div>
|
|
170
|
+
# <div>b</div>
|
|
171
|
+
# </body>
|
|
172
|
+
# </html>
|
|
173
|
+
|
|
154
174
|
# JSON with 2-space indentation
|
|
155
175
|
json_input = '{"z":3,"a":{"b":1}}'
|
|
156
176
|
Canon::Json::PrettyPrinter.new(indent: 2).format(json_input)
|
|
@@ -235,6 +235,23 @@ HTML whitespace is collapsed per CSS rendering rules. Empty text nodes between e
|
|
|
235
235
|
Multiple spaces within text content are collapsed to single spaces when `text_content: :normalize` is used.
|
|
236
236
|
====
|
|
237
237
|
|
|
238
|
+
==== Fixture-ready pretty-print and structural whitespace
|
|
239
|
+
|
|
240
|
+
When using `Canon::PrettyPrinter::Html` with `fixture_ready: true` (the mode
|
|
241
|
+
used by the diff pipeline's *PRETTY-PRINTED INPUTS* section), Canon strips
|
|
242
|
+
stray structural whitespace before formatting. Real-world HTML5 input from
|
|
243
|
+
upstream pipelines often carries whitespace-only text nodes between block-level
|
|
244
|
+
siblings (`<body>` → `<div>`, `<br>`, `<div>`, ...). libxml's `FORMAT` flag
|
|
245
|
+
treats any element with a non-whitespace-only text child as mixed content and
|
|
246
|
+
refuses to indent its children — producing a single-line blob instead of a
|
|
247
|
+
readable tree.
|
|
248
|
+
|
|
249
|
+
The fixture-ready mode removes whitespace-only text nodes from parents that
|
|
250
|
+
are purely structural (no real text content) and are not whitespace-preserving
|
|
251
|
+
elements (`<pre>`, `<script>`, `<style>`, `<textarea>`). Mixed-content runs
|
|
252
|
+
like `<p>foo <em>bar</em> baz</p>` are left untouched so that significant
|
|
253
|
+
inline whitespace is preserved.
|
|
254
|
+
|
|
238
255
|
=== Attribute order
|
|
239
256
|
|
|
240
257
|
HTML attributes are inherently unordered per the HTML specification, so default is `:ignore`.
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "node_inspector"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Comparison
|
|
7
|
+
# Shared two-cursor walk over child arrays with noise-aware realignment.
|
|
8
|
+
#
|
|
9
|
+
# When positional pairing would match a noise node (whitespace-only
|
|
10
|
+
# text or comment) against a content node, the walker treats the
|
|
11
|
+
# noise node as a single-side gap: emits a diff for it and advances
|
|
12
|
+
# only that cursor, so the next iteration aligns content against
|
|
13
|
+
# content.
|
|
14
|
+
#
|
|
15
|
+
# Noise classification is delegated to +NodeInspector.noise_dimension_for+,
|
|
16
|
+
# making the walk open for extension — new noise types only require
|
|
17
|
+
# adding a branch there.
|
|
18
|
+
#
|
|
19
|
+
# The walk is parameterised by a diff emitter (a callable that
|
|
20
|
+
# receives node1, node2, diff1, diff2, dimension) so both the HTML
|
|
21
|
+
# comparator (DiffNodeBuilder.build) and the XML comparator
|
|
22
|
+
# (comparator.add_difference) reuse the same cursor logic.
|
|
23
|
+
module ChildRealignment
|
|
24
|
+
class << self
|
|
25
|
+
# Walk two child arrays, emitting diffs for noise nodes and
|
|
26
|
+
# yielding matched content pairs.
|
|
27
|
+
#
|
|
28
|
+
# @param children1 [Array] Left-side children
|
|
29
|
+
# @param children2 [Array] Right-side children
|
|
30
|
+
# @param emitter [#call] Callable receiving
|
|
31
|
+
# (node1, node2, diff1, diff2, dimension)
|
|
32
|
+
# @param emit_structural_orphans [Boolean] When true, trailing-edge
|
|
33
|
+
# non-noise orphans are emitted as +:element_structure+ diffs.
|
|
34
|
+
# HTML fragment path sets this to true (it has no separate
|
|
35
|
+
# length-mismatch step); XML path sets it to false (structural
|
|
36
|
+
# orphans are already recorded by +use_positional_comparison+).
|
|
37
|
+
# @yield [child1, child2] Compare two matched content nodes.
|
|
38
|
+
# Must return a Comparison result constant.
|
|
39
|
+
# @return [Symbol] Worst comparison result encountered
|
|
40
|
+
def walk(children1, children2, emitter,
|
|
41
|
+
emit_structural_orphans: false)
|
|
42
|
+
worst = Comparison::EQUIVALENT
|
|
43
|
+
i = 0
|
|
44
|
+
j = 0
|
|
45
|
+
|
|
46
|
+
while i < children1.length || j < children2.length
|
|
47
|
+
child1 = children1[i]
|
|
48
|
+
child2 = children2[j]
|
|
49
|
+
|
|
50
|
+
if child1.nil?
|
|
51
|
+
result = emit_orphan(child2, :right, emitter,
|
|
52
|
+
emit_structural_orphans)
|
|
53
|
+
worst = result if result && result != Comparison::EQUIVALENT
|
|
54
|
+
j += 1
|
|
55
|
+
next
|
|
56
|
+
elsif child2.nil?
|
|
57
|
+
result = emit_orphan(child1, :left, emitter,
|
|
58
|
+
emit_structural_orphans)
|
|
59
|
+
worst = result if result && result != Comparison::EQUIVALENT
|
|
60
|
+
i += 1
|
|
61
|
+
next
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
dim1 = NodeInspector.noise_dimension_for(child1)
|
|
65
|
+
dim2 = NodeInspector.noise_dimension_for(child2)
|
|
66
|
+
|
|
67
|
+
if dim1 && !dim2
|
|
68
|
+
result = emit_inline_noise(child1, child2, dim1, :left, emitter)
|
|
69
|
+
worst = result unless result == Comparison::EQUIVALENT
|
|
70
|
+
i += 1
|
|
71
|
+
next
|
|
72
|
+
elsif dim2 && !dim1
|
|
73
|
+
result = emit_inline_noise(child1, child2, dim2, :right, emitter)
|
|
74
|
+
worst = result unless result == Comparison::EQUIVALENT
|
|
75
|
+
j += 1
|
|
76
|
+
next
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
if block_given?
|
|
80
|
+
child_result = yield(child1, child2)
|
|
81
|
+
worst = child_result unless child_result == Comparison::EQUIVALENT
|
|
82
|
+
end
|
|
83
|
+
i += 1
|
|
84
|
+
j += 1
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
worst
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Emit a diff for an inline noise node that sits opposite a
|
|
93
|
+
# content node. Whitespace passes both nodes for context;
|
|
94
|
+
# comments pass only the comment node.
|
|
95
|
+
def emit_inline_noise(node_left, node_right, dimension, noise_side,
|
|
96
|
+
emitter)
|
|
97
|
+
if dimension == :whitespace_adjacency
|
|
98
|
+
emitter.call(node_left, node_right,
|
|
99
|
+
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
100
|
+
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
101
|
+
dimension)
|
|
102
|
+
Comparison::UNEQUAL_TEXT_CONTENTS
|
|
103
|
+
else
|
|
104
|
+
n1 = noise_side == :left ? node_left : nil
|
|
105
|
+
n2 = noise_side == :right ? node_right : nil
|
|
106
|
+
emitter.call(n1, n2,
|
|
107
|
+
Comparison::MISSING_NODE,
|
|
108
|
+
Comparison::MISSING_NODE,
|
|
109
|
+
dimension)
|
|
110
|
+
Comparison::UNEQUAL_ELEMENTS
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Emit a diff for a trailing-edge orphan (one side exhausted).
|
|
115
|
+
# Noise orphans are always emitted; structural orphans only when
|
|
116
|
+
# +emit_structural+ is true.
|
|
117
|
+
def emit_orphan(orphan, side, emitter, emit_structural)
|
|
118
|
+
dim = NodeInspector.noise_dimension_for(orphan)
|
|
119
|
+
if dim
|
|
120
|
+
n1 = side == :left ? orphan : nil
|
|
121
|
+
n2 = side == :right ? orphan : nil
|
|
122
|
+
emitter.call(n1, n2,
|
|
123
|
+
Comparison::MISSING_NODE,
|
|
124
|
+
Comparison::MISSING_NODE,
|
|
125
|
+
dim)
|
|
126
|
+
Comparison::UNEQUAL_ELEMENTS
|
|
127
|
+
elsif emit_structural
|
|
128
|
+
n1 = side == :left ? orphan : nil
|
|
129
|
+
n2 = side == :right ? orphan : nil
|
|
130
|
+
emitter.call(n1, n2,
|
|
131
|
+
Comparison::MISSING_NODE,
|
|
132
|
+
Comparison::MISSING_NODE,
|
|
133
|
+
:element_structure)
|
|
134
|
+
Comparison::UNEQUAL_ELEMENTS
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -188,32 +188,9 @@ module Canon
|
|
|
188
188
|
node.is_a?(Nokogiri::HTML5::DocumentFragment)
|
|
189
189
|
end
|
|
190
190
|
|
|
191
|
-
#
|
|
192
|
-
#
|
|
193
|
-
#
|
|
194
|
-
def record_fragment_length_mismatch(_node1, _node2, children1,
|
|
195
|
-
children2, differences)
|
|
196
|
-
longer, shorter, side = if children1.length > children2.length
|
|
197
|
-
[children1, children2, :removed]
|
|
198
|
-
else
|
|
199
|
-
[children2, children1, :added]
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
longer[shorter.length...].each do |orphan|
|
|
203
|
-
n1 = side == :removed ? orphan : nil
|
|
204
|
-
n2 = side == :removed ? nil : orphan
|
|
205
|
-
differences <<
|
|
206
|
-
Canon::Comparison::DiffNodeBuilder.build(
|
|
207
|
-
node1: n1,
|
|
208
|
-
node2: n2,
|
|
209
|
-
diff1: Comparison::MISSING_NODE,
|
|
210
|
-
diff2: Comparison::MISSING_NODE,
|
|
211
|
-
dimension: :element_structure,
|
|
212
|
-
)
|
|
213
|
-
end
|
|
214
|
-
end
|
|
215
|
-
|
|
216
|
-
# Compare children of document fragments
|
|
191
|
+
# Compare children of document fragments using the shared
|
|
192
|
+
# +ChildRealignment+ walk. Structural orphans are emitted here
|
|
193
|
+
# (the HTML fragment path has no separate length-mismatch step).
|
|
217
194
|
#
|
|
218
195
|
# @param node1 [Nokogiri::DocumentFragment] First fragment
|
|
219
196
|
# @param node2 [Nokogiri::DocumentFragment] Second fragment
|
|
@@ -230,29 +207,24 @@ module Canon
|
|
|
230
207
|
children1 = XmlNodeComparison.filter_children(all_children1, opts)
|
|
231
208
|
children2 = XmlNodeComparison.filter_children(all_children2, opts)
|
|
232
209
|
|
|
233
|
-
if children1.
|
|
234
|
-
# Record the length mismatch as a DiffNode so verbose mode
|
|
235
|
-
# surfaces it. Without this, equivalent? wraps an empty
|
|
236
|
-
# differences array and incorrectly reports the inputs as
|
|
237
|
-
# equivalent.
|
|
238
|
-
record_fragment_length_mismatch(node1, node2,
|
|
239
|
-
children1, children2,
|
|
240
|
-
differences)
|
|
241
|
-
return Comparison::UNEQUAL_ELEMENTS
|
|
242
|
-
elsif children1.empty?
|
|
243
|
-
return Comparison::EQUIVALENT
|
|
244
|
-
end
|
|
210
|
+
return Comparison::EQUIVALENT if children1.empty? && children2.empty?
|
|
245
211
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
differences)
|
|
252
|
-
return child_result if child_result != Comparison::EQUIVALENT
|
|
212
|
+
emitter = html_diff_emitter(differences)
|
|
213
|
+
ChildRealignment.walk(children1, children2, emitter,
|
|
214
|
+
emit_structural_orphans: true) do |c1, c2|
|
|
215
|
+
XmlNodeComparison.compare_nodes(c1, c2, opts, child_opts,
|
|
216
|
+
diff_children, differences)
|
|
253
217
|
end
|
|
218
|
+
end
|
|
254
219
|
|
|
255
|
-
|
|
220
|
+
# Build a diff emitter for the HTML comparator path that
|
|
221
|
+
# creates DiffNode objects via DiffNodeBuilder.
|
|
222
|
+
def html_diff_emitter(differences)
|
|
223
|
+
proc do |n1, n2, d1, d2, dim|
|
|
224
|
+
differences << Canon::Comparison::DiffNodeBuilder.build(
|
|
225
|
+
node1: n1, node2: n2, diff1: d1, diff2: d2, dimension: dim,
|
|
226
|
+
)
|
|
227
|
+
end
|
|
256
228
|
end
|
|
257
229
|
|
|
258
230
|
# Perform semantic tree diff using SemanticTreeMatchStrategy
|
|
@@ -83,6 +83,32 @@ module Canon
|
|
|
83
83
|
end
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
# Classify +node+ as a noise node and return the diff dimension
|
|
87
|
+
# it should be reported under, or +nil+ if it is structural content.
|
|
88
|
+
#
|
|
89
|
+
# Noise nodes (whitespace-only text, comments) are realigned past
|
|
90
|
+
# during child comparison so that content nodes line up correctly
|
|
91
|
+
# across sides.
|
|
92
|
+
#
|
|
93
|
+
# @param node [Object] DOM node to classify
|
|
94
|
+
# @return [Symbol, nil] +:whitespace_adjacency+, +:comments+, or +nil+
|
|
95
|
+
def self.noise_dimension_for(node)
|
|
96
|
+
if whitespace_only_text?(node)
|
|
97
|
+
:whitespace_adjacency
|
|
98
|
+
elsif comment_node?(node)
|
|
99
|
+
:comments
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# True when +node+ is a noise node (whitespace-only text or comment).
|
|
104
|
+
# Convenience wrapper around +noise_dimension_for+.
|
|
105
|
+
#
|
|
106
|
+
# @param node [Object] DOM node to check
|
|
107
|
+
# @return [Boolean]
|
|
108
|
+
def self.noise_node?(node)
|
|
109
|
+
!noise_dimension_for(node).nil?
|
|
110
|
+
end
|
|
111
|
+
|
|
86
112
|
# Extract parse-time errors carried on a node or its owning document.
|
|
87
113
|
# Returns an Array of Strings.
|
|
88
114
|
def self.parse_errors(node)
|
|
@@ -98,6 +124,15 @@ module Canon
|
|
|
98
124
|
[]
|
|
99
125
|
end
|
|
100
126
|
end
|
|
127
|
+
|
|
128
|
+
# Return the parent node of +node+, or nil when +node+ is not a
|
|
129
|
+
# recognised DOM backend type or has no parent.
|
|
130
|
+
def self.parent_of(node)
|
|
131
|
+
case node
|
|
132
|
+
when Canon::Xml::Node, Nokogiri::XML::Node
|
|
133
|
+
node.parent
|
|
134
|
+
end
|
|
135
|
+
end
|
|
101
136
|
end
|
|
102
137
|
end
|
|
103
138
|
end
|
|
@@ -98,7 +98,7 @@ module Canon
|
|
|
98
98
|
end
|
|
99
99
|
|
|
100
100
|
# If no matches and children exist, they're all different
|
|
101
|
-
if matches.empty? && (!children1.empty? ||
|
|
101
|
+
if matches.empty? && (!children1.empty? || children2.empty?)
|
|
102
102
|
comparator.add_difference(parent_node, parent_node,
|
|
103
103
|
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
104
104
|
:text_content, opts, differences)
|
|
@@ -156,13 +156,12 @@ module Canon
|
|
|
156
156
|
end
|
|
157
157
|
|
|
158
158
|
# Use simple positional comparison for children, with
|
|
159
|
-
#
|
|
160
|
-
#
|
|
161
|
-
#
|
|
162
|
-
#
|
|
163
|
-
#
|
|
164
|
-
#
|
|
165
|
-
# content against content. See lutaml/canon#137.
|
|
159
|
+
# noise-aware re-alignment via ChildRealignment. When the
|
|
160
|
+
# children arrays differ in length, a pre-walk step records
|
|
161
|
+
# structural orphans (or suppresses them when the length
|
|
162
|
+
# difference is fully explained by noise nodes). The shared
|
|
163
|
+
# walk then handles noise realignment and content comparison.
|
|
164
|
+
# See lutaml/canon#137 (whitespace) and #144 (comments).
|
|
166
165
|
def use_positional_comparison(
|
|
167
166
|
children1, children2, parent_node, comparator,
|
|
168
167
|
opts, child_opts, diff_children, differences
|
|
@@ -173,11 +172,11 @@ module Canon
|
|
|
173
172
|
unless children1.length == children2.length
|
|
174
173
|
has_mismatch = true
|
|
175
174
|
|
|
176
|
-
|
|
175
|
+
noise_asymmetric = asymmetric_noise_explains_length_diff?(
|
|
177
176
|
children1, children2
|
|
178
177
|
)
|
|
179
178
|
|
|
180
|
-
if
|
|
179
|
+
if noise_asymmetric
|
|
181
180
|
dimension = nil
|
|
182
181
|
mismatched_children = []
|
|
183
182
|
else
|
|
@@ -191,7 +190,7 @@ module Canon
|
|
|
191
190
|
end
|
|
192
191
|
|
|
193
192
|
if mismatched_children.empty?
|
|
194
|
-
unless
|
|
193
|
+
unless noise_asymmetric
|
|
195
194
|
comparator.add_difference(parent_node, parent_node,
|
|
196
195
|
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
197
196
|
dimension, opts, differences)
|
|
@@ -215,75 +214,31 @@ module Canon
|
|
|
215
214
|
end
|
|
216
215
|
|
|
217
216
|
result = has_mismatch ? Comparison::UNEQUAL_ELEMENTS : Comparison::EQUIVALENT
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
217
|
+
|
|
218
|
+
emitter = xml_diff_emitter(comparator, opts, differences)
|
|
219
|
+
walk_result = ChildRealignment.walk(children1, children2,
|
|
220
|
+
emitter) do |c1, c2|
|
|
221
|
+
comparator.compare_nodes(c1, c2, child_opts, child_opts,
|
|
222
|
+
diff_children, differences)
|
|
223
|
+
end
|
|
222
224
|
result = walk_result unless walk_result == Comparison::EQUIVALENT
|
|
223
225
|
result
|
|
224
226
|
end
|
|
225
227
|
|
|
226
|
-
#
|
|
227
|
-
#
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
child_opts, diff_children, opts, differences
|
|
232
|
-
)
|
|
233
|
-
result = Comparison::EQUIVALENT
|
|
234
|
-
i = 0
|
|
235
|
-
j = 0
|
|
236
|
-
|
|
237
|
-
while i < children1.length || j < children2.length
|
|
238
|
-
c1 = children1[i]
|
|
239
|
-
c2 = children2[j]
|
|
240
|
-
|
|
241
|
-
if c1.nil?
|
|
242
|
-
j += 1
|
|
243
|
-
next
|
|
244
|
-
elsif c2.nil?
|
|
245
|
-
i += 1
|
|
246
|
-
next
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
ws1 = NodeInspector.whitespace_only_text?(c1)
|
|
250
|
-
ws2 = NodeInspector.whitespace_only_text?(c2)
|
|
251
|
-
|
|
252
|
-
if ws1 && !ws2
|
|
253
|
-
comparator.add_difference(c1, c2,
|
|
254
|
-
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
255
|
-
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
256
|
-
:whitespace_adjacency, opts, differences)
|
|
257
|
-
result = Comparison::UNEQUAL_TEXT_CONTENTS
|
|
258
|
-
i += 1
|
|
259
|
-
next
|
|
260
|
-
elsif ws2 && !ws1
|
|
261
|
-
comparator.add_difference(c1, c2,
|
|
262
|
-
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
263
|
-
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
264
|
-
:whitespace_adjacency, opts, differences)
|
|
265
|
-
result = Comparison::UNEQUAL_TEXT_CONTENTS
|
|
266
|
-
j += 1
|
|
267
|
-
next
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
child_result = comparator.compare_nodes(c1, c2,
|
|
271
|
-
child_opts, child_opts,
|
|
272
|
-
diff_children, differences)
|
|
273
|
-
result = child_result unless child_result == Comparison::EQUIVALENT
|
|
274
|
-
i += 1
|
|
275
|
-
j += 1
|
|
228
|
+
# Build a diff emitter for the XML comparator path that
|
|
229
|
+
# delegates to comparator.add_difference.
|
|
230
|
+
def xml_diff_emitter(comparator, opts, differences)
|
|
231
|
+
proc do |n1, n2, d1, d2, dim|
|
|
232
|
+
comparator.add_difference(n1, n2, d1, d2, dim, opts, differences)
|
|
276
233
|
end
|
|
277
|
-
|
|
278
|
-
result
|
|
279
234
|
end
|
|
280
235
|
|
|
281
|
-
# True when the length difference
|
|
282
|
-
#
|
|
283
|
-
def
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
236
|
+
# True when the length difference is fully explained by
|
|
237
|
+
# asymmetric noise nodes (whitespace-only text and/or comments).
|
|
238
|
+
def asymmetric_noise_explains_length_diff?(children1, children2)
|
|
239
|
+
signal1 = children1.reject { |c| NodeInspector.noise_node?(c) }
|
|
240
|
+
signal2 = children2.reject { |c| NodeInspector.noise_node?(c) }
|
|
241
|
+
signal1.length == signal2.length
|
|
287
242
|
end
|
|
288
243
|
|
|
289
244
|
# Determine dimension for length mismatch
|
|
@@ -86,6 +86,14 @@ module Canon
|
|
|
86
86
|
return "Attribute order changed: [#{attrs1.join(', ')}] → [#{attrs2.join(', ')}]"
|
|
87
87
|
end
|
|
88
88
|
|
|
89
|
+
# For asymmetric comment nodes (#144), name the side that carries
|
|
90
|
+
# the comment and surface the comment text rather than reusing
|
|
91
|
+
# the generic "element structure mismatch" wording.
|
|
92
|
+
if dimension == :comments
|
|
93
|
+
comment_reason = build_comment_difference_reason(node1, node2)
|
|
94
|
+
return comment_reason if comment_reason
|
|
95
|
+
end
|
|
96
|
+
|
|
89
97
|
# Default reason
|
|
90
98
|
if diff1 == Canon::Comparison::MISSING_NODE && diff2 == Canon::Comparison::MISSING_NODE
|
|
91
99
|
"element structure mismatch (children differ)"
|
|
@@ -217,6 +225,31 @@ module Canon
|
|
|
217
225
|
"'#{truncate(text1)}' vs '#{truncate(text2)}'"
|
|
218
226
|
end
|
|
219
227
|
|
|
228
|
+
# Build a Reason line for a +:comments+ diff. Returns +nil+ when
|
|
229
|
+
# neither side carries a comment (caller falls back to default).
|
|
230
|
+
def self.build_comment_difference_reason(node1, node2)
|
|
231
|
+
cm1 = node1 && Canon::Comparison::NodeInspector.comment_node?(node1)
|
|
232
|
+
cm2 = node2 && Canon::Comparison::NodeInspector.comment_node?(node2)
|
|
233
|
+
|
|
234
|
+
return nil unless cm1 || cm2
|
|
235
|
+
|
|
236
|
+
if cm1 && !cm2
|
|
237
|
+
"Comment present on EXPECTED only: " \
|
|
238
|
+
"<!--#{truncate(comment_text(node1))}-->"
|
|
239
|
+
elsif cm2 && !cm1
|
|
240
|
+
"Comment present on ACTUAL only: " \
|
|
241
|
+
"<!--#{truncate(comment_text(node2))}-->"
|
|
242
|
+
else
|
|
243
|
+
t1 = truncate(comment_text(node1))
|
|
244
|
+
t2 = truncate(comment_text(node2))
|
|
245
|
+
"Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def self.comment_text(node)
|
|
250
|
+
Canon::Comparison::NodeInspector.text_content(node).to_s
|
|
251
|
+
end
|
|
252
|
+
|
|
220
253
|
# Truncate text for display in reason messages
|
|
221
254
|
#
|
|
222
255
|
# @param text [String] Text to truncate
|
|
@@ -703,6 +703,10 @@ differences)
|
|
|
703
703
|
return build_whitespace_adjacency_reason(node1, node2)
|
|
704
704
|
end
|
|
705
705
|
|
|
706
|
+
if dimension == :comments
|
|
707
|
+
return build_comments_reason(node1, node2)
|
|
708
|
+
end
|
|
709
|
+
|
|
706
710
|
# For attribute values differences, show the actual values
|
|
707
711
|
if dimension == :attribute_values
|
|
708
712
|
attrs1 = extract_attributes(node1)
|
|
@@ -873,12 +877,31 @@ differences)
|
|
|
873
877
|
return build_text_diff_reason(text1, text2)
|
|
874
878
|
end
|
|
875
879
|
|
|
876
|
-
direction = whitespace_partner_direction(ws_node)
|
|
877
880
|
ws_vis = visualize_whitespace(ws_text)
|
|
878
|
-
content_vis = content_text ? visualize_whitespace(truncate_text(content_text)) : "(none)"
|
|
879
881
|
|
|
880
|
-
|
|
881
|
-
|
|
882
|
+
if content_text.nil? || content_text.strip.empty?
|
|
883
|
+
# Partner content extracts to "" / whitespace-only — naming it
|
|
884
|
+
# in the Reason ("Whitespace before \"\"") gives the reader
|
|
885
|
+
# nothing. Fall back to the parent element name so the
|
|
886
|
+
# diff carries structural context (issue #112's contract,
|
|
887
|
+
# extended from :text_content to :whitespace_adjacency).
|
|
888
|
+
parent_label = whitespace_adjacency_parent_label(ws_node)
|
|
889
|
+
"Whitespace inside #{parent_label}: " \
|
|
890
|
+
"present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
|
|
891
|
+
else
|
|
892
|
+
direction = whitespace_partner_direction(ws_node)
|
|
893
|
+
content_vis = visualize_whitespace(truncate_text(content_text))
|
|
894
|
+
"Whitespace #{direction} \"#{content_vis}\": " \
|
|
895
|
+
"present on #{present_side} (\"#{ws_vis}\"), absent on #{absent_side}"
|
|
896
|
+
end
|
|
897
|
+
end
|
|
898
|
+
|
|
899
|
+
def whitespace_adjacency_parent_label(ws_node)
|
|
900
|
+
parent = NodeInspector.parent_of(ws_node)
|
|
901
|
+
return "(unknown parent)" unless parent
|
|
902
|
+
|
|
903
|
+
name = parent.name
|
|
904
|
+
name && !name.empty? ? "<#{name}>" : "(unknown parent)"
|
|
882
905
|
end
|
|
883
906
|
|
|
884
907
|
# Direction of the partner content relative to the whitespace node,
|
|
@@ -889,11 +912,8 @@ differences)
|
|
|
889
912
|
# "adjacent to" as a degenerate fallback when neither neighbour
|
|
890
913
|
# exists.
|
|
891
914
|
def whitespace_partner_direction(ws_node)
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
parent = ws_node.parent
|
|
896
|
-
return "adjacent to" if parent.nil?
|
|
915
|
+
parent = NodeInspector.parent_of(ws_node)
|
|
916
|
+
return "adjacent to" unless parent
|
|
897
917
|
|
|
898
918
|
siblings = parent.children
|
|
899
919
|
idx = siblings.index(ws_node)
|
|
@@ -918,6 +938,30 @@ differences)
|
|
|
918
938
|
false
|
|
919
939
|
end
|
|
920
940
|
|
|
941
|
+
# Build a Reason line for a +:comments+ diff (#144).
|
|
942
|
+
# Names the side that carries the comment and surfaces the
|
|
943
|
+
# comment text.
|
|
944
|
+
def build_comments_reason(node1, node2)
|
|
945
|
+
cm1 = node1 && NodeInspector.comment_node?(node1)
|
|
946
|
+
cm2 = node2 && NodeInspector.comment_node?(node2)
|
|
947
|
+
|
|
948
|
+
if cm1 && !cm2
|
|
949
|
+
"Comment present on EXPECTED only: <!--#{truncate_text(comment_text(node1))}-->"
|
|
950
|
+
elsif cm2 && !cm1
|
|
951
|
+
"Comment present on ACTUAL only: <!--#{truncate_text(comment_text(node2))}-->"
|
|
952
|
+
elsif cm1 && cm2
|
|
953
|
+
t1 = truncate_text(comment_text(node1))
|
|
954
|
+
t2 = truncate_text(comment_text(node2))
|
|
955
|
+
"Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
|
|
956
|
+
else
|
|
957
|
+
"element structure mismatch (children differ)"
|
|
958
|
+
end
|
|
959
|
+
end
|
|
960
|
+
|
|
961
|
+
def comment_text(node)
|
|
962
|
+
NodeInspector.text_content(node).to_s
|
|
963
|
+
end
|
|
964
|
+
|
|
921
965
|
# Check if text is only whitespace
|
|
922
966
|
#
|
|
923
967
|
# @param text [String] Text to check
|
data/lib/canon/comparison.rb
CHANGED
|
@@ -74,7 +74,7 @@ module Canon
|
|
|
74
74
|
end
|
|
75
75
|
|
|
76
76
|
# :whitespace_adjacency is a report-only re-label of an
|
|
77
|
-
# asymmetric whitespace mismatch emitted by
|
|
77
|
+
# asymmetric whitespace mismatch emitted by ChildRealignment's
|
|
78
78
|
# two-cursor walk. Equivalence behaviour is unchanged — the
|
|
79
79
|
# underlying mismatch is normative regardless of match options.
|
|
80
80
|
if diff_node.dimension == :whitespace_adjacency
|
|
@@ -83,6 +83,14 @@ module Canon
|
|
|
83
83
|
return diff_node
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
# :comments diffs from asymmetric comment nodes intentionally
|
|
87
|
+
# fall through to profile.normative_dimension? below. Unlike
|
|
88
|
+
# :whitespace_adjacency (always normative), the classification
|
|
89
|
+
# of comment diffs respects the :comments match option:
|
|
90
|
+
# :strict → normative, :ignore → informative. This is by
|
|
91
|
+
# design — callers can control whether asymmetric comments
|
|
92
|
+
# affect equivalence via the match profile.
|
|
93
|
+
|
|
86
94
|
# THIRD: Determine if this dimension is normative based on CompareProfile
|
|
87
95
|
# This respects the policy settings (strict/normalize/ignore)
|
|
88
96
|
is_normative = profile.normative_dimension?(diff_node.dimension)
|
|
@@ -525,14 +525,34 @@ expand_difference: false)
|
|
|
525
525
|
text1 = NodeUtils.get_node_text(node1).to_s
|
|
526
526
|
text2 = NodeUtils.get_node_text(node2).to_s
|
|
527
527
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
528
|
+
if TextUtils.ambiguous_text_pair?(text1, text2) &&
|
|
529
|
+
(NodeUtils.parent_of(node1) || NodeUtils.parent_of(node2))
|
|
530
|
+
# Both sides extract to empty / whitespace-only strings —
|
|
531
|
+
# `""` / `""` tells the reader nothing. Fall back to a
|
|
532
|
+
# brief parent open-tag hint per #112's contract, but
|
|
533
|
+
# without dumping the full ancestor subtree (#125).
|
|
534
|
+
hint1 = NodeUtils.serialize_open_tag(NodeUtils.parent_of(node1))
|
|
535
|
+
hint2 = NodeUtils.serialize_open_tag(NodeUtils.parent_of(node2))
|
|
536
|
+
ws1 = TextUtils.visualize_whitespace(text1)
|
|
537
|
+
ws2 = TextUtils.visualize_whitespace(text2)
|
|
538
|
+
detail1 = ColorHelper.colorize(
|
|
539
|
+
"\"#{ws1}\" in #{hint1}",
|
|
540
|
+
:red, use_color
|
|
541
|
+
)
|
|
542
|
+
detail2 = ColorHelper.colorize(
|
|
543
|
+
"\"#{ws2}\" in #{hint2}",
|
|
544
|
+
:green, use_color
|
|
545
|
+
)
|
|
546
|
+
else
|
|
547
|
+
detail1 = ColorHelper.colorize(
|
|
548
|
+
"\"#{TextUtils.visualize_whitespace(text1)}\"",
|
|
549
|
+
:red, use_color
|
|
550
|
+
)
|
|
551
|
+
detail2 = ColorHelper.colorize(
|
|
552
|
+
"\"#{TextUtils.visualize_whitespace(text2)}\"",
|
|
553
|
+
:green, use_color
|
|
554
|
+
)
|
|
555
|
+
end
|
|
536
556
|
|
|
537
557
|
reason = if diff.is_a?(Canon::Diff::DiffNode)
|
|
538
558
|
diff.reason
|
|
@@ -29,6 +29,8 @@ module Canon
|
|
|
29
29
|
#
|
|
30
30
|
# See lutaml/canon#133, lutaml/canon#135.
|
|
31
31
|
class Html
|
|
32
|
+
WHITESPACE_PRESERVING_ELEMENTS = %w[pre textarea script style].freeze
|
|
33
|
+
|
|
32
34
|
def initialize(indent: 2, indent_type: "space", fixture_ready: false)
|
|
33
35
|
@indent = indent.to_i
|
|
34
36
|
@indent_type = indent_type
|
|
@@ -83,6 +85,7 @@ module Canon
|
|
|
83
85
|
# suppresses the +<?xml ...?>+ prefix.
|
|
84
86
|
def format_fixture_ready(html_string)
|
|
85
87
|
doc = Nokogiri::HTML5(html_string)
|
|
88
|
+
strip_structural_whitespace!(doc)
|
|
86
89
|
io = StringIO.new
|
|
87
90
|
if @indent_type == "tab"
|
|
88
91
|
doc.write_to(io, save_with: fixture_ready_save_options,
|
|
@@ -94,6 +97,37 @@ module Canon
|
|
|
94
97
|
io.string
|
|
95
98
|
end
|
|
96
99
|
|
|
100
|
+
# libxml's +FORMAT+ save flag does not insert indentation around
|
|
101
|
+
# the children of any element it sees as mixed content (any
|
|
102
|
+
# non-whitespace-only text node child). +Nokogiri::HTML5+ does
|
|
103
|
+
# not accept the +noblanks+ option that the XML parser uses to
|
|
104
|
+
# strip these inter-sibling text nodes pre-serialisation, so we
|
|
105
|
+
# do it manually here: drop whitespace-only text nodes whose
|
|
106
|
+
# parent is structural (no real text content) and not a
|
|
107
|
+
# whitespace-preserving element. Mixed-content runs like
|
|
108
|
+
# +<p>foo <em>bar</em> baz</p>+ are left alone.
|
|
109
|
+
def strip_structural_whitespace!(doc)
|
|
110
|
+
to_remove = []
|
|
111
|
+
doc.traverse do |node|
|
|
112
|
+
next unless node.text?
|
|
113
|
+
next unless node.content.strip.empty?
|
|
114
|
+
|
|
115
|
+
parent = node.parent
|
|
116
|
+
next if parent.nil?
|
|
117
|
+
next if WHITESPACE_PRESERVING_ELEMENTS.include?(parent.name)
|
|
118
|
+
next if parent_has_real_text?(parent)
|
|
119
|
+
|
|
120
|
+
to_remove << node
|
|
121
|
+
end
|
|
122
|
+
to_remove.each(&:remove)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def parent_has_real_text?(parent)
|
|
126
|
+
parent.children.any? do |c|
|
|
127
|
+
c.text? && !c.content.strip.empty?
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
97
131
|
def fixture_ready_save_options
|
|
98
132
|
Nokogiri::XML::Node::SaveOptions::FORMAT |
|
|
99
133
|
Nokogiri::XML::Node::SaveOptions::AS_XHTML |
|
data/lib/canon/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: canon
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-05 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: diff-lcs
|
|
@@ -167,6 +167,7 @@ files:
|
|
|
167
167
|
- docs/features/diff-formatting/algorithm-specific-output.adoc
|
|
168
168
|
- docs/features/diff-formatting/character-visualization.adoc
|
|
169
169
|
- docs/features/diff-formatting/colors-and-symbols.adoc
|
|
170
|
+
- docs/features/diff-formatting/comment-asymmetry.adoc
|
|
170
171
|
- docs/features/diff-formatting/context-and-grouping.adoc
|
|
171
172
|
- docs/features/diff-formatting/display-filtering.adoc
|
|
172
173
|
- docs/features/diff-formatting/display-preprocessing.adoc
|
|
@@ -221,6 +222,7 @@ files:
|
|
|
221
222
|
- lib/canon/commands/format_command.rb
|
|
222
223
|
- lib/canon/comparison.rb
|
|
223
224
|
- lib/canon/comparison/base_comparator.rb
|
|
225
|
+
- lib/canon/comparison/child_realignment.rb
|
|
224
226
|
- lib/canon/comparison/compare_profile.rb
|
|
225
227
|
- lib/canon/comparison/comparison_result.rb
|
|
226
228
|
- lib/canon/comparison/dimensions.rb
|