canon 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +16 -61
- data/README.adoc +5 -0
- data/Rakefile +17 -0
- data/docs/features/diff-formatting/comment-asymmetry.adoc +160 -0
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/child_realignment.rb +140 -0
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +36 -75
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +150 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +32 -77
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +43 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +89 -83
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +25 -23
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +16 -42
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +2 -2
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +1 -1
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/validators/html_validator.rb +1 -1
- data/lib/canon/validators/xml_validator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +131 -137
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +4 -6
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +271 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +7 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a13457a67f3e2ab91e00cec19684c502605ab807bdd87eb1120e77d190a99c2e
|
|
4
|
+
data.tar.gz: 35c0c873340e12c63048adf2222fda2f8c2ae3972337dcc212b26d391191ac35
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8db915564eebd4ca4dfadd65358f721aa70bca318c22dc1c02eff5e3527cf646ea19722b760072851f358b3fabefd12fc5f6dfc216bce146423c7091f3bf7eac
|
|
7
|
+
data.tar.gz: f92e7491d781c8762483335558ede985a1653bcfb88613858115aa87e50bb326f95b0b76b845c54154e657fb9f25b3d1f348bf8e9baa926ea1c6bfbbd77d6ca6
|
data/.rspec-opal
ADDED
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-
|
|
3
|
+
# on 2026-05-24 10:34:05 UTC using RuboCop version 1.86.0.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -11,51 +11,13 @@ Gemspec/RequiredRubyVersion:
|
|
|
11
11
|
Exclude:
|
|
12
12
|
- 'canon.gemspec'
|
|
13
13
|
|
|
14
|
-
# Offense count:
|
|
15
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
16
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
17
|
-
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
18
|
-
Layout/ArgumentAlignment:
|
|
19
|
-
Exclude:
|
|
20
|
-
- 'lib/canon/comparison/xml_comparator.rb'
|
|
21
|
-
- 'spec/canon/comparison/html4_html5_whitespace_parity_spec.rb'
|
|
22
|
-
|
|
23
|
-
# Offense count: 1
|
|
24
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
25
|
-
# Configuration parameters: EnforcedStyleAlignWith.
|
|
26
|
-
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
27
|
-
Layout/BlockAlignment:
|
|
28
|
-
Exclude:
|
|
29
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
|
|
30
|
-
|
|
31
|
-
# Offense count: 1
|
|
32
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
33
|
-
Layout/BlockEndNewline:
|
|
34
|
-
Exclude:
|
|
35
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
|
|
36
|
-
|
|
37
|
-
# Offense count: 2
|
|
38
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
39
|
-
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
40
|
-
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
41
|
-
Layout/IndentationWidth:
|
|
42
|
-
Exclude:
|
|
43
|
-
- 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
|
|
44
|
-
|
|
45
|
-
# Offense count: 1347
|
|
14
|
+
# Offense count: 1358
|
|
46
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
47
16
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
48
17
|
# URISchemes: http, https
|
|
49
18
|
Layout/LineLength:
|
|
50
19
|
Enabled: false
|
|
51
20
|
|
|
52
|
-
# Offense count: 2
|
|
53
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
54
|
-
# Configuration parameters: AllowInHeredoc.
|
|
55
|
-
Layout/TrailingWhitespace:
|
|
56
|
-
Exclude:
|
|
57
|
-
- 'lib/canon/comparison/xml_comparator.rb'
|
|
58
|
-
|
|
59
21
|
# Offense count: 58
|
|
60
22
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
61
23
|
Lint/DuplicateBranch:
|
|
@@ -69,13 +31,12 @@ Lint/EmptyConditionalBody:
|
|
|
69
31
|
- 'spec/canon/comparison/html_comparator_spec.rb'
|
|
70
32
|
- 'spec/canon/comparison_spec.rb'
|
|
71
33
|
|
|
72
|
-
# Offense count:
|
|
34
|
+
# Offense count: 5
|
|
73
35
|
# Configuration parameters: MaximumRangeSize.
|
|
74
36
|
Lint/MissingCopEnableDirective:
|
|
75
37
|
Exclude:
|
|
76
38
|
- 'lib/canon/commands/format_command.rb'
|
|
77
39
|
- 'lib/canon/xml/attribute_handler.rb'
|
|
78
|
-
- 'lib/canon/xml/data_model.rb'
|
|
79
40
|
- 'lib/canon/xml/namespace_handler.rb'
|
|
80
41
|
- 'lib/canon/xml/processor.rb'
|
|
81
42
|
- 'lib/canon/xml/xml_base_handler.rb'
|
|
@@ -101,7 +62,7 @@ Lint/UselessConstantScoping:
|
|
|
101
62
|
Exclude:
|
|
102
63
|
- 'lib/canon/diff_formatter/theme.rb'
|
|
103
64
|
|
|
104
|
-
# Offense count:
|
|
65
|
+
# Offense count: 313
|
|
105
66
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
106
67
|
Metrics/AbcSize:
|
|
107
68
|
Enabled: false
|
|
@@ -117,12 +78,12 @@ Metrics/BlockLength:
|
|
|
117
78
|
Metrics/BlockNesting:
|
|
118
79
|
Max: 4
|
|
119
80
|
|
|
120
|
-
# Offense count:
|
|
81
|
+
# Offense count: 276
|
|
121
82
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
122
83
|
Metrics/CyclomaticComplexity:
|
|
123
84
|
Enabled: false
|
|
124
85
|
|
|
125
|
-
# Offense count:
|
|
86
|
+
# Offense count: 523
|
|
126
87
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
127
88
|
Metrics/MethodLength:
|
|
128
89
|
Max: 146
|
|
@@ -132,7 +93,7 @@ Metrics/MethodLength:
|
|
|
132
93
|
Metrics/ParameterLists:
|
|
133
94
|
Max: 10
|
|
134
95
|
|
|
135
|
-
# Offense count:
|
|
96
|
+
# Offense count: 214
|
|
136
97
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
137
98
|
Metrics/PerceivedComplexity:
|
|
138
99
|
Enabled: false
|
|
@@ -165,13 +126,13 @@ Performance/CollectionLiteralInLoop:
|
|
|
165
126
|
- 'lib/canon/xml/xml_base_handler.rb'
|
|
166
127
|
- 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
|
|
167
128
|
|
|
168
|
-
# Offense count:
|
|
129
|
+
# Offense count: 107
|
|
169
130
|
# Configuration parameters: Prefixes, AllowedPatterns.
|
|
170
131
|
# Prefixes: when, with, without
|
|
171
132
|
RSpec/ContextWording:
|
|
172
133
|
Enabled: false
|
|
173
134
|
|
|
174
|
-
# Offense count:
|
|
135
|
+
# Offense count: 47
|
|
175
136
|
# Configuration parameters: IgnoredMetadata.
|
|
176
137
|
RSpec/DescribeClass:
|
|
177
138
|
Enabled: false
|
|
@@ -182,7 +143,7 @@ RSpec/DescribeMethod:
|
|
|
182
143
|
- 'spec/canon/comparison/multiple_differences_spec.rb'
|
|
183
144
|
- 'spec/canon/diff_formatter/character_map_customization_spec.rb'
|
|
184
145
|
|
|
185
|
-
# Offense count:
|
|
146
|
+
# Offense count: 874
|
|
186
147
|
# Configuration parameters: CountAsOne.
|
|
187
148
|
RSpec/ExampleLength:
|
|
188
149
|
Max: 44
|
|
@@ -196,12 +157,6 @@ RSpec/ExpectActual:
|
|
|
196
157
|
- 'spec/canon/rspec_matchers_spec.rb'
|
|
197
158
|
- 'spec/canon/string_matcher_spec.rb'
|
|
198
159
|
|
|
199
|
-
# Offense count: 7
|
|
200
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
201
|
-
RSpec/IncludeExamples:
|
|
202
|
-
Exclude:
|
|
203
|
-
- 'spec/canon/comparison/html4_html5_whitespace_parity_spec.rb'
|
|
204
|
-
|
|
205
160
|
# Offense count: 177
|
|
206
161
|
# Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
|
|
207
162
|
RSpec/IndexedLet:
|
|
@@ -240,7 +195,7 @@ RSpec/MultipleDescribes:
|
|
|
240
195
|
Exclude:
|
|
241
196
|
- 'spec/canon/comparison/match_options_spec.rb'
|
|
242
197
|
|
|
243
|
-
# Offense count:
|
|
198
|
+
# Offense count: 736
|
|
244
199
|
RSpec/MultipleExpectations:
|
|
245
200
|
Max: 15
|
|
246
201
|
|
|
@@ -249,7 +204,7 @@ RSpec/MultipleExpectations:
|
|
|
249
204
|
RSpec/MultipleMemoizedHelpers:
|
|
250
205
|
Max: 16
|
|
251
206
|
|
|
252
|
-
# Offense count:
|
|
207
|
+
# Offense count: 29
|
|
253
208
|
# Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
|
|
254
209
|
# SupportedStyles: always, named_only
|
|
255
210
|
RSpec/NamedSubject:
|
|
@@ -258,17 +213,18 @@ RSpec/NamedSubject:
|
|
|
258
213
|
- 'spec/canon/pretty_printer/json_spec.rb'
|
|
259
214
|
- 'spec/canon/pretty_printer/xml_spec.rb'
|
|
260
215
|
|
|
261
|
-
# Offense count:
|
|
216
|
+
# Offense count: 54
|
|
262
217
|
# Configuration parameters: AllowedGroups.
|
|
263
218
|
RSpec/NestedGroups:
|
|
264
219
|
Max: 4
|
|
265
220
|
|
|
266
|
-
# Offense count:
|
|
221
|
+
# Offense count: 11
|
|
267
222
|
# Configuration parameters: AllowedPatterns.
|
|
268
223
|
# AllowedPatterns: ^expect_, ^assert_
|
|
269
224
|
RSpec/NoExpectationExample:
|
|
270
225
|
Exclude:
|
|
271
226
|
- 'spec/canon/context_grouping_spec.rb'
|
|
227
|
+
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
272
228
|
- 'spec/canon/informative_diffs_debug_spec.rb'
|
|
273
229
|
- 'spec/canon/isodoc_blockquotes_spec.rb'
|
|
274
230
|
- 'spec/canon/match_scenarios_spec.rb'
|
|
@@ -292,14 +248,13 @@ RSpec/SpecFilePathFormat:
|
|
|
292
248
|
- 'spec/canon/yaml/formatter_spec.rb'
|
|
293
249
|
- 'spec/xml_c14n_spec.rb'
|
|
294
250
|
|
|
295
|
-
# Offense count:
|
|
251
|
+
# Offense count: 72
|
|
296
252
|
# Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
|
|
297
253
|
RSpec/VerifiedDoubles:
|
|
298
254
|
Exclude:
|
|
299
255
|
- 'spec/canon/comparison/diff_node_builder_spec.rb'
|
|
300
256
|
- 'spec/canon/comparison/whitespace_sensitivity_spec.rb'
|
|
301
257
|
- 'spec/canon/diff/diff_classifier_spec.rb'
|
|
302
|
-
- 'spec/canon/diff/path_builder_spec.rb'
|
|
303
258
|
- 'spec/canon/diff/xml_serialization_formatter_spec.rb'
|
|
304
259
|
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
305
260
|
- 'spec/canon/tree_diff/operation_converter_spec.rb'
|
data/README.adoc
CHANGED
|
@@ -618,6 +618,11 @@ See link:docs/MODES[Diff modes] for details.
|
|
|
618
618
|
reported as a dedicated `:whitespace_adjacency` dimension with direction
|
|
619
619
|
wording (`before`/`after`/`adjacent to`) instead of cascading into
|
|
620
620
|
misleading `:text_content` mismatches
|
|
621
|
+
* **Asymmetric comment reporting**: A `<!-- ... -->` node present on only
|
|
622
|
+
one side is reported as a dedicated `:comments` dimension diff anchored
|
|
623
|
+
at the comment node, instead of shifting children alignment and
|
|
624
|
+
surfacing a misleading `:element_structure` "Element removed" diff
|
|
625
|
+
against an unrelated trailing sibling
|
|
621
626
|
* **Non-ASCII detection**: Warnings for unexpected Unicode characters
|
|
622
627
|
* **Customizable**: Character maps, context lines, grouping options
|
|
623
628
|
|
data/Rakefile
CHANGED
|
@@ -5,10 +5,27 @@ require "rspec/core/rake_task"
|
|
|
5
5
|
|
|
6
6
|
RSpec::Core::RakeTask.new(:spec)
|
|
7
7
|
|
|
8
|
+
begin
|
|
9
|
+
require "opal/rspec/rake_task"
|
|
10
|
+
rescue LoadError
|
|
11
|
+
# Opal not available or incompatible with current Ruby version
|
|
12
|
+
end
|
|
13
|
+
|
|
8
14
|
require "rubocop/rake_task"
|
|
9
15
|
|
|
10
16
|
RuboCop::RakeTask.new
|
|
11
17
|
|
|
12
18
|
Dir.glob("lib/tasks/**/*.rake").each { |r| load r }
|
|
13
19
|
|
|
20
|
+
namespace :spec do
|
|
21
|
+
if defined?(Opal::RSpec::RakeTask)
|
|
22
|
+
desc "Run Opal (JavaScript) tests"
|
|
23
|
+
Opal::RSpec::RakeTask.new(:opal) do |server, runner|
|
|
24
|
+
server.append_path "lib"
|
|
25
|
+
runner.default_path = "spec"
|
|
26
|
+
runner.pattern = "spec/canon/opal_xml_smoke_spec.rb"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
14
31
|
task default: %i[spec rubocop]
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Comment asymmetry in diff reports
|
|
3
|
+
parent: Diff Formatting
|
|
4
|
+
nav_order: 9
|
|
5
|
+
---
|
|
6
|
+
= Comment asymmetry in diff reports
|
|
7
|
+
:toc:
|
|
8
|
+
:toclevels: 2
|
|
9
|
+
|
|
10
|
+
== Purpose
|
|
11
|
+
|
|
12
|
+
Canon's diff reports anchor `<!-- ... -->` comment nodes that have no
|
|
13
|
+
counterpart on the other side to a dedicated `:comments` dimension
|
|
14
|
+
instead of letting the resulting children-array length mismatch cascade
|
|
15
|
+
into a misleading `:element_structure` "Element removed" diff against
|
|
16
|
+
the trailing named sibling.
|
|
17
|
+
|
|
18
|
+
This is a *report-only* shape change — equivalence verdicts are
|
|
19
|
+
unchanged. Whether asymmetric comments cause a non-equivalent verdict
|
|
20
|
+
or not depends on the `comments` match option (`:strict` /
|
|
21
|
+
`:ignore` / `:exact`), as before.
|
|
22
|
+
|
|
23
|
+
== The problem
|
|
24
|
+
|
|
25
|
+
Consider an HTML fragment compared with `verbose: true`:
|
|
26
|
+
|
|
27
|
+
[source,html]
|
|
28
|
+
----
|
|
29
|
+
<!-- expected -->
|
|
30
|
+
<body>
|
|
31
|
+
<div>first</div>
|
|
32
|
+
<div>second</div>
|
|
33
|
+
<!-- a comment that exists only on side A -->
|
|
34
|
+
<div style="mso-element:footnote-list"></div>
|
|
35
|
+
</body>
|
|
36
|
+
|
|
37
|
+
<!-- actual -->
|
|
38
|
+
<body>
|
|
39
|
+
<div>first</div>
|
|
40
|
+
<div>second</div>
|
|
41
|
+
<div style="mso-element:footnote-list"></div>
|
|
42
|
+
</body>
|
|
43
|
+
----
|
|
44
|
+
|
|
45
|
+
The `<div style="mso-element:footnote-list">` is byte-identical between
|
|
46
|
+
the two sides; the only real difference is the comment on the expected
|
|
47
|
+
side. Pre-#144, the diff report contained:
|
|
48
|
+
|
|
49
|
+
[source]
|
|
50
|
+
----
|
|
51
|
+
DIFFERENCE #1 — element_structure: Element removed:
|
|
52
|
+
<div style="mso-element:footnote-list"/>
|
|
53
|
+
----
|
|
54
|
+
|
|
55
|
+
That is the wrong dimension, anchored at the wrong node. The element is
|
|
56
|
+
present on both sides — what is missing is the comment.
|
|
57
|
+
|
|
58
|
+
The cascade comes from positional alignment in
|
|
59
|
+
`Canon::Comparison::HtmlComparator#compare_fragment_children` (and the
|
|
60
|
+
analogous walker in `XmlComparatorHelpers::ChildComparison`): in
|
|
61
|
+
verbose mode, comments are intentionally kept by `filter_children` so
|
|
62
|
+
informative differences can be recorded, but the resulting unequal
|
|
63
|
+
children-array lengths fell through to a name-based mismatch heuristic
|
|
64
|
+
that filtered out generic `#`-prefixed names (`#text`, `#comment`),
|
|
65
|
+
leaving the trailing named element to take the blame.
|
|
66
|
+
|
|
67
|
+
== The contract
|
|
68
|
+
|
|
69
|
+
When the children alignment encounters a comment node on one side
|
|
70
|
+
paired against a non-comment node on the other (or sitting past the
|
|
71
|
+
trailing edge of the shorter side), Canon:
|
|
72
|
+
|
|
73
|
+
1. Treats the comment as a *single-side gap* in the alignment.
|
|
74
|
+
2. Emits one `:comments` diff entry anchored at the comment node
|
|
75
|
+
itself (not at a mis-paired neighbouring element).
|
|
76
|
+
3. Advances only the cursor that carries the comment, so the next
|
|
77
|
+
iteration aligns content against content.
|
|
78
|
+
|
|
79
|
+
The Reason line names the side that carries the comment and surfaces
|
|
80
|
+
its text:
|
|
81
|
+
|
|
82
|
+
[source]
|
|
83
|
+
----
|
|
84
|
+
DIFFERENCE #1 — comments: Comment present on EXPECTED only:
|
|
85
|
+
<!-- a comment that exists only on side A -->
|
|
86
|
+
----
|
|
87
|
+
|
|
88
|
+
== Combined with whitespace asymmetry
|
|
89
|
+
|
|
90
|
+
The same realignment walk handles asymmetric whitespace-only text
|
|
91
|
+
nodes (link:whitespace-adjacency.adoc[issue #137]) and asymmetric
|
|
92
|
+
comment nodes together. When a children mismatch is fully explained by
|
|
93
|
+
a combination of asymmetric whitespace and asymmetric comments, the
|
|
94
|
+
walker emits one diff per asymmetric node with the appropriate
|
|
95
|
+
dimension (`:whitespace_adjacency` for whitespace, `:comments` for
|
|
96
|
+
comments) — no `:element_structure` diff is produced.
|
|
97
|
+
|
|
98
|
+
When a real structural mismatch coexists with an asymmetric comment,
|
|
99
|
+
both kinds of diff are emitted — the structural one under
|
|
100
|
+
`:element_structure`, the comment one under `:comments`.
|
|
101
|
+
|
|
102
|
+
== Working with :comments diffs programmatically
|
|
103
|
+
|
|
104
|
+
[source,ruby]
|
|
105
|
+
----
|
|
106
|
+
result = Canon::Comparison.equivalent?(html1, html2,
|
|
107
|
+
format: :html5, verbose: true)
|
|
108
|
+
|
|
109
|
+
comment_diffs = result.differences.select { |d| d.dimension == :comments }
|
|
110
|
+
|
|
111
|
+
# Whether these affect equivalence depends on the comments match option.
|
|
112
|
+
# Under the default :ignore profile they are informative; under :strict
|
|
113
|
+
# they are normative.
|
|
114
|
+
----
|
|
115
|
+
|
|
116
|
+
== What this contract does NOT do
|
|
117
|
+
|
|
118
|
+
* **Does not silence asymmetric comments.** They are always reported
|
|
119
|
+
in verbose output; the change is the dimension label and the anchor
|
|
120
|
+
node.
|
|
121
|
+
* **Does not affect symmetric comments.** When both sides carry
|
|
122
|
+
parallel comment nodes, those compare normally — content-vs-content
|
|
123
|
+
comparison applies.
|
|
124
|
+
* **Does not change equivalence outcomes.** A comparison whose
|
|
125
|
+
equivalence verdict was driven by asymmetric comments retains the
|
|
126
|
+
same verdict; only the report shape changes.
|
|
127
|
+
|
|
128
|
+
== Where it runs
|
|
129
|
+
|
|
130
|
+
The noise-aware realignment is a single shared implementation:
|
|
131
|
+
|
|
132
|
+
* `Canon::Comparison::ChildRealignment` — the two-cursor walk that
|
|
133
|
+
detects noise nodes via `NodeInspector.noise_dimension_for`,
|
|
134
|
+
emits per-orphan diffs with the appropriate dimension
|
|
135
|
+
(`:whitespace_adjacency`, `:comments`), and advances only the
|
|
136
|
+
noise-side cursor so content nodes stay aligned.
|
|
137
|
+
|
|
138
|
+
Both comparison paths delegate to `ChildRealignment.walk`:
|
|
139
|
+
|
|
140
|
+
* `Canon::Comparison::HtmlComparator#compare_fragment_children` — the
|
|
141
|
+
HTML fragment path (passes `emit_structural_orphans: true` because it
|
|
142
|
+
has no separate length-mismatch step).
|
|
143
|
+
* `Canon::Comparison::XmlComparatorHelpers::ChildComparison` — the XML
|
|
144
|
+
comparator path (passes `emit_structural_orphans: false`; structural
|
|
145
|
+
orphans are handled by the pre-walk length-mismatch step via
|
|
146
|
+
`asymmetric_noise_explains_length_diff?`).
|
|
147
|
+
|
|
148
|
+
== Related
|
|
149
|
+
|
|
150
|
+
* link:whitespace-adjacency.adoc[Whitespace adjacency] — sibling
|
|
151
|
+
contract for asymmetric whitespace-only text nodes.
|
|
152
|
+
* link:../../advanced/diff-classification.adoc[Diff classification] —
|
|
153
|
+
Normative vs informative differences.
|
|
154
|
+
|
|
155
|
+
== History
|
|
156
|
+
|
|
157
|
+
The false-positive cascade was reported in
|
|
158
|
+
https://github.com/lutaml/canon/issues/144[issue #144]. The fix
|
|
159
|
+
mirrors the structure of the `:whitespace_adjacency` work in
|
|
160
|
+
https://github.com/lutaml/canon/issues/137[issue #137].
|
data/lib/canon/cli.rb
CHANGED
data/lib/canon/color_detector.rb
CHANGED
|
@@ -67,11 +67,9 @@ module Canon
|
|
|
67
67
|
# @param io [IO] Output stream
|
|
68
68
|
# @return [Boolean] true if the stream is a TTY
|
|
69
69
|
def tty?(io)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
# Ruby 2.5+ uses tty?, older uses isatty
|
|
74
|
-
io.tty? || io.isatty
|
|
70
|
+
io.tty?
|
|
71
|
+
rescue NoMethodError
|
|
72
|
+
false
|
|
75
73
|
rescue ArgumentError, IOError
|
|
76
74
|
# Stream might be closed or invalid
|
|
77
75
|
false
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "node_inspector"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Comparison
|
|
7
|
+
# Shared two-cursor walk over child arrays with noise-aware realignment.
|
|
8
|
+
#
|
|
9
|
+
# When positional pairing would match a noise node (whitespace-only
|
|
10
|
+
# text or comment) against a content node, the walker treats the
|
|
11
|
+
# noise node as a single-side gap: emits a diff for it and advances
|
|
12
|
+
# only that cursor, so the next iteration aligns content against
|
|
13
|
+
# content.
|
|
14
|
+
#
|
|
15
|
+
# Noise classification is delegated to +NodeInspector.noise_dimension_for+,
|
|
16
|
+
# making the walk open for extension — new noise types only require
|
|
17
|
+
# adding a branch there.
|
|
18
|
+
#
|
|
19
|
+
# The walk is parameterised by a diff emitter (a callable that
|
|
20
|
+
# receives node1, node2, diff1, diff2, dimension) so both the HTML
|
|
21
|
+
# comparator (DiffNodeBuilder.build) and the XML comparator
|
|
22
|
+
# (comparator.add_difference) reuse the same cursor logic.
|
|
23
|
+
module ChildRealignment
|
|
24
|
+
class << self
|
|
25
|
+
# Walk two child arrays, emitting diffs for noise nodes and
|
|
26
|
+
# yielding matched content pairs.
|
|
27
|
+
#
|
|
28
|
+
# @param children1 [Array] Left-side children
|
|
29
|
+
# @param children2 [Array] Right-side children
|
|
30
|
+
# @param emitter [#call] Callable receiving
|
|
31
|
+
# (node1, node2, diff1, diff2, dimension)
|
|
32
|
+
# @param emit_structural_orphans [Boolean] When true, trailing-edge
|
|
33
|
+
# non-noise orphans are emitted as +:element_structure+ diffs.
|
|
34
|
+
# HTML fragment path sets this to true (it has no separate
|
|
35
|
+
# length-mismatch step); XML path sets it to false (structural
|
|
36
|
+
# orphans are already recorded by +use_positional_comparison+).
|
|
37
|
+
# @yield [child1, child2] Compare two matched content nodes.
|
|
38
|
+
# Must return a Comparison result constant.
|
|
39
|
+
# @return [Symbol] Worst comparison result encountered
|
|
40
|
+
def walk(children1, children2, emitter,
|
|
41
|
+
emit_structural_orphans: false)
|
|
42
|
+
worst = Comparison::EQUIVALENT
|
|
43
|
+
i = 0
|
|
44
|
+
j = 0
|
|
45
|
+
|
|
46
|
+
while i < children1.length || j < children2.length
|
|
47
|
+
child1 = children1[i]
|
|
48
|
+
child2 = children2[j]
|
|
49
|
+
|
|
50
|
+
if child1.nil?
|
|
51
|
+
result = emit_orphan(child2, :right, emitter,
|
|
52
|
+
emit_structural_orphans)
|
|
53
|
+
worst = result if result && result != Comparison::EQUIVALENT
|
|
54
|
+
j += 1
|
|
55
|
+
next
|
|
56
|
+
elsif child2.nil?
|
|
57
|
+
result = emit_orphan(child1, :left, emitter,
|
|
58
|
+
emit_structural_orphans)
|
|
59
|
+
worst = result if result && result != Comparison::EQUIVALENT
|
|
60
|
+
i += 1
|
|
61
|
+
next
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
dim1 = NodeInspector.noise_dimension_for(child1)
|
|
65
|
+
dim2 = NodeInspector.noise_dimension_for(child2)
|
|
66
|
+
|
|
67
|
+
if dim1 && !dim2
|
|
68
|
+
result = emit_inline_noise(child1, child2, dim1, :left, emitter)
|
|
69
|
+
worst = result unless result == Comparison::EQUIVALENT
|
|
70
|
+
i += 1
|
|
71
|
+
next
|
|
72
|
+
elsif dim2 && !dim1
|
|
73
|
+
result = emit_inline_noise(child1, child2, dim2, :right, emitter)
|
|
74
|
+
worst = result unless result == Comparison::EQUIVALENT
|
|
75
|
+
j += 1
|
|
76
|
+
next
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
if block_given?
|
|
80
|
+
child_result = yield(child1, child2)
|
|
81
|
+
worst = child_result unless child_result == Comparison::EQUIVALENT
|
|
82
|
+
end
|
|
83
|
+
i += 1
|
|
84
|
+
j += 1
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
worst
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Emit a diff for an inline noise node that sits opposite a
|
|
93
|
+
# content node. Whitespace passes both nodes for context;
|
|
94
|
+
# comments pass only the comment node.
|
|
95
|
+
def emit_inline_noise(node_left, node_right, dimension, noise_side,
|
|
96
|
+
emitter)
|
|
97
|
+
if dimension == :whitespace_adjacency
|
|
98
|
+
emitter.call(node_left, node_right,
|
|
99
|
+
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
100
|
+
Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
101
|
+
dimension)
|
|
102
|
+
Comparison::UNEQUAL_TEXT_CONTENTS
|
|
103
|
+
else
|
|
104
|
+
n1 = noise_side == :left ? node_left : nil
|
|
105
|
+
n2 = noise_side == :right ? node_right : nil
|
|
106
|
+
emitter.call(n1, n2,
|
|
107
|
+
Comparison::MISSING_NODE,
|
|
108
|
+
Comparison::MISSING_NODE,
|
|
109
|
+
dimension)
|
|
110
|
+
Comparison::UNEQUAL_ELEMENTS
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Emit a diff for a trailing-edge orphan (one side exhausted).
|
|
115
|
+
# Noise orphans are always emitted; structural orphans only when
|
|
116
|
+
# +emit_structural+ is true.
|
|
117
|
+
def emit_orphan(orphan, side, emitter, emit_structural)
|
|
118
|
+
dim = NodeInspector.noise_dimension_for(orphan)
|
|
119
|
+
if dim
|
|
120
|
+
n1 = side == :left ? orphan : nil
|
|
121
|
+
n2 = side == :right ? orphan : nil
|
|
122
|
+
emitter.call(n1, n2,
|
|
123
|
+
Comparison::MISSING_NODE,
|
|
124
|
+
Comparison::MISSING_NODE,
|
|
125
|
+
dim)
|
|
126
|
+
Comparison::UNEQUAL_ELEMENTS
|
|
127
|
+
elsif emit_structural
|
|
128
|
+
n1 = side == :left ? orphan : nil
|
|
129
|
+
n2 = side == :right ? orphan : nil
|
|
130
|
+
emitter.call(n1, n2,
|
|
131
|
+
Comparison::MISSING_NODE,
|
|
132
|
+
Comparison::MISSING_NODE,
|
|
133
|
+
:element_structure)
|
|
134
|
+
Comparison::UNEQUAL_ELEMENTS
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -82,14 +82,11 @@ module Canon
|
|
|
82
82
|
%i[text_content structural_whitespace].include?(dimension)
|
|
83
83
|
end
|
|
84
84
|
|
|
85
|
-
private
|
|
86
|
-
|
|
87
85
|
# Get the behavior setting for a dimension
|
|
88
86
|
# @param dimension [Symbol] The match dimension
|
|
89
87
|
# @return [Symbol] The behavior (:strict, :normalize, :ignore)
|
|
90
88
|
def behavior_for(dimension)
|
|
91
|
-
|
|
92
|
-
if match_options.respond_to?(:behavior_for)
|
|
89
|
+
if match_options.is_a?(ResolvedMatchOptions)
|
|
93
90
|
match_options.behavior_for(dimension)
|
|
94
91
|
elsif match_options.is_a?(Hash)
|
|
95
92
|
match_options[dimension] || :strict
|
|
@@ -21,14 +21,10 @@ module Canon
|
|
|
21
21
|
def extract_data(node)
|
|
22
22
|
return [] unless node
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
if node.is_a?(Moxml::Node)
|
|
26
|
-
extract_from_moxml(node)
|
|
27
|
-
# Handle Nokogiri nodes
|
|
28
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
24
|
+
if Canon::XmlBackend.nokogiri?
|
|
29
25
|
extract_from_nokogiri(node)
|
|
30
26
|
else
|
|
31
|
-
|
|
27
|
+
extract_from_moxml(node)
|
|
32
28
|
end
|
|
33
29
|
end
|
|
34
30
|
|
|
@@ -21,14 +21,10 @@ module Canon
|
|
|
21
21
|
def extract_data(node)
|
|
22
22
|
return [] unless node
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
if node.is_a?(Moxml::Node)
|
|
26
|
-
extract_from_moxml(node)
|
|
27
|
-
# Handle Nokogiri nodes
|
|
28
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
24
|
+
if Canon::XmlBackend.nokogiri?
|
|
29
25
|
extract_from_nokogiri(node)
|
|
30
26
|
else
|
|
31
|
-
|
|
27
|
+
extract_from_moxml(node)
|
|
32
28
|
end
|
|
33
29
|
end
|
|
34
30
|
|
|
@@ -27,14 +27,10 @@ module Canon
|
|
|
27
27
|
def extract_data(node)
|
|
28
28
|
return {} unless node
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
if node.is_a?(Moxml::Node)
|
|
32
|
-
extract_from_moxml(node)
|
|
33
|
-
# Handle Nokogiri nodes
|
|
34
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
30
|
+
if Canon::XmlBackend.nokogiri?
|
|
35
31
|
extract_from_nokogiri(node)
|
|
36
32
|
else
|
|
37
|
-
|
|
33
|
+
extract_from_moxml(node)
|
|
38
34
|
end
|
|
39
35
|
end
|
|
40
36
|
|
|
@@ -21,14 +21,10 @@ module Canon
|
|
|
21
21
|
def extract_data(node)
|
|
22
22
|
return [] unless node
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
if node.is_a?(Moxml::Node)
|
|
26
|
-
extract_from_moxml(node)
|
|
27
|
-
# Handle Nokogiri nodes
|
|
28
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
24
|
+
if Canon::XmlBackend.nokogiri?
|
|
29
25
|
extract_from_nokogiri(node)
|
|
30
26
|
else
|
|
31
|
-
|
|
27
|
+
extract_from_moxml(node)
|
|
32
28
|
end
|
|
33
29
|
end
|
|
34
30
|
|