canon 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec-opal +7 -0
  3. data/.rubocop_todo.yml +16 -61
  4. data/README.adoc +5 -0
  5. data/Rakefile +17 -0
  6. data/docs/features/diff-formatting/comment-asymmetry.adoc +160 -0
  7. data/lib/canon/cli.rb +1 -1
  8. data/lib/canon/color_detector.rb +3 -5
  9. data/lib/canon/comparison/child_realignment.rb +140 -0
  10. data/lib/canon/comparison/compare_profile.rb +1 -4
  11. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
  12. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
  13. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
  14. data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
  15. data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
  16. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
  17. data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
  18. data/lib/canon/comparison/format_detector.rb +29 -20
  19. data/lib/canon/comparison/html_comparator.rb +36 -75
  20. data/lib/canon/comparison/html_compare_profile.rb +3 -10
  21. data/lib/canon/comparison/html_parser.rb +1 -1
  22. data/lib/canon/comparison/json_comparator.rb +8 -0
  23. data/lib/canon/comparison/node_inspector.rb +150 -58
  24. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
  25. data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
  26. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
  27. data/lib/canon/comparison/xml_comparator/child_comparison.rb +32 -77
  28. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +43 -8
  29. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
  30. data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
  31. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
  32. data/lib/canon/comparison/xml_comparator.rb +89 -83
  33. data/lib/canon/comparison/xml_node_comparison.rb +15 -15
  34. data/lib/canon/comparison/yaml_comparator.rb +8 -0
  35. data/lib/canon/comparison.rb +25 -23
  36. data/lib/canon/config/profile_loader.rb +13 -13
  37. data/lib/canon/config.rb +29 -5
  38. data/lib/canon/diff/diff_classifier.rb +16 -42
  39. data/lib/canon/diff/diff_line.rb +1 -1
  40. data/lib/canon/diff/diff_node_enricher.rb +22 -24
  41. data/lib/canon/diff/node_serializer.rb +23 -30
  42. data/lib/canon/diff/path_builder.rb +24 -37
  43. data/lib/canon/diff/source_locator.rb +0 -3
  44. data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
  45. data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
  46. data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
  47. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
  48. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
  49. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
  50. data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
  51. data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
  52. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
  53. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
  54. data/lib/canon/diff_formatter/debug_output.rb +12 -24
  55. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
  56. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
  57. data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
  58. data/lib/canon/diff_formatter/legend.rb +2 -2
  59. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
  60. data/lib/canon/diff_formatter/theme.rb +4 -4
  61. data/lib/canon/diff_formatter.rb +2 -2
  62. data/lib/canon/formatters/html_formatter.rb +1 -1
  63. data/lib/canon/formatters/html_formatter_base.rb +1 -1
  64. data/lib/canon/formatters/xml_formatter.rb +7 -32
  65. data/lib/canon/html/data_model.rb +1 -1
  66. data/lib/canon/pretty_printer/html.rb +1 -1
  67. data/lib/canon/pretty_printer/xml.rb +16 -7
  68. data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
  69. data/lib/canon/rspec_matchers.rb +2 -2
  70. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  71. data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
  72. data/lib/canon/tree_diff/core/tree_node.rb +1 -3
  73. data/lib/canon/validators/html_validator.rb +1 -1
  74. data/lib/canon/validators/xml_validator.rb +1 -1
  75. data/lib/canon/version.rb +1 -1
  76. data/lib/canon/xml/data_model.rb +131 -137
  77. data/lib/canon/xml/namespace_helper.rb +5 -0
  78. data/lib/canon/xml/node.rb +2 -1
  79. data/lib/canon/xml/nodes/root_node.rb +4 -0
  80. data/lib/canon/xml/nodes/text_node.rb +6 -1
  81. data/lib/canon/xml/sax_builder.rb +4 -6
  82. data/lib/canon/xml_backend.rb +49 -0
  83. data/lib/canon/xml_parsing.rb +271 -0
  84. data/lib/canon.rb +3 -1
  85. data/lib/tasks/benchmark_runner.rb +1 -1
  86. data/lib/tasks/performance_helpers.rb +1 -1
  87. metadata +7 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1800400419926b8607eb146490d0bc4a0ecf5e4bfaf2b3007a87e99d440661f3
4
- data.tar.gz: 0fc8298171e94fec5e9c4b650001fcc31e79ca774e1d7ad0f19fe91308199b18
3
+ metadata.gz: a13457a67f3e2ab91e00cec19684c502605ab807bdd87eb1120e77d190a99c2e
4
+ data.tar.gz: 35c0c873340e12c63048adf2222fda2f8c2ae3972337dcc212b26d391191ac35
5
5
  SHA512:
6
- metadata.gz: 5c553b671df23a70814bedb7521836b01ce7d6e0ab1af99f00aa05ca6f4ef875d8e8af2e4e63c208443d8ddbab92ed7a05c3c8c6828da2608c42bf6a38f0b7c4
7
- data.tar.gz: e183e77684bc3fe7c072caa904cf709a3c20e2f6c38099a42f099e3367d2b0c2336f5407a6def0147f7405056a26a639f4b9dd5075deb6b51a60bcb7921c7c44
6
+ metadata.gz: 8db915564eebd4ca4dfadd65358f721aa70bca318c22dc1c02eff5e3527cf646ea19722b760072851f358b3fabefd12fc5f6dfc216bce146423c7091f3bf7eac
7
+ data.tar.gz: f92e7491d781c8762483335558ede985a1653bcfb88613858115aa87e50bb326f95b0b76b845c54154e657fb9f25b3d1f348bf8e9baa926ea1c6bfbbd77d6ca6
data/.rspec-opal ADDED
@@ -0,0 +1,7 @@
1
+ --default-path=spec
2
+ --pattern='spec/canon/opal_xml_smoke_spec.rb'
3
+ -I lib
4
+ --opal-opt=-g,canon
5
+ -I spec
6
+ --require=spec_helper
7
+ --require=support/opal
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-04-27 09:48:55 UTC using RuboCop version 1.86.0.
3
+ # on 2026-05-24 10:34:05 UTC using RuboCop version 1.86.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -11,51 +11,13 @@ Gemspec/RequiredRubyVersion:
11
11
  Exclude:
12
12
  - 'canon.gemspec'
13
13
 
14
- # Offense count: 30
15
- # This cop supports safe autocorrection (--autocorrect).
16
- # Configuration parameters: EnforcedStyle, IndentationWidth.
17
- # SupportedStyles: with_first_argument, with_fixed_indentation
18
- Layout/ArgumentAlignment:
19
- Exclude:
20
- - 'lib/canon/comparison/xml_comparator.rb'
21
- - 'spec/canon/comparison/html4_html5_whitespace_parity_spec.rb'
22
-
23
- # Offense count: 1
24
- # This cop supports safe autocorrection (--autocorrect).
25
- # Configuration parameters: EnforcedStyleAlignWith.
26
- # SupportedStylesAlignWith: either, start_of_block, start_of_line
27
- Layout/BlockAlignment:
28
- Exclude:
29
- - 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
30
-
31
- # Offense count: 1
32
- # This cop supports safe autocorrection (--autocorrect).
33
- Layout/BlockEndNewline:
34
- Exclude:
35
- - 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
36
-
37
- # Offense count: 2
38
- # This cop supports safe autocorrection (--autocorrect).
39
- # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
40
- # SupportedStylesAlignWith: start_of_line, relative_to_receiver
41
- Layout/IndentationWidth:
42
- Exclude:
43
- - 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
44
-
45
- # Offense count: 1347
14
+ # Offense count: 1358
46
15
  # This cop supports safe autocorrection (--autocorrect).
47
16
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
48
17
  # URISchemes: http, https
49
18
  Layout/LineLength:
50
19
  Enabled: false
51
20
 
52
- # Offense count: 2
53
- # This cop supports safe autocorrection (--autocorrect).
54
- # Configuration parameters: AllowInHeredoc.
55
- Layout/TrailingWhitespace:
56
- Exclude:
57
- - 'lib/canon/comparison/xml_comparator.rb'
58
-
59
21
  # Offense count: 58
60
22
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
61
23
  Lint/DuplicateBranch:
@@ -69,13 +31,12 @@ Lint/EmptyConditionalBody:
69
31
  - 'spec/canon/comparison/html_comparator_spec.rb'
70
32
  - 'spec/canon/comparison_spec.rb'
71
33
 
72
- # Offense count: 6
34
+ # Offense count: 5
73
35
  # Configuration parameters: MaximumRangeSize.
74
36
  Lint/MissingCopEnableDirective:
75
37
  Exclude:
76
38
  - 'lib/canon/commands/format_command.rb'
77
39
  - 'lib/canon/xml/attribute_handler.rb'
78
- - 'lib/canon/xml/data_model.rb'
79
40
  - 'lib/canon/xml/namespace_handler.rb'
80
41
  - 'lib/canon/xml/processor.rb'
81
42
  - 'lib/canon/xml/xml_base_handler.rb'
@@ -101,7 +62,7 @@ Lint/UselessConstantScoping:
101
62
  Exclude:
102
63
  - 'lib/canon/diff_formatter/theme.rb'
103
64
 
104
- # Offense count: 322
65
+ # Offense count: 313
105
66
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
106
67
  Metrics/AbcSize:
107
68
  Enabled: false
@@ -117,12 +78,12 @@ Metrics/BlockLength:
117
78
  Metrics/BlockNesting:
118
79
  Max: 4
119
80
 
120
- # Offense count: 281
81
+ # Offense count: 276
121
82
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
122
83
  Metrics/CyclomaticComplexity:
123
84
  Enabled: false
124
85
 
125
- # Offense count: 517
86
+ # Offense count: 523
126
87
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
127
88
  Metrics/MethodLength:
128
89
  Max: 146
@@ -132,7 +93,7 @@ Metrics/MethodLength:
132
93
  Metrics/ParameterLists:
133
94
  Max: 10
134
95
 
135
- # Offense count: 225
96
+ # Offense count: 214
136
97
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
137
98
  Metrics/PerceivedComplexity:
138
99
  Enabled: false
@@ -165,13 +126,13 @@ Performance/CollectionLiteralInLoop:
165
126
  - 'lib/canon/xml/xml_base_handler.rb'
166
127
  - 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
167
128
 
168
- # Offense count: 85
129
+ # Offense count: 107
169
130
  # Configuration parameters: Prefixes, AllowedPatterns.
170
131
  # Prefixes: when, with, without
171
132
  RSpec/ContextWording:
172
133
  Enabled: false
173
134
 
174
- # Offense count: 43
135
+ # Offense count: 47
175
136
  # Configuration parameters: IgnoredMetadata.
176
137
  RSpec/DescribeClass:
177
138
  Enabled: false
@@ -182,7 +143,7 @@ RSpec/DescribeMethod:
182
143
  - 'spec/canon/comparison/multiple_differences_spec.rb'
183
144
  - 'spec/canon/diff_formatter/character_map_customization_spec.rb'
184
145
 
185
- # Offense count: 847
146
+ # Offense count: 874
186
147
  # Configuration parameters: CountAsOne.
187
148
  RSpec/ExampleLength:
188
149
  Max: 44
@@ -196,12 +157,6 @@ RSpec/ExpectActual:
196
157
  - 'spec/canon/rspec_matchers_spec.rb'
197
158
  - 'spec/canon/string_matcher_spec.rb'
198
159
 
199
- # Offense count: 7
200
- # This cop supports unsafe autocorrection (--autocorrect-all).
201
- RSpec/IncludeExamples:
202
- Exclude:
203
- - 'spec/canon/comparison/html4_html5_whitespace_parity_spec.rb'
204
-
205
160
  # Offense count: 177
206
161
  # Configuration parameters: Max, AllowedIdentifiers, AllowedPatterns.
207
162
  RSpec/IndexedLet:
@@ -240,7 +195,7 @@ RSpec/MultipleDescribes:
240
195
  Exclude:
241
196
  - 'spec/canon/comparison/match_options_spec.rb'
242
197
 
243
- # Offense count: 694
198
+ # Offense count: 736
244
199
  RSpec/MultipleExpectations:
245
200
  Max: 15
246
201
 
@@ -249,7 +204,7 @@ RSpec/MultipleExpectations:
249
204
  RSpec/MultipleMemoizedHelpers:
250
205
  Max: 16
251
206
 
252
- # Offense count: 17
207
+ # Offense count: 29
253
208
  # Configuration parameters: EnforcedStyle, IgnoreSharedExamples.
254
209
  # SupportedStyles: always, named_only
255
210
  RSpec/NamedSubject:
@@ -258,17 +213,18 @@ RSpec/NamedSubject:
258
213
  - 'spec/canon/pretty_printer/json_spec.rb'
259
214
  - 'spec/canon/pretty_printer/xml_spec.rb'
260
215
 
261
- # Offense count: 53
216
+ # Offense count: 54
262
217
  # Configuration parameters: AllowedGroups.
263
218
  RSpec/NestedGroups:
264
219
  Max: 4
265
220
 
266
- # Offense count: 10
221
+ # Offense count: 11
267
222
  # Configuration parameters: AllowedPatterns.
268
223
  # AllowedPatterns: ^expect_, ^assert_
269
224
  RSpec/NoExpectationExample:
270
225
  Exclude:
271
226
  - 'spec/canon/context_grouping_spec.rb'
227
+ - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
272
228
  - 'spec/canon/informative_diffs_debug_spec.rb'
273
229
  - 'spec/canon/isodoc_blockquotes_spec.rb'
274
230
  - 'spec/canon/match_scenarios_spec.rb'
@@ -292,14 +248,13 @@ RSpec/SpecFilePathFormat:
292
248
  - 'spec/canon/yaml/formatter_spec.rb'
293
249
  - 'spec/xml_c14n_spec.rb'
294
250
 
295
- # Offense count: 134
251
+ # Offense count: 72
296
252
  # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
297
253
  RSpec/VerifiedDoubles:
298
254
  Exclude:
299
255
  - 'spec/canon/comparison/diff_node_builder_spec.rb'
300
256
  - 'spec/canon/comparison/whitespace_sensitivity_spec.rb'
301
257
  - 'spec/canon/diff/diff_classifier_spec.rb'
302
- - 'spec/canon/diff/path_builder_spec.rb'
303
258
  - 'spec/canon/diff/xml_serialization_formatter_spec.rb'
304
259
  - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
305
260
  - 'spec/canon/tree_diff/operation_converter_spec.rb'
data/README.adoc CHANGED
@@ -618,6 +618,11 @@ See link:docs/MODES[Diff modes] for details.
618
618
  reported as a dedicated `:whitespace_adjacency` dimension with direction
619
619
  wording (`before`/`after`/`adjacent to`) instead of cascading into
620
620
  misleading `:text_content` mismatches
621
+ * **Asymmetric comment reporting**: A `<!-- ... -->` node present on only
622
+ one side is reported as a dedicated `:comments` dimension diff anchored
623
+ at the comment node, instead of shifting children alignment and
624
+ surfacing a misleading `:element_structure` "Element removed" diff
625
+ against an unrelated trailing sibling
621
626
  * **Non-ASCII detection**: Warnings for unexpected Unicode characters
622
627
  * **Customizable**: Character maps, context lines, grouping options
623
628
 
data/Rakefile CHANGED
@@ -5,10 +5,27 @@ require "rspec/core/rake_task"
5
5
 
6
6
  RSpec::Core::RakeTask.new(:spec)
7
7
 
8
+ begin
9
+ require "opal/rspec/rake_task"
10
+ rescue LoadError
11
+ # Opal not available or incompatible with current Ruby version
12
+ end
13
+
8
14
  require "rubocop/rake_task"
9
15
 
10
16
  RuboCop::RakeTask.new
11
17
 
12
18
  Dir.glob("lib/tasks/**/*.rake").each { |r| load r }
13
19
 
20
+ namespace :spec do
21
+ if defined?(Opal::RSpec::RakeTask)
22
+ desc "Run Opal (JavaScript) tests"
23
+ Opal::RSpec::RakeTask.new(:opal) do |server, runner|
24
+ server.append_path "lib"
25
+ runner.default_path = "spec"
26
+ runner.pattern = "spec/canon/opal_xml_smoke_spec.rb"
27
+ end
28
+ end
29
+ end
30
+
14
31
  task default: %i[spec rubocop]
@@ -0,0 +1,160 @@
1
+ ---
2
+ title: Comment asymmetry in diff reports
3
+ parent: Diff Formatting
4
+ nav_order: 9
5
+ ---
6
+ = Comment asymmetry in diff reports
7
+ :toc:
8
+ :toclevels: 2
9
+
10
+ == Purpose
11
+
12
+ Canon's diff reports anchor `<!-- ... -->` comment nodes that have no
13
+ counterpart on the other side to a dedicated `:comments` dimension
14
+ instead of letting the resulting children-array length mismatch cascade
15
+ into a misleading `:element_structure` "Element removed" diff against
16
+ the trailing named sibling.
17
+
18
+ This is a *report-only* shape change — equivalence verdicts are
19
+ unchanged. Whether asymmetric comments cause a non-equivalent verdict
20
+ or not depends on the `comments` match option (`:strict` /
21
+ `:ignore` / `:exact`), as before.
22
+
23
+ == The problem
24
+
25
+ Consider an HTML fragment compared with `verbose: true`:
26
+
27
+ [source,html]
28
+ ----
29
+ <!-- expected -->
30
+ <body>
31
+ <div>first</div>
32
+ <div>second</div>
33
+ <!-- a comment that exists only on side A -->
34
+ <div style="mso-element:footnote-list"></div>
35
+ </body>
36
+
37
+ <!-- actual -->
38
+ <body>
39
+ <div>first</div>
40
+ <div>second</div>
41
+ <div style="mso-element:footnote-list"></div>
42
+ </body>
43
+ ----
44
+
45
+ The `<div style="mso-element:footnote-list">` is byte-identical between
46
+ the two sides; the only real difference is the comment on the expected
47
+ side. Pre-#144, the diff report contained:
48
+
49
+ [source]
50
+ ----
51
+ DIFFERENCE #1 — element_structure: Element removed:
52
+ <div style="mso-element:footnote-list"/>
53
+ ----
54
+
55
+ That is the wrong dimension, anchored at the wrong node. The element is
56
+ present on both sides — what is missing is the comment.
57
+
58
+ The cascade comes from positional alignment in
59
+ `Canon::Comparison::HtmlComparator#compare_fragment_children` (and the
60
+ analogous walker in `XmlComparatorHelpers::ChildComparison`): in
61
+ verbose mode, comments are intentionally kept by `filter_children` so
62
+ informative differences can be recorded, but the resulting unequal
63
+ children-array lengths fell through to a name-based mismatch heuristic
64
+ that filtered out generic `#`-prefixed names (`#text`, `#comment`),
65
+ leaving the trailing named element to take the blame.
66
+
67
+ == The contract
68
+
69
+ When the children alignment encounters a comment node on one side
70
+ paired against a non-comment node on the other (or sitting past the
71
+ trailing edge of the shorter side), Canon:
72
+
73
+ 1. Treats the comment as a *single-side gap* in the alignment.
74
+ 2. Emits one `:comments` diff entry anchored at the comment node
75
+ itself (not at a mis-paired neighbouring element).
76
+ 3. Advances only the cursor that carries the comment, so the next
77
+ iteration aligns content against content.
78
+
79
+ The Reason line names the side that carries the comment and surfaces
80
+ its text:
81
+
82
+ [source]
83
+ ----
84
+ DIFFERENCE #1 — comments: Comment present on EXPECTED only:
85
+ <!-- a comment that exists only on side A -->
86
+ ----
87
+
88
+ == Combined with whitespace asymmetry
89
+
90
+ The same realignment walk handles asymmetric whitespace-only text
91
+ nodes (link:whitespace-adjacency.adoc[issue #137]) and asymmetric
92
+ comment nodes together. When a children mismatch is fully explained by
93
+ a combination of asymmetric whitespace and asymmetric comments, the
94
+ walker emits one diff per asymmetric node with the appropriate
95
+ dimension (`:whitespace_adjacency` for whitespace, `:comments` for
96
+ comments) — no `:element_structure` diff is produced.
97
+
98
+ When a real structural mismatch coexists with an asymmetric comment,
99
+ both kinds of diff are emitted — the structural one under
100
+ `:element_structure`, the comment one under `:comments`.
101
+
102
+ == Working with :comments diffs programmatically
103
+
104
+ [source,ruby]
105
+ ----
106
+ result = Canon::Comparison.equivalent?(html1, html2,
107
+ format: :html5, verbose: true)
108
+
109
+ comment_diffs = result.differences.select { |d| d.dimension == :comments }
110
+
111
+ # Whether these affect equivalence depends on the comments match option.
112
+ # Under the default :ignore profile they are informative; under :strict
113
+ # they are normative.
114
+ ----
115
+
116
+ == What this contract does NOT do
117
+
118
+ * **Does not silence asymmetric comments.** They are always reported
119
+ in verbose output; the change is the dimension label and the anchor
120
+ node.
121
+ * **Does not affect symmetric comments.** When both sides carry
122
+ parallel comment nodes, those compare normally — content-vs-content
123
+ comparison applies.
124
+ * **Does not change equivalence outcomes.** A comparison whose
125
+ equivalence verdict was driven by asymmetric comments retains the
126
+ same verdict; only the report shape changes.
127
+
128
+ == Where it runs
129
+
130
+ The noise-aware realignment is a single shared implementation:
131
+
132
+ * `Canon::Comparison::ChildRealignment` — the two-cursor walk that
133
+ detects noise nodes via `NodeInspector.noise_dimension_for`,
134
+ emits per-orphan diffs with the appropriate dimension
135
+ (`:whitespace_adjacency`, `:comments`), and advances only the
136
+ noise-side cursor so content nodes stay aligned.
137
+
138
+ Both comparison paths delegate to `ChildRealignment.walk`:
139
+
140
+ * `Canon::Comparison::HtmlComparator#compare_fragment_children` — the
141
+ HTML fragment path (passes `emit_structural_orphans: true` because it
142
+ has no separate length-mismatch step).
143
+ * `Canon::Comparison::XmlComparatorHelpers::ChildComparison` — the XML
144
+ comparator path (passes `emit_structural_orphans: false`; structural
145
+ orphans are handled by the pre-walk length-mismatch step via
146
+ `asymmetric_noise_explains_length_diff?`).
147
+
148
+ == Related
149
+
150
+ * link:whitespace-adjacency.adoc[Whitespace adjacency] — sibling
151
+ contract for asymmetric whitespace-only text nodes.
152
+ * link:../../advanced/diff-classification.adoc[Diff classification] —
153
+ Normative vs informative differences.
154
+
155
+ == History
156
+
157
+ The false-positive cascade was reported in
158
+ https://github.com/lutaml/canon/issues/144[issue #144]. The fix
159
+ mirrors the structure of the `:whitespace_adjacency` work in
160
+ https://github.com/lutaml/canon/issues/137[issue #137].
data/lib/canon/cli.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "thor"
3
+ require "thor" unless RUBY_ENGINE == "opal"
4
4
  require_relative "commands/format_command"
5
5
  require_relative "commands/diff_command"
6
6
  require_relative "options/registry"
@@ -67,11 +67,9 @@ module Canon
67
67
  # @param io [IO] Output stream
68
68
  # @return [Boolean] true if the stream is a TTY
69
69
  def tty?(io)
70
- return false unless io.respond_to?(:tty?)
71
- return false unless io.respond_to?(:isatty)
72
-
73
- # Ruby 2.5+ uses tty?, older uses isatty
74
- io.tty? || io.isatty
70
+ io.tty?
71
+ rescue NoMethodError
72
+ false
75
73
  rescue ArgumentError, IOError
76
74
  # Stream might be closed or invalid
77
75
  false
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "node_inspector"
4
+
5
+ module Canon
6
+ module Comparison
7
+ # Shared two-cursor walk over child arrays with noise-aware realignment.
8
+ #
9
+ # When positional pairing would match a noise node (whitespace-only
10
+ # text or comment) against a content node, the walker treats the
11
+ # noise node as a single-side gap: emits a diff for it and advances
12
+ # only that cursor, so the next iteration aligns content against
13
+ # content.
14
+ #
15
+ # Noise classification is delegated to +NodeInspector.noise_dimension_for+,
16
+ # making the walk open for extension — new noise types only require
17
+ # adding a branch there.
18
+ #
19
+ # The walk is parameterised by a diff emitter (a callable that
20
+ # receives node1, node2, diff1, diff2, dimension) so both the HTML
21
+ # comparator (DiffNodeBuilder.build) and the XML comparator
22
+ # (comparator.add_difference) reuse the same cursor logic.
23
+ module ChildRealignment
24
+ class << self
25
+ # Walk two child arrays, emitting diffs for noise nodes and
26
+ # yielding matched content pairs.
27
+ #
28
+ # @param children1 [Array] Left-side children
29
+ # @param children2 [Array] Right-side children
30
+ # @param emitter [#call] Callable receiving
31
+ # (node1, node2, diff1, diff2, dimension)
32
+ # @param emit_structural_orphans [Boolean] When true, trailing-edge
33
+ # non-noise orphans are emitted as +:element_structure+ diffs.
34
+ # HTML fragment path sets this to true (it has no separate
35
+ # length-mismatch step); XML path sets it to false (structural
36
+ # orphans are already recorded by +use_positional_comparison+).
37
+ # @yield [child1, child2] Compare two matched content nodes.
38
+ # Must return a Comparison result constant.
39
+ # @return [Symbol] Worst comparison result encountered
40
+ def walk(children1, children2, emitter,
41
+ emit_structural_orphans: false)
42
+ worst = Comparison::EQUIVALENT
43
+ i = 0
44
+ j = 0
45
+
46
+ while i < children1.length || j < children2.length
47
+ child1 = children1[i]
48
+ child2 = children2[j]
49
+
50
+ if child1.nil?
51
+ result = emit_orphan(child2, :right, emitter,
52
+ emit_structural_orphans)
53
+ worst = result if result && result != Comparison::EQUIVALENT
54
+ j += 1
55
+ next
56
+ elsif child2.nil?
57
+ result = emit_orphan(child1, :left, emitter,
58
+ emit_structural_orphans)
59
+ worst = result if result && result != Comparison::EQUIVALENT
60
+ i += 1
61
+ next
62
+ end
63
+
64
+ dim1 = NodeInspector.noise_dimension_for(child1)
65
+ dim2 = NodeInspector.noise_dimension_for(child2)
66
+
67
+ if dim1 && !dim2
68
+ result = emit_inline_noise(child1, child2, dim1, :left, emitter)
69
+ worst = result unless result == Comparison::EQUIVALENT
70
+ i += 1
71
+ next
72
+ elsif dim2 && !dim1
73
+ result = emit_inline_noise(child1, child2, dim2, :right, emitter)
74
+ worst = result unless result == Comparison::EQUIVALENT
75
+ j += 1
76
+ next
77
+ end
78
+
79
+ if block_given?
80
+ child_result = yield(child1, child2)
81
+ worst = child_result unless child_result == Comparison::EQUIVALENT
82
+ end
83
+ i += 1
84
+ j += 1
85
+ end
86
+
87
+ worst
88
+ end
89
+
90
+ private
91
+
92
+ # Emit a diff for an inline noise node that sits opposite a
93
+ # content node. Whitespace passes both nodes for context;
94
+ # comments pass only the comment node.
95
+ def emit_inline_noise(node_left, node_right, dimension, noise_side,
96
+ emitter)
97
+ if dimension == :whitespace_adjacency
98
+ emitter.call(node_left, node_right,
99
+ Comparison::UNEQUAL_TEXT_CONTENTS,
100
+ Comparison::UNEQUAL_TEXT_CONTENTS,
101
+ dimension)
102
+ Comparison::UNEQUAL_TEXT_CONTENTS
103
+ else
104
+ n1 = noise_side == :left ? node_left : nil
105
+ n2 = noise_side == :right ? node_right : nil
106
+ emitter.call(n1, n2,
107
+ Comparison::MISSING_NODE,
108
+ Comparison::MISSING_NODE,
109
+ dimension)
110
+ Comparison::UNEQUAL_ELEMENTS
111
+ end
112
+ end
113
+
114
+ # Emit a diff for a trailing-edge orphan (one side exhausted).
115
+ # Noise orphans are always emitted; structural orphans only when
116
+ # +emit_structural+ is true.
117
+ def emit_orphan(orphan, side, emitter, emit_structural)
118
+ dim = NodeInspector.noise_dimension_for(orphan)
119
+ if dim
120
+ n1 = side == :left ? orphan : nil
121
+ n2 = side == :right ? orphan : nil
122
+ emitter.call(n1, n2,
123
+ Comparison::MISSING_NODE,
124
+ Comparison::MISSING_NODE,
125
+ dim)
126
+ Comparison::UNEQUAL_ELEMENTS
127
+ elsif emit_structural
128
+ n1 = side == :left ? orphan : nil
129
+ n2 = side == :right ? orphan : nil
130
+ emitter.call(n1, n2,
131
+ Comparison::MISSING_NODE,
132
+ Comparison::MISSING_NODE,
133
+ :element_structure)
134
+ Comparison::UNEQUAL_ELEMENTS
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -82,14 +82,11 @@ module Canon
82
82
  %i[text_content structural_whitespace].include?(dimension)
83
83
  end
84
84
 
85
- private
86
-
87
85
  # Get the behavior setting for a dimension
88
86
  # @param dimension [Symbol] The match dimension
89
87
  # @return [Symbol] The behavior (:strict, :normalize, :ignore)
90
88
  def behavior_for(dimension)
91
- # Handle both ResolvedMatchOptions and Hash
92
- if match_options.respond_to?(:behavior_for)
89
+ if match_options.is_a?(ResolvedMatchOptions)
93
90
  match_options.behavior_for(dimension)
94
91
  elsif match_options.is_a?(Hash)
95
92
  match_options[dimension] || :strict
@@ -21,14 +21,10 @@ module Canon
21
21
  def extract_data(node)
22
22
  return [] unless node
23
23
 
24
- # Handle Moxml nodes
25
- if node.is_a?(Moxml::Node)
26
- extract_from_moxml(node)
27
- # Handle Nokogiri nodes
28
- elsif node.is_a?(Nokogiri::XML::Node)
24
+ if Canon::XmlBackend.nokogiri?
29
25
  extract_from_nokogiri(node)
30
26
  else
31
- []
27
+ extract_from_moxml(node)
32
28
  end
33
29
  end
34
30
 
@@ -21,14 +21,10 @@ module Canon
21
21
  def extract_data(node)
22
22
  return [] unless node
23
23
 
24
- # Handle Moxml nodes
25
- if node.is_a?(Moxml::Node)
26
- extract_from_moxml(node)
27
- # Handle Nokogiri nodes
28
- elsif node.is_a?(Nokogiri::XML::Node)
24
+ if Canon::XmlBackend.nokogiri?
29
25
  extract_from_nokogiri(node)
30
26
  else
31
- []
27
+ extract_from_moxml(node)
32
28
  end
33
29
  end
34
30
 
@@ -27,14 +27,10 @@ module Canon
27
27
  def extract_data(node)
28
28
  return {} unless node
29
29
 
30
- # Handle Moxml nodes
31
- if node.is_a?(Moxml::Node)
32
- extract_from_moxml(node)
33
- # Handle Nokogiri nodes
34
- elsif node.is_a?(Nokogiri::XML::Node)
30
+ if Canon::XmlBackend.nokogiri?
35
31
  extract_from_nokogiri(node)
36
32
  else
37
- {}
33
+ extract_from_moxml(node)
38
34
  end
39
35
  end
40
36
 
@@ -21,14 +21,10 @@ module Canon
21
21
  def extract_data(node)
22
22
  return [] unless node
23
23
 
24
- # Handle Moxml nodes
25
- if node.is_a?(Moxml::Node)
26
- extract_from_moxml(node)
27
- # Handle Nokogiri nodes
28
- elsif node.is_a?(Nokogiri::XML::Node)
24
+ if Canon::XmlBackend.nokogiri?
29
25
  extract_from_nokogiri(node)
30
26
  else
31
- []
27
+ extract_from_moxml(node)
32
28
  end
33
29
  end
34
30