canon 0.1.18 → 0.1.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +30 -76
- data/docs/understanding/formats/xml.adoc +17 -0
- data/lib/canon/comparison/markup_comparator.rb +3 -1
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +24 -25
- data/lib/canon/comparison.rb +36 -2
- data/lib/canon/diff/diff_line.rb +9 -3
- data/lib/canon/diff/diff_node_mapper.rb +261 -50
- data/lib/canon/diff/formatting_detector.rb +260 -12
- data/lib/canon/diff/node_serializer.rb +3 -1
- data/lib/canon/diff/path_builder.rb +14 -0
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +99 -19
- data/lib/canon/diff_formatter/debug_output.rb +16 -3
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +18 -4
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +6 -1
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +51 -15
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +41 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +4 -2
- data/lib/canon/tree_diff/core/xml_entity_decoder.rb +52 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +19 -104
- data/lib/canon/tree_diff/operation_converter.rb +18 -4
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +47 -2
- data/lib/canon/tree_diff/operations/operation_detector.rb +7 -4
- data/lib/canon/tree_diff/tree_diff_integrator.rb +57 -2
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +6 -1
- data/lib/canon/xml/nodes/comment_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +10 -2
- data/lib/canon/xml/sax_builder.rb +7 -1
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5caff8b02e8a381d4febbcbce1b0eeb4000599385428db08b8292fb35b76cfd5
|
|
4
|
+
data.tar.gz: ee62fd3b76217834e32c026bddcdf889ba9bad48772e5520b0bce7da4d3f982e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3a317a23324d26f55980d532e07c4d67b1fdfe9cf9e62afdd59ed0381be1f2ebcc6a19da92c02ab9c3e336ec10eda7885dca78d8fbe6de3717876d84f65bc05a
|
|
7
|
+
data.tar.gz: e38169560e771711fc1f79e60915e8412c0c8229244ac33f23107a87ede5e0f7fad87c47cf85a1519904256f985ed6abfb6d047d04e83c6865fc8ef23163a7d3
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-03-
|
|
3
|
+
# on 2026-03-25 12:14:38 UTC using RuboCop version 1.86.0.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -11,64 +11,36 @@ Gemspec/RequiredRubyVersion:
|
|
|
11
11
|
Exclude:
|
|
12
12
|
- 'canon.gemspec'
|
|
13
13
|
|
|
14
|
-
# Offense count: 10
|
|
15
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
16
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
17
|
-
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
18
|
-
Layout/ArgumentAlignment:
|
|
19
|
-
Exclude:
|
|
20
|
-
- 'lib/canon/xml/data_model.rb'
|
|
21
|
-
- 'spec/canon/comparison/encoding_normalization_spec.rb'
|
|
22
|
-
- 'spec/canon/comparison/xml_whitespace_spec.rb'
|
|
23
|
-
|
|
24
14
|
# Offense count: 1
|
|
25
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
26
|
-
# Configuration parameters:
|
|
27
|
-
#
|
|
28
|
-
Layout/
|
|
16
|
+
# Configuration parameters: EnforcedStyleAlignWith.
|
|
17
|
+
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
18
|
+
Layout/BlockAlignment:
|
|
29
19
|
Exclude:
|
|
30
|
-
- '
|
|
20
|
+
- 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
|
|
31
21
|
|
|
32
22
|
# Offense count: 1
|
|
33
23
|
# This cop supports safe autocorrection (--autocorrect).
|
|
34
|
-
Layout/
|
|
24
|
+
Layout/BlockEndNewline:
|
|
35
25
|
Exclude:
|
|
36
|
-
- '
|
|
26
|
+
- 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
|
|
37
27
|
|
|
38
|
-
# Offense count:
|
|
28
|
+
# Offense count: 2
|
|
39
29
|
# This cop supports safe autocorrection (--autocorrect).
|
|
40
|
-
# Configuration parameters:
|
|
41
|
-
#
|
|
42
|
-
|
|
43
|
-
# SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
|
|
44
|
-
Layout/HashAlignment:
|
|
30
|
+
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
31
|
+
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
32
|
+
Layout/IndentationWidth:
|
|
45
33
|
Exclude:
|
|
46
|
-
- '
|
|
34
|
+
- 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
|
|
47
35
|
|
|
48
|
-
# Offense count:
|
|
36
|
+
# Offense count: 858
|
|
49
37
|
# This cop supports safe autocorrection (--autocorrect).
|
|
50
38
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
51
39
|
# URISchemes: http, https
|
|
52
40
|
Layout/LineLength:
|
|
53
41
|
Enabled: false
|
|
54
42
|
|
|
55
|
-
# Offense count:
|
|
56
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
57
|
-
# Configuration parameters: EnforcedStyle.
|
|
58
|
-
# SupportedStyles: symmetrical, new_line, same_line
|
|
59
|
-
Layout/MultilineMethodCallBraceLayout:
|
|
60
|
-
Exclude:
|
|
61
|
-
- 'spec/canon/comparison/encoding_normalization_spec.rb'
|
|
62
|
-
- 'spec/canon/comparison/xml_whitespace_spec.rb'
|
|
63
|
-
|
|
64
|
-
# Offense count: 2
|
|
65
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
66
|
-
# Configuration parameters: AllowInHeredoc.
|
|
67
|
-
Layout/TrailingWhitespace:
|
|
68
|
-
Exclude:
|
|
69
|
-
- 'lib/canon/comparison/format_detector.rb'
|
|
70
|
-
|
|
71
|
-
# Offense count: 49
|
|
43
|
+
# Offense count: 48
|
|
72
44
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
73
45
|
Lint/DuplicateBranch:
|
|
74
46
|
Enabled: false
|
|
@@ -97,13 +69,6 @@ Lint/UnreachableCode:
|
|
|
97
69
|
Exclude:
|
|
98
70
|
- 'lib/canon/diff_formatter/debug_output.rb'
|
|
99
71
|
|
|
100
|
-
# Offense count: 1
|
|
101
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
102
|
-
# Configuration parameters: IgnoreEmptyBlocks, AllowUnusedKeywordArguments.
|
|
103
|
-
Lint/UnusedBlockArgument:
|
|
104
|
-
Exclude:
|
|
105
|
-
- 'lib/canon/xml/data_model.rb'
|
|
106
|
-
|
|
107
72
|
# Offense count: 6
|
|
108
73
|
# This cop supports safe autocorrection (--autocorrect).
|
|
109
74
|
# Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods, NotImplementedExceptions.
|
|
@@ -115,7 +80,7 @@ Lint/UnusedMethodArgument:
|
|
|
115
80
|
- 'lib/canon/diff_formatter/by_line/xml_formatter.rb'
|
|
116
81
|
- 'lib/canon/diff_formatter/by_object/base_formatter.rb'
|
|
117
82
|
|
|
118
|
-
# Offense count:
|
|
83
|
+
# Offense count: 238
|
|
119
84
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
120
85
|
Metrics/AbcSize:
|
|
121
86
|
Enabled: false
|
|
@@ -124,24 +89,24 @@ Metrics/AbcSize:
|
|
|
124
89
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
|
|
125
90
|
# AllowedMethods: refine
|
|
126
91
|
Metrics/BlockLength:
|
|
127
|
-
Max:
|
|
92
|
+
Max: 85
|
|
128
93
|
|
|
129
|
-
# Offense count:
|
|
94
|
+
# Offense count: 198
|
|
130
95
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
131
96
|
Metrics/CyclomaticComplexity:
|
|
132
97
|
Enabled: false
|
|
133
98
|
|
|
134
|
-
# Offense count:
|
|
99
|
+
# Offense count: 413
|
|
135
100
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
136
101
|
Metrics/MethodLength:
|
|
137
|
-
Max:
|
|
102
|
+
Max: 104
|
|
138
103
|
|
|
139
|
-
# Offense count:
|
|
104
|
+
# Offense count: 46
|
|
140
105
|
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
|
141
106
|
Metrics/ParameterLists:
|
|
142
107
|
Max: 9
|
|
143
108
|
|
|
144
|
-
# Offense count:
|
|
109
|
+
# Offense count: 162
|
|
145
110
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
146
111
|
Metrics/PerceivedComplexity:
|
|
147
112
|
Enabled: false
|
|
@@ -165,26 +130,21 @@ Naming/VariableNumber:
|
|
|
165
130
|
- 'lib/canon/comparison/markup_comparator.rb'
|
|
166
131
|
- 'lib/canon/comparison/xml_comparator/diff_node_builder.rb'
|
|
167
132
|
|
|
168
|
-
# Offense count:
|
|
133
|
+
# Offense count: 3
|
|
169
134
|
# Configuration parameters: MinSize.
|
|
170
135
|
Performance/CollectionLiteralInLoop:
|
|
171
136
|
Exclude:
|
|
172
137
|
- 'lib/canon/comparison/html_comparator.rb'
|
|
173
138
|
- 'lib/canon/xml/xml_base_handler.rb'
|
|
139
|
+
- 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
|
|
174
140
|
|
|
175
|
-
# Offense count:
|
|
176
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
177
|
-
Performance/UnfreezeString:
|
|
178
|
-
Exclude:
|
|
179
|
-
- 'spec/canon/comparison/encoding_normalization_spec.rb'
|
|
180
|
-
|
|
181
|
-
# Offense count: 68
|
|
141
|
+
# Offense count: 71
|
|
182
142
|
# Configuration parameters: Prefixes, AllowedPatterns.
|
|
183
143
|
# Prefixes: when, with, without
|
|
184
144
|
RSpec/ContextWording:
|
|
185
145
|
Enabled: false
|
|
186
146
|
|
|
187
|
-
# Offense count:
|
|
147
|
+
# Offense count: 30
|
|
188
148
|
# Configuration parameters: IgnoredMetadata.
|
|
189
149
|
RSpec/DescribeClass:
|
|
190
150
|
Enabled: false
|
|
@@ -195,7 +155,7 @@ RSpec/DescribeMethod:
|
|
|
195
155
|
- 'spec/canon/comparison/multiple_differences_spec.rb'
|
|
196
156
|
- 'spec/canon/diff_formatter/character_map_customization_spec.rb'
|
|
197
157
|
|
|
198
|
-
# Offense count:
|
|
158
|
+
# Offense count: 707
|
|
199
159
|
# Configuration parameters: CountAsOne.
|
|
200
160
|
RSpec/ExampleLength:
|
|
201
161
|
Max: 43
|
|
@@ -247,7 +207,7 @@ RSpec/MultipleDescribes:
|
|
|
247
207
|
Exclude:
|
|
248
208
|
- 'spec/canon/comparison/match_options_spec.rb'
|
|
249
209
|
|
|
250
|
-
# Offense count:
|
|
210
|
+
# Offense count: 546
|
|
251
211
|
RSpec/MultipleExpectations:
|
|
252
212
|
Max: 15
|
|
253
213
|
|
|
@@ -298,7 +258,7 @@ RSpec/SpecFilePathFormat:
|
|
|
298
258
|
- 'spec/canon/yaml/formatter_spec.rb'
|
|
299
259
|
- 'spec/xml_c14n_spec.rb'
|
|
300
260
|
|
|
301
|
-
# Offense count:
|
|
261
|
+
# Offense count: 128
|
|
302
262
|
# Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
|
|
303
263
|
RSpec/VerifiedDoubles:
|
|
304
264
|
Exclude:
|
|
@@ -307,6 +267,7 @@ RSpec/VerifiedDoubles:
|
|
|
307
267
|
- 'spec/canon/diff/diff_classifier_spec.rb'
|
|
308
268
|
- 'spec/canon/diff/path_builder_spec.rb'
|
|
309
269
|
- 'spec/canon/diff/xml_serialization_formatter_spec.rb'
|
|
270
|
+
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
310
271
|
- 'spec/canon/tree_diff/operation_converter_spec.rb'
|
|
311
272
|
|
|
312
273
|
# Offense count: 1
|
|
@@ -318,7 +279,7 @@ RSpec/VerifiedDoubles:
|
|
|
318
279
|
# AllowedMethods: lambda, proc, it
|
|
319
280
|
Style/BlockDelimiters:
|
|
320
281
|
Exclude:
|
|
321
|
-
- 'spec/canon/
|
|
282
|
+
- 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
|
|
322
283
|
|
|
323
284
|
# Offense count: 1
|
|
324
285
|
# This cop supports safe autocorrection (--autocorrect).
|
|
@@ -342,13 +303,6 @@ Style/IdenticalConditionalBranches:
|
|
|
342
303
|
- 'lib/canon/diff_formatter/by_object/base_formatter.rb'
|
|
343
304
|
- 'lib/canon/diff_formatter/legend.rb'
|
|
344
305
|
|
|
345
|
-
# Offense count: 1
|
|
346
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
347
|
-
# Configuration parameters: AllowMethodComparison, ComparisonsThreshold.
|
|
348
|
-
Style/MultipleComparison:
|
|
349
|
-
Exclude:
|
|
350
|
-
- 'lib/canon/comparison/format_detector.rb'
|
|
351
|
-
|
|
352
306
|
# Offense count: 1
|
|
353
307
|
# Configuration parameters: AllowedMethods.
|
|
354
308
|
# AllowedMethods: respond_to_missing?
|
|
@@ -208,6 +208,23 @@ differently depending on the operation:
|
|
|
208
208
|
This means documents with and without XML declarations are considered
|
|
209
209
|
equivalent when using the comparison API.
|
|
210
210
|
|
|
211
|
+
==== Case-insensitive declaration attributes
|
|
212
|
+
|
|
213
|
+
Per the XML specification, certain XML declaration attributes have
|
|
214
|
+
case-insensitive values. Canon normalizes these during formatting
|
|
215
|
+
comparison so that differences in letter casing are treated as
|
|
216
|
+
formatting-only rather than normative differences.
|
|
217
|
+
|
|
218
|
+
**Case-insensitive attributes:**
|
|
219
|
+
|
|
220
|
+
* `encoding` — e.g., `encoding="UTF-8"` equals `encoding="utf-8"`
|
|
221
|
+
* `standalone` — e.g., `standalone="Yes"` equals `standalone="yes"`
|
|
222
|
+
|
|
223
|
+
These attributes are normalized to lowercase during formatting-level
|
|
224
|
+
comparison in `Canon::Diff::FormattingDetector`. This applies when
|
|
225
|
+
determining whether a line-level difference is formatting-only (e.g.,
|
|
226
|
+
in `by_line` diff mode).
|
|
227
|
+
|
|
211
228
|
=== Comment handling
|
|
212
229
|
|
|
213
230
|
XML comments are preserved in canonical form unless `--with-comments` is explicitly set.
|
|
@@ -93,7 +93,9 @@ module Canon
|
|
|
93
93
|
elsif node.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
94
94
|
serialize_element_node(node)
|
|
95
95
|
elsif node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
96
|
-
|
|
96
|
+
# Use original text (with entity references) if available,
|
|
97
|
+
# otherwise fall back to value (decoded text)
|
|
98
|
+
node.original || node.value
|
|
97
99
|
elsif node.is_a?(Canon::Xml::Nodes::CommentNode)
|
|
98
100
|
"<!--#{node.value}-->"
|
|
99
101
|
elsif node.is_a?(Canon::Xml::Nodes::ProcessingInstructionNode)
|
|
@@ -143,8 +143,11 @@ diff_children, differences)
|
|
|
143
143
|
children1, children2, _parent_node, comparator,
|
|
144
144
|
opts, child_opts, diff_children, differences
|
|
145
145
|
)
|
|
146
|
+
has_mismatch = false
|
|
147
|
+
|
|
146
148
|
# Length check
|
|
147
149
|
unless children1.length == children2.length
|
|
150
|
+
has_mismatch = true
|
|
148
151
|
dimension = determine_dimension_for_mismatch(
|
|
149
152
|
children1, children2, comparator
|
|
150
153
|
)
|
|
@@ -154,28 +157,20 @@ diff_children, differences)
|
|
|
154
157
|
children1, children2, comparator
|
|
155
158
|
)
|
|
156
159
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
comparator.send(:add_difference, child, nil,
|
|
172
|
-
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
173
|
-
dimension, opts, differences)
|
|
174
|
-
else
|
|
175
|
-
comparator.send(:add_difference, nil, child,
|
|
176
|
-
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
177
|
-
dimension, opts, differences)
|
|
178
|
-
end
|
|
160
|
+
if mismatched_children.empty?
|
|
161
|
+
comparator.send(:add_difference, parent_node, parent_node,
|
|
162
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
163
|
+
dimension, opts, differences)
|
|
164
|
+
else
|
|
165
|
+
mismatched_children.each do |child|
|
|
166
|
+
if children1.length > children2.length # rubocop:disable Metrics/BlockNesting
|
|
167
|
+
comparator.send(:add_difference, child, nil,
|
|
168
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
169
|
+
dimension, opts, differences)
|
|
170
|
+
else
|
|
171
|
+
comparator.send(:add_difference, nil, child,
|
|
172
|
+
Comparison::MISSING_NODE, Comparison::MISSING_NODE,
|
|
173
|
+
dimension, opts, differences)
|
|
179
174
|
end
|
|
180
175
|
end
|
|
181
176
|
end
|
|
@@ -184,7 +179,7 @@ diff_children, differences)
|
|
|
184
179
|
end
|
|
185
180
|
|
|
186
181
|
# Compare children pairwise by position
|
|
187
|
-
result = Comparison::EQUIVALENT
|
|
182
|
+
result = has_mismatch ? Comparison::UNEQUAL_ELEMENTS : Comparison::EQUIVALENT
|
|
188
183
|
children1.zip(children2).each do |child1, child2|
|
|
189
184
|
# Skip if one is nil (due to different lengths)
|
|
190
185
|
next if child1.nil? || child2.nil?
|
|
@@ -220,8 +215,12 @@ diff_children, differences)
|
|
|
220
215
|
elsif !comparator.send(:same_node_type?, children1[i],
|
|
221
216
|
children2[i])
|
|
222
217
|
# Different node types at same position
|
|
223
|
-
|
|
224
|
-
|
|
218
|
+
# Check both nodes - if either is a comment, use :comments dimension
|
|
219
|
+
dim1 = comparator.send(:determine_node_dimension,
|
|
220
|
+
children1[i])
|
|
221
|
+
dim2 = comparator.send(:determine_node_dimension,
|
|
222
|
+
children2[i])
|
|
223
|
+
dimension = [dim1, dim2].include?(:comments) ? :comments : dim1
|
|
225
224
|
break
|
|
226
225
|
end
|
|
227
226
|
end
|
data/lib/canon/comparison.rb
CHANGED
|
@@ -224,9 +224,33 @@ module Canon
|
|
|
224
224
|
raise Canon::CompareFormatMismatchError.new(format1, format2)
|
|
225
225
|
end
|
|
226
226
|
|
|
227
|
+
# Get global config options if not defined in opts
|
|
228
|
+
# This is needed because semantic_diff doesn't go through dom_diff's config handling
|
|
229
|
+
if !(opts[:match_profile] || opts[:global_options]) && Canon::Config.instance.respond_to?(format1)
|
|
230
|
+
format_config = Canon::Config.instance.public_send(format1)
|
|
231
|
+
if format_config.match.profile
|
|
232
|
+
opts[:match_profile] =
|
|
233
|
+
format_config.match.profile
|
|
234
|
+
end
|
|
235
|
+
if format_config.match.options && !format_config.match.options.empty?
|
|
236
|
+
opts[:global_options] =
|
|
237
|
+
format_config.match.options
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
227
241
|
# Resolve match options for the format
|
|
228
242
|
match_opts_hash = resolve_match_options(format1, opts)
|
|
229
243
|
|
|
244
|
+
# Also read diff options from config (e.g., max_node_count for large documents)
|
|
245
|
+
# This is independent of match options and needs to be passed to TreeDiffIntegrator
|
|
246
|
+
if !match_opts_hash[:max_node_count] && Canon::Config.instance.respond_to?(format1)
|
|
247
|
+
diff_max_node = Canon::Config.instance.public_send(format1).diff.max_node_count
|
|
248
|
+
if diff_max_node > 10_000
|
|
249
|
+
match_opts_hash[:max_node_count] =
|
|
250
|
+
diff_max_node
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
230
254
|
# Delegate parsing to comparators (reuses existing preprocessing logic)
|
|
231
255
|
doc1, doc2 = parse_with_comparator(obj1, obj2, format1, match_opts_hash)
|
|
232
256
|
|
|
@@ -243,11 +267,21 @@ module Canon
|
|
|
243
267
|
# Perform diff
|
|
244
268
|
tree_diff_result = integrator.diff(doc1, doc2)
|
|
245
269
|
|
|
270
|
+
# Extract only match-related keys for OperationConverter and SemanticTreeMatchStrategy
|
|
271
|
+
# These components expect match options, not diff options like max_node_count
|
|
272
|
+
match_only_keys = %i[match_profile match preprocessing
|
|
273
|
+
text_content structural_whitespace attribute_presence
|
|
274
|
+
attribute_order attribute_values element_position
|
|
275
|
+
comments format similarity_threshold hash_matching
|
|
276
|
+
similarity_matching propagation sensitive_elements
|
|
277
|
+
whitespace_sensitive_elements respect_xml_space]
|
|
278
|
+
match_options_only = match_opts_hash.slice(*match_only_keys)
|
|
279
|
+
|
|
246
280
|
# Convert operations to DiffNodes for unified pipeline
|
|
247
281
|
# CRITICAL: Use match_opts_hash (resolved options with profile) not opts[:match]
|
|
248
282
|
converter = Canon::TreeDiff::OperationConverter.new(
|
|
249
283
|
format: format1,
|
|
250
|
-
match_options:
|
|
284
|
+
match_options: match_options_only,
|
|
251
285
|
)
|
|
252
286
|
diff_nodes = converter.convert(tree_diff_result[:operations])
|
|
253
287
|
|
|
@@ -255,7 +289,7 @@ module Canon
|
|
|
255
289
|
# This matches DOM diff preprocessing pattern (xml_comparator.rb:106-109)
|
|
256
290
|
require_relative "comparison/strategies/semantic_tree_match_strategy"
|
|
257
291
|
strategy = Comparison::Strategies::SemanticTreeMatchStrategy.new(
|
|
258
|
-
format: format1, match_options:
|
|
292
|
+
format: format1, match_options: match_options_only,
|
|
259
293
|
)
|
|
260
294
|
str1, str2 = strategy.preprocess_for_display(doc1, doc2)
|
|
261
295
|
|
data/lib/canon/diff/diff_line.rb
CHANGED
|
@@ -5,16 +5,20 @@ module Canon
|
|
|
5
5
|
# Represents a single line in the diff output
|
|
6
6
|
# Links textual representation to semantic DiffNode
|
|
7
7
|
class DiffLine
|
|
8
|
-
attr_reader :line_number, :content, :type, :diff_node
|
|
8
|
+
attr_reader :line_number, :new_position, :content, :type, :diff_node
|
|
9
|
+
attr_writer :formatting
|
|
9
10
|
|
|
10
|
-
# @param line_number [Integer] The line number in
|
|
11
|
+
# @param line_number [Integer] The 0-based line number in text1 (old text)
|
|
12
|
+
# @param new_position [Integer, nil] The 0-based line number in text2 (new text),
|
|
13
|
+
# used for :changed lines where old and new positions differ
|
|
11
14
|
# @param content [String] The text content of the line
|
|
12
15
|
# @param type [Symbol] The type of line (:unchanged, :added, :removed, :changed)
|
|
13
16
|
# @param diff_node [DiffNode, nil] The semantic diff node this line belongs to
|
|
14
17
|
# @param formatting [Boolean] Whether this is a formatting-only difference
|
|
15
18
|
def initialize(line_number:, content:, type:, diff_node: nil,
|
|
16
|
-
formatting: false)
|
|
19
|
+
formatting: false, new_position: nil)
|
|
17
20
|
@line_number = line_number
|
|
21
|
+
@new_position = new_position
|
|
18
22
|
@content = content
|
|
19
23
|
@type = type
|
|
20
24
|
@diff_node = diff_node
|
|
@@ -71,6 +75,7 @@ formatting: false)
|
|
|
71
75
|
def to_h
|
|
72
76
|
{
|
|
73
77
|
line_number: line_number,
|
|
78
|
+
new_position: new_position,
|
|
74
79
|
content: content,
|
|
75
80
|
type: type,
|
|
76
81
|
diff_node: diff_node&.to_h,
|
|
@@ -83,6 +88,7 @@ formatting: false)
|
|
|
83
88
|
def ==(other)
|
|
84
89
|
other.is_a?(DiffLine) &&
|
|
85
90
|
line_number == other.line_number &&
|
|
91
|
+
new_position == other.new_position &&
|
|
86
92
|
content == other.content &&
|
|
87
93
|
type == other.type &&
|
|
88
94
|
diff_node == other.diff_node &&
|