canon 0.1.19 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fae901023e2945c8ee14c48a6de4ce793d2735d9f2b098ba9f727b9c0f10e8ad
4
- data.tar.gz: 84ed342a12b39a77394275e6159eb16cc60f331c80ed161a6cd4fccc957dc06d
3
+ metadata.gz: 5caff8b02e8a381d4febbcbce1b0eeb4000599385428db08b8292fb35b76cfd5
4
+ data.tar.gz: ee62fd3b76217834e32c026bddcdf889ba9bad48772e5520b0bce7da4d3f982e
5
5
  SHA512:
6
- metadata.gz: d88d544b3b961dfa5c0f9fb806f51a473e29b0a018a22dbc9ea2aebaaf459a3aa6317d1cb22c2d5dd32d69eb6162389be274a06886566ab9b63f83e613c4b276
7
- data.tar.gz: cc409487c2c38791ec915584a8ebca85672ac7add362c78b5ace99bfd1a1657c6907ac387f321dbc12eda2613ffe998ae7fecdfce6895ff5e285f1f6022d250f
6
+ metadata.gz: 3a317a23324d26f55980d532e07c4d67b1fdfe9cf9e62afdd59ed0381be1f2ebcc6a19da92c02ab9c3e336ec10eda7885dca78d8fbe6de3717876d84f65bc05a
7
+ data.tar.gz: e38169560e771711fc1f79e60915e8412c0c8229244ac33f23107a87ede5e0f7fad87c47cf85a1519904256f985ed6abfb6d047d04e83c6865fc8ef23163a7d3
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-03-24 10:43:04 UTC using RuboCop version 1.85.1.
3
+ # on 2026-03-25 12:14:38 UTC using RuboCop version 1.86.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -11,77 +11,36 @@ Gemspec/RequiredRubyVersion:
11
11
  Exclude:
12
12
  - 'canon.gemspec'
13
13
 
14
- # Offense count: 2
15
- # This cop supports safe autocorrection (--autocorrect).
16
- # Configuration parameters: EnforcedStyle, IndentationWidth.
17
- # SupportedStyles: with_first_argument, with_fixed_indentation
18
- Layout/ArgumentAlignment:
19
- Exclude:
20
- - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
21
-
22
14
  # Offense count: 1
23
15
  # This cop supports safe autocorrection (--autocorrect).
24
- # Configuration parameters: EnforcedStyle, IndentationWidth.
25
- # SupportedStyles: with_first_element, with_fixed_indentation
26
- Layout/ArrayAlignment:
27
- Exclude:
28
- - 'lib/canon/diff/path_builder.rb'
29
-
30
- # Offense count: 6
31
- # This cop supports safe autocorrection (--autocorrect).
32
- Layout/ElseAlignment:
33
- Exclude:
34
- - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
35
-
36
- # Offense count: 2
37
- # This cop supports safe autocorrection (--autocorrect).
38
16
  # Configuration parameters: EnforcedStyleAlignWith.
39
- # SupportedStylesAlignWith: keyword, variable, start_of_line
40
- Layout/EndAlignment:
41
- Exclude:
42
- - 'lib/canon/diff/path_builder.rb'
43
- - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
44
-
45
- # Offense count: 1
46
- # This cop supports safe autocorrection (--autocorrect).
47
- # Configuration parameters: AllowForAlignment, AllowBeforeTrailingComments, ForceEqualSignAlignment.
48
- Layout/ExtraSpacing:
17
+ # SupportedStylesAlignWith: either, start_of_block, start_of_line
18
+ Layout/BlockAlignment:
49
19
  Exclude:
50
- - 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
20
+ - 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
51
21
 
52
22
  # Offense count: 1
53
23
  # This cop supports safe autocorrection (--autocorrect).
54
- # Configuration parameters: EnforcedStyle.
55
- # SupportedStyles: normal, indented_internal_methods
56
- Layout/IndentationConsistency:
24
+ Layout/BlockEndNewline:
57
25
  Exclude:
58
- - 'lib/canon/diff/path_builder.rb'
26
+ - 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
59
27
 
60
- # Offense count: 8
28
+ # Offense count: 2
61
29
  # This cop supports safe autocorrection (--autocorrect).
62
30
  # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
63
31
  # SupportedStylesAlignWith: start_of_line, relative_to_receiver
64
32
  Layout/IndentationWidth:
65
33
  Exclude:
66
- - 'lib/canon/diff/path_builder.rb'
67
- - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
34
+ - 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
68
35
 
69
- # Offense count: 841
36
+ # Offense count: 858
70
37
  # This cop supports safe autocorrection (--autocorrect).
71
38
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
72
39
  # URISchemes: http, https
73
40
  Layout/LineLength:
74
41
  Enabled: false
75
42
 
76
- # Offense count: 3
77
- # This cop supports safe autocorrection (--autocorrect).
78
- # Configuration parameters: AllowInHeredoc.
79
- Layout/TrailingWhitespace:
80
- Exclude:
81
- - 'lib/canon/diff/path_builder.rb'
82
- - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
83
-
84
- # Offense count: 49
43
+ # Offense count: 48
85
44
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
86
45
  Lint/DuplicateBranch:
87
46
  Enabled: false
@@ -130,19 +89,19 @@ Metrics/AbcSize:
130
89
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
131
90
  # AllowedMethods: refine
132
91
  Metrics/BlockLength:
133
- Max: 84
92
+ Max: 85
134
93
 
135
- # Offense count: 196
94
+ # Offense count: 198
136
95
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
137
96
  Metrics/CyclomaticComplexity:
138
97
  Enabled: false
139
98
 
140
- # Offense count: 405
99
+ # Offense count: 413
141
100
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
142
101
  Metrics/MethodLength:
143
- Max: 95
102
+ Max: 104
144
103
 
145
- # Offense count: 45
104
+ # Offense count: 46
146
105
  # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
147
106
  Metrics/ParameterLists:
148
107
  Max: 9
@@ -171,14 +130,15 @@ Naming/VariableNumber:
171
130
  - 'lib/canon/comparison/markup_comparator.rb'
172
131
  - 'lib/canon/comparison/xml_comparator/diff_node_builder.rb'
173
132
 
174
- # Offense count: 2
133
+ # Offense count: 3
175
134
  # Configuration parameters: MinSize.
176
135
  Performance/CollectionLiteralInLoop:
177
136
  Exclude:
178
137
  - 'lib/canon/comparison/html_comparator.rb'
179
138
  - 'lib/canon/xml/xml_base_handler.rb'
139
+ - 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
180
140
 
181
- # Offense count: 68
141
+ # Offense count: 71
182
142
  # Configuration parameters: Prefixes, AllowedPatterns.
183
143
  # Prefixes: when, with, without
184
144
  RSpec/ContextWording:
@@ -195,7 +155,7 @@ RSpec/DescribeMethod:
195
155
  - 'spec/canon/comparison/multiple_differences_spec.rb'
196
156
  - 'spec/canon/diff_formatter/character_map_customization_spec.rb'
197
157
 
198
- # Offense count: 699
158
+ # Offense count: 707
199
159
  # Configuration parameters: CountAsOne.
200
160
  RSpec/ExampleLength:
201
161
  Max: 43
@@ -247,7 +207,7 @@ RSpec/MultipleDescribes:
247
207
  Exclude:
248
208
  - 'spec/canon/comparison/match_options_spec.rb'
249
209
 
250
- # Offense count: 537
210
+ # Offense count: 546
251
211
  RSpec/MultipleExpectations:
252
212
  Max: 15
253
213
 
@@ -310,6 +270,17 @@ RSpec/VerifiedDoubles:
310
270
  - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
311
271
  - 'spec/canon/tree_diff/operation_converter_spec.rb'
312
272
 
273
+ # Offense count: 1
274
+ # This cop supports safe autocorrection (--autocorrect).
275
+ # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
276
+ # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
277
+ # ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
278
+ # FunctionalMethods: let, let!, subject, watch
279
+ # AllowedMethods: lambda, proc, it
280
+ Style/BlockDelimiters:
281
+ Exclude:
282
+ - 'spec/canon/diff/diff_node_mapper_comments_spec.rb'
283
+
313
284
  # Offense count: 1
314
285
  # This cop supports safe autocorrection (--autocorrect).
315
286
  # Configuration parameters: EnforcedStyle, AllowComments.
@@ -332,37 +303,9 @@ Style/IdenticalConditionalBranches:
332
303
  - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
333
304
  - 'lib/canon/diff_formatter/legend.rb'
334
305
 
335
- # Offense count: 2
336
- # This cop supports safe autocorrection (--autocorrect).
337
- # Configuration parameters: AllowMethodComparison, ComparisonsThreshold.
338
- Style/MultipleComparison:
339
- Exclude:
340
- - 'lib/canon/diff/path_builder.rb'
341
-
342
306
  # Offense count: 1
343
307
  # Configuration parameters: AllowedMethods.
344
308
  # AllowedMethods: respond_to_missing?
345
309
  Style/OptionalBooleanParameter:
346
310
  Exclude:
347
311
  - 'lib/canon/diff_formatter/debug_output.rb'
348
-
349
- # Offense count: 1
350
- # This cop supports safe autocorrection (--autocorrect).
351
- Style/RedundantParentheses:
352
- Exclude:
353
- - 'lib/canon/diff/path_builder.rb'
354
-
355
- # Offense count: 1
356
- # This cop supports safe autocorrection (--autocorrect).
357
- # Configuration parameters: AllowModifier.
358
- Style/SoleNestedConditional:
359
- Exclude:
360
- - 'lib/canon/diff/path_builder.rb'
361
-
362
- # Offense count: 3
363
- # This cop supports safe autocorrection (--autocorrect).
364
- # Configuration parameters: EnforcedStyleForMultiline.
365
- # SupportedStylesForMultiline: comma, consistent_comma, diff_comma, no_comma
366
- Style/TrailingCommaInArguments:
367
- Exclude:
368
- - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
@@ -208,6 +208,23 @@ differently depending on the operation:
208
208
  This means documents with and without XML declarations are considered
209
209
  equivalent when using the comparison API.
210
210
 
211
+ ==== Case-insensitive declaration attributes
212
+
213
+ Per the XML specification, certain XML declaration attributes have
214
+ case-insensitive values. Canon normalizes these during formatting
215
+ comparison so that differences in letter casing are treated as
216
+ formatting-only rather than normative differences.
217
+
218
+ **Case-insensitive attributes:**
219
+
220
+ * `encoding` — e.g., `encoding="UTF-8"` equals `encoding="utf-8"`
221
+ * `standalone` — e.g., `standalone="Yes"` equals `standalone="yes"`
222
+
223
+ These attributes are normalized to lowercase during formatting-level
224
+ comparison in `Canon::Diff::FormattingDetector`. This applies when
225
+ determining whether a line-level difference is formatting-only (e.g.,
226
+ in `by_line` diff mode).
227
+
211
228
  === Comment handling
212
229
 
213
230
  XML comments are preserved in canonical form unless `--with-comments` is explicitly set.
@@ -93,7 +93,9 @@ module Canon
93
93
  elsif node.is_a?(Canon::Xml::Nodes::ElementNode)
94
94
  serialize_element_node(node)
95
95
  elsif node.is_a?(Canon::Xml::Nodes::TextNode)
96
- node.value
96
+ # Use original text (with entity references) if available,
97
+ # otherwise fall back to value (decoded text)
98
+ node.original || node.value
97
99
  elsif node.is_a?(Canon::Xml::Nodes::CommentNode)
98
100
  "<!--#{node.value}-->"
99
101
  elsif node.is_a?(Canon::Xml::Nodes::ProcessingInstructionNode)
@@ -143,8 +143,11 @@ diff_children, differences)
143
143
  children1, children2, _parent_node, comparator,
144
144
  opts, child_opts, diff_children, differences
145
145
  )
146
+ has_mismatch = false
147
+
146
148
  # Length check
147
149
  unless children1.length == children2.length
150
+ has_mismatch = true
148
151
  dimension = determine_dimension_for_mismatch(
149
152
  children1, children2, comparator
150
153
  )
@@ -154,28 +157,20 @@ diff_children, differences)
154
157
  children1, children2, comparator
155
158
  )
156
159
 
157
- # Skip creating parent-level difference for comments
158
- # when comments: :ignore
159
- # The child comparison will handle the comment vs
160
- # element comparison
161
- # This avoids creating duplicate differences
162
- match_opts = opts[:match_opts]
163
- unless dimension == :comments && match_opts && match_opts[:comments] == :ignore
164
- if mismatched_children.empty?
165
- comparator.send(:add_difference, parent_node, parent_node,
166
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
167
- dimension, opts, differences)
168
- else
169
- mismatched_children.each do |child|
170
- if children1.length > children2.length # rubocop:disable Metrics/BlockNesting
171
- comparator.send(:add_difference, child, nil,
172
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
173
- dimension, opts, differences)
174
- else
175
- comparator.send(:add_difference, nil, child,
176
- Comparison::MISSING_NODE, Comparison::MISSING_NODE,
177
- dimension, opts, differences)
178
- end
160
+ if mismatched_children.empty?
161
+ comparator.send(:add_difference, parent_node, parent_node,
162
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
163
+ dimension, opts, differences)
164
+ else
165
+ mismatched_children.each do |child|
166
+ if children1.length > children2.length # rubocop:disable Metrics/BlockNesting
167
+ comparator.send(:add_difference, child, nil,
168
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
169
+ dimension, opts, differences)
170
+ else
171
+ comparator.send(:add_difference, nil, child,
172
+ Comparison::MISSING_NODE, Comparison::MISSING_NODE,
173
+ dimension, opts, differences)
179
174
  end
180
175
  end
181
176
  end
@@ -184,7 +179,7 @@ diff_children, differences)
184
179
  end
185
180
 
186
181
  # Compare children pairwise by position
187
- result = Comparison::EQUIVALENT
182
+ result = has_mismatch ? Comparison::UNEQUAL_ELEMENTS : Comparison::EQUIVALENT
188
183
  children1.zip(children2).each do |child1, child2|
189
184
  # Skip if one is nil (due to different lengths)
190
185
  next if child1.nil? || child2.nil?
@@ -220,8 +215,12 @@ diff_children, differences)
220
215
  elsif !comparator.send(:same_node_type?, children1[i],
221
216
  children2[i])
222
217
  # Different node types at same position
223
- dimension = comparator.send(:determine_node_dimension,
224
- children1[i])
218
+ # Check both nodes - if either is a comment, use :comments dimension
219
+ dim1 = comparator.send(:determine_node_dimension,
220
+ children1[i])
221
+ dim2 = comparator.send(:determine_node_dimension,
222
+ children2[i])
223
+ dimension = [dim1, dim2].include?(:comments) ? :comments : dim1
225
224
  break
226
225
  end
227
226
  end
@@ -224,9 +224,33 @@ module Canon
224
224
  raise Canon::CompareFormatMismatchError.new(format1, format2)
225
225
  end
226
226
 
227
+ # Get global config options if not defined in opts
228
+ # This is needed because semantic_diff doesn't go through dom_diff's config handling
229
+ if !(opts[:match_profile] || opts[:global_options]) && Canon::Config.instance.respond_to?(format1)
230
+ format_config = Canon::Config.instance.public_send(format1)
231
+ if format_config.match.profile
232
+ opts[:match_profile] =
233
+ format_config.match.profile
234
+ end
235
+ if format_config.match.options && !format_config.match.options.empty?
236
+ opts[:global_options] =
237
+ format_config.match.options
238
+ end
239
+ end
240
+
227
241
  # Resolve match options for the format
228
242
  match_opts_hash = resolve_match_options(format1, opts)
229
243
 
244
+ # Also read diff options from config (e.g., max_node_count for large documents)
245
+ # This is independent of match options and needs to be passed to TreeDiffIntegrator
246
+ if !match_opts_hash[:max_node_count] && Canon::Config.instance.respond_to?(format1)
247
+ diff_max_node = Canon::Config.instance.public_send(format1).diff.max_node_count
248
+ if diff_max_node > 10_000
249
+ match_opts_hash[:max_node_count] =
250
+ diff_max_node
251
+ end
252
+ end
253
+
230
254
  # Delegate parsing to comparators (reuses existing preprocessing logic)
231
255
  doc1, doc2 = parse_with_comparator(obj1, obj2, format1, match_opts_hash)
232
256
 
@@ -243,11 +267,21 @@ module Canon
243
267
  # Perform diff
244
268
  tree_diff_result = integrator.diff(doc1, doc2)
245
269
 
270
+ # Extract only match-related keys for OperationConverter and SemanticTreeMatchStrategy
271
+ # These components expect match options, not diff options like max_node_count
272
+ match_only_keys = %i[match_profile match preprocessing
273
+ text_content structural_whitespace attribute_presence
274
+ attribute_order attribute_values element_position
275
+ comments format similarity_threshold hash_matching
276
+ similarity_matching propagation sensitive_elements
277
+ whitespace_sensitive_elements respect_xml_space]
278
+ match_options_only = match_opts_hash.slice(*match_only_keys)
279
+
246
280
  # Convert operations to DiffNodes for unified pipeline
247
281
  # CRITICAL: Use match_opts_hash (resolved options with profile) not opts[:match]
248
282
  converter = Canon::TreeDiff::OperationConverter.new(
249
283
  format: format1,
250
- match_options: match_opts_hash,
284
+ match_options: match_options_only,
251
285
  )
252
286
  diff_nodes = converter.convert(tree_diff_result[:operations])
253
287
 
@@ -255,7 +289,7 @@ module Canon
255
289
  # This matches DOM diff preprocessing pattern (xml_comparator.rb:106-109)
256
290
  require_relative "comparison/strategies/semantic_tree_match_strategy"
257
291
  strategy = Comparison::Strategies::SemanticTreeMatchStrategy.new(
258
- format: format1, match_options: match_opts_hash,
292
+ format: format1, match_options: match_options_only,
259
293
  )
260
294
  str1, str2 = strategy.preprocess_for_display(doc1, doc2)
261
295
 
@@ -5,16 +5,20 @@ module Canon
5
5
  # Represents a single line in the diff output
6
6
  # Links textual representation to semantic DiffNode
7
7
  class DiffLine
8
- attr_reader :line_number, :content, :type, :diff_node
8
+ attr_reader :line_number, :new_position, :content, :type, :diff_node
9
+ attr_writer :formatting
9
10
 
10
- # @param line_number [Integer] The line number in the original text
11
+ # @param line_number [Integer] The 0-based line number in text1 (old text)
12
+ # @param new_position [Integer, nil] The 0-based line number in text2 (new text),
13
+ # used for :changed lines where old and new positions differ
11
14
  # @param content [String] The text content of the line
12
15
  # @param type [Symbol] The type of line (:unchanged, :added, :removed, :changed)
13
16
  # @param diff_node [DiffNode, nil] The semantic diff node this line belongs to
14
17
  # @param formatting [Boolean] Whether this is a formatting-only difference
15
18
  def initialize(line_number:, content:, type:, diff_node: nil,
16
- formatting: false)
19
+ formatting: false, new_position: nil)
17
20
  @line_number = line_number
21
+ @new_position = new_position
18
22
  @content = content
19
23
  @type = type
20
24
  @diff_node = diff_node
@@ -71,6 +75,7 @@ formatting: false)
71
75
  def to_h
72
76
  {
73
77
  line_number: line_number,
78
+ new_position: new_position,
74
79
  content: content,
75
80
  type: type,
76
81
  diff_node: diff_node&.to_h,
@@ -83,6 +88,7 @@ formatting: false)
83
88
  def ==(other)
84
89
  other.is_a?(DiffLine) &&
85
90
  line_number == other.line_number &&
91
+ new_position == other.new_position &&
86
92
  content == other.content &&
87
93
  type == other.type &&
88
94
  diff_node == other.diff_node &&