canon 0.1.18 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 34ab9a64b52d8598690536908941e38950bb4071fb6222b50d1cb584236e5286
4
- data.tar.gz: 2dffcc8e29fcd1f75d78595ef73350208e7b369f2183c2df93672b94df7a6376
3
+ metadata.gz: fae901023e2945c8ee14c48a6de4ce793d2735d9f2b098ba9f727b9c0f10e8ad
4
+ data.tar.gz: 84ed342a12b39a77394275e6159eb16cc60f331c80ed161a6cd4fccc957dc06d
5
5
  SHA512:
6
- metadata.gz: 41d784c820a7bbafd9874bf369ef303376578e74f7171e8f00a7ed1ed0b8800576ca67f51272f1464f6f36cbaec35b6f1f1e9806916e7a8c59bc6c04e827ee79
7
- data.tar.gz: d3e354615ed4b40447ae0be7de64bf6250c40650e25f897a5155f52a74f57a8601003efd37d539abf9f514d5cbe614298a2185db7d72b68b4ff95db6da1b5b99
6
+ metadata.gz: d88d544b3b961dfa5c0f9fb806f51a473e29b0a018a22dbc9ea2aebaaf459a3aa6317d1cb22c2d5dd32d69eb6162389be274a06886566ab9b63f83e613c4b276
7
+ data.tar.gz: cc409487c2c38791ec915584a8ebca85672ac7add362c78b5ace99bfd1a1657c6907ac387f321dbc12eda2613ffe998ae7fecdfce6895ff5e285f1f6022d250f
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-03-24 08:58:24 UTC using RuboCop version 1.85.1.
3
+ # on 2026-03-24 10:43:04 UTC using RuboCop version 1.85.1.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -11,15 +11,13 @@ Gemspec/RequiredRubyVersion:
11
11
  Exclude:
12
12
  - 'canon.gemspec'
13
13
 
14
- # Offense count: 10
14
+ # Offense count: 2
15
15
  # This cop supports safe autocorrection (--autocorrect).
16
16
  # Configuration parameters: EnforcedStyle, IndentationWidth.
17
17
  # SupportedStyles: with_first_argument, with_fixed_indentation
18
18
  Layout/ArgumentAlignment:
19
19
  Exclude:
20
- - 'lib/canon/xml/data_model.rb'
21
- - 'spec/canon/comparison/encoding_normalization_spec.rb'
22
- - 'spec/canon/comparison/xml_whitespace_spec.rb'
20
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
23
21
 
24
22
  # Offense count: 1
25
23
  # This cop supports safe autocorrection (--autocorrect).
@@ -27,46 +25,61 @@ Layout/ArgumentAlignment:
27
25
  # SupportedStyles: with_first_element, with_fixed_indentation
28
26
  Layout/ArrayAlignment:
29
27
  Exclude:
30
- - 'lib/canon/comparison/format_detector.rb'
28
+ - 'lib/canon/diff/path_builder.rb'
29
+
30
+ # Offense count: 6
31
+ # This cop supports safe autocorrection (--autocorrect).
32
+ Layout/ElseAlignment:
33
+ Exclude:
34
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
35
+
36
+ # Offense count: 2
37
+ # This cop supports safe autocorrection (--autocorrect).
38
+ # Configuration parameters: EnforcedStyleAlignWith.
39
+ # SupportedStylesAlignWith: keyword, variable, start_of_line
40
+ Layout/EndAlignment:
41
+ Exclude:
42
+ - 'lib/canon/diff/path_builder.rb'
43
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
31
44
 
32
45
  # Offense count: 1
33
46
  # This cop supports safe autocorrection (--autocorrect).
34
- Layout/EmptyLineAfterGuardClause:
47
+ # Configuration parameters: AllowForAlignment, AllowBeforeTrailingComments, ForceEqualSignAlignment.
48
+ Layout/ExtraSpacing:
35
49
  Exclude:
36
- - 'lib/canon/xml/data_model.rb'
50
+ - 'lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb'
37
51
 
38
52
  # Offense count: 1
39
53
  # This cop supports safe autocorrection (--autocorrect).
40
- # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
41
- # SupportedHashRocketStyles: key, separator, table
42
- # SupportedColonStyles: key, separator, table
43
- # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
44
- Layout/HashAlignment:
54
+ # Configuration parameters: EnforcedStyle.
55
+ # SupportedStyles: normal, indented_internal_methods
56
+ Layout/IndentationConsistency:
45
57
  Exclude:
46
- - 'lib/canon/comparison/format_detector.rb'
58
+ - 'lib/canon/diff/path_builder.rb'
59
+
60
+ # Offense count: 8
61
+ # This cop supports safe autocorrection (--autocorrect).
62
+ # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
63
+ # SupportedStylesAlignWith: start_of_line, relative_to_receiver
64
+ Layout/IndentationWidth:
65
+ Exclude:
66
+ - 'lib/canon/diff/path_builder.rb'
67
+ - 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb'
47
68
 
48
- # Offense count: 831
69
+ # Offense count: 841
49
70
  # This cop supports safe autocorrection (--autocorrect).
50
71
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
51
72
  # URISchemes: http, https
52
73
  Layout/LineLength:
53
74
  Enabled: false
54
75
 
55
- # Offense count: 9
56
- # This cop supports safe autocorrection (--autocorrect).
57
- # Configuration parameters: EnforcedStyle.
58
- # SupportedStyles: symmetrical, new_line, same_line
59
- Layout/MultilineMethodCallBraceLayout:
60
- Exclude:
61
- - 'spec/canon/comparison/encoding_normalization_spec.rb'
62
- - 'spec/canon/comparison/xml_whitespace_spec.rb'
63
-
64
- # Offense count: 2
76
+ # Offense count: 3
65
77
  # This cop supports safe autocorrection (--autocorrect).
66
78
  # Configuration parameters: AllowInHeredoc.
67
79
  Layout/TrailingWhitespace:
68
80
  Exclude:
69
- - 'lib/canon/comparison/format_detector.rb'
81
+ - 'lib/canon/diff/path_builder.rb'
82
+ - 'lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb'
70
83
 
71
84
  # Offense count: 49
72
85
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
@@ -97,13 +110,6 @@ Lint/UnreachableCode:
97
110
  Exclude:
98
111
  - 'lib/canon/diff_formatter/debug_output.rb'
99
112
 
100
- # Offense count: 1
101
- # This cop supports safe autocorrection (--autocorrect).
102
- # Configuration parameters: IgnoreEmptyBlocks, AllowUnusedKeywordArguments.
103
- Lint/UnusedBlockArgument:
104
- Exclude:
105
- - 'lib/canon/xml/data_model.rb'
106
-
107
113
  # Offense count: 6
108
114
  # This cop supports safe autocorrection (--autocorrect).
109
115
  # Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods, NotImplementedExceptions.
@@ -115,7 +121,7 @@ Lint/UnusedMethodArgument:
115
121
  - 'lib/canon/diff_formatter/by_line/xml_formatter.rb'
116
122
  - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
117
123
 
118
- # Offense count: 236
124
+ # Offense count: 238
119
125
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
120
126
  Metrics/AbcSize:
121
127
  Enabled: false
@@ -126,12 +132,12 @@ Metrics/AbcSize:
126
132
  Metrics/BlockLength:
127
133
  Max: 84
128
134
 
129
- # Offense count: 193
135
+ # Offense count: 196
130
136
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
131
137
  Metrics/CyclomaticComplexity:
132
138
  Enabled: false
133
139
 
134
- # Offense count: 403
140
+ # Offense count: 405
135
141
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
136
142
  Metrics/MethodLength:
137
143
  Max: 95
@@ -141,7 +147,7 @@ Metrics/MethodLength:
141
147
  Metrics/ParameterLists:
142
148
  Max: 9
143
149
 
144
- # Offense count: 160
150
+ # Offense count: 162
145
151
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
146
152
  Metrics/PerceivedComplexity:
147
153
  Enabled: false
@@ -172,19 +178,13 @@ Performance/CollectionLiteralInLoop:
172
178
  - 'lib/canon/comparison/html_comparator.rb'
173
179
  - 'lib/canon/xml/xml_base_handler.rb'
174
180
 
175
- # Offense count: 1
176
- # This cop supports unsafe autocorrection (--autocorrect-all).
177
- Performance/UnfreezeString:
178
- Exclude:
179
- - 'spec/canon/comparison/encoding_normalization_spec.rb'
180
-
181
181
  # Offense count: 68
182
182
  # Configuration parameters: Prefixes, AllowedPatterns.
183
183
  # Prefixes: when, with, without
184
184
  RSpec/ContextWording:
185
185
  Enabled: false
186
186
 
187
- # Offense count: 29
187
+ # Offense count: 30
188
188
  # Configuration parameters: IgnoredMetadata.
189
189
  RSpec/DescribeClass:
190
190
  Enabled: false
@@ -195,7 +195,7 @@ RSpec/DescribeMethod:
195
195
  - 'spec/canon/comparison/multiple_differences_spec.rb'
196
196
  - 'spec/canon/diff_formatter/character_map_customization_spec.rb'
197
197
 
198
- # Offense count: 695
198
+ # Offense count: 699
199
199
  # Configuration parameters: CountAsOne.
200
200
  RSpec/ExampleLength:
201
201
  Max: 43
@@ -247,7 +247,7 @@ RSpec/MultipleDescribes:
247
247
  Exclude:
248
248
  - 'spec/canon/comparison/match_options_spec.rb'
249
249
 
250
- # Offense count: 536
250
+ # Offense count: 537
251
251
  RSpec/MultipleExpectations:
252
252
  Max: 15
253
253
 
@@ -298,7 +298,7 @@ RSpec/SpecFilePathFormat:
298
298
  - 'spec/canon/yaml/formatter_spec.rb'
299
299
  - 'spec/xml_c14n_spec.rb'
300
300
 
301
- # Offense count: 126
301
+ # Offense count: 128
302
302
  # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
303
303
  RSpec/VerifiedDoubles:
304
304
  Exclude:
@@ -307,19 +307,9 @@ RSpec/VerifiedDoubles:
307
307
  - 'spec/canon/diff/diff_classifier_spec.rb'
308
308
  - 'spec/canon/diff/path_builder_spec.rb'
309
309
  - 'spec/canon/diff/xml_serialization_formatter_spec.rb'
310
+ - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
310
311
  - 'spec/canon/tree_diff/operation_converter_spec.rb'
311
312
 
312
- # Offense count: 1
313
- # This cop supports safe autocorrection (--autocorrect).
314
- # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
315
- # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
316
- # ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
317
- # FunctionalMethods: let, let!, subject, watch
318
- # AllowedMethods: lambda, proc, it
319
- Style/BlockDelimiters:
320
- Exclude:
321
- - 'spec/canon/comparison/encoding_normalization_spec.rb'
322
-
323
313
  # Offense count: 1
324
314
  # This cop supports safe autocorrection (--autocorrect).
325
315
  # Configuration parameters: EnforcedStyle, AllowComments.
@@ -342,12 +332,12 @@ Style/IdenticalConditionalBranches:
342
332
  - 'lib/canon/diff_formatter/by_object/base_formatter.rb'
343
333
  - 'lib/canon/diff_formatter/legend.rb'
344
334
 
345
- # Offense count: 1
335
+ # Offense count: 2
346
336
  # This cop supports safe autocorrection (--autocorrect).
347
337
  # Configuration parameters: AllowMethodComparison, ComparisonsThreshold.
348
338
  Style/MultipleComparison:
349
339
  Exclude:
350
- - 'lib/canon/comparison/format_detector.rb'
340
+ - 'lib/canon/diff/path_builder.rb'
351
341
 
352
342
  # Offense count: 1
353
343
  # Configuration parameters: AllowedMethods.
@@ -355,3 +345,24 @@ Style/MultipleComparison:
355
345
  Style/OptionalBooleanParameter:
356
346
  Exclude:
357
347
  - 'lib/canon/diff_formatter/debug_output.rb'
348
+
349
+ # Offense count: 1
350
+ # This cop supports safe autocorrection (--autocorrect).
351
+ Style/RedundantParentheses:
352
+ Exclude:
353
+ - 'lib/canon/diff/path_builder.rb'
354
+
355
+ # Offense count: 1
356
+ # This cop supports safe autocorrection (--autocorrect).
357
+ # Configuration parameters: AllowModifier.
358
+ Style/SoleNestedConditional:
359
+ Exclude:
360
+ - 'lib/canon/diff/path_builder.rb'
361
+
362
+ # Offense count: 3
363
+ # This cop supports safe autocorrection (--autocorrect).
364
+ # Configuration parameters: EnforcedStyleForMultiline.
365
+ # SupportedStylesForMultiline: comma, consistent_comma, diff_comma, no_comma
366
+ Style/TrailingCommaInArguments:
367
+ Exclude:
368
+ - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
@@ -83,6 +83,20 @@ module Canon
83
83
  # Get ordinal index (position among siblings with same label)
84
84
  index = ordinal_index(tree_node)
85
85
 
86
+ # For text nodes, use parent element name for clarity
87
+ # e.g., instead of "/p/#text[0]" use "/p/text()[0]"
88
+ if ["text",
89
+ "#text"].include?(label) && tree_node.respond_to?(:parent) && tree_node.parent
90
+ parent_name = if tree_node.parent.respond_to?(:label)
91
+ tree_node.parent.label
92
+ elsif tree_node.parent.respond_to?(:name)
93
+ tree_node.parent.name
94
+ end
95
+ if parent_name && parent_name != "#document" && parent_name != "#document-fragment"
96
+ return "#{parent_name}/text()[#{index}]"
97
+ end
98
+ end
99
+
86
100
  "#{label}[#{index}]"
87
101
  end
88
102
 
@@ -340,10 +340,24 @@ module Canon
340
340
  TextUtils.visualize_whitespace(text2), :green, use_color
341
341
  )
342
342
  else
343
- detail1 = ColorHelper.colorize(format_json_value(text1), :red,
344
- use_color)
345
- detail2 = ColorHelper.colorize(format_json_value(text2), :green,
346
- use_color)
343
+ # Escape non-ASCII characters for better terminal display
344
+ # JSON.generate doesn't escape chars like NBSP (U+00A0) or em-dash (U+2014)
345
+ detail1 = if TextUtils.needs_escaping?(text1)
346
+ ColorHelper.colorize(
347
+ TextUtils.escape_for_display(text1), :red, use_color
348
+ )
349
+ else
350
+ ColorHelper.colorize(format_json_value(text1), :red,
351
+ use_color)
352
+ end
353
+ detail2 = if TextUtils.needs_escaping?(text2)
354
+ ColorHelper.colorize(
355
+ TextUtils.escape_for_display(text2), :green, use_color
356
+ )
357
+ else
358
+ ColorHelper.colorize(format_json_value(text2), :green,
359
+ use_color)
360
+ end
347
361
  end
348
362
 
349
363
  changes = "Content differs: #{detail1} → #{detail2}"
@@ -16,7 +16,12 @@ module Canon
16
16
  def self.extract_location(diff)
17
17
  return "" unless diff
18
18
 
19
- # Get the appropriate node based on diff type
19
+ # Prefer pre-computed path if available (populated by MetadataEnricher)
20
+ if diff.respond_to?(:path) && !diff.path.nil? && !diff.path.empty?
21
+ return "Location: #{diff.path}"
22
+ end
23
+
24
+ # Fall back to extracting from nodes
20
25
  node = if diff.respond_to?(:node1)
21
26
  diff.node1 || diff.node2
22
27
  elsif diff.is_a?(Hash)
@@ -159,21 +159,47 @@ module Canon
159
159
  def self.get_node_text(node)
160
160
  return "" unless node
161
161
 
162
- if node.respond_to?(:text)
163
- node.text
164
- elsif node.respond_to?(:content)
165
- node.content
166
- elsif node.respond_to?(:inner_text)
167
- node.inner_text
168
- elsif node.respond_to?(:value)
169
- node.value
170
- elsif node.respond_to?(:node_info)
171
- node.node_info
172
- elsif node.respond_to?(:to_s)
173
- node.to_s
174
- else
175
- ""
176
- end.to_s.strip
162
+ text = if node.respond_to?(:text)
163
+ node.text
164
+ elsif node.respond_to?(:content)
165
+ node.content
166
+ elsif node.respond_to?(:inner_text)
167
+ node.inner_text
168
+ elsif node.respond_to?(:value)
169
+ node.value
170
+ elsif node.respond_to?(:node_info)
171
+ node.node_info
172
+ elsif node.respond_to?(:to_s)
173
+ node.to_s
174
+ else
175
+ ""
176
+ end
177
+
178
+ strip_ascii_whitespace(text.to_s)
179
+ end
180
+
181
+ # Strip only ASCII whitespace (space, tab, CR, LF) but preserve Unicode
182
+ # whitespace like non-breaking space (\u00A0). Ruby's String#strip removes
183
+ # all Unicode whitespace, which destroys meaningful content like \u00A0.
184
+ #
185
+ # @param str [String] String to strip
186
+ # @return [String] String with leading/trailing ASCII whitespace removed
187
+ ASCII_WHITESPACE_BYTES = [32, 9, 13, 10].freeze # ' ', '\t', '\r', '\n'
188
+
189
+ def self.strip_ascii_whitespace(str)
190
+ return "" if str.nil?
191
+ return str if str.empty?
192
+
193
+ # Find first non-ASCII-whitespace character position
194
+ first_pos = str.index(/[^ \t\r\n]/)
195
+ return "" unless first_pos
196
+
197
+ # Find last non-ASCII-whitespace character position (from end)
198
+ # Use reverse and index, then convert back to forward position
199
+ reversed_pos = str.reverse.index(/[^ \t\r\n]/)
200
+ last_pos = str.length - 1 - reversed_pos
201
+
202
+ str[first_pos..last_pos]
177
203
  end
178
204
 
179
205
  # Get element name for display
@@ -20,7 +20,8 @@ module Canon
20
20
 
21
21
  # Visualize whitespace characters in text
22
22
  #
23
- # Shows spaces as ·, tabs as →, newlines as ¬
23
+ # Shows spaces as ·, tabs as →, newlines as ¬, and Unicode whitespace
24
+ # like non-breaking space as <NBSP>, etc.
24
25
  #
25
26
  # @param text [String] Text to visualize
26
27
  # @return [String] Text with visible whitespace
@@ -31,6 +32,9 @@ module Canon
31
32
  .gsub(" ", "·")
32
33
  .gsub("\t", "→")
33
34
  .gsub("\n", "¬")
35
+ .gsub("\u00A0", "<NBSP>") # Non-breaking space
36
+ .gsub("\u2028", "<LSEP>") # Line separator
37
+ .gsub("\u2029", "<PSEP>") # Paragraph separator
34
38
  end
35
39
 
36
40
  # Extract a content preview from a node
@@ -55,6 +59,42 @@ module Canon
55
59
  text = text.strip.gsub(/\s+/, " ")
56
60
  truncate_text(text, max_length)
57
61
  end
62
+
63
+ # Escape non-ASCII and non-printable characters for display
64
+ #
65
+ # Converts characters outside the printable ASCII range (32-126) to
66
+ # their \uXXXX escape sequences. This ensures special characters like
67
+ # non-breaking space (\u00A0) and em-dash (\u2014) are visible in
68
+ # terminal output.
69
+ #
70
+ # @param text [String] Text to escape
71
+ # @return [String] Escaped text safe for terminal display
72
+ def self.escape_for_display(text)
73
+ return "" if text.nil?
74
+
75
+ text.chars.map do |c|
76
+ codepoint = c.ord
77
+ if codepoint < 32 || codepoint >= 127 || codepoint == 34 || codepoint == 92
78
+ # Escape control characters, non-ASCII, double-quote, and backslash
79
+ "\\u#{codepoint.to_s(16).upcase.rjust(4, '0')}"
80
+ else
81
+ c
82
+ end
83
+ end.join
84
+ end
85
+
86
+ # Check if text contains non-ASCII or non-printable characters
87
+ #
88
+ # @param text [String] Text to check
89
+ # @return [Boolean] true if text needs escaping for display
90
+ def self.needs_escaping?(text)
91
+ return false if text.nil?
92
+
93
+ text.each_char.any? do |c|
94
+ codepoint = c.ord
95
+ codepoint < 32 || codepoint >= 127 || codepoint == 34 || codepoint == 92
96
+ end
97
+ end
58
98
  end
59
99
  end
60
100
  end
data/lib/canon/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
- VERSION = "0.1.18"
4
+ VERSION = "0.1.19"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: canon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.18
4
+ version: 0.1.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.