canon 0.1.23 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +155 -30
- data/docs/INDEX.adoc +4 -0
- data/docs/advanced/diff-classification.adoc +3 -2
- data/docs/features/configuration-profiles.adoc +288 -0
- data/docs/features/diff-formatting/character-visualization.adoc +153 -454
- data/docs/features/diff-formatting/display-filtering.adoc +44 -0
- data/docs/features/diff-formatting/display-preprocessing.adoc +656 -0
- data/docs/features/diff-formatting/index.adoc +47 -0
- data/docs/features/diff-formatting/pretty-diff-mode.adoc +154 -0
- data/docs/features/environment-configuration/override-system.adoc +10 -3
- data/docs/features/index.adoc +9 -0
- data/docs/features/match-options/index.adoc +32 -42
- data/docs/features/match-options/pretty-printed-fixtures.adoc +270 -0
- data/docs/guides/choosing-configuration.adoc +22 -0
- data/docs/reference/environment-variables.adoc +121 -1
- data/docs/reference/options-across-interfaces.adoc +182 -2
- data/lib/canon/cli.rb +20 -0
- data/lib/canon/commands/diff_command.rb +7 -2
- data/lib/canon/commands/format_command.rb +1 -1
- data/lib/canon/comparison/html_comparator.rb +20 -15
- data/lib/canon/comparison/html_compare_profile.rb +4 -4
- data/lib/canon/comparison/markup_comparator.rb +12 -3
- data/lib/canon/comparison/match_options/base_resolver.rb +29 -7
- data/lib/canon/comparison/match_options/json_resolver.rb +9 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +16 -2
- data/lib/canon/comparison/match_options/yaml_resolver.rb +10 -0
- data/lib/canon/comparison/match_options.rb +4 -1
- data/lib/canon/comparison/whitespace_sensitivity.rb +189 -137
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +21 -4
- data/lib/canon/comparison/xml_comparator.rb +14 -12
- data/lib/canon/comparison/xml_node_comparison.rb +51 -6
- data/lib/canon/comparison.rb +52 -9
- data/lib/canon/config/env_schema.rb +32 -4
- data/lib/canon/config/override_resolver.rb +16 -3
- data/lib/canon/config/profile_loader.rb +135 -0
- data/lib/canon/config/profiles/metanorma.yml +74 -0
- data/lib/canon/config/profiles/metanorma_debug.yml +8 -0
- data/lib/canon/config/type_converter.rb +8 -0
- data/lib/canon/config.rb +469 -5
- data/lib/canon/diff/diff_classifier.rb +41 -11
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +48 -17
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +58 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +22 -7
- data/lib/canon/diff_formatter.rb +493 -36
- data/lib/canon/pretty_printer/xml_normalized.rb +395 -0
- data/lib/canon/rspec_matchers.rb +36 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/nodes/namespace_node.rb +4 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +4 -0
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/tasks/performance_helpers.rb +2 -2
- metadata +24 -2
data/lib/canon/config.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "config/env_provider"
|
|
4
4
|
require_relative "config/override_resolver"
|
|
5
|
+
require_relative "config/profile_loader"
|
|
5
6
|
require_relative "color_detector"
|
|
6
7
|
|
|
7
8
|
module Canon
|
|
@@ -44,6 +45,36 @@ module Canon
|
|
|
44
45
|
@json = FormatConfig.new(:json)
|
|
45
46
|
@yaml = FormatConfig.new(:yaml)
|
|
46
47
|
@string = FormatConfig.new(:string)
|
|
48
|
+
@profile = nil
|
|
49
|
+
|
|
50
|
+
env_profile = ENV.fetch("CANON_CONFIG_PROFILE", nil)
|
|
51
|
+
if env_profile
|
|
52
|
+
# Convert to symbol if it matches a built-in profile name
|
|
53
|
+
self.profile = if ProfileLoader.available_profiles.include?(env_profile.to_sym)
|
|
54
|
+
env_profile.to_sym
|
|
55
|
+
else
|
|
56
|
+
env_profile
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Returns the current profile name or path.
|
|
62
|
+
def profile
|
|
63
|
+
@profile
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Apply a configuration profile by name (Symbol for built-in) or
|
|
67
|
+
# file path (String). Set to +nil+ to clear the profile layer.
|
|
68
|
+
def profile=(name_or_path)
|
|
69
|
+
clear_profile_values!
|
|
70
|
+
|
|
71
|
+
if name_or_path.nil?
|
|
72
|
+
@profile = nil
|
|
73
|
+
return
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
@profile = name_or_path.is_a?(Symbol) ? name_or_path : name_or_path.to_s
|
|
77
|
+
apply_profile(@profile)
|
|
47
78
|
end
|
|
48
79
|
|
|
49
80
|
def reset!
|
|
@@ -52,6 +83,7 @@ module Canon
|
|
|
52
83
|
@json.reset!
|
|
53
84
|
@yaml.reset!
|
|
54
85
|
@string.reset!
|
|
86
|
+
@profile = nil
|
|
55
87
|
end
|
|
56
88
|
|
|
57
89
|
# Backward compatibility methods for top-level diff configuration
|
|
@@ -93,19 +125,44 @@ module Canon
|
|
|
93
125
|
# Each format (XML, HTML, JSON, YAML) has its own instance
|
|
94
126
|
class FormatConfig
|
|
95
127
|
attr_reader :format, :match, :diff
|
|
96
|
-
attr_accessor :preprocessing
|
|
97
128
|
|
|
98
129
|
def initialize(format)
|
|
99
130
|
@format = format
|
|
100
131
|
@match = MatchConfig.new(format)
|
|
101
132
|
@diff = DiffConfig.new(format)
|
|
102
133
|
@preprocessing = nil
|
|
134
|
+
@profile_preprocessing = nil
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def preprocessing
|
|
138
|
+
@preprocessing || @profile_preprocessing
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def preprocessing=(value)
|
|
142
|
+
@preprocessing = value
|
|
103
143
|
end
|
|
104
144
|
|
|
105
145
|
def reset!
|
|
106
146
|
@match.reset!
|
|
107
147
|
@diff.reset!
|
|
108
148
|
@preprocessing = nil
|
|
149
|
+
@profile_preprocessing = nil
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def apply_profile_data(data)
|
|
153
|
+
if data.key?("preprocessing")
|
|
154
|
+
val = data["preprocessing"]
|
|
155
|
+
@profile_preprocessing = val.is_a?(String) ? val.to_sym : val
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
@match.apply_profile_data(data["match"]) if data.key?("match")
|
|
159
|
+
@diff.apply_profile_data(data["diff"]) if data.key?("diff")
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def clear_profile!
|
|
163
|
+
@profile_preprocessing = nil
|
|
164
|
+
@match.clear_profile!
|
|
165
|
+
@diff.clear_profile!
|
|
109
166
|
end
|
|
110
167
|
end
|
|
111
168
|
|
|
@@ -137,6 +194,34 @@ module Canon
|
|
|
137
194
|
@resolver.set_programmatic(:profile, value)
|
|
138
195
|
end
|
|
139
196
|
|
|
197
|
+
# Return all profile-sourced values from the resolver, excluding
|
|
198
|
+
# the :profile key itself (which is accessed via #profile).
|
|
199
|
+
# These are the YAML-profile settings (e.g., preserve_whitespace_elements)
|
|
200
|
+
# that are stored in the resolver's profile layer but not exposed
|
|
201
|
+
# through the built-in MATCH_PROFILES system.
|
|
202
|
+
#
|
|
203
|
+
# @return [Hash] Profile option key-values (excluding :profile)
|
|
204
|
+
def profile_options
|
|
205
|
+
@resolver.profile.except(:profile)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Element names where whitespace is PRESERVED exactly (no manipulation).
|
|
209
|
+
# All whitespace characters are significant in these elements.
|
|
210
|
+
def preserve_whitespace_elements
|
|
211
|
+
@resolver.resolve(:preserve_whitespace_elements) || []
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Element names where whitespace is COLLAPSED (HTML-style behavior).
|
|
215
|
+
# Multiple whitespace chars collapse to single space; boundaries preserved.
|
|
216
|
+
def collapse_whitespace_elements
|
|
217
|
+
@resolver.resolve(:collapse_whitespace_elements) || []
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Element names where whitespace-only text nodes are STRIPPED.
|
|
221
|
+
def strip_whitespace_elements
|
|
222
|
+
@resolver.resolve(:strip_whitespace_elements) || []
|
|
223
|
+
end
|
|
224
|
+
|
|
140
225
|
# Build match options from profile and options
|
|
141
226
|
def to_h
|
|
142
227
|
result = {}
|
|
@@ -145,6 +230,20 @@ module Canon
|
|
|
145
230
|
result
|
|
146
231
|
end
|
|
147
232
|
|
|
233
|
+
def apply_profile_data(data)
|
|
234
|
+
return unless data
|
|
235
|
+
|
|
236
|
+
data.each do |key, value|
|
|
237
|
+
sym_key = key.to_sym
|
|
238
|
+
converted = value.is_a?(String) ? value.to_sym : value
|
|
239
|
+
@resolver.set_profile(sym_key, converted)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def clear_profile!
|
|
244
|
+
@resolver.clear_profile!
|
|
245
|
+
end
|
|
246
|
+
|
|
148
247
|
private
|
|
149
248
|
|
|
150
249
|
def build_resolver(format)
|
|
@@ -162,15 +261,60 @@ module Canon
|
|
|
162
261
|
end
|
|
163
262
|
end
|
|
164
263
|
|
|
264
|
+
# Pretty-printer sub-configuration for display canonicalization.
|
|
265
|
+
# Controls how documents are formatted by +Canon::PrettyPrinter::Xml+
|
|
266
|
+
# when +display_preprocessing: :pretty_print+ is active.
|
|
267
|
+
# The two attributes (+indent+ and +indent_type+) are backed by the
|
|
268
|
+
# parent +DiffConfig+'s resolver so that ENV overrides (e.g.
|
|
269
|
+
# +CANON_XML_DIFF_PRETTY_PRINTER_INDENT+) work automatically.
|
|
270
|
+
class PrettyPrinterConfig
|
|
271
|
+
def initialize(resolver)
|
|
272
|
+
@resolver = resolver
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def indent
|
|
276
|
+
@resolver.resolve(:pretty_printer_indent)
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def indent=(value)
|
|
280
|
+
@resolver.set_programmatic(:pretty_printer_indent, value)
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def indent_type
|
|
284
|
+
@resolver.resolve(:pretty_printer_indent_type)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def indent_type=(value)
|
|
288
|
+
@resolver.set_programmatic(:pretty_printer_indent_type, value)
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
165
292
|
# Diff configuration for output formatting
|
|
166
293
|
class DiffConfig
|
|
294
|
+
attr_reader :pretty_printer
|
|
295
|
+
|
|
167
296
|
def initialize(format = nil)
|
|
168
297
|
@format = format
|
|
169
298
|
@resolver = build_resolver(format)
|
|
299
|
+
@pretty_printer = PrettyPrinterConfig.new(@resolver)
|
|
170
300
|
end
|
|
171
301
|
|
|
172
302
|
def reset!
|
|
173
303
|
@resolver = build_resolver(@format)
|
|
304
|
+
@pretty_printer = PrettyPrinterConfig.new(@resolver)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def apply_profile_data(data)
|
|
308
|
+
return unless data
|
|
309
|
+
|
|
310
|
+
data.each do |key, value|
|
|
311
|
+
sym_key = key.to_sym
|
|
312
|
+
@resolver.set_profile(sym_key, coerce_profile_value(sym_key, value))
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
def clear_profile!
|
|
317
|
+
@resolver.clear_profile!
|
|
174
318
|
end
|
|
175
319
|
|
|
176
320
|
# Accessors with ENV override support
|
|
@@ -230,6 +374,35 @@ module Canon
|
|
|
230
374
|
@resolver.set_programmatic(:show_raw_inputs, value)
|
|
231
375
|
end
|
|
232
376
|
|
|
377
|
+
# Show only the EXPECTED (fixture) block in the raw-inputs section.
|
|
378
|
+
# Has no effect unless +show_raw_inputs+ or +verbose_diff+ is also set.
|
|
379
|
+
# Use +show_raw_expected: false+ together with +show_raw_received: true+
|
|
380
|
+
# (or +show_raw_inputs: true+) to suppress the fixture display while
|
|
381
|
+
# keeping the received output.
|
|
382
|
+
#
|
|
383
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_SHOW_RAW_EXPECTED+
|
|
384
|
+
def show_raw_expected
|
|
385
|
+
@resolver.resolve(:show_raw_expected)
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def show_raw_expected=(value)
|
|
389
|
+
@resolver.set_programmatic(:show_raw_expected, value)
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# Show only the RECEIVED (actual) block in the raw-inputs section.
|
|
393
|
+
# Combined with +show_raw_expected: false+ (or leaving it at the default
|
|
394
|
+
# +false+) this suppresses the fixture while still displaying the output
|
|
395
|
+
# that was generated.
|
|
396
|
+
#
|
|
397
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_SHOW_RAW_RECEIVED+
|
|
398
|
+
def show_raw_received
|
|
399
|
+
@resolver.resolve(:show_raw_received)
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
def show_raw_received=(value)
|
|
403
|
+
@resolver.set_programmatic(:show_raw_received, value)
|
|
404
|
+
end
|
|
405
|
+
|
|
233
406
|
def show_preprocessed_inputs
|
|
234
407
|
@resolver.resolve(:show_preprocessed_inputs)
|
|
235
408
|
end
|
|
@@ -238,6 +411,77 @@ module Canon
|
|
|
238
411
|
@resolver.set_programmatic(:show_preprocessed_inputs, value)
|
|
239
412
|
end
|
|
240
413
|
|
|
414
|
+
# Show only the EXPECTED (fixture) block in the preprocessed-inputs
|
|
415
|
+
# section. Has no effect unless +show_preprocessed_inputs+ or
|
|
416
|
+
# +verbose_diff+ is also set. Use +show_preprocessed_expected: true+
|
|
417
|
+
# together with +show_preprocessed_received: false+ to display only the
|
|
418
|
+
# preprocessed fixture while suppressing the preprocessed received output.
|
|
419
|
+
#
|
|
420
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PREPROCESSED_EXPECTED+
|
|
421
|
+
def show_preprocessed_expected
|
|
422
|
+
@resolver.resolve(:show_preprocessed_expected)
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
def show_preprocessed_expected=(value)
|
|
426
|
+
@resolver.set_programmatic(:show_preprocessed_expected, value)
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
# Show only the RECEIVED (actual) block in the preprocessed-inputs
|
|
430
|
+
# section. Combined with +show_preprocessed_expected: false+ (or leaving
|
|
431
|
+
# it at the default +false+) this suppresses the fixture preprocessing
|
|
432
|
+
# display while still showing what the received document looked like after
|
|
433
|
+
# preprocessing.
|
|
434
|
+
#
|
|
435
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PREPROCESSED_RECEIVED+
|
|
436
|
+
def show_preprocessed_received
|
|
437
|
+
@resolver.resolve(:show_preprocessed_received)
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
def show_preprocessed_received=(value)
|
|
441
|
+
@resolver.set_programmatic(:show_preprocessed_received, value)
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
# Show both EXPECTED and RECEIVED blocks in a fixture-ready pretty-printed
|
|
445
|
+
# section. The output uses the same pretty-printer as
|
|
446
|
+
# +display_preprocessing: :pretty_print+ (one tag per line, indentation)
|
|
447
|
+
# but with *no* character visualization — whitespace appears as plain ASCII
|
|
448
|
+
# so the output can be copy-pasted directly into RSpec fixture heredocs.
|
|
449
|
+
#
|
|
450
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PRETTYPRINT_INPUTS+
|
|
451
|
+
def show_prettyprint_inputs
|
|
452
|
+
@resolver.resolve(:show_prettyprint_inputs)
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def show_prettyprint_inputs=(value)
|
|
456
|
+
@resolver.set_programmatic(:show_prettyprint_inputs, value)
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
# Show only the EXPECTED (fixture) block in the pretty-print section.
|
|
460
|
+
# Useful when the fixture is what needs updating and the received side is
|
|
461
|
+
# not needed for copy-pasting.
|
|
462
|
+
#
|
|
463
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PRETTYPRINT_EXPECTED+
|
|
464
|
+
def show_prettyprint_expected
|
|
465
|
+
@resolver.resolve(:show_prettyprint_expected)
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def show_prettyprint_expected=(value)
|
|
469
|
+
@resolver.set_programmatic(:show_prettyprint_expected, value)
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
# Show only the RECEIVED (actual) block in the pretty-print section.
|
|
473
|
+
# Use this to get a copy-pasteable pretty-printed form of the generated
|
|
474
|
+
# output (the most common fixture-update workflow).
|
|
475
|
+
#
|
|
476
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_SHOW_PRETTYPRINT_RECEIVED+
|
|
477
|
+
def show_prettyprint_received
|
|
478
|
+
@resolver.resolve(:show_prettyprint_received)
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
def show_prettyprint_received=(value)
|
|
482
|
+
@resolver.set_programmatic(:show_prettyprint_received, value)
|
|
483
|
+
end
|
|
484
|
+
|
|
241
485
|
def show_line_numbered_inputs
|
|
242
486
|
@resolver.resolve(:show_line_numbered_inputs)
|
|
243
487
|
end
|
|
@@ -254,6 +498,147 @@ module Canon
|
|
|
254
498
|
@resolver.set_programmatic(:display_format, value)
|
|
255
499
|
end
|
|
256
500
|
|
|
501
|
+
# Controls how documents are normalized *for display* before the line
|
|
502
|
+
# diff. This is independent of +FormatConfig#preprocessing+, which
|
|
503
|
+
# controls normalization for *comparison* (equivalence detection).
|
|
504
|
+
#
|
|
505
|
+
# Values:
|
|
506
|
+
# :none - use documents as-is (default, existing behaviour)
|
|
507
|
+
# :pretty_print - run through Canon::PrettyPrinter::Xml before diffing
|
|
508
|
+
# :c14n - run through XML C14N normalization before diffing
|
|
509
|
+
def display_preprocessing
|
|
510
|
+
@resolver.resolve(:display_preprocessing)
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
def display_preprocessing=(value)
|
|
514
|
+
@resolver.set_programmatic(:display_preprocessing, value)
|
|
515
|
+
end
|
|
516
|
+
|
|
517
|
+
# Element names where whitespace is PRESERVED exactly (no manipulation).
|
|
518
|
+
# All whitespace characters are significant in these elements.
|
|
519
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_PRESERVE_WHITESPACE_ELEMENTS+
|
|
520
|
+
def preserve_whitespace_elements
|
|
521
|
+
@resolver.resolve(:preserve_whitespace_elements) || []
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
def preserve_whitespace_elements=(value)
|
|
525
|
+
@resolver.set_programmatic(:preserve_whitespace_elements,
|
|
526
|
+
Array(value).map(&:to_s))
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# Element names where whitespace is COLLAPSED (HTML-style behavior).
|
|
530
|
+
# Multiple whitespace chars collapse to single space; boundaries preserved.
|
|
531
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_COLLAPSE_WHITESPACE_ELEMENTS+
|
|
532
|
+
def collapse_whitespace_elements
|
|
533
|
+
@resolver.resolve(:collapse_whitespace_elements) || []
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
def collapse_whitespace_elements=(value)
|
|
537
|
+
@resolver.set_programmatic(:collapse_whitespace_elements,
|
|
538
|
+
Array(value).map(&:to_s))
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
# Element names where whitespace-only text nodes are STRIPPED.
|
|
542
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_STRIP_WHITESPACE_ELEMENTS+
|
|
543
|
+
def strip_whitespace_elements
|
|
544
|
+
@resolver.resolve(:strip_whitespace_elements) || []
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def strip_whitespace_elements=(value)
|
|
548
|
+
@resolver.set_programmatic(:strip_whitespace_elements,
|
|
549
|
+
Array(value).map(&:to_s))
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
# When true, whitespace-only text nodes starting with "\n" in :collapse
|
|
553
|
+
# elements of the **expected** (fixture) document are treated as structural
|
|
554
|
+
# indentation and dropped from both comparison and display. Use this when
|
|
555
|
+
# fixture files are indented but received XML is compact.
|
|
556
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_PRETTY_PRINTED_EXPECTED+
|
|
557
|
+
def pretty_printed_expected
|
|
558
|
+
@resolver.resolve(:pretty_printed_expected)
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
def pretty_printed_expected=(value)
|
|
562
|
+
@resolver.set_programmatic(:pretty_printed_expected, value)
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
# When true, whitespace-only text nodes starting with "\n" in :normalize
|
|
566
|
+
# elements of the **received** document are treated as structural
|
|
567
|
+
# indentation and dropped from both comparison and display. Use this when
|
|
568
|
+
# received XML may be pretty-printed but the fixture is compact.
|
|
569
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_PRETTY_PRINTED_RECEIVED+
|
|
570
|
+
def pretty_printed_received
|
|
571
|
+
@resolver.resolve(:pretty_printed_received)
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
def pretty_printed_received=(value)
|
|
575
|
+
@resolver.set_programmatic(:pretty_printed_received, value)
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
# When true, attributes on each element are sorted by namespace URI
|
|
579
|
+
# then local name in the pretty-printed display, eliminating spurious
|
|
580
|
+
# diff noise from differing attribute order.
|
|
581
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_PRETTY_PRINTER_SORT_ATTRIBUTES+
|
|
582
|
+
def pretty_printer_sort_attributes
|
|
583
|
+
@resolver.resolve(:pretty_printer_sort_attributes)
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
def pretty_printer_sort_attributes=(value)
|
|
587
|
+
@resolver.set_programmatic(:pretty_printer_sort_attributes, value)
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
# Render element nodes in the Semantic Diff Report as compact inline XML
|
|
591
|
+
# (e.g. +<strong>Annex</strong>+) instead of the verbose node_info
|
|
592
|
+
# description string (e.g. "name: strong namespace_uri: …").
|
|
593
|
+
#
|
|
594
|
+
# Default: +false+ (keep existing verbose format for backwards compatibility)
|
|
595
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_COMPACT_SEMANTIC_REPORT+
|
|
596
|
+
def compact_semantic_report
|
|
597
|
+
@resolver.resolve(:compact_semantic_report)
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
def compact_semantic_report=(value)
|
|
601
|
+
@resolver.set_programmatic(:compact_semantic_report, value)
|
|
602
|
+
end
|
|
603
|
+
|
|
604
|
+
# Show the full serialized node content (including children) in
|
|
605
|
+
# element_structure diffs instead of just the tag name.
|
|
606
|
+
#
|
|
607
|
+
# Default: +false+ (show only the tag name, e.g. +<biblio-tag>+)
|
|
608
|
+
# ENV variable: +CANON_<FORMAT>_DIFF_EXPAND_DIFFERENCE+
|
|
609
|
+
def expand_difference
|
|
610
|
+
@resolver.resolve(:expand_difference)
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
def expand_difference=(value)
|
|
614
|
+
@resolver.set_programmatic(:expand_difference, value)
|
|
615
|
+
end
|
|
616
|
+
|
|
617
|
+
# Controls whether invisible characters (spaces, tabs, non-breaking
|
|
618
|
+
# spaces, etc.) are replaced with visible Unicode symbols in diff output.
|
|
619
|
+
#
|
|
620
|
+
# Values:
|
|
621
|
+
# true - apply the full default visualization map (default)
|
|
622
|
+
# false - disable visualization; output plain text
|
|
623
|
+
# :content_only - reserved for future use; currently behaves as +true+.
|
|
624
|
+
# Future intent: apply visualization only to DOM text
|
|
625
|
+
# node content, not to structural indentation whitespace.
|
|
626
|
+
# (TODO: implement DOM-level pre-serialization pass)
|
|
627
|
+
def character_visualization
|
|
628
|
+
val = @resolver.resolve(:character_visualization)
|
|
629
|
+
# Coerce symbol booleans that may arrive via ENV (env_schema uses :symbol type
|
|
630
|
+
# so "true"/"false" env strings become :true/:false symbols)
|
|
631
|
+
case val
|
|
632
|
+
when true, :true then true # rubocop:disable Lint/BooleanSymbol
|
|
633
|
+
when false, :false then false # rubocop:disable Lint/BooleanSymbol
|
|
634
|
+
else val # true/false from programmatic, or :content_only
|
|
635
|
+
end
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
def character_visualization=(value)
|
|
639
|
+
@resolver.set_programmatic(:character_visualization, value)
|
|
640
|
+
end
|
|
641
|
+
|
|
257
642
|
def algorithm
|
|
258
643
|
@resolver.resolve(:algorithm)
|
|
259
644
|
end
|
|
@@ -309,9 +694,27 @@ module Canon
|
|
|
309
694
|
verbose_diff: verbose_diff,
|
|
310
695
|
diff_algorithm: algorithm,
|
|
311
696
|
show_raw_inputs: show_raw_inputs,
|
|
697
|
+
show_raw_expected: show_raw_expected,
|
|
698
|
+
show_raw_received: show_raw_received,
|
|
312
699
|
show_preprocessed_inputs: show_preprocessed_inputs,
|
|
700
|
+
show_preprocessed_expected: show_preprocessed_expected,
|
|
701
|
+
show_preprocessed_received: show_preprocessed_received,
|
|
702
|
+
show_prettyprint_inputs: show_prettyprint_inputs,
|
|
703
|
+
show_prettyprint_expected: show_prettyprint_expected,
|
|
704
|
+
show_prettyprint_received: show_prettyprint_received,
|
|
313
705
|
show_line_numbered_inputs: show_line_numbered_inputs,
|
|
706
|
+
character_visualization: character_visualization,
|
|
314
707
|
display_format: display_format,
|
|
708
|
+
display_preprocessing: display_preprocessing,
|
|
709
|
+
pretty_printer_indent: pretty_printer.indent,
|
|
710
|
+
pretty_printer_indent_type: pretty_printer.indent_type,
|
|
711
|
+
preserve_whitespace_elements: preserve_whitespace_elements,
|
|
712
|
+
collapse_whitespace_elements: collapse_whitespace_elements,
|
|
713
|
+
strip_whitespace_elements: strip_whitespace_elements,
|
|
714
|
+
pretty_printed_expected: pretty_printed_expected,
|
|
715
|
+
pretty_printed_received: pretty_printed_received,
|
|
716
|
+
compact_semantic_report: compact_semantic_report,
|
|
717
|
+
expand_difference: expand_difference,
|
|
315
718
|
max_file_size: max_file_size,
|
|
316
719
|
max_node_count: max_node_count,
|
|
317
720
|
max_diff_lines: max_diff_lines,
|
|
@@ -331,12 +734,31 @@ module Canon
|
|
|
331
734
|
verbose_diff: false,
|
|
332
735
|
algorithm: :dom,
|
|
333
736
|
show_raw_inputs: false,
|
|
737
|
+
show_raw_expected: false,
|
|
738
|
+
show_raw_received: false,
|
|
334
739
|
show_preprocessed_inputs: false,
|
|
740
|
+
show_preprocessed_expected: false,
|
|
741
|
+
show_preprocessed_received: false,
|
|
742
|
+
show_prettyprint_inputs: false,
|
|
743
|
+
show_prettyprint_expected: false,
|
|
744
|
+
show_prettyprint_received: false,
|
|
335
745
|
show_line_numbered_inputs: false,
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
746
|
+
character_visualization: true, # true, false, :content_only
|
|
747
|
+
display_format: :raw, # :raw = no formatting, :canonical = HTML-aware formatting
|
|
748
|
+
display_preprocessing: :none, # :none, :pretty_print, :c14n
|
|
749
|
+
pretty_printer_indent: 2,
|
|
750
|
+
pretty_printer_indent_type: :space, # :space or :tab
|
|
751
|
+
preserve_whitespace_elements: [],
|
|
752
|
+
collapse_whitespace_elements: [],
|
|
753
|
+
strip_whitespace_elements: [],
|
|
754
|
+
pretty_printed_expected: false,
|
|
755
|
+
pretty_printed_received: false,
|
|
756
|
+
pretty_printer_sort_attributes: false,
|
|
757
|
+
compact_semantic_report: false,
|
|
758
|
+
expand_difference: false,
|
|
759
|
+
max_file_size: 5_242_880, # 5MB in bytes
|
|
760
|
+
max_node_count: 10_000, # Maximum nodes in tree
|
|
761
|
+
max_diff_lines: 10_000, # Maximum diff output lines
|
|
340
762
|
theme: :dark, # Default theme
|
|
341
763
|
}
|
|
342
764
|
|
|
@@ -348,6 +770,48 @@ module Canon
|
|
|
348
770
|
env: env,
|
|
349
771
|
)
|
|
350
772
|
end
|
|
773
|
+
|
|
774
|
+
# Coerce a YAML value to the appropriate Ruby type based on EnvSchema.
|
|
775
|
+
# YAML natively handles booleans, integers, and arrays, but symbols
|
|
776
|
+
# arrive as strings and need conversion.
|
|
777
|
+
def coerce_profile_value(key, value)
|
|
778
|
+
return value if value.is_a?(Array) # string_array already correct from YAML
|
|
779
|
+
|
|
780
|
+
type = EnvSchema.type_for(key)
|
|
781
|
+
case type
|
|
782
|
+
when :symbol
|
|
783
|
+
value.is_a?(String) ? value.to_sym : value
|
|
784
|
+
when :boolean
|
|
785
|
+
# YAML booleans are already true/false
|
|
786
|
+
value
|
|
787
|
+
when :integer
|
|
788
|
+
value.is_a?(String) ? Integer(value) : value
|
|
789
|
+
else
|
|
790
|
+
value
|
|
791
|
+
end
|
|
792
|
+
end
|
|
793
|
+
end
|
|
794
|
+
|
|
795
|
+
private
|
|
796
|
+
|
|
797
|
+
def apply_profile(name_or_path)
|
|
798
|
+
data = ProfileLoader.load(name_or_path)
|
|
799
|
+
shared = data["shared"] || {}
|
|
800
|
+
formats = data["formats"] || {}
|
|
801
|
+
|
|
802
|
+
format_configs.each do |fmt_key, fmt_cfg|
|
|
803
|
+
fmt_data = ProfileLoader.send(:deep_merge, shared,
|
|
804
|
+
formats[fmt_key.to_s] || {})
|
|
805
|
+
fmt_cfg.apply_profile_data(fmt_data)
|
|
806
|
+
end
|
|
807
|
+
end
|
|
808
|
+
|
|
809
|
+
def clear_profile_values!
|
|
810
|
+
format_configs.each_value(&:clear_profile!)
|
|
811
|
+
end
|
|
812
|
+
|
|
813
|
+
def format_configs
|
|
814
|
+
{ xml: @xml, html: @html, json: @json, yaml: @yaml, string: @string }
|
|
351
815
|
end
|
|
352
816
|
end
|
|
353
817
|
end
|
|
@@ -38,7 +38,12 @@ module Canon
|
|
|
38
38
|
# FIRST: Check for XML serialization-level formatting differences
|
|
39
39
|
# These are ALWAYS non-normative (formatting-only) regardless of match options
|
|
40
40
|
# Examples: self-closing tags (<tag/>) vs explicit closing tags (<tag></tag>)
|
|
41
|
-
|
|
41
|
+
#
|
|
42
|
+
# EXCEPTION: If the text node is inside a whitespace-sensitive element
|
|
43
|
+
# (:preserve or :collapse), don't dismiss as serialization formatting
|
|
44
|
+
# because whitespace presence is meaningful in those elements.
|
|
45
|
+
if !inside_whitespace_sensitive_element?(diff_node) &&
|
|
46
|
+
XmlSerializationFormatter.serialization_formatting?(diff_node)
|
|
42
47
|
diff_node.formatting = true
|
|
43
48
|
diff_node.normative = false
|
|
44
49
|
return diff_node
|
|
@@ -49,9 +54,11 @@ module Canon
|
|
|
49
54
|
# it should be marked as non-normative (informative)
|
|
50
55
|
# This ensures that verbose and non-verbose modes give consistent results
|
|
51
56
|
#
|
|
52
|
-
# EXCEPTION: If the text node is inside a whitespace
|
|
57
|
+
# EXCEPTION: If the text node is inside a PRESERVE whitespace element
|
|
53
58
|
# (like <pre>, <code>, <textarea> in HTML), don't apply formatting detection
|
|
54
|
-
# because whitespace should be preserved in these elements
|
|
59
|
+
# because whitespace should be preserved exactly in these elements.
|
|
60
|
+
# Note: COLLAPSE elements like <p> DO get formatting detection because
|
|
61
|
+
# their whitespace IS normalized (differences are formatting-only).
|
|
55
62
|
#
|
|
56
63
|
# This check must come BEFORE normative_dimension? is called,
|
|
57
64
|
# because normative_dimension? returns true for text_content: :normalize
|
|
@@ -59,7 +66,7 @@ module Canon
|
|
|
59
66
|
# detection from being applied.
|
|
60
67
|
if diff_node.dimension == :text_content &&
|
|
61
68
|
profile.send(:behavior_for, :text_content) == :normalize &&
|
|
62
|
-
!
|
|
69
|
+
!inside_preserve_element?(diff_node) &&
|
|
63
70
|
formatting_only_diff?(diff_node)
|
|
64
71
|
diff_node.formatting = true
|
|
65
72
|
diff_node.normative = false
|
|
@@ -143,19 +150,42 @@ module Canon
|
|
|
143
150
|
end
|
|
144
151
|
|
|
145
152
|
# Check if the text node is inside a whitespace-sensitive element
|
|
153
|
+
# (preserve/collapse classification or xml:space='preserve').
|
|
154
|
+
# In these elements, whitespace presence is meaningful and should
|
|
155
|
+
# not be dismissed as serialization formatting.
|
|
146
156
|
# @param diff_node [DiffNode] The diff node to check
|
|
147
|
-
# @return [Boolean] true if
|
|
157
|
+
# @return [Boolean] true if whitespace is preserved for this element
|
|
148
158
|
def inside_whitespace_sensitive_element?(diff_node)
|
|
149
|
-
# Get the text node (not the parent element)
|
|
150
159
|
node = diff_node.node1 || diff_node.node2
|
|
151
160
|
return false unless node
|
|
152
161
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
162
|
+
return false unless node.respond_to?(:parent)
|
|
163
|
+
|
|
164
|
+
parent = node.parent
|
|
165
|
+
return false unless parent
|
|
166
|
+
|
|
167
|
+
match_opts = @match_options.options
|
|
168
|
+
Canon::Comparison::WhitespaceSensitivity.whitespace_preserved?(parent,
|
|
169
|
+
match_opts)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Check if the text node is inside a PRESERVE whitespace element
|
|
173
|
+
# Only returns true for elements where whitespace is preserved exactly (:preserve),
|
|
174
|
+
# not for elements where whitespace is normalized (:collapse).
|
|
175
|
+
# @param diff_node [DiffNode] The diff node to check
|
|
176
|
+
# @return [Boolean] true if inside a preserve whitespace element
|
|
177
|
+
def inside_preserve_element?(diff_node)
|
|
178
|
+
node = diff_node.node1 || diff_node.node2
|
|
179
|
+
return false unless node
|
|
180
|
+
|
|
181
|
+
match_opts = @match_options.options
|
|
182
|
+
parent = node.parent
|
|
183
|
+
return false unless parent
|
|
157
184
|
|
|
158
|
-
Canon::Comparison::WhitespaceSensitivity.
|
|
185
|
+
classification = Canon::Comparison::WhitespaceSensitivity.classify_element(
|
|
186
|
+
parent, match_opts
|
|
187
|
+
)
|
|
188
|
+
classification == :preserve
|
|
159
189
|
end
|
|
160
190
|
|
|
161
191
|
# Extract text content from a node for formatting comparison
|