canon 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +31 -149
- data/README.adoc +9 -0
- data/docs/advanced/semantic-diff-report.adoc +31 -0
- data/docs/features/configuration-profiles.adoc +4 -2
- data/docs/features/match-options/html-policies.adoc +2 -0
- data/docs/features/match-options/index.adoc +40 -0
- data/docs/guides/choosing-configuration.adoc +12 -1
- data/docs/reference/cli-options.adoc +3 -0
- data/docs/reference/options-across-interfaces.adoc +7 -1
- data/docs/understanding/formats/html.adoc +9 -2
- data/lib/canon/cli.rb +4 -0
- data/lib/canon/commands/diff_command.rb +1 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +92 -11
- data/lib/canon/comparison/markup_comparator.rb +19 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
- data/lib/canon/comparison/match_options.rb +23 -2
- data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +6 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
- data/lib/canon/comparison/xml_comparator.rb +80 -4
- data/lib/canon/comparison/xml_node_comparison.rb +29 -3
- data/lib/canon/comparison.rb +84 -22
- data/lib/canon/config/env_schema.rb +2 -1
- data/lib/canon/config/profiles/metanorma.yml +3 -0
- data/lib/canon/config.rb +51 -5
- data/lib/canon/diff/diff_classifier.rb +18 -2
- data/lib/canon/diff/diff_line_builder.rb +9 -8
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
- data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
- data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +65 -17
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +17 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
- data/lib/canon/diff_formatter.rb +57 -173
- data/lib/canon/html/data_model.rb +10 -4
- data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
- data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +59 -5
- data/lib/canon/xml/element_matcher.rb +3 -0
- data/lib/canon/xml/node.rb +8 -1
- data/lib/canon/xml/nodes/comment_node.rb +4 -0
- data/lib/canon/xml/nodes/element_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/canon/xml/sax_builder.rb +11 -2
- data/lib/canon/xml/xpath_engine.rb +238 -0
- metadata +6 -2
|
@@ -232,6 +232,17 @@ diff_children, differences)
|
|
|
232
232
|
return false unless text_node?(node) && node.parent
|
|
233
233
|
return false unless MatchOptions.normalize_text(node_text(node)).empty?
|
|
234
234
|
|
|
235
|
+
# HTML-specific: NBSP (U+00A0) is never insignificant whitespace —
|
|
236
|
+
# it always renders as a visible non-breaking space.
|
|
237
|
+
format = opts[:format] || match_opts[:format]
|
|
238
|
+
if %i[html html4 html5].include?(format)
|
|
239
|
+
return false if WhitespaceSensitivity.contains_nbsp?(node_text(node))
|
|
240
|
+
|
|
241
|
+
# Whitespace between inline element siblings is semantically
|
|
242
|
+
# significant (renders as a visible gap) and must not be stripped.
|
|
243
|
+
return false if WhitespaceSensitivity.inline_whitespace_significant?(node)
|
|
244
|
+
end
|
|
245
|
+
|
|
235
246
|
return true unless WhitespaceSensitivity.whitespace_preserved?(
|
|
236
247
|
node.parent, match_opts
|
|
237
248
|
)
|
|
@@ -329,9 +340,24 @@ diff_children, differences)
|
|
|
329
340
|
# @param node [Object] Node to check
|
|
330
341
|
# @return [Boolean] true if node is a text node
|
|
331
342
|
def self.text_node?(node)
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
343
|
+
return false unless node
|
|
344
|
+
|
|
345
|
+
# Nokogiri text nodes (XML, HTML4, HTML5) — call element? rather
|
|
346
|
+
# than respond_to?(:element?), which always returns true for
|
|
347
|
+
# Nokogiri::XML::Node and made this predicate vacuously false
|
|
348
|
+
# for every Nokogiri text node. See issue #118.
|
|
349
|
+
if node.is_a?(Nokogiri::XML::Node)
|
|
350
|
+
return node.text? && !node.element?
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# Canon::Xml::Nodes types and other ducktyped nodes.
|
|
354
|
+
if node.respond_to?(:text?) && node.text? &&
|
|
355
|
+
!node.respond_to?(:element?)
|
|
356
|
+
return true
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
# Symbol-style node_type (Canon's own node objects).
|
|
360
|
+
node.respond_to?(:node_type) && node.node_type == :text
|
|
335
361
|
end
|
|
336
362
|
|
|
337
363
|
# Extract text content from a node
|
data/lib/canon/comparison.rb
CHANGED
|
@@ -144,6 +144,35 @@ module Canon
|
|
|
144
144
|
dom_diff(obj1, obj2, opts)
|
|
145
145
|
end
|
|
146
146
|
|
|
147
|
+
# Summarize the first difference between two documents.
|
|
148
|
+
#
|
|
149
|
+
# Returns a human-readable string describing the first difference
|
|
150
|
+
# when documents differ, or "Equivalent" when they match.
|
|
151
|
+
# This is a lightweight alternative to +equivalent?+ with +verbose: true+.
|
|
152
|
+
#
|
|
153
|
+
# @param obj1 [Object] First object to compare
|
|
154
|
+
# @param obj2 [Object] Second object to compare
|
|
155
|
+
# @param opts [Hash] Comparison options (same as +equivalent?+)
|
|
156
|
+
# @return [String] Summary string
|
|
157
|
+
#
|
|
158
|
+
# @example
|
|
159
|
+
# Canon::Comparison.summarize("<p>Hello</p>", "<p>World</p>")
|
|
160
|
+
# # => "Not equivalent: text content differs at /p[1] (Hello vs World)"
|
|
161
|
+
#
|
|
162
|
+
# Canon::Comparison.summarize("<p>Hello</p>", "<p>Hello</p>")
|
|
163
|
+
# # => "Equivalent"
|
|
164
|
+
def summarize(obj1, obj2, opts = {})
|
|
165
|
+
result = equivalent?(obj1, obj2, opts.merge(verbose: true))
|
|
166
|
+
|
|
167
|
+
if result.is_a?(ComparisonResult)
|
|
168
|
+
result.summary
|
|
169
|
+
elsif result == true
|
|
170
|
+
"Equivalent"
|
|
171
|
+
else
|
|
172
|
+
"Not equivalent"
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
147
176
|
# Define a custom comparison profile with DSL syntax
|
|
148
177
|
#
|
|
149
178
|
# @param name [Symbol] Profile name
|
|
@@ -602,26 +631,26 @@ module Canon
|
|
|
602
631
|
# parsers can mutate the DOM).
|
|
603
632
|
opts[:_original_str1] = obj1.dup if obj1.is_a?(String)
|
|
604
633
|
opts[:_original_str2] = obj2.dup if obj2.is_a?(String)
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
634
|
+
# Parse all HTML formats (:html, :html4, :html5) with
|
|
635
|
+
# Nokogiri::HTML5 so that html4 and html5 share HTML's
|
|
636
|
+
# whitespace-sensitivity semantics (issue #118).
|
|
637
|
+
#
|
|
638
|
+
# The previous html/html4 branch used Nokogiri::XML.fragment
|
|
639
|
+
# to dodge Nokogiri::HTML4.fragment's destructive DOM
|
|
640
|
+
# mutations. That avoided one problem but introduced a
|
|
641
|
+
# bigger one: XML whitespace rules were being applied to
|
|
642
|
+
# HTML content. HTML's content model — identical between
|
|
643
|
+
# HTML4 and HTML5 — treats whitespace-only text between
|
|
644
|
+
# block-level children as insignificant; XML treats every
|
|
645
|
+
# whitespace text node as significant. Routing html4 input
|
|
646
|
+
# through an XML parser therefore made
|
|
647
|
+
# be_html4_equivalent_to reject inputs that
|
|
648
|
+
# be_html5_equivalent_to (correctly) accepts.
|
|
649
|
+
# Nokogiri::HTML5.fragment is non-destructive (the original
|
|
650
|
+
# HTML4.fragment concern does not apply to it) and applies
|
|
651
|
+
# HTML's content model uniformly.
|
|
652
|
+
obj1 = HtmlParser.parse(obj1, :html5) if obj1.is_a?(String)
|
|
653
|
+
obj2 = HtmlParser.parse(obj2, :html5) if obj2.is_a?(String)
|
|
625
654
|
end
|
|
626
655
|
else
|
|
627
656
|
format1 = FormatDetector.detect(obj1)
|
|
@@ -662,8 +691,14 @@ module Canon
|
|
|
662
691
|
# but defined in config
|
|
663
692
|
if Canon::Config.instance.respond_to?(comparison_format)
|
|
664
693
|
format_config = Canon::Config.instance.public_send(comparison_format)
|
|
665
|
-
if opts[:
|
|
666
|
-
|
|
694
|
+
if opts[:global_profile].nil? && format_config.match.profile
|
|
695
|
+
# Config-sourced profile has *global* priority (applied before
|
|
696
|
+
# global_options), so that YAML profile_options like
|
|
697
|
+
# whitespace_type: :normalize can override the built-in profile
|
|
698
|
+
# (e.g. :spec_friendly)'s whitespace_type: :strict. Writing to
|
|
699
|
+
# :match_profile here gave the config profile per-call priority,
|
|
700
|
+
# which incorrectly overrode the YAML's own overrides.
|
|
701
|
+
opts[:global_profile] = format_config.match.profile
|
|
667
702
|
end
|
|
668
703
|
# Pass YAML profile's extra match options (e.g., preserve_whitespace_elements)
|
|
669
704
|
# that are stored in MatchConfig's resolver but not exposed via the
|
|
@@ -701,6 +736,33 @@ module Canon
|
|
|
701
736
|
str
|
|
702
737
|
end
|
|
703
738
|
|
|
739
|
+
# Decode HTML named entities ( etc.) to their numeric
|
|
740
|
+
# character reference equivalents so that Nokogiri::XML.fragment
|
|
741
|
+
# (which only understands the five XML entities) preserves them
|
|
742
|
+
# as text nodes instead of silently dropping them.
|
|
743
|
+
#
|
|
744
|
+
# Uses Nokogiri's HTML4 parser to resolve the entities — the
|
|
745
|
+
# text is extracted from a fragment so no structural tags are added.
|
|
746
|
+
#
|
|
747
|
+
# @param str [String] HTML string potentially containing named entities
|
|
748
|
+
# @return [String] String with named entities replaced by characters
|
|
749
|
+
def decode_html_entities(str)
|
|
750
|
+
# Fast path: skip if no ampersands present
|
|
751
|
+
return str unless str.include?("&")
|
|
752
|
+
|
|
753
|
+
# Parse as HTML fragment to resolve named entities, then
|
|
754
|
+
# re-serialize as text. This converts → U+00A0, etc.
|
|
755
|
+
doc = Nokogiri::HTML4.fragment(str)
|
|
756
|
+
|
|
757
|
+
# Serialize back, preserving the resolved characters.
|
|
758
|
+
# to_html re-encodes characters, so use inner_html which
|
|
759
|
+
# keeps the character form.
|
|
760
|
+
doc.inner_html
|
|
761
|
+
|
|
762
|
+
# If the serialization re-encoded characters as entities,
|
|
763
|
+
# that's fine — the XML parser understands numeric refs like  
|
|
764
|
+
end
|
|
765
|
+
|
|
704
766
|
# Detect the format of an object (delegates to FormatDetector)
|
|
705
767
|
#
|
|
706
768
|
# @param obj [Object] Object to detect format of
|
|
@@ -14,6 +14,7 @@ module Canon
|
|
|
14
14
|
show_diffs: :symbol,
|
|
15
15
|
verbose_diff: :boolean,
|
|
16
16
|
algorithm: :symbol,
|
|
17
|
+
parser: :symbol,
|
|
17
18
|
show_raw_inputs: :boolean,
|
|
18
19
|
show_raw_expected: :boolean,
|
|
19
20
|
show_raw_received: :boolean,
|
|
@@ -66,7 +67,7 @@ module Canon
|
|
|
66
67
|
|
|
67
68
|
def all_diff_attributes
|
|
68
69
|
%i[mode use_color context_lines grouping_lines show_diffs
|
|
69
|
-
verbose_diff algorithm show_raw_inputs show_raw_expected show_raw_received
|
|
70
|
+
verbose_diff algorithm parser show_raw_inputs show_raw_expected show_raw_received
|
|
70
71
|
show_preprocessed_inputs show_preprocessed_expected show_preprocessed_received
|
|
71
72
|
show_prettyprint_inputs show_prettyprint_expected show_prettyprint_received
|
|
72
73
|
show_line_numbered_inputs character_visualization
|
|
@@ -28,6 +28,9 @@ formats:
|
|
|
28
28
|
xml:
|
|
29
29
|
match:
|
|
30
30
|
profile: spec_friendly
|
|
31
|
+
# Treat different Unicode whitespace types (space, NBSP, ideographic space, etc.)
|
|
32
|
+
# as equivalent — useful for spec comparisons where whitespace type doesn't matter
|
|
33
|
+
whitespace_type: :normalize
|
|
31
34
|
# Elements where whitespace is PRESERVED exactly (no manipulation)
|
|
32
35
|
# All whitespace characters are significant in these elements
|
|
33
36
|
preserve_whitespace_elements:
|
data/lib/canon/config.rb
CHANGED
|
@@ -285,6 +285,7 @@ module Canon
|
|
|
285
285
|
end
|
|
286
286
|
|
|
287
287
|
def indent_type=(value)
|
|
288
|
+
DiffConfig.validate_config_value!(:pretty_printer_indent_type, value)
|
|
288
289
|
@resolver.set_programmatic(:pretty_printer_indent_type, value)
|
|
289
290
|
end
|
|
290
291
|
end
|
|
@@ -293,6 +294,20 @@ module Canon
|
|
|
293
294
|
class DiffConfig
|
|
294
295
|
attr_reader :pretty_printer
|
|
295
296
|
|
|
297
|
+
# Valid values for enum-like configuration options
|
|
298
|
+
VALID_ENUM_VALUES = {
|
|
299
|
+
mode: %i[by_line by_object pretty_diff],
|
|
300
|
+
show_diffs: %i[all normative informative],
|
|
301
|
+
algorithm: %i[dom semantic],
|
|
302
|
+
parser: %i[sax dom],
|
|
303
|
+
display_preprocessing: %i[none pretty_print normalize_pretty_print
|
|
304
|
+
c14n],
|
|
305
|
+
display_format: %i[raw canonical],
|
|
306
|
+
pretty_printer_indent_type: %i[space tab],
|
|
307
|
+
character_visualization: [true, false, :content_only],
|
|
308
|
+
theme: %i[light dark retro claude cyberpunk],
|
|
309
|
+
}.freeze
|
|
310
|
+
|
|
296
311
|
def initialize(format = nil)
|
|
297
312
|
@format = format
|
|
298
313
|
@resolver = build_resolver(format)
|
|
@@ -309,7 +324,9 @@ module Canon
|
|
|
309
324
|
|
|
310
325
|
data.each do |key, value|
|
|
311
326
|
sym_key = key.to_sym
|
|
312
|
-
|
|
327
|
+
coerced = coerce_profile_value(sym_key, value)
|
|
328
|
+
self.class.validate_config_value!(sym_key, coerced)
|
|
329
|
+
@resolver.set_profile(sym_key, coerced)
|
|
313
330
|
end
|
|
314
331
|
end
|
|
315
332
|
|
|
@@ -317,12 +334,25 @@ module Canon
|
|
|
317
334
|
@resolver.clear_profile!
|
|
318
335
|
end
|
|
319
336
|
|
|
337
|
+
# Validate a config value against its allowed enum values
|
|
338
|
+
def self.validate_config_value!(key, value)
|
|
339
|
+
valid = VALID_ENUM_VALUES[key]
|
|
340
|
+
return unless valid
|
|
341
|
+
|
|
342
|
+
return if valid.include?(value)
|
|
343
|
+
|
|
344
|
+
raise ArgumentError,
|
|
345
|
+
"Invalid value #{value.inspect} for #{key}. " \
|
|
346
|
+
"Valid values: #{valid.map(&:inspect).join(', ')}"
|
|
347
|
+
end
|
|
348
|
+
|
|
320
349
|
# Accessors with ENV override support
|
|
321
350
|
def mode
|
|
322
351
|
@resolver.resolve(:mode)
|
|
323
352
|
end
|
|
324
353
|
|
|
325
354
|
def mode=(value)
|
|
355
|
+
self.class.validate_config_value!(:mode, value)
|
|
326
356
|
@resolver.set_programmatic(:mode, value)
|
|
327
357
|
end
|
|
328
358
|
|
|
@@ -355,6 +385,7 @@ module Canon
|
|
|
355
385
|
end
|
|
356
386
|
|
|
357
387
|
def show_diffs=(value)
|
|
388
|
+
self.class.validate_config_value!(:show_diffs, value)
|
|
358
389
|
@resolver.set_programmatic(:show_diffs, value)
|
|
359
390
|
end
|
|
360
391
|
|
|
@@ -495,6 +526,7 @@ module Canon
|
|
|
495
526
|
end
|
|
496
527
|
|
|
497
528
|
def display_format=(value)
|
|
529
|
+
self.class.validate_config_value!(:display_format, value)
|
|
498
530
|
@resolver.set_programmatic(:display_format, value)
|
|
499
531
|
end
|
|
500
532
|
|
|
@@ -511,6 +543,7 @@ module Canon
|
|
|
511
543
|
end
|
|
512
544
|
|
|
513
545
|
def display_preprocessing=(value)
|
|
546
|
+
self.class.validate_config_value!(:display_preprocessing, value)
|
|
514
547
|
@resolver.set_programmatic(:display_preprocessing, value)
|
|
515
548
|
end
|
|
516
549
|
|
|
@@ -620,10 +653,8 @@ module Canon
|
|
|
620
653
|
# Values:
|
|
621
654
|
# true - apply the full default visualization map (default)
|
|
622
655
|
# false - disable visualization; output plain text
|
|
623
|
-
# :content_only -
|
|
624
|
-
#
|
|
625
|
-
# node content, not to structural indentation whitespace.
|
|
626
|
-
# (TODO: implement DOM-level pre-serialization pass)
|
|
656
|
+
# :content_only - apply visualization only to text content, not
|
|
657
|
+
# to structural indentation whitespace.
|
|
627
658
|
def character_visualization
|
|
628
659
|
val = @resolver.resolve(:character_visualization)
|
|
629
660
|
# Coerce symbol booleans that may arrive via ENV (env_schema uses :symbol type
|
|
@@ -636,6 +667,7 @@ module Canon
|
|
|
636
667
|
end
|
|
637
668
|
|
|
638
669
|
def character_visualization=(value)
|
|
670
|
+
self.class.validate_config_value!(:character_visualization, value)
|
|
639
671
|
@resolver.set_programmatic(:character_visualization, value)
|
|
640
672
|
end
|
|
641
673
|
|
|
@@ -644,15 +676,27 @@ module Canon
|
|
|
644
676
|
end
|
|
645
677
|
|
|
646
678
|
def algorithm=(value)
|
|
679
|
+
self.class.validate_config_value!(:algorithm, value)
|
|
647
680
|
@resolver.set_programmatic(:algorithm, value)
|
|
648
681
|
end
|
|
649
682
|
|
|
683
|
+
# XML parser backend (:sax or :dom, default :sax)
|
|
684
|
+
def parser
|
|
685
|
+
@resolver.resolve(:parser)
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
def parser=(value)
|
|
689
|
+
self.class.validate_config_value!(:parser, value)
|
|
690
|
+
@resolver.set_programmatic(:parser, value)
|
|
691
|
+
end
|
|
692
|
+
|
|
650
693
|
# Theme name (:light, :dark, :retro, :claude)
|
|
651
694
|
def theme
|
|
652
695
|
@resolver.resolve(:theme)
|
|
653
696
|
end
|
|
654
697
|
|
|
655
698
|
def theme=(value)
|
|
699
|
+
self.class.validate_config_value!(:theme, value)
|
|
656
700
|
@resolver.set_programmatic(:theme, value)
|
|
657
701
|
end
|
|
658
702
|
|
|
@@ -693,6 +737,7 @@ module Canon
|
|
|
693
737
|
show_diffs: show_diffs,
|
|
694
738
|
verbose_diff: verbose_diff,
|
|
695
739
|
diff_algorithm: algorithm,
|
|
740
|
+
parser: parser,
|
|
696
741
|
show_raw_inputs: show_raw_inputs,
|
|
697
742
|
show_raw_expected: show_raw_expected,
|
|
698
743
|
show_raw_received: show_raw_received,
|
|
@@ -733,6 +778,7 @@ module Canon
|
|
|
733
778
|
show_diffs: :all,
|
|
734
779
|
verbose_diff: false,
|
|
735
780
|
algorithm: :dom,
|
|
781
|
+
parser: :sax,
|
|
736
782
|
show_raw_inputs: false,
|
|
737
783
|
show_raw_expected: false,
|
|
738
784
|
show_raw_received: false,
|
|
@@ -150,8 +150,9 @@ module Canon
|
|
|
150
150
|
end
|
|
151
151
|
|
|
152
152
|
# Check if the text node is inside a whitespace-sensitive element
|
|
153
|
-
# (preserve/collapse classification
|
|
154
|
-
#
|
|
153
|
+
# (preserve/collapse classification, xml:space='preserve', or
|
|
154
|
+
# between inline element siblings in HTML).
|
|
155
|
+
# In these contexts, whitespace presence is meaningful and should
|
|
155
156
|
# not be dismissed as serialization formatting.
|
|
156
157
|
# @param diff_node [DiffNode] The diff node to check
|
|
157
158
|
# @return [Boolean] true if whitespace is preserved for this element
|
|
@@ -159,6 +160,21 @@ module Canon
|
|
|
159
160
|
node = diff_node.node1 || diff_node.node2
|
|
160
161
|
return false unless node
|
|
161
162
|
|
|
163
|
+
# HTML: whitespace between inline element siblings is significant
|
|
164
|
+
if Canon::Comparison::WhitespaceSensitivity.inline_whitespace_significant?(node)
|
|
165
|
+
return true
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# HTML: non-breaking space (U+00A0) is never insignificant
|
|
169
|
+
text = if node.respond_to?(:content)
|
|
170
|
+
node.content
|
|
171
|
+
elsif node.respond_to?(:value)
|
|
172
|
+
node.value
|
|
173
|
+
end
|
|
174
|
+
if text && Canon::Comparison::WhitespaceSensitivity.contains_nbsp?(text)
|
|
175
|
+
return true
|
|
176
|
+
end
|
|
177
|
+
|
|
162
178
|
return false unless node.respond_to?(:parent)
|
|
163
179
|
|
|
164
180
|
parent = node.parent
|
|
@@ -858,12 +858,14 @@ new_line_ranges)
|
|
|
858
858
|
# The DiffNode's explicit formatting? flag takes precedence:
|
|
859
859
|
# - If formatting? == true: return true (explicitly formatting-only)
|
|
860
860
|
#
|
|
861
|
-
# If node exists and is normative
|
|
862
|
-
# -
|
|
863
|
-
#
|
|
861
|
+
# If node exists and is normative:
|
|
862
|
+
# - Return false — normative DiffNodes are NEVER formatting-only.
|
|
863
|
+
# Even if the serialized content looks whitespace-equivalent,
|
|
864
|
+
# the comparison classified it as a normative change and it MUST
|
|
865
|
+
# be visible in by_line output (especially with show_diffs: :normative).
|
|
864
866
|
#
|
|
865
867
|
# If node exists and is informative (norm=false):
|
|
866
|
-
# - Return false (informative diffs are
|
|
868
|
+
# - Return false (informative diffs are shown as informative)
|
|
867
869
|
#
|
|
868
870
|
# If NO node exists (diff_node is nil):
|
|
869
871
|
# - Use heuristics: comment-only lines and FormattingDetector
|
|
@@ -877,11 +879,10 @@ new_line_ranges)
|
|
|
877
879
|
return true if diff_node&.formatting?
|
|
878
880
|
|
|
879
881
|
if diff_node
|
|
880
|
-
#
|
|
881
|
-
return false
|
|
882
|
+
# Normative nodes are never formatting-only
|
|
883
|
+
return false if diff_node.normative?
|
|
882
884
|
|
|
883
|
-
#
|
|
884
|
-
# (but NOT comment_only_line? which would misclassify comment content changes)
|
|
885
|
+
# Informative nodes: check line-level formatting
|
|
885
886
|
elsif comment_only_line?(line1) || comment_only_line?(line2)
|
|
886
887
|
# No DiffNode: use heuristics
|
|
887
888
|
return true
|
|
@@ -45,11 +45,13 @@ module Canon
|
|
|
45
45
|
end
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
+
# rubocop:disable Metrics/ParameterLists
|
|
48
49
|
def initialize(use_color: true, context_lines: 3,
|
|
49
50
|
diff_grouping_lines: nil, visualization_map: nil,
|
|
50
51
|
show_diffs: :all, differences: [],
|
|
51
52
|
diff_mode: :separate, legacy_terminal: false,
|
|
52
|
-
equivalent: nil, theme: nil
|
|
53
|
+
equivalent: nil, theme: nil,
|
|
54
|
+
character_visualization: true)
|
|
53
55
|
@use_color = use_color
|
|
54
56
|
@context_lines = context_lines
|
|
55
57
|
@diff_grouping_lines = diff_grouping_lines
|
|
@@ -61,7 +63,9 @@ module Canon
|
|
|
61
63
|
@legacy_terminal = legacy_terminal
|
|
62
64
|
@equivalent = equivalent
|
|
63
65
|
@theme = theme
|
|
66
|
+
@character_visualization = character_visualization
|
|
64
67
|
end
|
|
68
|
+
# rubocop:enable Metrics/ParameterLists
|
|
65
69
|
|
|
66
70
|
# Get the resolved theme hash
|
|
67
71
|
# @return [Hash] Theme hash
|
|
@@ -644,15 +648,23 @@ module Canon
|
|
|
644
648
|
|
|
645
649
|
# Apply character visualization
|
|
646
650
|
#
|
|
651
|
+
# When +character_visualization+ is +:content_only+, leading
|
|
652
|
+
# structural whitespace (indentation) is left plain while content
|
|
653
|
+
# whitespace is visualized.
|
|
654
|
+
#
|
|
647
655
|
# @param token [String] The token to apply visualization to
|
|
648
656
|
# @param color [Symbol, nil] Optional color to apply
|
|
649
657
|
# @return [String] Visualized and optionally colored token
|
|
650
658
|
def apply_visualization(token, color = nil)
|
|
651
659
|
return "" if token.nil?
|
|
652
660
|
|
|
653
|
-
visual =
|
|
654
|
-
|
|
655
|
-
|
|
661
|
+
visual = if @character_visualization == :content_only
|
|
662
|
+
visualize_content_only(token.to_s)
|
|
663
|
+
else
|
|
664
|
+
token.to_s.chars.map do |char|
|
|
665
|
+
@visualization_map.fetch(char, char)
|
|
666
|
+
end.join
|
|
667
|
+
end
|
|
656
668
|
|
|
657
669
|
if color && @use_color
|
|
658
670
|
require "rainbow"
|
|
@@ -678,6 +690,29 @@ module Canon
|
|
|
678
690
|
end
|
|
679
691
|
end
|
|
680
692
|
|
|
693
|
+
# Visualize only content portion, leaving structural indentation plain.
|
|
694
|
+
#
|
|
695
|
+
# Splits the token into leading whitespace (structural indentation)
|
|
696
|
+
# and the rest (content). Only the content portion gets character
|
|
697
|
+
# visualization.
|
|
698
|
+
#
|
|
699
|
+
# @param token [String] The full line token
|
|
700
|
+
# @return [String] Token with content-only visualization
|
|
701
|
+
def visualize_content_only(token)
|
|
702
|
+
# Leading whitespace is structural indentation — keep it plain
|
|
703
|
+
indent_end = token.index(/[^\s]/) || token.length
|
|
704
|
+
indent = token[0...indent_end]
|
|
705
|
+
content = token[indent_end..]
|
|
706
|
+
|
|
707
|
+
if content.nil? || content.empty?
|
|
708
|
+
indent
|
|
709
|
+
else
|
|
710
|
+
indent + content.chars.map { |char|
|
|
711
|
+
@visualization_map.fetch(char, char)
|
|
712
|
+
}.join
|
|
713
|
+
end
|
|
714
|
+
end
|
|
715
|
+
|
|
681
716
|
# Get max diff lines limit
|
|
682
717
|
#
|
|
683
718
|
# @return [Integer, nil] Max diff output lines
|
|
@@ -12,19 +12,22 @@ module Canon
|
|
|
12
12
|
class HtmlFormatter < BaseFormatter
|
|
13
13
|
attr_reader :html_version
|
|
14
14
|
|
|
15
|
+
# rubocop:disable Metrics/ParameterLists
|
|
15
16
|
def initialize(use_color: true, context_lines: 3,
|
|
16
17
|
diff_grouping_lines: nil, visualization_map: nil,
|
|
17
18
|
html_version: :html4, show_diffs: :all, differences: [],
|
|
18
19
|
diff_mode: :separate, legacy_terminal: false,
|
|
19
|
-
equivalent: nil)
|
|
20
|
+
equivalent: nil, character_visualization: true)
|
|
20
21
|
super(use_color: use_color, context_lines: context_lines,
|
|
21
22
|
diff_grouping_lines: diff_grouping_lines,
|
|
22
23
|
visualization_map: visualization_map,
|
|
23
24
|
show_diffs: show_diffs, differences: differences,
|
|
24
25
|
diff_mode: diff_mode, legacy_terminal: legacy_terminal,
|
|
25
|
-
equivalent: equivalent
|
|
26
|
+
equivalent: equivalent,
|
|
27
|
+
character_visualization: character_visualization)
|
|
26
28
|
@html_version = html_version
|
|
27
29
|
end
|
|
30
|
+
# rubocop:enable Metrics/ParameterLists
|
|
28
31
|
|
|
29
32
|
# Format DOM-guided HTML diff
|
|
30
33
|
#
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "paint"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
class DiffFormatter
|
|
7
|
+
# Handles the by_line rendering pipeline for line-by-line diffs.
|
|
8
|
+
#
|
|
9
|
+
# Receives preprocessed document strings from the DiffFormatter facade
|
|
10
|
+
# and delegates to format-specific ByLine formatters (XML, HTML, JSON, YAML).
|
|
11
|
+
class ByLineFormatter
|
|
12
|
+
# rubocop:disable Metrics/ParameterLists
|
|
13
|
+
def initialize(use_color:, visualization_map:, context_lines:,
|
|
14
|
+
diff_grouping_lines:, show_diffs:, character_visualization:,
|
|
15
|
+
legacy_terminal:, diff_mode:)
|
|
16
|
+
@use_color = use_color
|
|
17
|
+
@visualization_map = visualization_map
|
|
18
|
+
@context_lines = context_lines
|
|
19
|
+
@diff_grouping_lines = diff_grouping_lines
|
|
20
|
+
@show_diffs = show_diffs
|
|
21
|
+
@character_visualization = character_visualization
|
|
22
|
+
@legacy_terminal = legacy_terminal
|
|
23
|
+
@diff_mode = diff_mode
|
|
24
|
+
end
|
|
25
|
+
# rubocop:enable Metrics/ParameterLists
|
|
26
|
+
|
|
27
|
+
# Format a line-by-line diff between two documents.
|
|
28
|
+
#
|
|
29
|
+
# @param doc1 [String] First document (already preprocessed)
|
|
30
|
+
# @param doc2 [String] Second document (already preprocessed)
|
|
31
|
+
# @param format [Symbol] Document format (:xml, :html, :json, :yaml, etc.)
|
|
32
|
+
# @param html_version [Symbol, nil] HTML version override (:html4, :html5)
|
|
33
|
+
# @param differences [Array, ComparisonResult] Differences from comparison
|
|
34
|
+
# @return [String] Formatted diff output
|
|
35
|
+
def format(doc1, doc2, format:, html_version: nil, differences: [])
|
|
36
|
+
resolved_format = format == :html && html_version ? html_version : format
|
|
37
|
+
format_name = resolved_format.to_s.upcase
|
|
38
|
+
|
|
39
|
+
output = []
|
|
40
|
+
output << colorize("Line-by-line diff (#{format_name} mode):", :cyan,
|
|
41
|
+
:bold)
|
|
42
|
+
|
|
43
|
+
return output.join("\n") if doc1.nil? || doc2.nil?
|
|
44
|
+
|
|
45
|
+
diffs_array = extract_differences(differences)
|
|
46
|
+
|
|
47
|
+
formatter = ByLine::BaseFormatter.for_format(
|
|
48
|
+
resolved_format,
|
|
49
|
+
use_color: @use_color,
|
|
50
|
+
context_lines: @context_lines,
|
|
51
|
+
diff_grouping_lines: @diff_grouping_lines,
|
|
52
|
+
visualization_map: @visualization_map,
|
|
53
|
+
show_diffs: @show_diffs,
|
|
54
|
+
differences: diffs_array,
|
|
55
|
+
diff_mode: @legacy_terminal ? :separate : @diff_mode,
|
|
56
|
+
legacy_terminal: @legacy_terminal,
|
|
57
|
+
equivalent: @comparison_equivalent,
|
|
58
|
+
character_visualization: @character_visualization,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
output << formatter.format(doc1, doc2)
|
|
62
|
+
output.join("\n")
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def extract_differences(differences)
|
|
68
|
+
if differences.is_a?(Canon::Comparison::ComparisonResult)
|
|
69
|
+
@comparison_equivalent = differences.equivalent?
|
|
70
|
+
differences.differences
|
|
71
|
+
else
|
|
72
|
+
@comparison_equivalent = nil
|
|
73
|
+
differences
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def colorize(text, *colors)
|
|
78
|
+
return text unless @use_color
|
|
79
|
+
|
|
80
|
+
"\e[0m#{Paint[text, *colors]}"
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "paint"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
class DiffFormatter
|
|
7
|
+
# Handles the by_object rendering pipeline for tree-based semantic diffs.
|
|
8
|
+
#
|
|
9
|
+
# Delegates to format-specific ByObject formatters (XML, JSON, YAML)
|
|
10
|
+
# which produce visual tree output with box-drawing characters.
|
|
11
|
+
class ByObjectFormatter
|
|
12
|
+
def initialize(use_color:, visualization_map:, show_diffs:)
|
|
13
|
+
@use_color = use_color
|
|
14
|
+
@visualization_map = visualization_map
|
|
15
|
+
@show_diffs = show_diffs
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Format a tree-based object diff.
|
|
19
|
+
#
|
|
20
|
+
# @param differences [Array, ComparisonResult] Differences from comparison
|
|
21
|
+
# @param format [Symbol] Document format (:xml, :json, :yaml)
|
|
22
|
+
# @return [String] Formatted diff output
|
|
23
|
+
def format(differences, format)
|
|
24
|
+
output = []
|
|
25
|
+
output << colorize("Visual Diff:", :cyan, :bold)
|
|
26
|
+
|
|
27
|
+
diffs_array = if differences.is_a?(Canon::Comparison::ComparisonResult)
|
|
28
|
+
differences.differences
|
|
29
|
+
else
|
|
30
|
+
differences
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
formatter = ByObject::BaseFormatter.for_format(
|
|
34
|
+
format,
|
|
35
|
+
use_color: @use_color,
|
|
36
|
+
visualization_map: @visualization_map,
|
|
37
|
+
show_diffs: @show_diffs,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
output << formatter.format(diffs_array, format)
|
|
41
|
+
output.join("\n")
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def colorize(text, *colors)
|
|
47
|
+
return text unless @use_color
|
|
48
|
+
|
|
49
|
+
"\e[0m#{Paint[text, *colors]}"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|