canon 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +31 -149
- data/README.adoc +9 -0
- data/docs/advanced/semantic-diff-report.adoc +96 -0
- data/docs/features/configuration-profiles.adoc +4 -2
- data/docs/features/diff-formatting/index.adoc +3 -0
- data/docs/features/diff-formatting/whitespace-adjacency.adoc +140 -0
- data/docs/features/match-options/html-policies.adoc +2 -0
- data/docs/features/match-options/index.adoc +40 -0
- data/docs/guides/choosing-configuration.adoc +12 -1
- data/docs/reference/cli-options.adoc +3 -0
- data/docs/reference/environment-variables.adoc +3 -1
- data/docs/reference/options-across-interfaces.adoc +7 -1
- data/docs/understanding/formats/html.adoc +9 -2
- data/lib/canon/cli.rb +4 -0
- data/lib/canon/commands/diff_command.rb +1 -0
- data/lib/canon/comparison/comparison_result.rb +95 -2
- data/lib/canon/comparison/html_comparator.rb +96 -11
- data/lib/canon/comparison/markup_comparator.rb +68 -71
- data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
- data/lib/canon/comparison/match_options.rb +23 -2
- data/lib/canon/comparison/node_inspector.rb +103 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +133 -55
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +24 -23
- data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
- data/lib/canon/comparison/xml_comparator.rb +174 -7
- data/lib/canon/comparison/xml_node_comparison.rb +48 -66
- data/lib/canon/comparison.rb +143 -22
- data/lib/canon/config/env_schema.rb +2 -1
- data/lib/canon/config/profiles/metanorma.yml +3 -0
- data/lib/canon/config.rb +51 -5
- data/lib/canon/diff/diff_classifier.rb +55 -41
- data/lib/canon/diff/diff_line_builder.rb +9 -8
- data/lib/canon/diff/xml_serialization_formatter.rb +27 -42
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
- data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
- data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +184 -26
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +92 -4
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
- data/lib/canon/diff_formatter.rb +128 -175
- data/lib/canon/html/data_model.rb +10 -4
- data/lib/canon/pretty_printer/html.rb +76 -14
- data/lib/canon/pretty_printer/html_void_elements.rb +20 -0
- data/lib/canon/pretty_printer/xml_normalized.rb +10 -3
- data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
- data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +59 -5
- data/lib/canon/xml/data_model.rb +13 -1
- data/lib/canon/xml/element_matcher.rb +3 -0
- data/lib/canon/xml/node.rb +23 -1
- data/lib/canon/xml/nodes/comment_node.rb +4 -0
- data/lib/canon/xml/nodes/element_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/canon/xml/sax_builder.rb +29 -2
- data/lib/canon/xml/xpath_engine.rb +238 -0
- metadata +9 -2
data/lib/canon/config.rb
CHANGED
|
@@ -285,6 +285,7 @@ module Canon
|
|
|
285
285
|
end
|
|
286
286
|
|
|
287
287
|
def indent_type=(value)
|
|
288
|
+
DiffConfig.validate_config_value!(:pretty_printer_indent_type, value)
|
|
288
289
|
@resolver.set_programmatic(:pretty_printer_indent_type, value)
|
|
289
290
|
end
|
|
290
291
|
end
|
|
@@ -293,6 +294,20 @@ module Canon
|
|
|
293
294
|
class DiffConfig
|
|
294
295
|
attr_reader :pretty_printer
|
|
295
296
|
|
|
297
|
+
# Valid values for enum-like configuration options
|
|
298
|
+
VALID_ENUM_VALUES = {
|
|
299
|
+
mode: %i[by_line by_object pretty_diff],
|
|
300
|
+
show_diffs: %i[all normative informative],
|
|
301
|
+
algorithm: %i[dom semantic],
|
|
302
|
+
parser: %i[sax dom],
|
|
303
|
+
display_preprocessing: %i[none pretty_print normalize_pretty_print
|
|
304
|
+
c14n],
|
|
305
|
+
display_format: %i[raw canonical],
|
|
306
|
+
pretty_printer_indent_type: %i[space tab],
|
|
307
|
+
character_visualization: [true, false, :content_only],
|
|
308
|
+
theme: %i[light dark retro claude cyberpunk],
|
|
309
|
+
}.freeze
|
|
310
|
+
|
|
296
311
|
def initialize(format = nil)
|
|
297
312
|
@format = format
|
|
298
313
|
@resolver = build_resolver(format)
|
|
@@ -309,7 +324,9 @@ module Canon
|
|
|
309
324
|
|
|
310
325
|
data.each do |key, value|
|
|
311
326
|
sym_key = key.to_sym
|
|
312
|
-
|
|
327
|
+
coerced = coerce_profile_value(sym_key, value)
|
|
328
|
+
self.class.validate_config_value!(sym_key, coerced)
|
|
329
|
+
@resolver.set_profile(sym_key, coerced)
|
|
313
330
|
end
|
|
314
331
|
end
|
|
315
332
|
|
|
@@ -317,12 +334,25 @@ module Canon
|
|
|
317
334
|
@resolver.clear_profile!
|
|
318
335
|
end
|
|
319
336
|
|
|
337
|
+
# Validate a config value against its allowed enum values
|
|
338
|
+
def self.validate_config_value!(key, value)
|
|
339
|
+
valid = VALID_ENUM_VALUES[key]
|
|
340
|
+
return unless valid
|
|
341
|
+
|
|
342
|
+
return if valid.include?(value)
|
|
343
|
+
|
|
344
|
+
raise ArgumentError,
|
|
345
|
+
"Invalid value #{value.inspect} for #{key}. " \
|
|
346
|
+
"Valid values: #{valid.map(&:inspect).join(', ')}"
|
|
347
|
+
end
|
|
348
|
+
|
|
320
349
|
# Accessors with ENV override support
|
|
321
350
|
def mode
|
|
322
351
|
@resolver.resolve(:mode)
|
|
323
352
|
end
|
|
324
353
|
|
|
325
354
|
def mode=(value)
|
|
355
|
+
self.class.validate_config_value!(:mode, value)
|
|
326
356
|
@resolver.set_programmatic(:mode, value)
|
|
327
357
|
end
|
|
328
358
|
|
|
@@ -355,6 +385,7 @@ module Canon
|
|
|
355
385
|
end
|
|
356
386
|
|
|
357
387
|
def show_diffs=(value)
|
|
388
|
+
self.class.validate_config_value!(:show_diffs, value)
|
|
358
389
|
@resolver.set_programmatic(:show_diffs, value)
|
|
359
390
|
end
|
|
360
391
|
|
|
@@ -495,6 +526,7 @@ module Canon
|
|
|
495
526
|
end
|
|
496
527
|
|
|
497
528
|
def display_format=(value)
|
|
529
|
+
self.class.validate_config_value!(:display_format, value)
|
|
498
530
|
@resolver.set_programmatic(:display_format, value)
|
|
499
531
|
end
|
|
500
532
|
|
|
@@ -511,6 +543,7 @@ module Canon
|
|
|
511
543
|
end
|
|
512
544
|
|
|
513
545
|
def display_preprocessing=(value)
|
|
546
|
+
self.class.validate_config_value!(:display_preprocessing, value)
|
|
514
547
|
@resolver.set_programmatic(:display_preprocessing, value)
|
|
515
548
|
end
|
|
516
549
|
|
|
@@ -620,10 +653,8 @@ module Canon
|
|
|
620
653
|
# Values:
|
|
621
654
|
# true - apply the full default visualization map (default)
|
|
622
655
|
# false - disable visualization; output plain text
|
|
623
|
-
# :content_only -
|
|
624
|
-
#
|
|
625
|
-
# node content, not to structural indentation whitespace.
|
|
626
|
-
# (TODO: implement DOM-level pre-serialization pass)
|
|
656
|
+
# :content_only - apply visualization only to text content, not
|
|
657
|
+
# to structural indentation whitespace.
|
|
627
658
|
def character_visualization
|
|
628
659
|
val = @resolver.resolve(:character_visualization)
|
|
629
660
|
# Coerce symbol booleans that may arrive via ENV (env_schema uses :symbol type
|
|
@@ -636,6 +667,7 @@ module Canon
|
|
|
636
667
|
end
|
|
637
668
|
|
|
638
669
|
def character_visualization=(value)
|
|
670
|
+
self.class.validate_config_value!(:character_visualization, value)
|
|
639
671
|
@resolver.set_programmatic(:character_visualization, value)
|
|
640
672
|
end
|
|
641
673
|
|
|
@@ -644,15 +676,27 @@ module Canon
|
|
|
644
676
|
end
|
|
645
677
|
|
|
646
678
|
def algorithm=(value)
|
|
679
|
+
self.class.validate_config_value!(:algorithm, value)
|
|
647
680
|
@resolver.set_programmatic(:algorithm, value)
|
|
648
681
|
end
|
|
649
682
|
|
|
683
|
+
# XML parser backend (:sax or :dom, default :sax)
|
|
684
|
+
def parser
|
|
685
|
+
@resolver.resolve(:parser)
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
def parser=(value)
|
|
689
|
+
self.class.validate_config_value!(:parser, value)
|
|
690
|
+
@resolver.set_programmatic(:parser, value)
|
|
691
|
+
end
|
|
692
|
+
|
|
650
693
|
# Theme name (:light, :dark, :retro, :claude)
|
|
651
694
|
def theme
|
|
652
695
|
@resolver.resolve(:theme)
|
|
653
696
|
end
|
|
654
697
|
|
|
655
698
|
def theme=(value)
|
|
699
|
+
self.class.validate_config_value!(:theme, value)
|
|
656
700
|
@resolver.set_programmatic(:theme, value)
|
|
657
701
|
end
|
|
658
702
|
|
|
@@ -693,6 +737,7 @@ module Canon
|
|
|
693
737
|
show_diffs: show_diffs,
|
|
694
738
|
verbose_diff: verbose_diff,
|
|
695
739
|
diff_algorithm: algorithm,
|
|
740
|
+
parser: parser,
|
|
696
741
|
show_raw_inputs: show_raw_inputs,
|
|
697
742
|
show_raw_expected: show_raw_expected,
|
|
698
743
|
show_raw_received: show_raw_received,
|
|
@@ -733,6 +778,7 @@ module Canon
|
|
|
733
778
|
show_diffs: :all,
|
|
734
779
|
verbose_diff: false,
|
|
735
780
|
algorithm: :dom,
|
|
781
|
+
parser: :sax,
|
|
736
782
|
show_raw_inputs: false,
|
|
737
783
|
show_raw_expected: false,
|
|
738
784
|
show_raw_received: false,
|
|
@@ -73,6 +73,16 @@ module Canon
|
|
|
73
73
|
return diff_node
|
|
74
74
|
end
|
|
75
75
|
|
|
76
|
+
# :whitespace_adjacency is a report-only re-label of an
|
|
77
|
+
# asymmetric whitespace mismatch emitted by ChildComparison's
|
|
78
|
+
# two-cursor walk. Equivalence behaviour is unchanged — the
|
|
79
|
+
# underlying mismatch is normative regardless of match options.
|
|
80
|
+
if diff_node.dimension == :whitespace_adjacency
|
|
81
|
+
diff_node.formatting = false
|
|
82
|
+
diff_node.normative = true
|
|
83
|
+
return diff_node
|
|
84
|
+
end
|
|
85
|
+
|
|
76
86
|
# THIRD: Determine if this dimension is normative based on CompareProfile
|
|
77
87
|
# This respects the policy settings (strict/normalize/ignore)
|
|
78
88
|
is_normative = profile.normative_dimension?(diff_node.dimension)
|
|
@@ -150,8 +160,9 @@ module Canon
|
|
|
150
160
|
end
|
|
151
161
|
|
|
152
162
|
# Check if the text node is inside a whitespace-sensitive element
|
|
153
|
-
# (preserve/collapse classification
|
|
154
|
-
#
|
|
163
|
+
# (preserve/collapse classification, xml:space='preserve', or
|
|
164
|
+
# between inline element siblings in HTML).
|
|
165
|
+
# In these contexts, whitespace presence is meaningful and should
|
|
155
166
|
# not be dismissed as serialization formatting.
|
|
156
167
|
# @param diff_node [DiffNode] The diff node to check
|
|
157
168
|
# @return [Boolean] true if whitespace is preserved for this element
|
|
@@ -159,6 +170,21 @@ module Canon
|
|
|
159
170
|
node = diff_node.node1 || diff_node.node2
|
|
160
171
|
return false unless node
|
|
161
172
|
|
|
173
|
+
# HTML: whitespace between inline element siblings is significant
|
|
174
|
+
if Canon::Comparison::WhitespaceSensitivity.inline_whitespace_significant?(node)
|
|
175
|
+
return true
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# HTML: non-breaking space (U+00A0) is never insignificant
|
|
179
|
+
text = if node.respond_to?(:content)
|
|
180
|
+
node.content
|
|
181
|
+
elsif node.respond_to?(:value)
|
|
182
|
+
node.value
|
|
183
|
+
end
|
|
184
|
+
if text && Canon::Comparison::WhitespaceSensitivity.contains_nbsp?(text)
|
|
185
|
+
return true
|
|
186
|
+
end
|
|
187
|
+
|
|
162
188
|
return false unless node.respond_to?(:parent)
|
|
163
189
|
|
|
164
190
|
parent = node.parent
|
|
@@ -194,28 +220,21 @@ module Canon
|
|
|
194
220
|
def extract_text_content(node)
|
|
195
221
|
return nil if node.nil?
|
|
196
222
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
# For simple text nodes or strings
|
|
213
|
-
return node.to_s if node.is_a?(String)
|
|
214
|
-
|
|
215
|
-
# For other node types, try to_s
|
|
216
|
-
node.to_s
|
|
223
|
+
case node
|
|
224
|
+
when Canon::Xml::Nodes::TextNode
|
|
225
|
+
node.value
|
|
226
|
+
when Canon::Xml::Node
|
|
227
|
+
node.text_content
|
|
228
|
+
when Nokogiri::XML::Node
|
|
229
|
+
node.content.to_s
|
|
230
|
+
when Moxml::Node
|
|
231
|
+
node.content.to_s
|
|
232
|
+
when String
|
|
233
|
+
node
|
|
234
|
+
else
|
|
235
|
+
node.to_s
|
|
236
|
+
end
|
|
217
237
|
rescue StandardError
|
|
218
|
-
# If extraction fails, return nil (not formatting-only)
|
|
219
238
|
nil
|
|
220
239
|
end
|
|
221
240
|
|
|
@@ -225,25 +244,20 @@ module Canon
|
|
|
225
244
|
def text_node?(node)
|
|
226
245
|
return false if node.nil?
|
|
227
246
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
247
|
+
case node
|
|
248
|
+
when Canon::Xml::Nodes::TextNode
|
|
249
|
+
true
|
|
250
|
+
when Canon::Xml::Node
|
|
251
|
+
node.node_type == :text
|
|
252
|
+
when Nokogiri::XML::Node
|
|
234
253
|
node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
# Test doubles or objects with text node-like interface
|
|
243
|
-
# Check if it has a value method (contains text content)
|
|
244
|
-
return true if node.respond_to?(:value)
|
|
245
|
-
|
|
246
|
-
false
|
|
254
|
+
when Moxml::Node
|
|
255
|
+
node.text?
|
|
256
|
+
when String
|
|
257
|
+
true
|
|
258
|
+
else
|
|
259
|
+
false
|
|
260
|
+
end
|
|
247
261
|
end
|
|
248
262
|
end
|
|
249
263
|
end
|
|
@@ -858,12 +858,14 @@ new_line_ranges)
|
|
|
858
858
|
# The DiffNode's explicit formatting? flag takes precedence:
|
|
859
859
|
# - If formatting? == true: return true (explicitly formatting-only)
|
|
860
860
|
#
|
|
861
|
-
# If node exists and is normative
|
|
862
|
-
# -
|
|
863
|
-
#
|
|
861
|
+
# If node exists and is normative:
|
|
862
|
+
# - Return false — normative DiffNodes are NEVER formatting-only.
|
|
863
|
+
# Even if the serialized content looks whitespace-equivalent,
|
|
864
|
+
# the comparison classified it as a normative change and it MUST
|
|
865
|
+
# be visible in by_line output (especially with show_diffs: :normative).
|
|
864
866
|
#
|
|
865
867
|
# If node exists and is informative (norm=false):
|
|
866
|
-
# - Return false (informative diffs are
|
|
868
|
+
# - Return false (informative diffs are shown as informative)
|
|
867
869
|
#
|
|
868
870
|
# If NO node exists (diff_node is nil):
|
|
869
871
|
# - Use heuristics: comment-only lines and FormattingDetector
|
|
@@ -877,11 +879,10 @@ new_line_ranges)
|
|
|
877
879
|
return true if diff_node&.formatting?
|
|
878
880
|
|
|
879
881
|
if diff_node
|
|
880
|
-
#
|
|
881
|
-
return false
|
|
882
|
+
# Normative nodes are never formatting-only
|
|
883
|
+
return false if diff_node.normative?
|
|
882
884
|
|
|
883
|
-
#
|
|
884
|
-
# (but NOT comment_only_line? which would misclassify comment content changes)
|
|
885
|
+
# Informative nodes: check line-level formatting
|
|
885
886
|
elsif comment_only_line?(line1) || comment_only_line?(line2)
|
|
886
887
|
# No DiffNode: use heuristics
|
|
887
888
|
return true
|
|
@@ -91,28 +91,20 @@ module Canon
|
|
|
91
91
|
def self.text_node?(node)
|
|
92
92
|
return false if node.nil?
|
|
93
93
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
# Nokogiri text nodes (node_type returns integer constant like 3)
|
|
101
|
-
return true if node.respond_to?(:node_type) &&
|
|
102
|
-
node.node_type.is_a?(Integer) &&
|
|
94
|
+
case node
|
|
95
|
+
when Canon::Xml::Nodes::TextNode
|
|
96
|
+
true
|
|
97
|
+
when Canon::Xml::Node
|
|
98
|
+
node.node_type == :text
|
|
99
|
+
when Nokogiri::XML::Node
|
|
103
100
|
node.node_type == Nokogiri::XML::Node::TEXT_NODE
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
# Test doubles or objects with text node-like interface
|
|
112
|
-
# Check if it has a value method (contains text content)
|
|
113
|
-
return true if node.respond_to?(:value)
|
|
114
|
-
|
|
115
|
-
false
|
|
101
|
+
when Moxml::Node
|
|
102
|
+
node.text?
|
|
103
|
+
when String
|
|
104
|
+
true
|
|
105
|
+
else
|
|
106
|
+
false
|
|
107
|
+
end
|
|
116
108
|
end
|
|
117
109
|
|
|
118
110
|
# Extract text content from a node
|
|
@@ -121,28 +113,21 @@ module Canon
|
|
|
121
113
|
def self.extract_text_content(node)
|
|
122
114
|
return nil if node.nil?
|
|
123
115
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
# For simple text nodes or strings
|
|
140
|
-
return node.to_s if node.is_a?(String)
|
|
141
|
-
|
|
142
|
-
# For other node types, try to_s
|
|
143
|
-
node.to_s
|
|
116
|
+
case node
|
|
117
|
+
when Canon::Xml::Nodes::TextNode
|
|
118
|
+
node.value
|
|
119
|
+
when Canon::Xml::Node
|
|
120
|
+
node.text_content
|
|
121
|
+
when Nokogiri::XML::Node
|
|
122
|
+
node.content.to_s
|
|
123
|
+
when Moxml::Node
|
|
124
|
+
node.content.to_s
|
|
125
|
+
when String
|
|
126
|
+
node
|
|
127
|
+
else
|
|
128
|
+
node.to_s
|
|
129
|
+
end
|
|
144
130
|
rescue StandardError
|
|
145
|
-
# If extraction fails, return nil (not a serialization difference)
|
|
146
131
|
nil
|
|
147
132
|
end
|
|
148
133
|
|
|
@@ -45,11 +45,13 @@ module Canon
|
|
|
45
45
|
end
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
+
# rubocop:disable Metrics/ParameterLists
|
|
48
49
|
def initialize(use_color: true, context_lines: 3,
|
|
49
50
|
diff_grouping_lines: nil, visualization_map: nil,
|
|
50
51
|
show_diffs: :all, differences: [],
|
|
51
52
|
diff_mode: :separate, legacy_terminal: false,
|
|
52
|
-
equivalent: nil, theme: nil
|
|
53
|
+
equivalent: nil, theme: nil,
|
|
54
|
+
character_visualization: true)
|
|
53
55
|
@use_color = use_color
|
|
54
56
|
@context_lines = context_lines
|
|
55
57
|
@diff_grouping_lines = diff_grouping_lines
|
|
@@ -61,7 +63,9 @@ module Canon
|
|
|
61
63
|
@legacy_terminal = legacy_terminal
|
|
62
64
|
@equivalent = equivalent
|
|
63
65
|
@theme = theme
|
|
66
|
+
@character_visualization = character_visualization
|
|
64
67
|
end
|
|
68
|
+
# rubocop:enable Metrics/ParameterLists
|
|
65
69
|
|
|
66
70
|
# Get the resolved theme hash
|
|
67
71
|
# @return [Hash] Theme hash
|
|
@@ -644,15 +648,23 @@ module Canon
|
|
|
644
648
|
|
|
645
649
|
# Apply character visualization
|
|
646
650
|
#
|
|
651
|
+
# When +character_visualization+ is +:content_only+, leading
|
|
652
|
+
# structural whitespace (indentation) is left plain while content
|
|
653
|
+
# whitespace is visualized.
|
|
654
|
+
#
|
|
647
655
|
# @param token [String] The token to apply visualization to
|
|
648
656
|
# @param color [Symbol, nil] Optional color to apply
|
|
649
657
|
# @return [String] Visualized and optionally colored token
|
|
650
658
|
def apply_visualization(token, color = nil)
|
|
651
659
|
return "" if token.nil?
|
|
652
660
|
|
|
653
|
-
visual =
|
|
654
|
-
|
|
655
|
-
|
|
661
|
+
visual = if @character_visualization == :content_only
|
|
662
|
+
visualize_content_only(token.to_s)
|
|
663
|
+
else
|
|
664
|
+
token.to_s.chars.map do |char|
|
|
665
|
+
@visualization_map.fetch(char, char)
|
|
666
|
+
end.join
|
|
667
|
+
end
|
|
656
668
|
|
|
657
669
|
if color && @use_color
|
|
658
670
|
require "rainbow"
|
|
@@ -678,6 +690,29 @@ module Canon
|
|
|
678
690
|
end
|
|
679
691
|
end
|
|
680
692
|
|
|
693
|
+
# Visualize only content portion, leaving structural indentation plain.
|
|
694
|
+
#
|
|
695
|
+
# Splits the token into leading whitespace (structural indentation)
|
|
696
|
+
# and the rest (content). Only the content portion gets character
|
|
697
|
+
# visualization.
|
|
698
|
+
#
|
|
699
|
+
# @param token [String] The full line token
|
|
700
|
+
# @return [String] Token with content-only visualization
|
|
701
|
+
def visualize_content_only(token)
|
|
702
|
+
# Leading whitespace is structural indentation — keep it plain
|
|
703
|
+
indent_end = token.index(/[^\s]/) || token.length
|
|
704
|
+
indent = token[0...indent_end]
|
|
705
|
+
content = token[indent_end..]
|
|
706
|
+
|
|
707
|
+
if content.nil? || content.empty?
|
|
708
|
+
indent
|
|
709
|
+
else
|
|
710
|
+
indent + content.chars.map { |char|
|
|
711
|
+
@visualization_map.fetch(char, char)
|
|
712
|
+
}.join
|
|
713
|
+
end
|
|
714
|
+
end
|
|
715
|
+
|
|
681
716
|
# Get max diff lines limit
|
|
682
717
|
#
|
|
683
718
|
# @return [Integer, nil] Max diff output lines
|
|
@@ -12,19 +12,22 @@ module Canon
|
|
|
12
12
|
class HtmlFormatter < BaseFormatter
|
|
13
13
|
attr_reader :html_version
|
|
14
14
|
|
|
15
|
+
# rubocop:disable Metrics/ParameterLists
|
|
15
16
|
def initialize(use_color: true, context_lines: 3,
|
|
16
17
|
diff_grouping_lines: nil, visualization_map: nil,
|
|
17
18
|
html_version: :html4, show_diffs: :all, differences: [],
|
|
18
19
|
diff_mode: :separate, legacy_terminal: false,
|
|
19
|
-
equivalent: nil)
|
|
20
|
+
equivalent: nil, character_visualization: true)
|
|
20
21
|
super(use_color: use_color, context_lines: context_lines,
|
|
21
22
|
diff_grouping_lines: diff_grouping_lines,
|
|
22
23
|
visualization_map: visualization_map,
|
|
23
24
|
show_diffs: show_diffs, differences: differences,
|
|
24
25
|
diff_mode: diff_mode, legacy_terminal: legacy_terminal,
|
|
25
|
-
equivalent: equivalent
|
|
26
|
+
equivalent: equivalent,
|
|
27
|
+
character_visualization: character_visualization)
|
|
26
28
|
@html_version = html_version
|
|
27
29
|
end
|
|
30
|
+
# rubocop:enable Metrics/ParameterLists
|
|
28
31
|
|
|
29
32
|
# Format DOM-guided HTML diff
|
|
30
33
|
#
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "paint"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
class DiffFormatter
|
|
7
|
+
# Handles the by_line rendering pipeline for line-by-line diffs.
|
|
8
|
+
#
|
|
9
|
+
# Receives preprocessed document strings from the DiffFormatter facade
|
|
10
|
+
# and delegates to format-specific ByLine formatters (XML, HTML, JSON, YAML).
|
|
11
|
+
class ByLineFormatter
|
|
12
|
+
# rubocop:disable Metrics/ParameterLists
|
|
13
|
+
def initialize(use_color:, visualization_map:, context_lines:,
|
|
14
|
+
diff_grouping_lines:, show_diffs:, character_visualization:,
|
|
15
|
+
legacy_terminal:, diff_mode:)
|
|
16
|
+
@use_color = use_color
|
|
17
|
+
@visualization_map = visualization_map
|
|
18
|
+
@context_lines = context_lines
|
|
19
|
+
@diff_grouping_lines = diff_grouping_lines
|
|
20
|
+
@show_diffs = show_diffs
|
|
21
|
+
@character_visualization = character_visualization
|
|
22
|
+
@legacy_terminal = legacy_terminal
|
|
23
|
+
@diff_mode = diff_mode
|
|
24
|
+
end
|
|
25
|
+
# rubocop:enable Metrics/ParameterLists
|
|
26
|
+
|
|
27
|
+
# Format a line-by-line diff between two documents.
|
|
28
|
+
#
|
|
29
|
+
# @param doc1 [String] First document (already preprocessed)
|
|
30
|
+
# @param doc2 [String] Second document (already preprocessed)
|
|
31
|
+
# @param format [Symbol] Document format (:xml, :html, :json, :yaml, etc.)
|
|
32
|
+
# @param html_version [Symbol, nil] HTML version override (:html4, :html5)
|
|
33
|
+
# @param differences [Array, ComparisonResult] Differences from comparison
|
|
34
|
+
# @return [String] Formatted diff output
|
|
35
|
+
def format(doc1, doc2, format:, html_version: nil, differences: [])
|
|
36
|
+
resolved_format = format == :html && html_version ? html_version : format
|
|
37
|
+
format_name = resolved_format.to_s.upcase
|
|
38
|
+
|
|
39
|
+
output = []
|
|
40
|
+
output << colorize("Line-by-line diff (#{format_name} mode):", :cyan,
|
|
41
|
+
:bold)
|
|
42
|
+
|
|
43
|
+
return output.join("\n") if doc1.nil? || doc2.nil?
|
|
44
|
+
|
|
45
|
+
diffs_array = extract_differences(differences)
|
|
46
|
+
|
|
47
|
+
formatter = ByLine::BaseFormatter.for_format(
|
|
48
|
+
resolved_format,
|
|
49
|
+
use_color: @use_color,
|
|
50
|
+
context_lines: @context_lines,
|
|
51
|
+
diff_grouping_lines: @diff_grouping_lines,
|
|
52
|
+
visualization_map: @visualization_map,
|
|
53
|
+
show_diffs: @show_diffs,
|
|
54
|
+
differences: diffs_array,
|
|
55
|
+
diff_mode: @legacy_terminal ? :separate : @diff_mode,
|
|
56
|
+
legacy_terminal: @legacy_terminal,
|
|
57
|
+
equivalent: @comparison_equivalent,
|
|
58
|
+
character_visualization: @character_visualization,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
output << formatter.format(doc1, doc2)
|
|
62
|
+
output.join("\n")
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def extract_differences(differences)
|
|
68
|
+
if differences.is_a?(Canon::Comparison::ComparisonResult)
|
|
69
|
+
@comparison_equivalent = differences.equivalent?
|
|
70
|
+
differences.differences
|
|
71
|
+
else
|
|
72
|
+
@comparison_equivalent = nil
|
|
73
|
+
differences
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def colorize(text, *colors)
|
|
78
|
+
return text unless @use_color
|
|
79
|
+
|
|
80
|
+
"\e[0m#{Paint[text, *colors]}"
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "paint"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
class DiffFormatter
|
|
7
|
+
# Handles the by_object rendering pipeline for tree-based semantic diffs.
|
|
8
|
+
#
|
|
9
|
+
# Delegates to format-specific ByObject formatters (XML, JSON, YAML)
|
|
10
|
+
# which produce visual tree output with box-drawing characters.
|
|
11
|
+
class ByObjectFormatter
|
|
12
|
+
def initialize(use_color:, visualization_map:, show_diffs:)
|
|
13
|
+
@use_color = use_color
|
|
14
|
+
@visualization_map = visualization_map
|
|
15
|
+
@show_diffs = show_diffs
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Format a tree-based object diff.
|
|
19
|
+
#
|
|
20
|
+
# @param differences [Array, ComparisonResult] Differences from comparison
|
|
21
|
+
# @param format [Symbol] Document format (:xml, :json, :yaml)
|
|
22
|
+
# @return [String] Formatted diff output
|
|
23
|
+
def format(differences, format)
|
|
24
|
+
output = []
|
|
25
|
+
output << colorize("Visual Diff:", :cyan, :bold)
|
|
26
|
+
|
|
27
|
+
diffs_array = if differences.is_a?(Canon::Comparison::ComparisonResult)
|
|
28
|
+
differences.differences
|
|
29
|
+
else
|
|
30
|
+
differences
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
formatter = ByObject::BaseFormatter.for_format(
|
|
34
|
+
format,
|
|
35
|
+
use_color: @use_color,
|
|
36
|
+
visualization_map: @visualization_map,
|
|
37
|
+
show_diffs: @show_diffs,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
output << formatter.format(diffs_array, format)
|
|
41
|
+
output.join("\n")
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def colorize(text, *colors)
|
|
47
|
+
return text unless @use_color
|
|
48
|
+
|
|
49
|
+
"\e[0m#{Paint[text, *colors]}"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|