canon 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +31 -149
- data/README.adoc +9 -0
- data/docs/advanced/semantic-diff-report.adoc +96 -0
- data/docs/features/configuration-profiles.adoc +4 -2
- data/docs/features/diff-formatting/index.adoc +3 -0
- data/docs/features/diff-formatting/whitespace-adjacency.adoc +140 -0
- data/docs/features/match-options/html-policies.adoc +2 -0
- data/docs/features/match-options/index.adoc +40 -0
- data/docs/guides/choosing-configuration.adoc +12 -1
- data/docs/reference/cli-options.adoc +3 -0
- data/docs/reference/environment-variables.adoc +3 -1
- data/docs/reference/options-across-interfaces.adoc +7 -1
- data/docs/understanding/formats/html.adoc +9 -2
- data/lib/canon/cli.rb +4 -0
- data/lib/canon/commands/diff_command.rb +1 -0
- data/lib/canon/comparison/comparison_result.rb +95 -2
- data/lib/canon/comparison/html_comparator.rb +96 -11
- data/lib/canon/comparison/markup_comparator.rb +68 -71
- data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
- data/lib/canon/comparison/match_options.rb +23 -2
- data/lib/canon/comparison/node_inspector.rb +103 -0
- data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +133 -55
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +24 -23
- data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
- data/lib/canon/comparison/xml_comparator.rb +174 -7
- data/lib/canon/comparison/xml_node_comparison.rb +48 -66
- data/lib/canon/comparison.rb +143 -22
- data/lib/canon/config/env_schema.rb +2 -1
- data/lib/canon/config/profiles/metanorma.yml +3 -0
- data/lib/canon/config.rb +51 -5
- data/lib/canon/diff/diff_classifier.rb +55 -41
- data/lib/canon/diff/diff_line_builder.rb +9 -8
- data/lib/canon/diff/xml_serialization_formatter.rb +27 -42
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
- data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
- data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +184 -26
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +92 -4
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
- data/lib/canon/diff_formatter.rb +128 -175
- data/lib/canon/html/data_model.rb +10 -4
- data/lib/canon/pretty_printer/html.rb +76 -14
- data/lib/canon/pretty_printer/html_void_elements.rb +20 -0
- data/lib/canon/pretty_printer/xml_normalized.rb +10 -3
- data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
- data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +59 -5
- data/lib/canon/xml/data_model.rb +13 -1
- data/lib/canon/xml/element_matcher.rb +3 -0
- data/lib/canon/xml/node.rb +23 -1
- data/lib/canon/xml/nodes/comment_node.rb +4 -0
- data/lib/canon/xml/nodes/element_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/canon/xml/sax_builder.rb +29 -2
- data/lib/canon/xml/xpath_engine.rb +238 -0
- metadata +9 -2
|
@@ -13,8 +13,10 @@ module Canon
|
|
|
13
13
|
# @param use_color [Boolean] Whether to use colors
|
|
14
14
|
# @param compact [Boolean] Whether to serialize element nodes as compact XML
|
|
15
15
|
# @return [String] Formatted dimension details
|
|
16
|
+
# rubocop:disable Lint/UnusedMethodArgument
|
|
16
17
|
def self.format_dimension_details(diff, use_color, compact: false,
|
|
17
18
|
expand_difference: false)
|
|
19
|
+
# rubocop:enable Lint/UnusedMethodArgument
|
|
18
20
|
dimension = extract_dimension(diff)
|
|
19
21
|
|
|
20
22
|
case dimension
|
|
@@ -23,8 +25,7 @@ expand_difference: false)
|
|
|
23
25
|
when :namespace_declarations
|
|
24
26
|
format_namespace_declarations_details(diff, use_color)
|
|
25
27
|
when :element_structure
|
|
26
|
-
format_element_structure_details(diff, use_color
|
|
27
|
-
expand_difference: expand_difference)
|
|
28
|
+
format_element_structure_details(diff, use_color)
|
|
28
29
|
when :attribute_presence
|
|
29
30
|
format_attribute_presence_details(diff, use_color)
|
|
30
31
|
when :attribute_values
|
|
@@ -33,6 +34,8 @@ expand_difference: false)
|
|
|
33
34
|
format_attribute_order_details(diff, use_color)
|
|
34
35
|
when :text_content
|
|
35
36
|
format_text_content_details(diff, use_color, compact: compact)
|
|
37
|
+
when :whitespace_adjacency
|
|
38
|
+
format_whitespace_adjacency_details(diff, use_color)
|
|
36
39
|
when :structural_whitespace
|
|
37
40
|
format_structural_whitespace_details(diff, use_color)
|
|
38
41
|
when :comments
|
|
@@ -163,37 +166,70 @@ expand_difference: false)
|
|
|
163
166
|
|
|
164
167
|
# Format element structure differences
|
|
165
168
|
#
|
|
169
|
+
# Produces compact XML for both sides so the user can see attributes
|
|
170
|
+
# and text content, not just the tag name. Handles nil nodes that
|
|
171
|
+
# arise from insertions/deletions.
|
|
172
|
+
#
|
|
166
173
|
# @param diff [DiffNode, Hash] Difference node
|
|
167
174
|
# @param use_color [Boolean] Whether to use colors
|
|
168
175
|
# @return [Array] Tuple of [detail1, detail2, changes]
|
|
169
|
-
def self.format_element_structure_details(diff, use_color
|
|
170
|
-
expand_difference: false)
|
|
176
|
+
def self.format_element_structure_details(diff, use_color)
|
|
171
177
|
require_relative "color_helper"
|
|
172
178
|
require_relative "node_utils"
|
|
173
179
|
|
|
174
180
|
node1 = extract_node1(diff)
|
|
175
181
|
node2 = extract_node2(diff)
|
|
176
182
|
|
|
177
|
-
|
|
178
|
-
|
|
183
|
+
has1 = !node1.nil?
|
|
184
|
+
has2 = !node2.nil?
|
|
185
|
+
|
|
186
|
+
if has1 && has2
|
|
187
|
+
# Both elements present — show compact XML for both
|
|
188
|
+
compact1 = NodeUtils.serialize_node_compact(node1)
|
|
189
|
+
compact2 = NodeUtils.serialize_node_compact(node2)
|
|
190
|
+
detail1 = ColorHelper.colorize(compact1, :red, use_color)
|
|
191
|
+
detail2 = ColorHelper.colorize(compact2, :green, use_color)
|
|
192
|
+
|
|
193
|
+
name1 = NodeUtils.get_element_name_for_display(node1)
|
|
194
|
+
name2 = NodeUtils.get_element_name_for_display(node2)
|
|
179
195
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
196
|
+
changes = if name1 == name2
|
|
197
|
+
"Element <#{name1}> structure changed (children differ)"
|
|
198
|
+
else
|
|
199
|
+
build_structure_change_text(compact1, compact2,
|
|
200
|
+
use_color)
|
|
201
|
+
end
|
|
202
|
+
elsif has1
|
|
203
|
+
# Element removed
|
|
204
|
+
compact1 = NodeUtils.serialize_node_compact(node1)
|
|
205
|
+
detail1 = ColorHelper.colorize(compact1, :red, use_color)
|
|
206
|
+
detail2 = ColorHelper.colorize("(not present)", :green, use_color)
|
|
207
|
+
changes = "Element removed: #{ColorHelper.colorize(compact1, :red,
|
|
208
|
+
use_color)}"
|
|
185
209
|
else
|
|
186
|
-
|
|
187
|
-
|
|
210
|
+
# Element added
|
|
211
|
+
compact2 = NodeUtils.serialize_node_compact(node2)
|
|
212
|
+
detail1 = ColorHelper.colorize("(not present)", :red, use_color)
|
|
213
|
+
detail2 = ColorHelper.colorize(compact2, :green, use_color)
|
|
214
|
+
changes = "Element added: #{ColorHelper.colorize(compact2, :green,
|
|
215
|
+
use_color)}"
|
|
188
216
|
end
|
|
189
217
|
|
|
190
|
-
changes = "Element differs: #{ColorHelper.colorize(name1, :red,
|
|
191
|
-
use_color)} → " \
|
|
192
|
-
"#{ColorHelper.colorize(name2, :green, use_color)}"
|
|
193
|
-
|
|
194
218
|
[detail1, detail2, changes]
|
|
195
219
|
end
|
|
196
220
|
|
|
221
|
+
# Build human-readable change text for element structure diffs
|
|
222
|
+
#
|
|
223
|
+
# @param display1 [String] Serialized expected element
|
|
224
|
+
# @param display2 [String] Serialized actual element
|
|
225
|
+
# @param use_color [Boolean] Whether to use colors
|
|
226
|
+
# @return [String] Change description
|
|
227
|
+
def self.build_structure_change_text(display1, display2, use_color)
|
|
228
|
+
"Element structure changed: " \
|
|
229
|
+
"#{ColorHelper.colorize(display1, :red, use_color)} → " \
|
|
230
|
+
"#{ColorHelper.colorize(display2, :green, use_color)}"
|
|
231
|
+
end
|
|
232
|
+
|
|
197
233
|
# Format attribute presence differences
|
|
198
234
|
#
|
|
199
235
|
# @param diff [DiffNode, Hash] Difference node
|
|
@@ -332,18 +368,21 @@ expand_difference: false)
|
|
|
332
368
|
node1 = extract_node1(diff)
|
|
333
369
|
node2 = extract_node2(diff)
|
|
334
370
|
|
|
371
|
+
# Symmetric one-sided rendering for missing/extra text nodes.
|
|
372
|
+
# When exactly one side is nil, render "(not present)" on that
|
|
373
|
+
# side and the present side's raw text content (whitespace-
|
|
374
|
+
# visualised, with a brief parent open-tag hint for context).
|
|
375
|
+
# Mirrors format_element_structure_details above. Without this
|
|
376
|
+
# short-circuit, the ambiguous-pair fallback further down would
|
|
377
|
+
# serialize the present side's *parent subtree* in full,
|
|
378
|
+
# producing a misleading diff payload. See lutaml/canon#125.
|
|
379
|
+
if node1.nil? ^ node2.nil?
|
|
380
|
+
return format_text_content_one_sided(node1, node2, use_color)
|
|
381
|
+
end
|
|
382
|
+
|
|
335
383
|
text1 = NodeUtils.node_to_display(node1, compact: compact)
|
|
336
384
|
text2 = NodeUtils.node_to_display(node2, compact: compact)
|
|
337
385
|
|
|
338
|
-
# Handle cases where one node is missing (e.g. text added or removed)
|
|
339
|
-
if node1.nil? || node2.nil?
|
|
340
|
-
if node1.nil?
|
|
341
|
-
text2 = NodeUtils.node_to_display(node2, compact: compact)
|
|
342
|
-
else
|
|
343
|
-
text1 = NodeUtils.node_to_display(node1, compact: compact)
|
|
344
|
-
end
|
|
345
|
-
end
|
|
346
|
-
|
|
347
386
|
if NodeUtils.inside_preserve_element?(node1) || NodeUtils.inside_preserve_element?(node2)
|
|
348
387
|
detail1 = ColorHelper.colorize(
|
|
349
388
|
TextUtils.visualize_whitespace(text1), :red, use_color
|
|
@@ -351,6 +390,20 @@ expand_difference: false)
|
|
|
351
390
|
detail2 = ColorHelper.colorize(
|
|
352
391
|
TextUtils.visualize_whitespace(text2), :green, use_color
|
|
353
392
|
)
|
|
393
|
+
elsif TextUtils.ambiguous_text_pair?(text1, text2) &&
|
|
394
|
+
(NodeUtils.parent_of(node1) || NodeUtils.parent_of(node2))
|
|
395
|
+
# Both sides render to empty/whitespace-only strings, which are
|
|
396
|
+
# indistinguishable after JSON quoting. Fall back to each side's
|
|
397
|
+
# parent element serialized compactly, with whitespace visualized
|
|
398
|
+
# so the reader can see the structural contrast.
|
|
399
|
+
ctx1 = NodeUtils.serialize_node_compact(NodeUtils.parent_of(node1))
|
|
400
|
+
ctx2 = NodeUtils.serialize_node_compact(NodeUtils.parent_of(node2))
|
|
401
|
+
detail1 = ColorHelper.colorize(
|
|
402
|
+
TextUtils.visualize_whitespace(ctx1), :red, use_color
|
|
403
|
+
)
|
|
404
|
+
detail2 = ColorHelper.colorize(
|
|
405
|
+
TextUtils.visualize_whitespace(ctx2), :green, use_color
|
|
406
|
+
)
|
|
354
407
|
elsif compact && (node1.is_a?(Canon::Xml::Nodes::ElementNode) ||
|
|
355
408
|
node2.is_a?(Canon::Xml::Nodes::ElementNode))
|
|
356
409
|
# In compact mode with element nodes, display as raw XML without
|
|
@@ -384,8 +437,113 @@ expand_difference: false)
|
|
|
384
437
|
[detail1, detail2, changes]
|
|
385
438
|
end
|
|
386
439
|
|
|
440
|
+
# Whether a node is an element (Canon or Nokogiri), used to
|
|
441
|
+
# detect element-shaped diffs that have been misclassified as
|
|
442
|
+
# :text_content and route them to element-structure rendering.
|
|
443
|
+
# See lutaml/canon#125 follow-up.
|
|
444
|
+
def self.present_is_element?(node)
|
|
445
|
+
return false unless node
|
|
446
|
+
|
|
447
|
+
case node
|
|
448
|
+
when Canon::Xml::Node
|
|
449
|
+
node.node_type == :element
|
|
450
|
+
when Nokogiri::XML::Node
|
|
451
|
+
node.element?
|
|
452
|
+
else
|
|
453
|
+
false
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
# Render a one-sided text-content diff (one node nil, the other a
|
|
458
|
+
# text node). Mirrors the +has1+/+has2+ branches of
|
|
459
|
+
# +format_element_structure_details+: "(not present)" on the nil
|
|
460
|
+
# side, the present side's raw text content (whitespace-visualised
|
|
461
|
+
# and quoted) plus a brief parent open-tag hint for context.
|
|
462
|
+
#
|
|
463
|
+
# @param node1 [Object, nil] First node (nil if removed)
|
|
464
|
+
# @param node2 [Object, nil] Second node (nil if added)
|
|
465
|
+
# @param use_color [Boolean] Whether to apply ANSI colours
|
|
466
|
+
# @return [Array<String>] Tuple of [detail1, detail2, changes]
|
|
467
|
+
def self.format_text_content_one_sided(node1, node2, use_color)
|
|
468
|
+
require_relative "color_helper"
|
|
469
|
+
require_relative "node_utils"
|
|
470
|
+
require_relative "text_utils"
|
|
471
|
+
|
|
472
|
+
present = node1 || node2
|
|
473
|
+
|
|
474
|
+
# Defensive: if a one-sided text-content diff carries an
|
|
475
|
+
# *element* on the present side (e.g. because an upstream
|
|
476
|
+
# comparator misclassified an element orphan as
|
|
477
|
+
# :text_content), delegate to the element-structure
|
|
478
|
+
# formatter rather than rendering the element as +text ""+.
|
|
479
|
+
# The construction-side fix in lutaml/canon#125 follow-up
|
|
480
|
+
# removes the immediate failure mode, but other paths could
|
|
481
|
+
# still misclassify and the formatter must produce a
|
|
482
|
+
# best-effort element representation, never +text ""+.
|
|
483
|
+
if present_is_element?(present)
|
|
484
|
+
return format_element_structure_details(
|
|
485
|
+
{ node1: node1, node2: node2 }, use_color
|
|
486
|
+
)
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
removed = node2.nil?
|
|
490
|
+
|
|
491
|
+
raw = NodeUtils.raw_text_value(present)
|
|
492
|
+
visible = TextUtils.visualize_whitespace(raw)
|
|
493
|
+
parent = NodeUtils.parent_of(present)
|
|
494
|
+
context = parent ? " in #{NodeUtils.serialize_open_tag(parent)}" : ""
|
|
495
|
+
present_str = "text \"#{visible}\"#{context}"
|
|
496
|
+
|
|
497
|
+
if removed
|
|
498
|
+
detail1 = ColorHelper.colorize(present_str, :red, use_color)
|
|
499
|
+
detail2 = ColorHelper.colorize("(not present)", :green, use_color)
|
|
500
|
+
changes = "Text removed: #{detail1}"
|
|
501
|
+
else
|
|
502
|
+
detail1 = ColorHelper.colorize("(not present)", :red, use_color)
|
|
503
|
+
detail2 = ColorHelper.colorize(present_str, :green, use_color)
|
|
504
|
+
changes = "Text added: #{detail2}"
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
[detail1, detail2, changes]
|
|
508
|
+
end
|
|
509
|
+
|
|
387
510
|
# Format structural whitespace differences
|
|
388
511
|
#
|
|
512
|
+
# Format a :whitespace_adjacency diff (#137).
|
|
513
|
+
#
|
|
514
|
+
# @param diff [DiffNode, Hash] Difference node
|
|
515
|
+
# @param use_color [Boolean] Whether to use colors
|
|
516
|
+
# @return [Array] Tuple of [detail1, detail2, changes]
|
|
517
|
+
def self.format_whitespace_adjacency_details(diff, use_color)
|
|
518
|
+
require_relative "color_helper"
|
|
519
|
+
require_relative "node_utils"
|
|
520
|
+
require_relative "text_utils"
|
|
521
|
+
|
|
522
|
+
node1 = extract_node1(diff)
|
|
523
|
+
node2 = extract_node2(diff)
|
|
524
|
+
|
|
525
|
+
text1 = NodeUtils.get_node_text(node1).to_s
|
|
526
|
+
text2 = NodeUtils.get_node_text(node2).to_s
|
|
527
|
+
|
|
528
|
+
detail1 = ColorHelper.colorize(
|
|
529
|
+
"\"#{TextUtils.visualize_whitespace(text1)}\"",
|
|
530
|
+
:red, use_color
|
|
531
|
+
)
|
|
532
|
+
detail2 = ColorHelper.colorize(
|
|
533
|
+
"\"#{TextUtils.visualize_whitespace(text2)}\"",
|
|
534
|
+
:green, use_color
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
reason = if diff.is_a?(Canon::Diff::DiffNode)
|
|
538
|
+
diff.reason
|
|
539
|
+
else
|
|
540
|
+
diff.is_a?(Hash) ? diff[:reason] : nil
|
|
541
|
+
end
|
|
542
|
+
changes = reason.to_s
|
|
543
|
+
|
|
544
|
+
[detail1, detail2, changes]
|
|
545
|
+
end
|
|
546
|
+
|
|
389
547
|
# @param diff [DiffNode, Hash] Difference node
|
|
390
548
|
# @param use_color [Boolean] Whether to use colors
|
|
391
549
|
# @return [Array] Tuple of [detail1, detail2, changes]
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "nokogiri"
|
|
3
4
|
require_relative "../../xml/namespace_helper"
|
|
4
5
|
|
|
5
6
|
module Canon
|
|
@@ -260,12 +261,15 @@ module Canon
|
|
|
260
261
|
end
|
|
261
262
|
end
|
|
262
263
|
|
|
263
|
-
# Serialize a
|
|
264
|
+
# Serialize a node tree as compact XML for display.
|
|
264
265
|
#
|
|
265
266
|
# Produces a human-readable inline XML string without namespace
|
|
266
267
|
# declarations and without indentation — suitable for use in Semantic
|
|
267
|
-
# Diff Report entries.
|
|
268
|
-
#
|
|
268
|
+
# Diff Report entries. Handles both +Canon::Xml::Nodes+ types and
|
|
269
|
+
# Nokogiri XML/HTML nodes (the html DOM comparison path uses
|
|
270
|
+
# Nokogiri nodes, so element-structure diffs originating there must
|
|
271
|
+
# be rendered structurally too — see issue #120). For any other
|
|
272
|
+
# node type, falls back to +get_node_text+.
|
|
269
273
|
#
|
|
270
274
|
# @param node [Object] Node to serialize
|
|
271
275
|
# @return [String] Compact XML string
|
|
@@ -294,12 +298,79 @@ module Canon
|
|
|
294
298
|
when Canon::Xml::Nodes::CommentNode
|
|
295
299
|
text = node.respond_to?(:value) ? node.value.to_s : ""
|
|
296
300
|
"<!--#{CGI.escapeHTML(text)}-->"
|
|
301
|
+
when Nokogiri::XML::Text, Nokogiri::XML::CDATA
|
|
302
|
+
CGI.escapeHTML(node.content.to_s)
|
|
303
|
+
when Nokogiri::XML::Comment
|
|
304
|
+
"<!--#{CGI.escapeHTML(node.content.to_s)}-->"
|
|
305
|
+
when Nokogiri::XML::Element
|
|
306
|
+
tag = node.name.to_s
|
|
307
|
+
attrs = node.attribute_nodes.map do |a|
|
|
308
|
+
" #{a.name}=\"#{CGI.escapeHTML(a.value.to_s)}\""
|
|
309
|
+
end.join
|
|
310
|
+
children_xml = node.children.map do |c|
|
|
311
|
+
serialize_node_compact(c)
|
|
312
|
+
end.join
|
|
313
|
+
if children_xml.empty?
|
|
314
|
+
"<#{tag}#{attrs}/>"
|
|
315
|
+
else
|
|
316
|
+
"<#{tag}#{attrs}>#{children_xml}</#{tag}>"
|
|
317
|
+
end
|
|
297
318
|
else
|
|
298
|
-
#
|
|
319
|
+
# Unknown node types — fall back to text extraction
|
|
299
320
|
get_node_text(node)
|
|
300
321
|
end
|
|
301
322
|
end
|
|
302
323
|
|
|
324
|
+
# Serialize a node's open tag only — name + attributes, no children,
|
|
325
|
+
# no closing tag. Used by +format_text_content_one_sided+ to render
|
|
326
|
+
# a brief parent-element context hint (e.g. +<div id="A">+) for a
|
|
327
|
+
# one-sided text diff, instead of the full ancestor subtree that
|
|
328
|
+
# +serialize_node_compact+ would produce. See lutaml/canon#125.
|
|
329
|
+
#
|
|
330
|
+
# @param node [Object] Element node to serialize
|
|
331
|
+
# @return [String] Open-tag string, or "" for non-elements / nil
|
|
332
|
+
def self.serialize_open_tag(node)
|
|
333
|
+
require "cgi"
|
|
334
|
+
return "" unless node
|
|
335
|
+
|
|
336
|
+
case node
|
|
337
|
+
when Canon::Xml::Nodes::ElementNode
|
|
338
|
+
tag = node.name.to_s
|
|
339
|
+
attrs = node.attribute_nodes.map do |attr|
|
|
340
|
+
" #{attr.name}=\"#{CGI.escapeHTML(attr.value.to_s)}\""
|
|
341
|
+
end.join
|
|
342
|
+
"<#{tag}#{attrs}>"
|
|
343
|
+
when Nokogiri::XML::Element
|
|
344
|
+
tag = node.name.to_s
|
|
345
|
+
attrs = node.attribute_nodes.map do |a|
|
|
346
|
+
" #{a.name}=\"#{CGI.escapeHTML(a.value.to_s)}\""
|
|
347
|
+
end.join
|
|
348
|
+
"<#{tag}#{attrs}>"
|
|
349
|
+
else
|
|
350
|
+
""
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
# Return the raw text content of a text node without stripping
|
|
355
|
+
# whitespace. +get_node_text+ strips ASCII whitespace, which
|
|
356
|
+
# destroys whitespace-only payloads that callers (e.g. one-sided
|
|
357
|
+
# text-content diff rendering) need to display verbatim.
|
|
358
|
+
#
|
|
359
|
+
# @param node [Object] Text node
|
|
360
|
+
# @return [String] Raw text content, or "" if not a text-bearing node
|
|
361
|
+
def self.raw_text_value(node)
|
|
362
|
+
return "" unless node
|
|
363
|
+
|
|
364
|
+
case node
|
|
365
|
+
when Canon::Xml::Node
|
|
366
|
+
node.value.to_s
|
|
367
|
+
when Nokogiri::XML::Node
|
|
368
|
+
node.content.to_s
|
|
369
|
+
else
|
|
370
|
+
""
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
303
374
|
# Return the best display string for a node.
|
|
304
375
|
#
|
|
305
376
|
# When +compact: true+ and the node is a Canon ElementNode, returns a
|
|
@@ -318,6 +389,23 @@ module Canon
|
|
|
318
389
|
end
|
|
319
390
|
end
|
|
320
391
|
|
|
392
|
+
# Return the parent of a node, or nil, regardless of the node API.
|
|
393
|
+
#
|
|
394
|
+
# Canon::Xml nodes expose +parent+; some Nokogiri-shaped nodes expose
|
|
395
|
+
# +parent_node+. This helper abstracts over both.
|
|
396
|
+
#
|
|
397
|
+
# @param node [Object] Node to query
|
|
398
|
+
# @return [Object, nil] Parent node or nil
|
|
399
|
+
def self.parent_of(node)
|
|
400
|
+
return nil unless node
|
|
401
|
+
|
|
402
|
+
if node.respond_to?(:parent)
|
|
403
|
+
node.parent
|
|
404
|
+
elsif node.respond_to?(:parent_node)
|
|
405
|
+
node.parent_node
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
|
|
321
409
|
# Check if node is inside a preserve-whitespace element
|
|
322
410
|
#
|
|
323
411
|
# @param node [Object] Node to check
|
|
@@ -83,6 +83,35 @@ module Canon
|
|
|
83
83
|
end.join
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
# Whether two text values would be visually indistinguishable when
|
|
87
|
+
# rendered through the standard JSON-quoting path.
|
|
88
|
+
#
|
|
89
|
+
# Covers three cases that collapse to near-identical short strings
|
|
90
|
+
# like +""+ / +" "+ / +":"+ / +":"+:
|
|
91
|
+
# * both sides empty
|
|
92
|
+
# * both sides whitespace-only (possibly with different whitespace
|
|
93
|
+
# that JSON.generate preserves verbatim but a reader cannot tell
|
|
94
|
+
# apart from plain spaces)
|
|
95
|
+
# * both sides equal (the comparator reported a diff based on
|
|
96
|
+
# something the text-only extraction does not surface — e.g. a
|
|
97
|
+
# sibling text node that exists on one side and not the other)
|
|
98
|
+
#
|
|
99
|
+
# Callers should fall back to rendering parent-element context
|
|
100
|
+
# instead.
|
|
101
|
+
#
|
|
102
|
+
# @param text1 [String, nil]
|
|
103
|
+
# @param text2 [String, nil]
|
|
104
|
+
# @return [Boolean]
|
|
105
|
+
def self.ambiguous_text_pair?(text1, text2)
|
|
106
|
+
blank_or_whitespace = ->(t) {
|
|
107
|
+
t.nil? || t.empty? || t.match?(/\A\s+\z/)
|
|
108
|
+
}
|
|
109
|
+
return true if blank_or_whitespace.call(text1) &&
|
|
110
|
+
blank_or_whitespace.call(text2)
|
|
111
|
+
|
|
112
|
+
text1 == text2
|
|
113
|
+
end
|
|
114
|
+
|
|
86
115
|
# Check if text contains non-ASCII or non-printable characters
|
|
87
116
|
#
|
|
88
117
|
# @param text [String] Text to check
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "paint"
|
|
4
|
+
require "diff/lcs"
|
|
5
|
+
|
|
6
|
+
module Canon
|
|
7
|
+
class DiffFormatter
|
|
8
|
+
# Handles the pretty_diff rendering pipeline for text-LCS diffs.
|
|
9
|
+
#
|
|
10
|
+
# Bypasses DiffNodeMapper entirely — runs Diff::LCS.sdiff on plain-text
|
|
11
|
+
# lines and renders with context windowing and colorization.
|
|
12
|
+
class PrettyDiffFormatter
|
|
13
|
+
def initialize(use_color:, context_lines:)
|
|
14
|
+
@use_color = use_color
|
|
15
|
+
@context_lines = context_lines
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Format a text-LCS diff between two documents.
|
|
19
|
+
#
|
|
20
|
+
# @param doc1 [String, nil] First document (already preprocessed)
|
|
21
|
+
# @param doc2 [String, nil] Second document (already preprocessed)
|
|
22
|
+
# @param format [Symbol] Document format for display name
|
|
23
|
+
# @return [String] Formatted diff output
|
|
24
|
+
def format(doc1, doc2, format:)
|
|
25
|
+
format_name = format.to_s.upcase
|
|
26
|
+
|
|
27
|
+
output = []
|
|
28
|
+
output << colorize("Pretty diff (#{format_name} mode):", :cyan, :bold)
|
|
29
|
+
|
|
30
|
+
return output.join("\n") if doc1.nil? || doc2.nil?
|
|
31
|
+
|
|
32
|
+
lines1 = doc1.lines.map(&:chomp)
|
|
33
|
+
lines2 = doc2.lines.map(&:chomp)
|
|
34
|
+
|
|
35
|
+
hunks = ::Diff::LCS.sdiff(lines1, lines2)
|
|
36
|
+
|
|
37
|
+
output << render_pretty_diff(hunks)
|
|
38
|
+
output.join("\n")
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# Render sdiff hunks with context windowing and colorization.
|
|
44
|
+
#
|
|
45
|
+
# Uses context_lines setting for expansion. Changed hunks
|
|
46
|
+
# (action != "=") are expanded by context_lines in each direction;
|
|
47
|
+
# nearby windows are merged; a separator is emitted between
|
|
48
|
+
# non-adjacent blocks.
|
|
49
|
+
#
|
|
50
|
+
# @param hunks [Array<Diff::LCS::ContextChange>] Output of Diff::LCS.sdiff
|
|
51
|
+
# @return [String] Rendered diff lines joined with "\n"
|
|
52
|
+
def render_pretty_diff(hunks)
|
|
53
|
+
changed = hunks.each_index.reject { |i| hunks[i].action == "=" }
|
|
54
|
+
|
|
55
|
+
return colorize(" (no differences)", :green) if changed.empty?
|
|
56
|
+
|
|
57
|
+
ctx = [@context_lines || 3, 0].max
|
|
58
|
+
|
|
59
|
+
windows = changed.map do |pos|
|
|
60
|
+
[
|
|
61
|
+
[pos - ctx, 0].max,
|
|
62
|
+
[pos + ctx, hunks.length - 1].min,
|
|
63
|
+
]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
merged = []
|
|
67
|
+
windows.each do |lo, hi|
|
|
68
|
+
if merged.empty? || lo > merged.last[1] + 1
|
|
69
|
+
merged << [lo, hi]
|
|
70
|
+
else
|
|
71
|
+
merged.last[1] = [merged.last[1], hi].max
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
lines = []
|
|
76
|
+
merged.each_with_index do |(lo, hi), block_idx|
|
|
77
|
+
if block_idx.positive?
|
|
78
|
+
lines << colorize("--- ---", :cyan)
|
|
79
|
+
elsif lo.positive?
|
|
80
|
+
lines << colorize("--- ---", :cyan)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
(lo..hi).each do |i|
|
|
84
|
+
hunk = hunks[i]
|
|
85
|
+
case hunk.action
|
|
86
|
+
when "="
|
|
87
|
+
lines << (@use_color ? "\e[0m #{hunk.old_element}" : " #{hunk.old_element}")
|
|
88
|
+
when "-"
|
|
89
|
+
lines << colorize("- #{hunk.old_element}", :red)
|
|
90
|
+
when "+"
|
|
91
|
+
lines << colorize("+ #{hunk.new_element}", :green)
|
|
92
|
+
when "!"
|
|
93
|
+
lines << colorize("- #{hunk.old_element}", :red)
|
|
94
|
+
lines << colorize("+ #{hunk.new_element}", :green)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
lines.join("\n")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def colorize(text, *colors)
|
|
103
|
+
return text unless @use_color
|
|
104
|
+
|
|
105
|
+
"\e[0m#{Paint[text, *colors]}"
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|