canon 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +31 -149
- data/README.adoc +9 -0
- data/docs/advanced/semantic-diff-report.adoc +31 -0
- data/docs/features/configuration-profiles.adoc +4 -2
- data/docs/features/match-options/html-policies.adoc +2 -0
- data/docs/features/match-options/index.adoc +40 -0
- data/docs/guides/choosing-configuration.adoc +12 -1
- data/docs/reference/cli-options.adoc +3 -0
- data/docs/reference/options-across-interfaces.adoc +7 -1
- data/docs/understanding/formats/html.adoc +9 -2
- data/lib/canon/cli.rb +4 -0
- data/lib/canon/commands/diff_command.rb +1 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +92 -11
- data/lib/canon/comparison/markup_comparator.rb +19 -0
- data/lib/canon/comparison/match_options/base_resolver.rb +1 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +8 -0
- data/lib/canon/comparison/match_options.rb +23 -2
- data/lib/canon/comparison/whitespace_sensitivity.rb +96 -0
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +6 -0
- data/lib/canon/comparison/xml_comparator/node_parser.rb +45 -7
- data/lib/canon/comparison/xml_comparator.rb +80 -4
- data/lib/canon/comparison/xml_node_comparison.rb +29 -3
- data/lib/canon/comparison.rb +84 -22
- data/lib/canon/config/env_schema.rb +2 -1
- data/lib/canon/config/profiles/metanorma.yml +3 -0
- data/lib/canon/config.rb +51 -5
- data/lib/canon/diff/diff_classifier.rb +18 -2
- data/lib/canon/diff/diff_line_builder.rb +9 -8
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +39 -4
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +5 -2
- data/lib/canon/diff_formatter/by_line_formatter.rb +84 -0
- data/lib/canon/diff_formatter/by_object_formatter.rb +53 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +65 -17
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +17 -0
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +29 -0
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +109 -0
- data/lib/canon/diff_formatter.rb +57 -173
- data/lib/canon/html/data_model.rb +10 -4
- data/lib/canon/tree_diff/adapters/html_adapter.rb +55 -2
- data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +59 -5
- data/lib/canon/xml/element_matcher.rb +3 -0
- data/lib/canon/xml/node.rb +8 -1
- data/lib/canon/xml/nodes/comment_node.rb +4 -0
- data/lib/canon/xml/nodes/element_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/canon/xml/sax_builder.rb +11 -2
- data/lib/canon/xml/xpath_engine.rb +238 -0
- metadata +6 -2
|
@@ -200,6 +200,19 @@ module Canon
|
|
|
200
200
|
whitespace_sensitive_tags.include?(element.name.downcase)
|
|
201
201
|
end
|
|
202
202
|
|
|
203
|
+
# Check if a text value is formatting-only whitespace
|
|
204
|
+
#
|
|
205
|
+
# Formatting whitespace contains newlines (indentation between
|
|
206
|
+
# block elements) and is safe to strip. Pure spaces/tabs without
|
|
207
|
+
# newlines may be semantically significant between inline elements
|
|
208
|
+
# and are preserved.
|
|
209
|
+
#
|
|
210
|
+
# @param text [String] Text value to check
|
|
211
|
+
# @return [Boolean] True if formatting-only whitespace
|
|
212
|
+
def formatting_whitespace?(text)
|
|
213
|
+
text.match?(/\A[\s\p{Zs}]*\z/) && text.include?("\n")
|
|
214
|
+
end
|
|
215
|
+
|
|
203
216
|
# Build Nokogiri element from TreeNode
|
|
204
217
|
#
|
|
205
218
|
# @param tree_node [Core::TreeNode] Tree node
|
|
@@ -270,15 +283,51 @@ module Canon
|
|
|
270
283
|
source_node: element_node, # Preserve reference to Canon node
|
|
271
284
|
)
|
|
272
285
|
|
|
286
|
+
# Skip whitespace-only text children UNLESS this element is
|
|
287
|
+
# whitespace-sensitive (pre, code, textarea, script, style).
|
|
288
|
+
# Layout whitespace between block-level children is not
|
|
289
|
+
# semantically meaningful and preserving it causes the
|
|
290
|
+
# position-based tree matcher to misalign siblings, producing
|
|
291
|
+
# spurious NORMATIVE diffs around self-closing tags. This
|
|
292
|
+
# mirrors XMLAdapter's behavior and the DOM-diff path's
|
|
293
|
+
# remove_whitespace_only_text_nodes filter.
|
|
294
|
+
#
|
|
295
|
+
# HTML distinguishes between formatting whitespace (newlines +
|
|
296
|
+
# indentation between block elements) and inline whitespace
|
|
297
|
+
# (spaces between inline elements like <span>). Only formatting
|
|
298
|
+
# whitespace is stripped — inline spaces are semantically
|
|
299
|
+
# significant because they render as visible gaps.
|
|
300
|
+
skip_ws_text = !whitespace_sensitive?(element_node)
|
|
301
|
+
|
|
273
302
|
# Process children recursively
|
|
274
303
|
element_node.children.each do |child|
|
|
304
|
+
next if skip_ws_text && whitespace_only_text?(child)
|
|
305
|
+
|
|
275
306
|
child_tree = to_tree(child)
|
|
276
|
-
|
|
307
|
+
next if child_tree.nil?
|
|
308
|
+
|
|
309
|
+
if child_tree.label == "text" && !whitespace_sensitive?(element_node) && formatting_whitespace?(child_tree.value)
|
|
310
|
+
next
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
tree_node.add_child(child_tree)
|
|
277
314
|
end
|
|
278
315
|
|
|
279
316
|
tree_node
|
|
280
317
|
end
|
|
281
318
|
|
|
319
|
+
# Check if a Canon::Xml::Nodes node is a whitespace-only text node
|
|
320
|
+
#
|
|
321
|
+
# @param node [Canon::Xml::Nodes::Node] Node to check
|
|
322
|
+
# @return [Boolean] true if node is a TextNode containing only whitespace
|
|
323
|
+
def whitespace_only_text?(node)
|
|
324
|
+
return false unless node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
325
|
+
|
|
326
|
+
# Uses \p{Zs} for Unicode space separators (em/en/thin spaces)
|
|
327
|
+
# plus ASCII whitespace -- same regex as XMLAdapter.
|
|
328
|
+
node.value.to_s.match?(/\A[\s\p{Zs}]*\z/)
|
|
329
|
+
end
|
|
330
|
+
|
|
282
331
|
# Convert Canon::Xml::Nodes::TextNode to TreeNode
|
|
283
332
|
#
|
|
284
333
|
# @param text_node [Canon::Xml::Nodes::TextNode] Text node
|
|
@@ -287,7 +336,11 @@ module Canon
|
|
|
287
336
|
# Extract text value
|
|
288
337
|
text_value = text_node.value.to_s
|
|
289
338
|
|
|
290
|
-
# Return nil for empty text
|
|
339
|
+
# Return nil for truly empty text. Whitespace-only text nodes are
|
|
340
|
+
# filtered at the parent ElementNode level in
|
|
341
|
+
# to_tree_from_canon_element so that whitespace-sensitive
|
|
342
|
+
# containers (pre, code, textarea, script, style) retain their
|
|
343
|
+
# whitespace content.
|
|
291
344
|
return nil if text_value.empty?
|
|
292
345
|
|
|
293
346
|
Core::TreeNode.new(
|
data/lib/canon/version.rb
CHANGED
data/lib/canon/xml/c14n.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "data_model"
|
|
4
4
|
require_relative "processor"
|
|
5
|
+
require_relative "xpath_engine"
|
|
5
6
|
|
|
6
7
|
module Canon
|
|
7
8
|
module Xml
|
|
@@ -21,15 +22,68 @@ module Canon
|
|
|
21
22
|
processor.process(root_node)
|
|
22
23
|
end
|
|
23
24
|
|
|
24
|
-
# Canonicalize a document subset
|
|
25
|
+
# Canonicalize a document subset selected by XPath expression.
|
|
26
|
+
#
|
|
27
|
+
# Implements W3C C14N 1.1 subset canonicalization:
|
|
28
|
+
# 1. Evaluates XPath against the document tree
|
|
29
|
+
# 2. Marks matched nodes as the node-set
|
|
30
|
+
# 3. Renders canonical form for only the selected nodes,
|
|
31
|
+
# with namespace and attribute inheritance from excluded ancestors
|
|
32
|
+
#
|
|
25
33
|
# @param xml [String] XML document as string
|
|
26
34
|
# @param xpath [String] XPath expression for subset selection
|
|
27
35
|
# @param with_comments [Boolean] Include comments in canonical form
|
|
28
36
|
# @return [String] Canonical form in UTF-8
|
|
29
|
-
def self.canonicalize_subset(xml,
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
37
|
+
def self.canonicalize_subset(xml, xpath, with_comments: false)
|
|
38
|
+
root_node = DataModel.from_xml(xml)
|
|
39
|
+
|
|
40
|
+
# Mark all nodes as NOT in the node-set initially
|
|
41
|
+
mark_all_nodes(root_node, false)
|
|
42
|
+
|
|
43
|
+
# Evaluate XPath and mark matched nodes
|
|
44
|
+
matched = XPathEngine.evaluate(root_node, xpath)
|
|
45
|
+
|
|
46
|
+
# If XPath matches root or is empty, fall back to full canonicalization
|
|
47
|
+
if matched.empty?
|
|
48
|
+
mark_all_nodes(root_node, true)
|
|
49
|
+
else
|
|
50
|
+
# Mark matched nodes and their ancestors/descendants
|
|
51
|
+
mark_subset(root_node, matched)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Process to canonical form
|
|
55
|
+
processor = Processor.new(with_comments: with_comments)
|
|
56
|
+
processor.process(root_node)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
class << self
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
# Recursively set in_node_set on all nodes
|
|
63
|
+
def mark_all_nodes(node, value)
|
|
64
|
+
node.in_node_set = value
|
|
65
|
+
node.children.each { |child| mark_all_nodes(child, value) }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Mark matched nodes and all required supporting nodes.
|
|
69
|
+
#
|
|
70
|
+
# Per W3C C14N 1.1, only nodes in the node-set are rendered.
|
|
71
|
+
# Ancestors not in the node-set become "omitted ancestors" —
|
|
72
|
+
# the Processor handles namespace/attribute inheritance from them.
|
|
73
|
+
def mark_subset(root_node, matched)
|
|
74
|
+
# Mark matched nodes and their descendants
|
|
75
|
+
matched.each do |node|
|
|
76
|
+
mark_node_and_descendants(node)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Root node is always in the set so processing starts
|
|
80
|
+
root_node.in_node_set = true
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def mark_node_and_descendants(node)
|
|
84
|
+
node.in_node_set = true
|
|
85
|
+
node.children.each { |child| mark_node_and_descendants(child) }
|
|
86
|
+
end
|
|
33
87
|
end
|
|
34
88
|
end
|
|
35
89
|
end
|
|
@@ -134,6 +134,9 @@ module Canon
|
|
|
134
134
|
|
|
135
135
|
# Match children recursively
|
|
136
136
|
def match_children(children1, children2, path)
|
|
137
|
+
# FAST PATH: Same array object means all children match
|
|
138
|
+
return if children1.equal?(children2)
|
|
139
|
+
|
|
137
140
|
# Filter to only element nodes
|
|
138
141
|
elems1 = children1.select { |n| n.node_type == :element }
|
|
139
142
|
elems2 = children2.select { |n| n.node_type == :element }
|
data/lib/canon/xml/node.rb
CHANGED
|
@@ -17,13 +17,20 @@ module Canon
|
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
def in_node_set?
|
|
20
|
-
@in_node_set
|
|
20
|
+
instance_variable_defined?(:@in_node_set) ? @in_node_set : true
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
def in_node_set=(value)
|
|
24
24
|
@in_node_set = value
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
# Return the text content of this node and all descendants.
|
|
28
|
+
# ElementNode concatenates children's text_content; other nodes
|
|
29
|
+
# (TextNode, CommentNode, etc.) return their value.
|
|
30
|
+
def text_content
|
|
31
|
+
children.map(&:text_content).join
|
|
32
|
+
end
|
|
33
|
+
|
|
27
34
|
protected
|
|
28
35
|
|
|
29
36
|
attr_writer :parent
|
|
@@ -182,8 +182,17 @@ strip_doctype: false)
|
|
|
182
182
|
|
|
183
183
|
# Skip whitespace-only text nodes unless:
|
|
184
184
|
# 1. preserve_whitespace is true, OR
|
|
185
|
-
# 2. The content contains CR (from 
 entities) which must be preserved for C14N
|
|
186
|
-
|
|
185
|
+
# 2. The content contains CR (from 
 entities) which must be preserved for C14N, OR
|
|
186
|
+
# 3. The content contains non-ASCII whitespace (NBSP U+00A0, ideographic
|
|
187
|
+
# space U+3000, etc.) — those are semantically meaningful content,
|
|
188
|
+
# not pretty-print indentation, and must survive parsing so the
|
|
189
|
+
# comparator can detect Unicode whitespace-type differences.
|
|
190
|
+
#
|
|
191
|
+
# Strip only when the node is pure ASCII whitespace (space, tab, CR, LF).
|
|
192
|
+
# This lets pretty-printed fixtures work (indent nodes stripped) while
|
|
193
|
+
# preserving NBSP-only text nodes.
|
|
194
|
+
if !@preserve_whitespace && decoded_string.gsub(/[ \t\r\n]/,
|
|
195
|
+
"").empty? && parent.node_type == :element && !decoded_string.include?("\r")
|
|
187
196
|
# Only skip if parent is an element (not root)
|
|
188
197
|
return
|
|
189
198
|
end
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Xml
|
|
5
|
+
# XPath evaluation engine for C14N subset selection.
|
|
6
|
+
#
|
|
7
|
+
# Supports a focused subset of XPath 1.0 sufficient for W3C C14N
|
|
8
|
+
# subset canonicalization:
|
|
9
|
+
#
|
|
10
|
+
# - Absolute paths: /root/child, /root/child[1]
|
|
11
|
+
# - Descendant-or-self: //element, //ns:element
|
|
12
|
+
# - Predicates: [1] (position), [@attr], [@attr='value']
|
|
13
|
+
# - Wildcards: *
|
|
14
|
+
# - Union: expr1 | expr2
|
|
15
|
+
#
|
|
16
|
+
# Not supported (not needed for C14N subset):
|
|
17
|
+
# - Axes other than child and descendant-or-self
|
|
18
|
+
# - Functions (last(), position(), etc.)
|
|
19
|
+
# - Variables
|
|
20
|
+
#
|
|
21
|
+
class XPathEngine
|
|
22
|
+
# Evaluate an XPath expression against a data model tree.
|
|
23
|
+
#
|
|
24
|
+
# @param root [Nodes::RootNode] Root of the data model tree
|
|
25
|
+
# @param xpath [String] XPath expression
|
|
26
|
+
# @return [Array<Node>] Matched nodes in document order
|
|
27
|
+
def self.evaluate(root, xpath)
|
|
28
|
+
new(root).evaluate(xpath)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def initialize(root)
|
|
32
|
+
@root = root
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Evaluate an XPath expression and return matched nodes.
|
|
36
|
+
#
|
|
37
|
+
# @param xpath [String] XPath expression
|
|
38
|
+
# @return [Array<Node>] Matched nodes in document order
|
|
39
|
+
def evaluate(xpath)
|
|
40
|
+
# Handle union operator (|)
|
|
41
|
+
if xpath.include?("|")
|
|
42
|
+
xpath.split("|").flat_map { |expr| evaluate(expr.strip) }.uniq
|
|
43
|
+
else
|
|
44
|
+
evaluate_path(xpath.strip)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def evaluate_path(xpath)
|
|
51
|
+
if xpath.start_with?("//")
|
|
52
|
+
# Descendant-or-self: anywhere in the tree
|
|
53
|
+
evaluate_descendant(xpath[2..])
|
|
54
|
+
elsif xpath.start_with?("/")
|
|
55
|
+
# Absolute path
|
|
56
|
+
evaluate_absolute(xpath[1..])
|
|
57
|
+
else
|
|
58
|
+
# Relative path — treat as descendant
|
|
59
|
+
evaluate_descendant(xpath)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def evaluate_absolute(path)
|
|
64
|
+
return [] if path.empty?
|
|
65
|
+
|
|
66
|
+
steps = parse_steps(path)
|
|
67
|
+
return [] if steps.empty?
|
|
68
|
+
|
|
69
|
+
# Start from root's children
|
|
70
|
+
current_nodes = @root.children
|
|
71
|
+
apply_steps(current_nodes, steps)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def evaluate_descendant(path)
|
|
75
|
+
steps = parse_steps(path)
|
|
76
|
+
return [] if steps.empty?
|
|
77
|
+
|
|
78
|
+
# Collect all descendant element nodes
|
|
79
|
+
all_elements = []
|
|
80
|
+
collect_elements(@root, all_elements)
|
|
81
|
+
|
|
82
|
+
# For each element, try to match the full path starting there
|
|
83
|
+
result = []
|
|
84
|
+
all_elements.each do |element|
|
|
85
|
+
first_step = steps.first
|
|
86
|
+
next unless step_matches?(element, first_step)
|
|
87
|
+
|
|
88
|
+
if steps.length == 1
|
|
89
|
+
result << element
|
|
90
|
+
else
|
|
91
|
+
remaining = steps[1..]
|
|
92
|
+
matched = apply_steps(element.children, remaining)
|
|
93
|
+
result.concat(matched)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
result.uniq
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def collect_elements(node, result)
|
|
101
|
+
node.children.each do |child|
|
|
102
|
+
next unless child.is_a?(Nodes::ElementNode)
|
|
103
|
+
|
|
104
|
+
result << child
|
|
105
|
+
collect_elements(child, result)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def apply_steps(nodes, steps)
|
|
110
|
+
return nodes if steps.empty?
|
|
111
|
+
|
|
112
|
+
step = steps.first
|
|
113
|
+
remaining = steps[1..]
|
|
114
|
+
|
|
115
|
+
matched = nodes.select { |n| step_matches?(n, step) }
|
|
116
|
+
|
|
117
|
+
if remaining.empty?
|
|
118
|
+
matched
|
|
119
|
+
else
|
|
120
|
+
matched.flat_map do |node|
|
|
121
|
+
apply_steps(node.children, remaining)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def step_matches?(node, step)
|
|
127
|
+
return false unless node.is_a?(Nodes::ElementNode)
|
|
128
|
+
|
|
129
|
+
name_matches?(node, step[:name]) &&
|
|
130
|
+
predicates_match?(node, step[:predicates])
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def name_matches?(node, name)
|
|
134
|
+
return true if name == "*"
|
|
135
|
+
|
|
136
|
+
# Handle prefixed names (ns:element)
|
|
137
|
+
if name.include?(":")
|
|
138
|
+
prefix, local = name.split(":", 2)
|
|
139
|
+
node.prefix == prefix && node.name == local
|
|
140
|
+
else
|
|
141
|
+
node.name == name
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def predicates_match?(node, predicates)
|
|
146
|
+
return true if predicates.empty?
|
|
147
|
+
|
|
148
|
+
predicates.all? { |pred| predicate_matches?(node, pred) }
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def predicate_matches?(node, pred)
|
|
152
|
+
case pred[:type]
|
|
153
|
+
when :position
|
|
154
|
+
# [1] — position among siblings with same name
|
|
155
|
+
position_predicate?(node, pred[:value])
|
|
156
|
+
when :attribute_exists
|
|
157
|
+
# [@attr]
|
|
158
|
+
node.attribute_nodes.any? { |a| a.local_name == pred[:name] }
|
|
159
|
+
when :attribute_value
|
|
160
|
+
# [@attr='value']
|
|
161
|
+
node.attribute_nodes.any? do |a|
|
|
162
|
+
a.local_name == pred[:name] && a.value == pred[:value]
|
|
163
|
+
end
|
|
164
|
+
else
|
|
165
|
+
false
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def position_predicate?(node, position)
|
|
170
|
+
siblings = node.parent&.children&.select do |n|
|
|
171
|
+
n.is_a?(Nodes::ElementNode) && n.name == node.name
|
|
172
|
+
end || []
|
|
173
|
+
idx = siblings.index(node)
|
|
174
|
+
idx && (idx + 1) == position
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Parse a path string into an array of steps.
|
|
178
|
+
#
|
|
179
|
+
# @param path [String] XPath path (without leading /)
|
|
180
|
+
# @return [Array<Hash>] Array of { name:, predicates: }
|
|
181
|
+
def parse_steps(path)
|
|
182
|
+
steps = []
|
|
183
|
+
scanner = StringScanner.new(path)
|
|
184
|
+
|
|
185
|
+
until scanner.eos?
|
|
186
|
+
scanner.skip(/\s+/)
|
|
187
|
+
break if scanner.eos?
|
|
188
|
+
|
|
189
|
+
# Skip /
|
|
190
|
+
scanner.scan(%r{/})
|
|
191
|
+
|
|
192
|
+
name = scan_name(scanner)
|
|
193
|
+
break if name.nil?
|
|
194
|
+
|
|
195
|
+
predicates = scan_predicates(scanner)
|
|
196
|
+
|
|
197
|
+
steps << { name: name, predicates: predicates }
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
steps
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def scan_name(scanner)
|
|
204
|
+
scanner.scan(%r{[a-zA-Z_][\w:.-]*|\*})
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def scan_predicates(scanner) # rubocop:disable Metrics/AbcSize
|
|
208
|
+
predicates = []
|
|
209
|
+
while scanner.scan(/\[/) # rubocop:disable Style/RedundantRegexpArgument
|
|
210
|
+
scanner.skip(/\s*/)
|
|
211
|
+
pred = scan_predicate(scanner)
|
|
212
|
+
scanner.skip(/\s*/)
|
|
213
|
+
scanner.scan(/\]/) # rubocop:disable Style/RedundantRegexpArgument
|
|
214
|
+
predicates << pred if pred
|
|
215
|
+
end
|
|
216
|
+
predicates
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def scan_predicate(scanner)
|
|
220
|
+
if scanner.scan(/(\d+)/)
|
|
221
|
+
{ type: :position, value: scanner[1].to_i }
|
|
222
|
+
elsif scanner.scan(/@/)
|
|
223
|
+
name = scanner.scan(/[a-zA-Z_][\w.-]*/)
|
|
224
|
+
|
|
225
|
+
if scanner.scan(/=/) # rubocop:disable Style/RedundantRegexpArgument
|
|
226
|
+
# Remove surrounding quotes if present
|
|
227
|
+
scanner.scan(/['"]/)
|
|
228
|
+
value = scanner.scan(/[^'"\]]+/)
|
|
229
|
+
scanner.scan(/['"]/)
|
|
230
|
+
{ type: :attribute_value, name: name, value: value }
|
|
231
|
+
else
|
|
232
|
+
{ type: :attribute_exists, name: name }
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: canon
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: diff-lcs
|
|
@@ -296,10 +296,12 @@ files:
|
|
|
296
296
|
- lib/canon/diff_formatter/by_line/simple_formatter.rb
|
|
297
297
|
- lib/canon/diff_formatter/by_line/xml_formatter.rb
|
|
298
298
|
- lib/canon/diff_formatter/by_line/yaml_formatter.rb
|
|
299
|
+
- lib/canon/diff_formatter/by_line_formatter.rb
|
|
299
300
|
- lib/canon/diff_formatter/by_object/base_formatter.rb
|
|
300
301
|
- lib/canon/diff_formatter/by_object/json_formatter.rb
|
|
301
302
|
- lib/canon/diff_formatter/by_object/xml_formatter.rb
|
|
302
303
|
- lib/canon/diff_formatter/by_object/yaml_formatter.rb
|
|
304
|
+
- lib/canon/diff_formatter/by_object_formatter.rb
|
|
303
305
|
- lib/canon/diff_formatter/character_map.yml
|
|
304
306
|
- lib/canon/diff_formatter/debug_output.rb
|
|
305
307
|
- lib/canon/diff_formatter/diff_detail_formatter.rb
|
|
@@ -309,6 +311,7 @@ files:
|
|
|
309
311
|
- lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb
|
|
310
312
|
- lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb
|
|
311
313
|
- lib/canon/diff_formatter/legend.rb
|
|
314
|
+
- lib/canon/diff_formatter/pretty_diff_formatter.rb
|
|
312
315
|
- lib/canon/diff_formatter/theme.rb
|
|
313
316
|
- lib/canon/errors.rb
|
|
314
317
|
- lib/canon/formatters/html4_formatter.rb
|
|
@@ -376,6 +379,7 @@ files:
|
|
|
376
379
|
- lib/canon/xml/sax_builder.rb
|
|
377
380
|
- lib/canon/xml/whitespace_normalizer.rb
|
|
378
381
|
- lib/canon/xml/xml_base_handler.rb
|
|
382
|
+
- lib/canon/xml/xpath_engine.rb
|
|
379
383
|
- lib/tasks/benchmark_runner.rb
|
|
380
384
|
- lib/tasks/performance.rake
|
|
381
385
|
- lib/tasks/performance_comparator.rb
|