canon 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -1
- data/.rubocop_todo.yml +276 -7
- data/README.adoc +203 -138
- data/_config.yml +116 -0
- data/docs/ADVANCED_TOPICS.adoc +20 -0
- data/docs/BASIC_USAGE.adoc +16 -0
- data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/docs/CLI.adoc +493 -0
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/docs/DIFF_FORMATTING.adoc +540 -0
- data/docs/FORMATS.adoc +447 -0
- data/docs/INDEX.adoc +222 -0
- data/docs/INPUT_VALIDATION.adoc +477 -0
- data/docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/docs/MATCH_OPTIONS.adoc +719 -0
- data/docs/MODES.adoc +432 -0
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/docs/OPTIONS.adoc +1387 -0
- data/docs/PREPROCESSING.adoc +491 -0
- data/docs/RSPEC.adoc +605 -0
- data/docs/RUBY_API.adoc +478 -0
- data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
- data/docs/UNDERSTANDING_CANON.adoc +17 -0
- data/docs/VERBOSE.adoc +482 -0
- data/exe/canon +7 -0
- data/lib/canon/cli.rb +179 -0
- data/lib/canon/commands/diff_command.rb +195 -0
- data/lib/canon/commands/format_command.rb +113 -0
- data/lib/canon/comparison/base_comparator.rb +39 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +410 -0
- data/lib/canon/comparison/json_comparator.rb +212 -0
- data/lib/canon/comparison/match_options.rb +616 -0
- data/lib/canon/comparison/xml_comparator.rb +566 -0
- data/lib/canon/comparison/yaml_comparator.rb +93 -0
- data/lib/canon/comparison.rb +239 -0
- data/lib/canon/config.rb +172 -0
- data/lib/canon/diff/diff_block.rb +71 -0
- data/lib/canon/diff/diff_block_builder.rb +105 -0
- data/lib/canon/diff/diff_classifier.rb +46 -0
- data/lib/canon/diff/diff_context.rb +85 -0
- data/lib/canon/diff/diff_context_builder.rb +107 -0
- data/lib/canon/diff/diff_line.rb +77 -0
- data/lib/canon/diff/diff_node.rb +56 -0
- data/lib/canon/diff/diff_node_mapper.rb +148 -0
- data/lib/canon/diff/diff_report.rb +133 -0
- data/lib/canon/diff/diff_report_builder.rb +62 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
- data/lib/canon/diff_formatter/character_map.yml +197 -0
- data/lib/canon/diff_formatter/debug_output.rb +431 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
- data/lib/canon/diff_formatter/legend.rb +141 -0
- data/lib/canon/diff_formatter.rb +520 -0
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html4_formatter.rb +17 -0
- data/lib/canon/formatters/html5_formatter.rb +17 -0
- data/lib/canon/formatters/html_formatter.rb +37 -0
- data/lib/canon/formatters/html_formatter_base.rb +163 -0
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/xml_formatter.rb +20 -55
- data/lib/canon/formatters/yaml_formatter.rb +4 -1
- data/lib/canon/pretty_printer/html.rb +57 -0
- data/lib/canon/pretty_printer/json.rb +25 -0
- data/lib/canon/pretty_printer/xml.rb +29 -0
- data/lib/canon/rspec_matchers.rb +222 -80
- data/lib/canon/validators/base_validator.rb +49 -0
- data/lib/canon/validators/html_validator.rb +138 -0
- data/lib/canon/validators/json_validator.rb +89 -0
- data/lib/canon/validators/xml_validator.rb +53 -0
- data/lib/canon/validators/yaml_validator.rb +73 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/attribute_handler.rb +80 -0
- data/lib/canon/xml/c14n.rb +36 -0
- data/lib/canon/xml/character_encoder.rb +38 -0
- data/lib/canon/xml/data_model.rb +225 -0
- data/lib/canon/xml/element_matcher.rb +196 -0
- data/lib/canon/xml/line_range_mapper.rb +158 -0
- data/lib/canon/xml/namespace_handler.rb +86 -0
- data/lib/canon/xml/node.rb +32 -0
- data/lib/canon/xml/nodes/attribute_node.rb +54 -0
- data/lib/canon/xml/nodes/comment_node.rb +23 -0
- data/lib/canon/xml/nodes/element_node.rb +56 -0
- data/lib/canon/xml/nodes/namespace_node.rb +38 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
- data/lib/canon/xml/nodes/root_node.rb +16 -0
- data/lib/canon/xml/nodes/text_node.rb +23 -0
- data/lib/canon/xml/processor.rb +151 -0
- data/lib/canon/xml/whitespace_normalizer.rb +72 -0
- data/lib/canon/xml/xml_base_handler.rb +188 -0
- data/lib/canon.rb +14 -3
- metadata +116 -21
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../pretty_printer/xml"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Xml
|
|
7
|
+
# Maps DOM elements to line ranges in pretty-printed XML
|
|
8
|
+
#
|
|
9
|
+
# This class builds a mapping between DOM elements and their corresponding
|
|
10
|
+
# line numbers in pretty-printed XML output. This enables line-accurate
|
|
11
|
+
# diff display that can highlight specific elements even when the XML
|
|
12
|
+
# structure is complex.
|
|
13
|
+
#
|
|
14
|
+
# == How it works
|
|
15
|
+
#
|
|
16
|
+
# 1. Pretty-prints the XML with consistent indentation
|
|
17
|
+
# 2. Traverses the DOM tree depth-first
|
|
18
|
+
# 3. For each element, finds its opening and closing tags in the pretty-printed output
|
|
19
|
+
# 4. Records the line range (start_line..end_line) for that element
|
|
20
|
+
# 5. Returns a Hash mapping element → LineRange
|
|
21
|
+
#
|
|
22
|
+
# == Usage
|
|
23
|
+
#
|
|
24
|
+
# mapper = LineRangeMapper.new(indent: 2)
|
|
25
|
+
# root = Canon::Xml::DataModel.from_xml(xml_string)
|
|
26
|
+
# line_map = mapper.build_map(root, xml_string)
|
|
27
|
+
#
|
|
28
|
+
# # Look up line range for an element
|
|
29
|
+
# range = line_map[element]
|
|
30
|
+
# puts "Element spans lines #{range.start_line} to #{range.end_line}"
|
|
31
|
+
#
|
|
32
|
+
# == Line Range Format
|
|
33
|
+
#
|
|
34
|
+
# Each LineRange contains:
|
|
35
|
+
# - start_line: First line of the element (0-indexed)
|
|
36
|
+
# - end_line: Last line of the element (0-indexed)
|
|
37
|
+
# - elem: Reference to the DOM element
|
|
38
|
+
#
|
|
39
|
+
class LineRangeMapper
|
|
40
|
+
# Line range for an element
|
|
41
|
+
LineRange = Struct.new(:start_line, :end_line, :elem) do
|
|
42
|
+
def contains?(line_num)
|
|
43
|
+
line_num >= start_line && line_num <= end_line
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def length
|
|
47
|
+
end_line - start_line + 1
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def initialize(indent: 2)
|
|
52
|
+
@indent = indent
|
|
53
|
+
@ranges = []
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Build line range map for a DOM tree
|
|
57
|
+
#
|
|
58
|
+
# @param root [Canon::Xml::Nodes::RootNode] DOM tree
|
|
59
|
+
# @param xml_string [String] Original XML (for pretty-printing)
|
|
60
|
+
# @return [Hash] Map of element => LineRange
|
|
61
|
+
def build_map(root, xml_string)
|
|
62
|
+
@ranges = []
|
|
63
|
+
@map = {}
|
|
64
|
+
|
|
65
|
+
# Pretty-print to get consistent formatting
|
|
66
|
+
pretty_xml = Canon::PrettyPrinter::Xml.new(indent: @indent).format(xml_string)
|
|
67
|
+
@lines = pretty_xml.split("\n")
|
|
68
|
+
|
|
69
|
+
# Track current line number
|
|
70
|
+
@current_line = 0
|
|
71
|
+
|
|
72
|
+
# Build map recursively
|
|
73
|
+
root.children.each do |child|
|
|
74
|
+
map_node(child)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
@map
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
# Map a node to its line range
|
|
83
|
+
def map_node(node)
|
|
84
|
+
return unless node.node_type == :element
|
|
85
|
+
|
|
86
|
+
# Find opening tag line
|
|
87
|
+
opening_tag = find_opening_tag(node)
|
|
88
|
+
return unless opening_tag
|
|
89
|
+
|
|
90
|
+
start_line = opening_tag[:line]
|
|
91
|
+
@current_line = start_line
|
|
92
|
+
|
|
93
|
+
# Map children recursively
|
|
94
|
+
node.children.each do |child|
|
|
95
|
+
map_node(child)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Find closing tag line
|
|
99
|
+
closing_tag = find_closing_tag(node, start_line)
|
|
100
|
+
if closing_tag
|
|
101
|
+
@current_line = closing_tag[:line]
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Create range
|
|
105
|
+
end_line = @current_line
|
|
106
|
+
range = LineRange.new(start_line, end_line, node)
|
|
107
|
+
@map[node] = range
|
|
108
|
+
@ranges << range
|
|
109
|
+
|
|
110
|
+
# Move to next line after this element
|
|
111
|
+
@current_line = end_line + 1
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Find opening tag line for element
|
|
115
|
+
def find_opening_tag(elem)
|
|
116
|
+
tag_pattern = if elem.prefix && !elem.prefix.empty?
|
|
117
|
+
/<#{Regexp.escape(elem.prefix)}:#{Regexp.escape(elem.name)}[\s>\/]/
|
|
118
|
+
else
|
|
119
|
+
/<#{Regexp.escape(elem.name)}[\s>\/]/
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
(@current_line...@lines.length).each do |i|
|
|
123
|
+
line = @lines[i]
|
|
124
|
+
if line.match?(tag_pattern)
|
|
125
|
+
return { line: i, content: line }
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
nil
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Find closing tag line for element
|
|
133
|
+
def find_closing_tag(elem, start_line)
|
|
134
|
+
tag_pattern = if elem.prefix && !elem.prefix.empty?
|
|
135
|
+
/<\/#{Regexp.escape(elem.prefix)}:#{Regexp.escape(elem.name)}>/
|
|
136
|
+
else
|
|
137
|
+
/<\/#{Regexp.escape(elem.name)}>/
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Check if self-closing
|
|
141
|
+
start_content = @lines[start_line]
|
|
142
|
+
if start_content&.include?("/>")
|
|
143
|
+
return { line: start_line, content: start_content }
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Find closing tag
|
|
147
|
+
(start_line...@lines.length).each do |i|
|
|
148
|
+
line = @lines[i]
|
|
149
|
+
if line.match?(tag_pattern)
|
|
150
|
+
return { line: i, content: line }
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
{ line: start_line, content: start_content }
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Xml
|
|
5
|
+
# Namespace handler for C14N 1.1
|
|
6
|
+
# Handles namespace declaration processing per spec
|
|
7
|
+
class NamespaceHandler
|
|
8
|
+
def initialize(encoder)
|
|
9
|
+
@encoder = encoder
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Process namespace axis of an element
|
|
13
|
+
# rubocop:disable Metrics/MethodLength
|
|
14
|
+
def process_namespaces(element, output, parent_element = nil)
|
|
15
|
+
return unless element.in_node_set?
|
|
16
|
+
|
|
17
|
+
namespaces = element.sorted_namespace_nodes.select(&:in_node_set?)
|
|
18
|
+
|
|
19
|
+
# Check if we need to emit xmlns="" for empty default namespace
|
|
20
|
+
if should_emit_empty_default_namespace?(element, namespaces,
|
|
21
|
+
parent_element)
|
|
22
|
+
output << ' xmlns=""'
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Process each namespace node
|
|
26
|
+
namespaces.each do |ns|
|
|
27
|
+
next if should_skip_namespace?(ns, element, parent_element)
|
|
28
|
+
|
|
29
|
+
output << " "
|
|
30
|
+
output << if ns.default_namespace?
|
|
31
|
+
"xmlns"
|
|
32
|
+
else
|
|
33
|
+
"xmlns:#{ns.prefix}"
|
|
34
|
+
end
|
|
35
|
+
output << '="'
|
|
36
|
+
output << @encoder.encode_attribute(ns.uri)
|
|
37
|
+
output << '"'
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# Check if we should emit xmlns="" for empty default namespace
|
|
44
|
+
# Per spec: emit if and only if:
|
|
45
|
+
# 1. The element E is in the node-set
|
|
46
|
+
# 2. The first namespace node is not the default namespace node
|
|
47
|
+
# 3. The nearest ancestor element of E in the node-set has a
|
|
48
|
+
# default namespace node in the node-set with non-empty value
|
|
49
|
+
def should_emit_empty_default_namespace?(element, namespaces,
|
|
50
|
+
parent_element)
|
|
51
|
+
return false unless element.in_node_set?
|
|
52
|
+
return false if namespaces.first&.default_namespace?
|
|
53
|
+
return false unless parent_element
|
|
54
|
+
|
|
55
|
+
# Check if parent has non-empty default namespace
|
|
56
|
+
parent_default_ns = parent_element.namespace_nodes.find do |ns|
|
|
57
|
+
ns.default_namespace? && ns.in_node_set?
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
parent_default_ns && !parent_default_ns.uri.empty?
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Check if a namespace node should be skipped
|
|
64
|
+
def should_skip_namespace?(ns, _element, parent_element)
|
|
65
|
+
# Skip xml namespace with standard URI
|
|
66
|
+
return true if ns.xml_namespace?
|
|
67
|
+
|
|
68
|
+
# Skip if an ancestor already declared this namespace
|
|
69
|
+
return true if namespace_declared_by_ancestor?(ns, parent_element)
|
|
70
|
+
|
|
71
|
+
false
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Check if a namespace is already declared by an ancestor
|
|
75
|
+
def namespace_declared_by_ancestor?(ns, parent_element)
|
|
76
|
+
return false unless parent_element
|
|
77
|
+
|
|
78
|
+
parent_ns = parent_element.namespace_nodes.find do |parent_ns|
|
|
79
|
+
parent_ns.prefix == ns.prefix && parent_ns.in_node_set?
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
parent_ns && parent_ns.uri == ns.uri
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Xml
|
|
5
|
+
# Base class for all XPath data model nodes
|
|
6
|
+
class Node
|
|
7
|
+
attr_reader :parent, :children
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@parent = nil
|
|
11
|
+
@children = []
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def add_child(child)
|
|
15
|
+
child.parent = self
|
|
16
|
+
@children << child
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def in_node_set?
|
|
20
|
+
@in_node_set ||= true
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def in_node_set=(value)
|
|
24
|
+
@in_node_set = value
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
protected
|
|
28
|
+
|
|
29
|
+
attr_writer :parent
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../node"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Xml
|
|
7
|
+
module Nodes
|
|
8
|
+
# Attribute node in the XPath data model
|
|
9
|
+
class AttributeNode < Node
|
|
10
|
+
attr_reader :name, :value, :namespace_uri, :prefix
|
|
11
|
+
|
|
12
|
+
def initialize(name:, value:, namespace_uri: nil, prefix: nil)
|
|
13
|
+
super()
|
|
14
|
+
@name = name
|
|
15
|
+
@value = value
|
|
16
|
+
@namespace_uri = namespace_uri
|
|
17
|
+
@prefix = prefix
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def node_type
|
|
21
|
+
:attribute
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def local_name
|
|
25
|
+
name
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def qname
|
|
29
|
+
prefix.nil? || prefix.empty? ? name : "#{prefix}:#{name}"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Check if this is an xml:* attribute
|
|
33
|
+
def xml_attribute?
|
|
34
|
+
namespace_uri == "http://www.w3.org/XML/1998/namespace"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Check if this is a simple inheritable attribute (xml:lang or xml:space)
|
|
38
|
+
def simple_inheritable?
|
|
39
|
+
xml_attribute? && ["lang", "space"].include?(name)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Check if this is xml:id
|
|
43
|
+
def xml_id?
|
|
44
|
+
xml_attribute? && name == "id"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Check if this is xml:base
|
|
48
|
+
def xml_base?
|
|
49
|
+
xml_attribute? && name == "base"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../node"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Xml
|
|
7
|
+
module Nodes
|
|
8
|
+
# Comment node in the XPath data model
|
|
9
|
+
class CommentNode < Node
|
|
10
|
+
attr_reader :value
|
|
11
|
+
|
|
12
|
+
def initialize(value:)
|
|
13
|
+
super()
|
|
14
|
+
@value = value
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def node_type
|
|
18
|
+
:comment
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../node"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Xml
|
|
7
|
+
module Nodes
|
|
8
|
+
# Element node in the XPath data model
|
|
9
|
+
class ElementNode < Node
|
|
10
|
+
attr_reader :name, :namespace_uri, :prefix, :namespace_nodes,
|
|
11
|
+
:attribute_nodes
|
|
12
|
+
|
|
13
|
+
def initialize(name:, namespace_uri: nil, prefix: nil)
|
|
14
|
+
super()
|
|
15
|
+
@name = name
|
|
16
|
+
@namespace_uri = namespace_uri
|
|
17
|
+
@prefix = prefix
|
|
18
|
+
@namespace_nodes = []
|
|
19
|
+
@attribute_nodes = []
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def node_type
|
|
23
|
+
:element
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def qname
|
|
27
|
+
prefix.nil? || prefix.empty? ? name : "#{prefix}:#{name}"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def add_namespace(namespace_node)
|
|
31
|
+
namespace_node.parent = self
|
|
32
|
+
@namespace_nodes << namespace_node
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def add_attribute(attribute_node)
|
|
36
|
+
attribute_node.parent = self
|
|
37
|
+
@attribute_nodes << attribute_node
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Get namespace nodes in sorted order (lexicographically by local name)
|
|
41
|
+
def sorted_namespace_nodes
|
|
42
|
+
@namespace_nodes.sort_by do |ns|
|
|
43
|
+
ns.local_name.to_s
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Get attribute nodes in sorted order (by namespace URI then local name)
|
|
48
|
+
def sorted_attribute_nodes
|
|
49
|
+
@attribute_nodes.sort_by do |attr|
|
|
50
|
+
[attr.namespace_uri.to_s, attr.local_name]
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../node"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Xml
|
|
7
|
+
module Nodes
|
|
8
|
+
# Namespace node in the XPath data model
|
|
9
|
+
class NamespaceNode < Node
|
|
10
|
+
attr_reader :prefix, :uri
|
|
11
|
+
|
|
12
|
+
def initialize(prefix:, uri:)
|
|
13
|
+
super()
|
|
14
|
+
@prefix = prefix
|
|
15
|
+
@uri = uri
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def node_type
|
|
19
|
+
:namespace
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Local name is the prefix (empty string for default namespace)
|
|
23
|
+
def local_name
|
|
24
|
+
prefix.to_s
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def default_namespace?
|
|
28
|
+
prefix.nil? || prefix.empty?
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Check if this is the xml namespace
|
|
32
|
+
def xml_namespace?
|
|
33
|
+
prefix == "xml" && uri == "http://www.w3.org/XML/1998/namespace"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../node"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Xml
|
|
7
|
+
module Nodes
|
|
8
|
+
# Processing Instruction node in the XPath data model
|
|
9
|
+
class ProcessingInstructionNode < Node
|
|
10
|
+
attr_reader :target, :data
|
|
11
|
+
|
|
12
|
+
def initialize(target:, data: "")
|
|
13
|
+
super()
|
|
14
|
+
@target = target
|
|
15
|
+
@data = data
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def node_type
|
|
19
|
+
:processing_instruction
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../node"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Xml
|
|
7
|
+
module Nodes
|
|
8
|
+
# Text node in the XPath data model
|
|
9
|
+
class TextNode < Node
|
|
10
|
+
attr_reader :value
|
|
11
|
+
|
|
12
|
+
def initialize(value:)
|
|
13
|
+
super()
|
|
14
|
+
@value = value
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def node_type
|
|
18
|
+
:text
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "character_encoder"
|
|
4
|
+
require_relative "namespace_handler"
|
|
5
|
+
require_relative "attribute_handler"
|
|
6
|
+
require_relative "xml_base_handler"
|
|
7
|
+
|
|
8
|
+
module Canon
|
|
9
|
+
module Xml
|
|
10
|
+
# C14N 1.1 processor
|
|
11
|
+
# Processes XPath data model and generates canonical form
|
|
12
|
+
class Processor
|
|
13
|
+
def initialize(with_comments: false)
|
|
14
|
+
@with_comments = with_comments
|
|
15
|
+
@encoder = CharacterEncoder.new
|
|
16
|
+
@namespace_handler = NamespaceHandler.new(@encoder)
|
|
17
|
+
@attribute_handler = AttributeHandler.new(@encoder)
|
|
18
|
+
@xml_base_handler = XmlBaseHandler.new
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Process a node-set and generate canonical form
|
|
22
|
+
def process(root_node)
|
|
23
|
+
output = String.new(encoding: "UTF-8")
|
|
24
|
+
process_node(root_node, output)
|
|
25
|
+
output
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
# rubocop:disable Metrics/MethodLength
|
|
31
|
+
def process_node(node, output, parent_element = nil,
|
|
32
|
+
omitted_ancestors = [])
|
|
33
|
+
case node.node_type
|
|
34
|
+
when :root
|
|
35
|
+
process_root_node(node, output)
|
|
36
|
+
when :element
|
|
37
|
+
process_element_node(node, output, parent_element,
|
|
38
|
+
omitted_ancestors)
|
|
39
|
+
when :text
|
|
40
|
+
process_text_node(node, output)
|
|
41
|
+
when :comment
|
|
42
|
+
process_comment_node(node, output, parent_element)
|
|
43
|
+
when :processing_instruction
|
|
44
|
+
process_pi_node(node, output, parent_element)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def process_root_node(node, output)
|
|
49
|
+
# Process children in document order
|
|
50
|
+
node.children.each do |child|
|
|
51
|
+
process_node(child, output)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def process_element_node(node, output, parent_element,
|
|
56
|
+
omitted_ancestors)
|
|
57
|
+
if node.in_node_set?
|
|
58
|
+
# Element is in node-set, render it
|
|
59
|
+
render_element(node, output, parent_element, omitted_ancestors)
|
|
60
|
+
else
|
|
61
|
+
# Element is not in node-set, but process its children
|
|
62
|
+
new_omitted = omitted_ancestors + [node]
|
|
63
|
+
node.children.each do |child|
|
|
64
|
+
process_node(child, output, parent_element, new_omitted)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# rubocop:disable Metrics/MethodLength
|
|
70
|
+
def render_element(node, output, parent_element, omitted_ancestors)
|
|
71
|
+
# Opening tag
|
|
72
|
+
output << "<" << node.qname
|
|
73
|
+
|
|
74
|
+
# Process namespace axis
|
|
75
|
+
@namespace_handler.process_namespaces(node, output, parent_element)
|
|
76
|
+
|
|
77
|
+
# Process attribute axis with xml:base fixup if needed
|
|
78
|
+
process_element_attributes(node, output, omitted_ancestors)
|
|
79
|
+
|
|
80
|
+
output << ">"
|
|
81
|
+
|
|
82
|
+
# Process children
|
|
83
|
+
node.children.each do |child|
|
|
84
|
+
process_node(child, output, node, [])
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Closing tag
|
|
88
|
+
output << "</" << node.qname << ">"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# rubocop:disable Metrics/MethodLength
|
|
92
|
+
def process_element_attributes(node, output, omitted_ancestors)
|
|
93
|
+
# First process regular attributes
|
|
94
|
+
@attribute_handler.process_attributes(node, output,
|
|
95
|
+
omitted_ancestors)
|
|
96
|
+
|
|
97
|
+
# Then handle xml:base fixup if needed
|
|
98
|
+
if omitted_ancestors.any?
|
|
99
|
+
fixed_base = @xml_base_handler.fixup_xml_base(node,
|
|
100
|
+
omitted_ancestors)
|
|
101
|
+
if fixed_base && !fixed_base.empty?
|
|
102
|
+
# Check if element already has xml:base
|
|
103
|
+
has_base = node.attribute_nodes.any?(&:xml_base?)
|
|
104
|
+
unless has_base
|
|
105
|
+
output << ' xml:base="'
|
|
106
|
+
output << @encoder.encode_attribute(fixed_base)
|
|
107
|
+
output << '"'
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def process_text_node(node, output)
|
|
114
|
+
return unless node.in_node_set?
|
|
115
|
+
|
|
116
|
+
output << @encoder.encode_text(node.value)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def process_comment_node(node, output, parent_element)
|
|
120
|
+
return unless @with_comments
|
|
121
|
+
return unless node.in_node_set?
|
|
122
|
+
|
|
123
|
+
# Add line break before comment if it's outside document element
|
|
124
|
+
output << "\n" if parent_element.nil? && output.length.positive?
|
|
125
|
+
|
|
126
|
+
output << "<!--" << node.value << "-->"
|
|
127
|
+
|
|
128
|
+
# Add line break after comment if it's outside document element
|
|
129
|
+
output << "\n" if parent_element.nil?
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def process_pi_node(node, output, parent_element)
|
|
133
|
+
return unless node.in_node_set?
|
|
134
|
+
|
|
135
|
+
# Add line break before PI if it's outside document element
|
|
136
|
+
output << "\n" if parent_element.nil? && output.length.positive?
|
|
137
|
+
|
|
138
|
+
output << "<?" << node.target
|
|
139
|
+
|
|
140
|
+
unless node.data.empty?
|
|
141
|
+
output << " " << node.data
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
output << "?>"
|
|
145
|
+
|
|
146
|
+
# Add line break after PI if it's outside document element
|
|
147
|
+
output << "\n" if parent_element.nil?
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|