canon 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +14 -71
- data/Rakefile +17 -0
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +18 -29
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +146 -80
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +61 -83
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +23 -23
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +7 -41
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +2 -2
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +1 -1
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/validators/html_validator.rb +1 -1
- data/lib/canon/validators/xml_validator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +131 -137
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +4 -6
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +271 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +5 -2
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
# Backend-agnostic XML parsing, serialization, and type dispatch.
|
|
5
|
+
#
|
|
6
|
+
# Provides a unified API that delegates to the active backend
|
|
7
|
+
# (Nokogiri or moxml/Oga). Uses backend-branching (`if XmlBackend.nokogiri?`)
|
|
8
|
+
# rather than `case/when` with constant references — this ensures Nokogiri
|
|
9
|
+
# constants are never resolved under Opal, preventing NameError at runtime.
|
|
10
|
+
#
|
|
11
|
+
# OCP: adding a new backend only requires updating this module.
|
|
12
|
+
# DRY: all backend dispatch centralized here, not scattered across
|
|
13
|
+
# comparator/formatter files.
|
|
14
|
+
module XmlParsing
|
|
15
|
+
class << self
|
|
16
|
+
def moxml_context
|
|
17
|
+
@moxml_context ||= Moxml.new(:oga)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# --- Parsing ---
|
|
21
|
+
|
|
22
|
+
def parse(xml_string, options = {})
|
|
23
|
+
if XmlBackend.nokogiri?
|
|
24
|
+
nokogiri_parse(xml_string, options)
|
|
25
|
+
else
|
|
26
|
+
moxml_parse(xml_string, options)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def parse_fragment(xml_string)
|
|
31
|
+
if XmlBackend.nokogiri?
|
|
32
|
+
Nokogiri::XML.fragment(xml_string).children.to_a
|
|
33
|
+
else
|
|
34
|
+
doc = moxml_context.parse("<__frag__>#{xml_string}</__frag__>")
|
|
35
|
+
doc.root.children.to_a
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# --- Serialization ---
|
|
40
|
+
|
|
41
|
+
def serialize(node)
|
|
42
|
+
if XmlBackend.nokogiri?
|
|
43
|
+
nokogiri_serialize(node)
|
|
44
|
+
else
|
|
45
|
+
moxml_serialize(node)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# --- Type checks (backend-safe) ---
|
|
50
|
+
|
|
51
|
+
def document?(obj)
|
|
52
|
+
if XmlBackend.nokogiri?
|
|
53
|
+
obj.is_a?(Nokogiri::XML::Document)
|
|
54
|
+
else
|
|
55
|
+
obj.is_a?(Moxml::Document)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def xml_node?(obj)
|
|
60
|
+
if XmlBackend.nokogiri?
|
|
61
|
+
obj.is_a?(Nokogiri::XML::Node)
|
|
62
|
+
else
|
|
63
|
+
obj.is_a?(Moxml::Node)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def element?(node)
|
|
68
|
+
if XmlBackend.nokogiri?
|
|
69
|
+
node.is_a?(Nokogiri::XML::Element)
|
|
70
|
+
else
|
|
71
|
+
node.is_a?(Moxml::Element)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def text_node?(node)
|
|
76
|
+
if XmlBackend.nokogiri?
|
|
77
|
+
node.is_a?(Nokogiri::XML::Text)
|
|
78
|
+
else
|
|
79
|
+
node.is_a?(Moxml::Text)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def comment?(node)
|
|
84
|
+
if XmlBackend.nokogiri?
|
|
85
|
+
node.is_a?(Nokogiri::XML::Comment)
|
|
86
|
+
else
|
|
87
|
+
node.is_a?(Moxml::Comment)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def cdata?(node)
|
|
92
|
+
if XmlBackend.nokogiri?
|
|
93
|
+
node.is_a?(Nokogiri::XML::CDATA)
|
|
94
|
+
else
|
|
95
|
+
node.is_a?(Moxml::Cdata)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def processing_instruction?(node)
|
|
100
|
+
if XmlBackend.nokogiri?
|
|
101
|
+
node.is_a?(Nokogiri::XML::ProcessingInstruction)
|
|
102
|
+
else
|
|
103
|
+
node.is_a?(Moxml::ProcessingInstruction)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def document_fragment?(obj)
|
|
108
|
+
if XmlBackend.nokogiri?
|
|
109
|
+
obj.is_a?(Nokogiri::XML::DocumentFragment)
|
|
110
|
+
else
|
|
111
|
+
obj.is_a?(Moxml::DocumentFragment)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def dtd?(node)
|
|
116
|
+
if XmlBackend.nokogiri?
|
|
117
|
+
node.is_a?(Nokogiri::XML::DTD)
|
|
118
|
+
else
|
|
119
|
+
false
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# --- Node traversal ---
|
|
124
|
+
|
|
125
|
+
def children(node)
|
|
126
|
+
if XmlBackend.nokogiri?
|
|
127
|
+
node.is_a?(Nokogiri::XML::Node) ? node.children.to_a : []
|
|
128
|
+
else
|
|
129
|
+
node.is_a?(Moxml::Node) ? node.children.to_a : []
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def name(node)
|
|
134
|
+
if XmlBackend.nokogiri?
|
|
135
|
+
node.is_a?(Nokogiri::XML::Node) ? node.name : nil
|
|
136
|
+
else
|
|
137
|
+
node.is_a?(Moxml::Node) ? node.name : nil
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def text_content(node)
|
|
142
|
+
if XmlBackend.nokogiri?
|
|
143
|
+
node.is_a?(Nokogiri::XML::Node) ? node.content : node.to_s
|
|
144
|
+
else
|
|
145
|
+
node.is_a?(Moxml::Node) ? node.text : node.to_s
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def attributes(node)
|
|
150
|
+
if XmlBackend.nokogiri?
|
|
151
|
+
node.is_a?(Nokogiri::XML::Element) ? node.attributes.values : []
|
|
152
|
+
else
|
|
153
|
+
node.is_a?(Moxml::Element) ? node.attributes : []
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def attribute_value(node, attr_name)
|
|
158
|
+
if XmlBackend.nokogiri?
|
|
159
|
+
node.is_a?(Nokogiri::XML::Element) ? node[attr_name.to_s] : nil
|
|
160
|
+
else
|
|
161
|
+
node.is_a?(Moxml::Element) ? node[attr_name.to_s] : nil
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def namespace_definitions(node)
|
|
166
|
+
if XmlBackend.nokogiri?
|
|
167
|
+
node.is_a?(Nokogiri::XML::Element) ? node.namespace_definitions : []
|
|
168
|
+
else
|
|
169
|
+
node.is_a?(Moxml::Element) ? node.namespace_definitions : []
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def parent(node)
|
|
174
|
+
return nil unless xml_node?(node)
|
|
175
|
+
# Document nodes have no parent
|
|
176
|
+
return nil if document?(node)
|
|
177
|
+
|
|
178
|
+
node.parent
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def namespace_uri(node)
|
|
182
|
+
if XmlBackend.nokogiri?
|
|
183
|
+
node.namespace&.href if node.is_a?(Nokogiri::XML::Element)
|
|
184
|
+
elsif node.is_a?(Moxml::Element)
|
|
185
|
+
node.namespace_uri
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Returns a symbol for all backends (:element, :text, :comment, etc.)
|
|
190
|
+
# or nil for unrecognised nodes.
|
|
191
|
+
def node_type(node)
|
|
192
|
+
if XmlBackend.nokogiri?
|
|
193
|
+
nokogiri_node_type(node)
|
|
194
|
+
else
|
|
195
|
+
moxml_node_type(node)
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def canonicalize(node, options = {})
|
|
200
|
+
if XmlBackend.nokogiri?
|
|
201
|
+
node.canonicalize(options)
|
|
202
|
+
else
|
|
203
|
+
moxml_canonicalize(node, options)
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
private
|
|
208
|
+
|
|
209
|
+
# --- Nokogiri backend ---
|
|
210
|
+
|
|
211
|
+
def nokogiri_type_map
|
|
212
|
+
@nokogiri_type_map ||= {
|
|
213
|
+
Nokogiri::XML::Node::ELEMENT_NODE => :element,
|
|
214
|
+
Nokogiri::XML::Node::TEXT_NODE => :text,
|
|
215
|
+
Nokogiri::XML::Node::CDATA_SECTION_NODE => :cdata,
|
|
216
|
+
Nokogiri::XML::Node::COMMENT_NODE => :comment,
|
|
217
|
+
Nokogiri::XML::Node::PI_NODE => :processing_instruction,
|
|
218
|
+
Nokogiri::XML::Node::DOCUMENT_NODE => :document,
|
|
219
|
+
Nokogiri::XML::Node::DOCUMENT_FRAG_NODE => :document_fragment,
|
|
220
|
+
Nokogiri::XML::Node::DTD_NODE => :dtd,
|
|
221
|
+
Nokogiri::XML::Node::ATTRIBUTE_NODE => :attribute,
|
|
222
|
+
}.freeze
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def nokogiri_node_type(node)
|
|
226
|
+
return nil unless node.is_a?(Nokogiri::XML::Node)
|
|
227
|
+
|
|
228
|
+
nokogiri_type_map[node.node_type]
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def nokogiri_parse(xml_string, options)
|
|
232
|
+
doc = Nokogiri::XML.parse(xml_string)
|
|
233
|
+
doc = doc.remove_namespaces! if options[:remove_namespaces]
|
|
234
|
+
doc
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def nokogiri_serialize(node)
|
|
238
|
+
if node.is_a?(Nokogiri::XML::Document)
|
|
239
|
+
node.to_xml(encoding: "UTF-8")
|
|
240
|
+
else
|
|
241
|
+
node.to_xml
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# --- Moxml backend ---
|
|
246
|
+
|
|
247
|
+
def moxml_parse(xml_string, _options)
|
|
248
|
+
moxml_context.parse(xml_string)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def moxml_serialize(node)
|
|
252
|
+
node.to_xml
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def moxml_canonicalize(node, _options)
|
|
256
|
+
node.to_xml
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def moxml_node_type(node)
|
|
260
|
+
return :element if node.is_a?(Moxml::Element)
|
|
261
|
+
return :text if node.is_a?(Moxml::Text)
|
|
262
|
+
return :comment if node.is_a?(Moxml::Comment)
|
|
263
|
+
return :cdata if node.is_a?(Moxml::Cdata)
|
|
264
|
+
return :document if node.is_a?(Moxml::Document)
|
|
265
|
+
return :processing_instruction if node.is_a?(Moxml::ProcessingInstruction)
|
|
266
|
+
|
|
267
|
+
nil
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
data/lib/canon.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "canon/version"
|
|
4
4
|
require_relative "canon/errors"
|
|
5
|
+
require_relative "canon/xml_backend"
|
|
6
|
+
require_relative "canon/xml_parsing"
|
|
5
7
|
require_relative "canon/config"
|
|
6
8
|
require_relative "canon/data_model"
|
|
7
9
|
require_relative "canon/html"
|
|
@@ -13,7 +15,7 @@ require_relative "canon/formatters/html4_formatter"
|
|
|
13
15
|
require_relative "canon/formatters/html5_formatter"
|
|
14
16
|
require_relative "canon/comparison"
|
|
15
17
|
|
|
16
|
-
require_relative "canon/rspec_matchers" if defined?(RSpec)
|
|
18
|
+
require_relative "canon/rspec_matchers" if defined?(RSpec.configure)
|
|
17
19
|
|
|
18
20
|
module Canon
|
|
19
21
|
SUPPORTED_FORMATS = %i[xml yaml json html html4 html5 string].freeze
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "benchmark/ips"
|
|
4
|
-
require "table_tennis"
|
|
4
|
+
require "table_tennis" unless RUBY_ENGINE == "opal"
|
|
5
5
|
|
|
6
6
|
# Ensure lib/ is on the load path regardless of tmp location
|
|
7
7
|
lib_path = File.expand_path(File.join(__dir__, "..", "..", "lib"))
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: canon
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: diff-lcs
|
|
@@ -148,6 +148,7 @@ extensions: []
|
|
|
148
148
|
extra_rdoc_files: []
|
|
149
149
|
files:
|
|
150
150
|
- ".rspec"
|
|
151
|
+
- ".rspec-opal"
|
|
151
152
|
- ".rubocop.yml"
|
|
152
153
|
- ".rubocop_todo.yml"
|
|
153
154
|
- CODE_OF_CONDUCT.md
|
|
@@ -385,6 +386,8 @@ files:
|
|
|
385
386
|
- lib/canon/xml/whitespace_normalizer.rb
|
|
386
387
|
- lib/canon/xml/xml_base_handler.rb
|
|
387
388
|
- lib/canon/xml/xpath_engine.rb
|
|
389
|
+
- lib/canon/xml_backend.rb
|
|
390
|
+
- lib/canon/xml_parsing.rb
|
|
388
391
|
- lib/tasks/benchmark_runner.rb
|
|
389
392
|
- lib/tasks/performance.rake
|
|
390
393
|
- lib/tasks/performance_comparator.rb
|