canon 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +14 -71
- data/Rakefile +17 -0
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +18 -29
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +146 -80
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +61 -83
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +23 -23
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +7 -41
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +2 -2
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +1 -1
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/validators/html_validator.rb +1 -1
- data/lib/canon/validators/xml_validator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +131 -137
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +4 -6
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +271 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a13457a67f3e2ab91e00cec19684c502605ab807bdd87eb1120e77d190a99c2e
|
|
4
|
+
data.tar.gz: 35c0c873340e12c63048adf2222fda2f8c2ae3972337dcc212b26d391191ac35
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8db915564eebd4ca4dfadd65358f721aa70bca318c22dc1c02eff5e3527cf646ea19722b760072851f358b3fabefd12fc5f6dfc216bce146423c7091f3bf7eac
|
|
7
|
+
data.tar.gz: f92e7491d781c8762483335558ede985a1653bcfb88613858115aa87e50bb326f95b0b76b845c54154e657fb9f25b3d1f348bf8e9baa926ea1c6bfbbd77d6ca6
|
data/.rspec-opal
ADDED
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-05-
|
|
3
|
+
# on 2026-05-24 10:34:05 UTC using RuboCop version 1.86.0.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -11,58 +11,14 @@ Gemspec/RequiredRubyVersion:
|
|
|
11
11
|
Exclude:
|
|
12
12
|
- 'canon.gemspec'
|
|
13
13
|
|
|
14
|
-
# Offense count:
|
|
15
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
16
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
17
|
-
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
18
|
-
Layout/ArgumentAlignment:
|
|
19
|
-
Exclude:
|
|
20
|
-
- 'lib/canon/comparison/child_realignment.rb'
|
|
21
|
-
- 'lib/canon/comparison/xml_comparator/child_comparison.rb'
|
|
22
|
-
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
23
|
-
|
|
24
|
-
# Offense count: 5
|
|
25
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
26
|
-
# Configuration parameters: EnforcedStyleAlignWith.
|
|
27
|
-
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
28
|
-
Layout/BlockAlignment:
|
|
29
|
-
Exclude:
|
|
30
|
-
- 'spec/canon/comparison/comments_asymmetry_spec.rb'
|
|
31
|
-
- 'spec/canon/comparison/whitespace_adjacency_spec.rb'
|
|
32
|
-
|
|
33
|
-
# Offense count: 5
|
|
34
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
35
|
-
Layout/BlockEndNewline:
|
|
36
|
-
Exclude:
|
|
37
|
-
- 'spec/canon/comparison/comments_asymmetry_spec.rb'
|
|
38
|
-
- 'spec/canon/comparison/whitespace_adjacency_spec.rb'
|
|
39
|
-
|
|
40
|
-
# Offense count: 10
|
|
41
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
42
|
-
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
43
|
-
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
44
|
-
Layout/IndentationWidth:
|
|
45
|
-
Exclude:
|
|
46
|
-
- 'spec/canon/comparison/comments_asymmetry_spec.rb'
|
|
47
|
-
- 'spec/canon/comparison/whitespace_adjacency_spec.rb'
|
|
48
|
-
|
|
49
|
-
# Offense count: 1386
|
|
14
|
+
# Offense count: 1358
|
|
50
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
51
16
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
52
17
|
# URISchemes: http, https
|
|
53
18
|
Layout/LineLength:
|
|
54
19
|
Enabled: false
|
|
55
20
|
|
|
56
|
-
# Offense count:
|
|
57
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
58
|
-
# Configuration parameters: AllowInHeredoc.
|
|
59
|
-
Layout/TrailingWhitespace:
|
|
60
|
-
Exclude:
|
|
61
|
-
- 'lib/canon/comparison/child_realignment.rb'
|
|
62
|
-
- 'lib/canon/comparison/xml_comparator/child_comparison.rb'
|
|
63
|
-
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
64
|
-
|
|
65
|
-
# Offense count: 63
|
|
21
|
+
# Offense count: 58
|
|
66
22
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
67
23
|
Lint/DuplicateBranch:
|
|
68
24
|
Enabled: false
|
|
@@ -75,13 +31,12 @@ Lint/EmptyConditionalBody:
|
|
|
75
31
|
- 'spec/canon/comparison/html_comparator_spec.rb'
|
|
76
32
|
- 'spec/canon/comparison_spec.rb'
|
|
77
33
|
|
|
78
|
-
# Offense count:
|
|
34
|
+
# Offense count: 5
|
|
79
35
|
# Configuration parameters: MaximumRangeSize.
|
|
80
36
|
Lint/MissingCopEnableDirective:
|
|
81
37
|
Exclude:
|
|
82
38
|
- 'lib/canon/commands/format_command.rb'
|
|
83
39
|
- 'lib/canon/xml/attribute_handler.rb'
|
|
84
|
-
- 'lib/canon/xml/data_model.rb'
|
|
85
40
|
- 'lib/canon/xml/namespace_handler.rb'
|
|
86
41
|
- 'lib/canon/xml/processor.rb'
|
|
87
42
|
- 'lib/canon/xml/xml_base_handler.rb'
|
|
@@ -107,7 +62,7 @@ Lint/UselessConstantScoping:
|
|
|
107
62
|
Exclude:
|
|
108
63
|
- 'lib/canon/diff_formatter/theme.rb'
|
|
109
64
|
|
|
110
|
-
# Offense count:
|
|
65
|
+
# Offense count: 313
|
|
111
66
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
112
67
|
Metrics/AbcSize:
|
|
113
68
|
Enabled: false
|
|
@@ -123,12 +78,12 @@ Metrics/BlockLength:
|
|
|
123
78
|
Metrics/BlockNesting:
|
|
124
79
|
Max: 4
|
|
125
80
|
|
|
126
|
-
# Offense count:
|
|
81
|
+
# Offense count: 276
|
|
127
82
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
128
83
|
Metrics/CyclomaticComplexity:
|
|
129
84
|
Enabled: false
|
|
130
85
|
|
|
131
|
-
# Offense count:
|
|
86
|
+
# Offense count: 523
|
|
132
87
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
133
88
|
Metrics/MethodLength:
|
|
134
89
|
Max: 146
|
|
@@ -138,7 +93,7 @@ Metrics/MethodLength:
|
|
|
138
93
|
Metrics/ParameterLists:
|
|
139
94
|
Max: 10
|
|
140
95
|
|
|
141
|
-
# Offense count:
|
|
96
|
+
# Offense count: 214
|
|
142
97
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
143
98
|
Metrics/PerceivedComplexity:
|
|
144
99
|
Enabled: false
|
|
@@ -177,7 +132,7 @@ Performance/CollectionLiteralInLoop:
|
|
|
177
132
|
RSpec/ContextWording:
|
|
178
133
|
Enabled: false
|
|
179
134
|
|
|
180
|
-
# Offense count:
|
|
135
|
+
# Offense count: 47
|
|
181
136
|
# Configuration parameters: IgnoredMetadata.
|
|
182
137
|
RSpec/DescribeClass:
|
|
183
138
|
Enabled: false
|
|
@@ -188,7 +143,7 @@ RSpec/DescribeMethod:
|
|
|
188
143
|
- 'spec/canon/comparison/multiple_differences_spec.rb'
|
|
189
144
|
- 'spec/canon/diff_formatter/character_map_customization_spec.rb'
|
|
190
145
|
|
|
191
|
-
# Offense count:
|
|
146
|
+
# Offense count: 874
|
|
192
147
|
# Configuration parameters: CountAsOne.
|
|
193
148
|
RSpec/ExampleLength:
|
|
194
149
|
Max: 44
|
|
@@ -240,7 +195,7 @@ RSpec/MultipleDescribes:
|
|
|
240
195
|
Exclude:
|
|
241
196
|
- 'spec/canon/comparison/match_options_spec.rb'
|
|
242
197
|
|
|
243
|
-
# Offense count:
|
|
198
|
+
# Offense count: 736
|
|
244
199
|
RSpec/MultipleExpectations:
|
|
245
200
|
Max: 15
|
|
246
201
|
|
|
@@ -263,12 +218,13 @@ RSpec/NamedSubject:
|
|
|
263
218
|
RSpec/NestedGroups:
|
|
264
219
|
Max: 4
|
|
265
220
|
|
|
266
|
-
# Offense count:
|
|
221
|
+
# Offense count: 11
|
|
267
222
|
# Configuration parameters: AllowedPatterns.
|
|
268
223
|
# AllowedPatterns: ^expect_, ^assert_
|
|
269
224
|
RSpec/NoExpectationExample:
|
|
270
225
|
Exclude:
|
|
271
226
|
- 'spec/canon/context_grouping_spec.rb'
|
|
227
|
+
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
272
228
|
- 'spec/canon/informative_diffs_debug_spec.rb'
|
|
273
229
|
- 'spec/canon/isodoc_blockquotes_spec.rb'
|
|
274
230
|
- 'spec/canon/match_scenarios_spec.rb'
|
|
@@ -292,30 +248,17 @@ RSpec/SpecFilePathFormat:
|
|
|
292
248
|
- 'spec/canon/yaml/formatter_spec.rb'
|
|
293
249
|
- 'spec/xml_c14n_spec.rb'
|
|
294
250
|
|
|
295
|
-
# Offense count:
|
|
251
|
+
# Offense count: 72
|
|
296
252
|
# Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
|
|
297
253
|
RSpec/VerifiedDoubles:
|
|
298
254
|
Exclude:
|
|
299
255
|
- 'spec/canon/comparison/diff_node_builder_spec.rb'
|
|
300
256
|
- 'spec/canon/comparison/whitespace_sensitivity_spec.rb'
|
|
301
257
|
- 'spec/canon/diff/diff_classifier_spec.rb'
|
|
302
|
-
- 'spec/canon/diff/path_builder_spec.rb'
|
|
303
258
|
- 'spec/canon/diff/xml_serialization_formatter_spec.rb'
|
|
304
259
|
- 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
|
|
305
260
|
- 'spec/canon/tree_diff/operation_converter_spec.rb'
|
|
306
261
|
|
|
307
|
-
# Offense count: 8
|
|
308
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
309
|
-
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
310
|
-
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
311
|
-
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
312
|
-
# FunctionalMethods: let, let!, subject, watch
|
|
313
|
-
# AllowedMethods: lambda, proc, it
|
|
314
|
-
Style/BlockDelimiters:
|
|
315
|
-
Exclude:
|
|
316
|
-
- 'spec/canon/comparison/comments_asymmetry_spec.rb'
|
|
317
|
-
- 'spec/canon/comparison/whitespace_adjacency_spec.rb'
|
|
318
|
-
|
|
319
262
|
# Offense count: 1
|
|
320
263
|
# This cop supports safe autocorrection (--autocorrect).
|
|
321
264
|
# Configuration parameters: EnforcedStyle, AllowComments.
|
data/Rakefile
CHANGED
|
@@ -5,10 +5,27 @@ require "rspec/core/rake_task"
|
|
|
5
5
|
|
|
6
6
|
RSpec::Core::RakeTask.new(:spec)
|
|
7
7
|
|
|
8
|
+
begin
|
|
9
|
+
require "opal/rspec/rake_task"
|
|
10
|
+
rescue LoadError
|
|
11
|
+
# Opal not available or incompatible with current Ruby version
|
|
12
|
+
end
|
|
13
|
+
|
|
8
14
|
require "rubocop/rake_task"
|
|
9
15
|
|
|
10
16
|
RuboCop::RakeTask.new
|
|
11
17
|
|
|
12
18
|
Dir.glob("lib/tasks/**/*.rake").each { |r| load r }
|
|
13
19
|
|
|
20
|
+
namespace :spec do
|
|
21
|
+
if defined?(Opal::RSpec::RakeTask)
|
|
22
|
+
desc "Run Opal (JavaScript) tests"
|
|
23
|
+
Opal::RSpec::RakeTask.new(:opal) do |server, runner|
|
|
24
|
+
server.append_path "lib"
|
|
25
|
+
runner.default_path = "spec"
|
|
26
|
+
runner.pattern = "spec/canon/opal_xml_smoke_spec.rb"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
14
31
|
task default: %i[spec rubocop]
|
data/lib/canon/cli.rb
CHANGED
data/lib/canon/color_detector.rb
CHANGED
|
@@ -67,11 +67,9 @@ module Canon
|
|
|
67
67
|
# @param io [IO] Output stream
|
|
68
68
|
# @return [Boolean] true if the stream is a TTY
|
|
69
69
|
def tty?(io)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
# Ruby 2.5+ uses tty?, older uses isatty
|
|
74
|
-
io.tty? || io.isatty
|
|
70
|
+
io.tty?
|
|
71
|
+
rescue NoMethodError
|
|
72
|
+
false
|
|
75
73
|
rescue ArgumentError, IOError
|
|
76
74
|
# Stream might be closed or invalid
|
|
77
75
|
false
|
|
@@ -82,14 +82,11 @@ module Canon
|
|
|
82
82
|
%i[text_content structural_whitespace].include?(dimension)
|
|
83
83
|
end
|
|
84
84
|
|
|
85
|
-
private
|
|
86
|
-
|
|
87
85
|
# Get the behavior setting for a dimension
|
|
88
86
|
# @param dimension [Symbol] The match dimension
|
|
89
87
|
# @return [Symbol] The behavior (:strict, :normalize, :ignore)
|
|
90
88
|
def behavior_for(dimension)
|
|
91
|
-
|
|
92
|
-
if match_options.respond_to?(:behavior_for)
|
|
89
|
+
if match_options.is_a?(ResolvedMatchOptions)
|
|
93
90
|
match_options.behavior_for(dimension)
|
|
94
91
|
elsif match_options.is_a?(Hash)
|
|
95
92
|
match_options[dimension] || :strict
|
|
@@ -21,14 +21,10 @@ module Canon
|
|
|
21
21
|
def extract_data(node)
|
|
22
22
|
return [] unless node
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
if node.is_a?(Moxml::Node)
|
|
26
|
-
extract_from_moxml(node)
|
|
27
|
-
# Handle Nokogiri nodes
|
|
28
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
24
|
+
if Canon::XmlBackend.nokogiri?
|
|
29
25
|
extract_from_nokogiri(node)
|
|
30
26
|
else
|
|
31
|
-
|
|
27
|
+
extract_from_moxml(node)
|
|
32
28
|
end
|
|
33
29
|
end
|
|
34
30
|
|
|
@@ -21,14 +21,10 @@ module Canon
|
|
|
21
21
|
def extract_data(node)
|
|
22
22
|
return [] unless node
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
if node.is_a?(Moxml::Node)
|
|
26
|
-
extract_from_moxml(node)
|
|
27
|
-
# Handle Nokogiri nodes
|
|
28
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
24
|
+
if Canon::XmlBackend.nokogiri?
|
|
29
25
|
extract_from_nokogiri(node)
|
|
30
26
|
else
|
|
31
|
-
|
|
27
|
+
extract_from_moxml(node)
|
|
32
28
|
end
|
|
33
29
|
end
|
|
34
30
|
|
|
@@ -27,14 +27,10 @@ module Canon
|
|
|
27
27
|
def extract_data(node)
|
|
28
28
|
return {} unless node
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
if node.is_a?(Moxml::Node)
|
|
32
|
-
extract_from_moxml(node)
|
|
33
|
-
# Handle Nokogiri nodes
|
|
34
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
30
|
+
if Canon::XmlBackend.nokogiri?
|
|
35
31
|
extract_from_nokogiri(node)
|
|
36
32
|
else
|
|
37
|
-
|
|
33
|
+
extract_from_moxml(node)
|
|
38
34
|
end
|
|
39
35
|
end
|
|
40
36
|
|
|
@@ -21,14 +21,10 @@ module Canon
|
|
|
21
21
|
def extract_data(node)
|
|
22
22
|
return [] unless node
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
if node.is_a?(Moxml::Node)
|
|
26
|
-
extract_from_moxml(node)
|
|
27
|
-
# Handle Nokogiri nodes
|
|
28
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
24
|
+
if Canon::XmlBackend.nokogiri?
|
|
29
25
|
extract_from_nokogiri(node)
|
|
30
26
|
else
|
|
31
|
-
|
|
27
|
+
extract_from_moxml(node)
|
|
32
28
|
end
|
|
33
29
|
end
|
|
34
30
|
|
|
@@ -23,14 +23,10 @@ module Canon
|
|
|
23
23
|
def extract_data(node)
|
|
24
24
|
return 0 unless node
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
if node.is_a?(Moxml::Node)
|
|
28
|
-
extract_from_moxml(node)
|
|
29
|
-
# Handle Nokogiri nodes
|
|
30
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
26
|
+
if Canon::XmlBackend.nokogiri?
|
|
31
27
|
extract_from_nokogiri(node)
|
|
32
28
|
else
|
|
33
|
-
|
|
29
|
+
extract_from_moxml(node)
|
|
34
30
|
end
|
|
35
31
|
end
|
|
36
32
|
|
|
@@ -25,14 +25,10 @@ module Canon
|
|
|
25
25
|
def extract_data(node)
|
|
26
26
|
return [] unless node
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
if node.is_a?(Moxml::Node)
|
|
30
|
-
extract_from_moxml(node)
|
|
31
|
-
# Handle Nokogiri nodes
|
|
32
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
28
|
+
if Canon::XmlBackend.nokogiri?
|
|
33
29
|
extract_from_nokogiri(node)
|
|
34
30
|
else
|
|
35
|
-
|
|
31
|
+
extract_from_moxml(node)
|
|
36
32
|
end
|
|
37
33
|
end
|
|
38
34
|
|
|
@@ -23,12 +23,10 @@ module Canon
|
|
|
23
23
|
def extract_data(node)
|
|
24
24
|
return nil unless node
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
if node.is_a?(Moxml::Node)
|
|
28
|
-
extract_from_moxml(node)
|
|
29
|
-
# Handle Nokogiri nodes
|
|
30
|
-
elsif node.is_a?(Nokogiri::XML::Node)
|
|
26
|
+
if Canon::XmlBackend.nokogiri?
|
|
31
27
|
extract_from_nokogiri(node)
|
|
28
|
+
else
|
|
29
|
+
extract_from_moxml(node)
|
|
32
30
|
end
|
|
33
31
|
end
|
|
34
32
|
|
|
@@ -22,27 +22,36 @@ module Canon
|
|
|
22
22
|
# @param obj [Object] Object to detect format of
|
|
23
23
|
# @return [Symbol] Format type (:xml, :html, :json, :yaml, :ruby_object, :string)
|
|
24
24
|
def detect(obj)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
obj.html? ? :html : :xml
|
|
37
|
-
when Nokogiri::HTML::Document, Nokogiri::HTML5::Document
|
|
38
|
-
:html
|
|
39
|
-
when String
|
|
40
|
-
detect_string(obj)
|
|
41
|
-
when Hash, Array
|
|
42
|
-
# Raw Ruby objects (from parsed JSON/YAML)
|
|
43
|
-
:ruby_object
|
|
25
|
+
if XmlBackend.moxml?
|
|
26
|
+
case obj
|
|
27
|
+
when Moxml::Node, Moxml::Document
|
|
28
|
+
:xml
|
|
29
|
+
when String
|
|
30
|
+
detect_string(obj)
|
|
31
|
+
when Hash, Array
|
|
32
|
+
:ruby_object
|
|
33
|
+
else
|
|
34
|
+
raise Canon::Error, "Unknown format for object: #{obj.class}"
|
|
35
|
+
end
|
|
44
36
|
else
|
|
45
|
-
|
|
37
|
+
case obj
|
|
38
|
+
when Moxml::Node, Moxml::Document
|
|
39
|
+
:xml
|
|
40
|
+
when Nokogiri::HTML::DocumentFragment, Nokogiri::HTML5::DocumentFragment
|
|
41
|
+
:html
|
|
42
|
+
when Nokogiri::XML::DocumentFragment
|
|
43
|
+
obj.document&.html? ? :html : :xml
|
|
44
|
+
when Nokogiri::XML::Document, Nokogiri::XML::Node
|
|
45
|
+
obj.html? ? :html : :xml
|
|
46
|
+
when Nokogiri::HTML::Document, Nokogiri::HTML5::Document
|
|
47
|
+
:html
|
|
48
|
+
when String
|
|
49
|
+
detect_string(obj)
|
|
50
|
+
when Hash, Array
|
|
51
|
+
:ruby_object
|
|
52
|
+
else
|
|
53
|
+
raise Canon::Error, "Unknown format for object: #{obj.class}"
|
|
54
|
+
end
|
|
46
55
|
end
|
|
47
56
|
end
|
|
48
57
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "nokogiri"
|
|
3
|
+
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
4
|
require_relative "../comparison" # Load base module with constants first
|
|
5
5
|
require_relative "markup_comparator"
|
|
6
6
|
require_relative "xml_comparator"
|
|
@@ -167,6 +167,11 @@ module Canon
|
|
|
167
167
|
end
|
|
168
168
|
end
|
|
169
169
|
|
|
170
|
+
# Public parsing API for external callers
|
|
171
|
+
def parse(html, preprocessing = :none)
|
|
172
|
+
parse_node_for_semantic(html, preprocessing)
|
|
173
|
+
end
|
|
174
|
+
|
|
170
175
|
private
|
|
171
176
|
|
|
172
177
|
# Check if both nodes are document fragments
|
|
@@ -337,13 +342,10 @@ module Canon
|
|
|
337
342
|
# Convert to string if needed
|
|
338
343
|
html_string = if html.is_a?(String)
|
|
339
344
|
html
|
|
340
|
-
elsif
|
|
345
|
+
elsif Canon::XmlParsing.xml_node?(html)
|
|
341
346
|
html.to_html
|
|
342
|
-
elsif html.respond_to?(:to_s)
|
|
343
|
-
html.to_s
|
|
344
347
|
else
|
|
345
|
-
|
|
346
|
-
"Unable to convert HTML to string: #{html.class}"
|
|
348
|
+
html.to_s
|
|
347
349
|
end
|
|
348
350
|
|
|
349
351
|
# Strip DOCTYPE for consistent parsing
|
|
@@ -492,22 +494,18 @@ module Canon
|
|
|
492
494
|
end
|
|
493
495
|
|
|
494
496
|
def find_and_normalize_style_script(node)
|
|
495
|
-
return unless node.
|
|
497
|
+
return unless node.is_a?(Canon::Xml::Node)
|
|
496
498
|
|
|
497
499
|
node.children.each do |child|
|
|
498
500
|
next unless child.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
499
501
|
|
|
500
502
|
# If this is a style or script element, normalize its text content
|
|
501
503
|
if %w[style script].include?(child.name.downcase)
|
|
502
|
-
# Get text children and remove HTML comments from them
|
|
503
504
|
child.children.each do |text_child|
|
|
504
505
|
next unless text_child.is_a?(Canon::Xml::Nodes::TextNode)
|
|
505
506
|
|
|
506
|
-
# Remove HTML comments from text content without using regex
|
|
507
|
-
# to avoid ReDoS/incomplete sanitization vulnerabilities
|
|
508
507
|
normalized = remove_html_comments(text_child.value)
|
|
509
|
-
|
|
510
|
-
text_child.instance_variable_set(:@value, normalized)
|
|
508
|
+
text_child.value = normalized
|
|
511
509
|
end
|
|
512
510
|
end
|
|
513
511
|
|
|
@@ -584,13 +582,10 @@ module Canon
|
|
|
584
582
|
# @param node [Canon::Xml::Node, Nokogiri::HTML::Document] Parsed node
|
|
585
583
|
# @return [String] Serialized HTML string
|
|
586
584
|
def serialize_for_display(node)
|
|
587
|
-
# Use XmlNodeComparison's serializer for Canon::Xml::Node
|
|
588
585
|
if node.is_a?(Canon::Xml::Node)
|
|
589
586
|
XmlNodeComparison.serialize_node_to_xml(node)
|
|
590
|
-
elsif
|
|
591
|
-
node.to_html
|
|
592
|
-
elsif node.respond_to?(:to_xml)
|
|
593
|
-
node.to_xml
|
|
587
|
+
elsif Canon::XmlParsing.xml_node?(node)
|
|
588
|
+
Canon::XmlBackend.nokogiri? ? node.to_html : Canon::XmlParsing.serialize(node)
|
|
594
589
|
else
|
|
595
590
|
node.to_s
|
|
596
591
|
end
|
|
@@ -605,16 +600,11 @@ module Canon
|
|
|
605
600
|
if html.is_a?(String)
|
|
606
601
|
html
|
|
607
602
|
elsif html.is_a?(Canon::Xml::Node)
|
|
608
|
-
# Serialize Canon nodes to string
|
|
609
603
|
Canon::Xml::DataModel.serialize(html)
|
|
610
|
-
elsif
|
|
611
|
-
|
|
612
|
-
html.to_html
|
|
613
|
-
elsif html.respond_to?(:to_s)
|
|
614
|
-
html.to_s
|
|
604
|
+
elsif Canon::XmlParsing.xml_node?(html)
|
|
605
|
+
Canon::XmlBackend.nokogiri? ? html.to_html : html.to_s
|
|
615
606
|
else
|
|
616
|
-
|
|
617
|
-
"Unable to extract original string from: #{html.class}"
|
|
607
|
+
html.to_s
|
|
618
608
|
end
|
|
619
609
|
end
|
|
620
610
|
|
|
@@ -727,11 +717,10 @@ compare_profile = nil)
|
|
|
727
717
|
# Check if any ancestor of the given node preserves whitespace
|
|
728
718
|
def ancestor_preserves_whitespace?(node, preserve_list)
|
|
729
719
|
current = node
|
|
730
|
-
while current.
|
|
720
|
+
while current.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(current)
|
|
731
721
|
return true if preserve_list.include?(current.name.downcase)
|
|
732
722
|
|
|
733
|
-
|
|
734
|
-
break if current.is_a?(Nokogiri::XML::Document)
|
|
723
|
+
break if Canon::XmlParsing.document?(current)
|
|
735
724
|
|
|
736
725
|
current = current.parent
|
|
737
726
|
end
|
|
@@ -811,7 +800,7 @@ compare_profile = nil)
|
|
|
811
800
|
end
|
|
812
801
|
|
|
813
802
|
# Check if it's a fragment that contains XML processing instructions
|
|
814
|
-
if node.
|
|
803
|
+
if (node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)) && node.children.any? do |child|
|
|
815
804
|
child.is_a?(Nokogiri::XML::ProcessingInstruction) &&
|
|
816
805
|
child.name == "xml"
|
|
817
806
|
end
|
|
@@ -48,9 +48,8 @@ module Canon
|
|
|
48
48
|
|
|
49
49
|
# If key exists, check if it's :strict
|
|
50
50
|
return match_options[:comments] == :strict
|
|
51
|
-
elsif match_options.
|
|
51
|
+
elsif match_options.is_a?(ResolvedMatchOptions)
|
|
52
52
|
behavior = behavior_for(dimension)
|
|
53
|
-
# In HTML, only :strict makes comments affect equivalence
|
|
54
53
|
return behavior == :strict
|
|
55
54
|
end
|
|
56
55
|
# Default: comments don't affect equivalence in HTML
|
|
@@ -106,14 +105,8 @@ module Canon
|
|
|
106
105
|
def has_explicit_option?(dimension)
|
|
107
106
|
if match_options.is_a?(Hash)
|
|
108
107
|
match_options.key?(dimension)
|
|
109
|
-
elsif match_options.
|
|
110
|
-
|
|
111
|
-
begin
|
|
112
|
-
match_options[dimension]
|
|
113
|
-
true
|
|
114
|
-
rescue StandardError
|
|
115
|
-
false
|
|
116
|
-
end
|
|
108
|
+
elsif match_options.is_a?(ResolvedMatchOptions)
|
|
109
|
+
!match_options.options[dimension].nil?
|
|
117
110
|
else
|
|
118
111
|
false
|
|
119
112
|
end
|
|
@@ -26,6 +26,14 @@ module Canon
|
|
|
26
26
|
}.freeze
|
|
27
27
|
|
|
28
28
|
class << self
|
|
29
|
+
# Parse JSON from string or return as-is
|
|
30
|
+
#
|
|
31
|
+
# @param obj [String, Hash, Array] JSON string or parsed object
|
|
32
|
+
# @return [Object] Parsed JSON object
|
|
33
|
+
def parse(obj)
|
|
34
|
+
parse_json(obj)
|
|
35
|
+
end
|
|
36
|
+
|
|
29
37
|
# Compare two JSON objects for equivalence
|
|
30
38
|
#
|
|
31
39
|
# @param json1 [String, Hash, Array] First JSON
|