canon 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec-opal +7 -0
  3. data/.rubocop_todo.yml +14 -71
  4. data/Rakefile +17 -0
  5. data/lib/canon/cli.rb +1 -1
  6. data/lib/canon/color_detector.rb +3 -5
  7. data/lib/canon/comparison/compare_profile.rb +1 -4
  8. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
  9. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
  10. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
  11. data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
  12. data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
  13. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
  14. data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
  15. data/lib/canon/comparison/format_detector.rb +29 -20
  16. data/lib/canon/comparison/html_comparator.rb +18 -29
  17. data/lib/canon/comparison/html_compare_profile.rb +3 -10
  18. data/lib/canon/comparison/html_parser.rb +1 -1
  19. data/lib/canon/comparison/json_comparator.rb +8 -0
  20. data/lib/canon/comparison/node_inspector.rb +146 -80
  21. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
  22. data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
  23. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
  24. data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
  25. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
  26. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
  27. data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
  28. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
  29. data/lib/canon/comparison/xml_comparator.rb +61 -83
  30. data/lib/canon/comparison/xml_node_comparison.rb +15 -15
  31. data/lib/canon/comparison/yaml_comparator.rb +8 -0
  32. data/lib/canon/comparison.rb +23 -23
  33. data/lib/canon/config/profile_loader.rb +13 -13
  34. data/lib/canon/config.rb +29 -5
  35. data/lib/canon/diff/diff_classifier.rb +7 -41
  36. data/lib/canon/diff/diff_line.rb +1 -1
  37. data/lib/canon/diff/diff_node_enricher.rb +22 -24
  38. data/lib/canon/diff/node_serializer.rb +23 -30
  39. data/lib/canon/diff/path_builder.rb +24 -37
  40. data/lib/canon/diff/source_locator.rb +0 -3
  41. data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
  42. data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
  43. data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
  44. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
  45. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
  46. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
  47. data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
  48. data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
  49. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
  50. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
  51. data/lib/canon/diff_formatter/debug_output.rb +12 -24
  52. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
  53. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
  54. data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
  55. data/lib/canon/diff_formatter/legend.rb +2 -2
  56. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
  57. data/lib/canon/diff_formatter/theme.rb +4 -4
  58. data/lib/canon/diff_formatter.rb +2 -2
  59. data/lib/canon/formatters/html_formatter.rb +1 -1
  60. data/lib/canon/formatters/html_formatter_base.rb +1 -1
  61. data/lib/canon/formatters/xml_formatter.rb +7 -32
  62. data/lib/canon/html/data_model.rb +1 -1
  63. data/lib/canon/pretty_printer/html.rb +1 -1
  64. data/lib/canon/pretty_printer/xml.rb +16 -7
  65. data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
  66. data/lib/canon/rspec_matchers.rb +2 -2
  67. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  68. data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
  69. data/lib/canon/tree_diff/core/tree_node.rb +1 -3
  70. data/lib/canon/validators/html_validator.rb +1 -1
  71. data/lib/canon/validators/xml_validator.rb +1 -1
  72. data/lib/canon/version.rb +1 -1
  73. data/lib/canon/xml/data_model.rb +131 -137
  74. data/lib/canon/xml/namespace_helper.rb +5 -0
  75. data/lib/canon/xml/node.rb +2 -1
  76. data/lib/canon/xml/nodes/root_node.rb +4 -0
  77. data/lib/canon/xml/nodes/text_node.rb +6 -1
  78. data/lib/canon/xml/sax_builder.rb +4 -6
  79. data/lib/canon/xml_backend.rb +49 -0
  80. data/lib/canon/xml_parsing.rb +271 -0
  81. data/lib/canon.rb +3 -1
  82. data/lib/tasks/benchmark_runner.rb +1 -1
  83. data/lib/tasks/performance_helpers.rb +1 -1
  84. metadata +5 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b5218e18de7c596c5875ee1cf906331269cd58475a1f00de5c20af398bb07f08
4
- data.tar.gz: 0dedd6f9e8ca265d37c610a183ed0e695ba68a9d8c9f0766b890f3d8db7d1f66
3
+ metadata.gz: a13457a67f3e2ab91e00cec19684c502605ab807bdd87eb1120e77d190a99c2e
4
+ data.tar.gz: 35c0c873340e12c63048adf2222fda2f8c2ae3972337dcc212b26d391191ac35
5
5
  SHA512:
6
- metadata.gz: c24944e5600684e24f4b32cd16d90d68f64ca07671da7cd30a4cc7e13e818e98f86ab849ee28f7780f40ae3514df1ba3087cb32f55c7923d5303c65819aa8d59
7
- data.tar.gz: 13a3c944492c29a916569b86829cefd8bf7baaf247eea105ee3f608d25789ef7c79ab0b0a95a3806e66b85348dd629169f8e19bef127e3ca79685a0e13d1bca9
6
+ metadata.gz: 8db915564eebd4ca4dfadd65358f721aa70bca318c22dc1c02eff5e3527cf646ea19722b760072851f358b3fabefd12fc5f6dfc216bce146423c7091f3bf7eac
7
+ data.tar.gz: f92e7491d781c8762483335558ede985a1653bcfb88613858115aa87e50bb326f95b0b76b845c54154e657fb9f25b3d1f348bf8e9baa926ea1c6bfbbd77d6ca6
data/.rspec-opal ADDED
@@ -0,0 +1,7 @@
1
+ --default-path=spec
2
+ --pattern='spec/canon/opal_xml_smoke_spec.rb'
3
+ -I lib
4
+ --opal-opt=-g,canon
5
+ -I spec
6
+ --require=spec_helper
7
+ --require=support/opal
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-05-05 13:09:45 UTC using RuboCop version 1.86.0.
3
+ # on 2026-05-24 10:34:05 UTC using RuboCop version 1.86.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -11,58 +11,14 @@ Gemspec/RequiredRubyVersion:
11
11
  Exclude:
12
12
  - 'canon.gemspec'
13
13
 
14
- # Offense count: 5
15
- # This cop supports safe autocorrection (--autocorrect).
16
- # Configuration parameters: EnforcedStyle, IndentationWidth.
17
- # SupportedStyles: with_first_argument, with_fixed_indentation
18
- Layout/ArgumentAlignment:
19
- Exclude:
20
- - 'lib/canon/comparison/child_realignment.rb'
21
- - 'lib/canon/comparison/xml_comparator/child_comparison.rb'
22
- - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
23
-
24
- # Offense count: 5
25
- # This cop supports safe autocorrection (--autocorrect).
26
- # Configuration parameters: EnforcedStyleAlignWith.
27
- # SupportedStylesAlignWith: either, start_of_block, start_of_line
28
- Layout/BlockAlignment:
29
- Exclude:
30
- - 'spec/canon/comparison/comments_asymmetry_spec.rb'
31
- - 'spec/canon/comparison/whitespace_adjacency_spec.rb'
32
-
33
- # Offense count: 5
34
- # This cop supports safe autocorrection (--autocorrect).
35
- Layout/BlockEndNewline:
36
- Exclude:
37
- - 'spec/canon/comparison/comments_asymmetry_spec.rb'
38
- - 'spec/canon/comparison/whitespace_adjacency_spec.rb'
39
-
40
- # Offense count: 10
41
- # This cop supports safe autocorrection (--autocorrect).
42
- # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
43
- # SupportedStylesAlignWith: start_of_line, relative_to_receiver
44
- Layout/IndentationWidth:
45
- Exclude:
46
- - 'spec/canon/comparison/comments_asymmetry_spec.rb'
47
- - 'spec/canon/comparison/whitespace_adjacency_spec.rb'
48
-
49
- # Offense count: 1386
14
+ # Offense count: 1358
50
15
  # This cop supports safe autocorrection (--autocorrect).
51
16
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
52
17
  # URISchemes: http, https
53
18
  Layout/LineLength:
54
19
  Enabled: false
55
20
 
56
- # Offense count: 6
57
- # This cop supports safe autocorrection (--autocorrect).
58
- # Configuration parameters: AllowInHeredoc.
59
- Layout/TrailingWhitespace:
60
- Exclude:
61
- - 'lib/canon/comparison/child_realignment.rb'
62
- - 'lib/canon/comparison/xml_comparator/child_comparison.rb'
63
- - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
64
-
65
- # Offense count: 63
21
+ # Offense count: 58
66
22
  # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
67
23
  Lint/DuplicateBranch:
68
24
  Enabled: false
@@ -75,13 +31,12 @@ Lint/EmptyConditionalBody:
75
31
  - 'spec/canon/comparison/html_comparator_spec.rb'
76
32
  - 'spec/canon/comparison_spec.rb'
77
33
 
78
- # Offense count: 6
34
+ # Offense count: 5
79
35
  # Configuration parameters: MaximumRangeSize.
80
36
  Lint/MissingCopEnableDirective:
81
37
  Exclude:
82
38
  - 'lib/canon/commands/format_command.rb'
83
39
  - 'lib/canon/xml/attribute_handler.rb'
84
- - 'lib/canon/xml/data_model.rb'
85
40
  - 'lib/canon/xml/namespace_handler.rb'
86
41
  - 'lib/canon/xml/processor.rb'
87
42
  - 'lib/canon/xml/xml_base_handler.rb'
@@ -107,7 +62,7 @@ Lint/UselessConstantScoping:
107
62
  Exclude:
108
63
  - 'lib/canon/diff_formatter/theme.rb'
109
64
 
110
- # Offense count: 321
65
+ # Offense count: 313
111
66
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
112
67
  Metrics/AbcSize:
113
68
  Enabled: false
@@ -123,12 +78,12 @@ Metrics/BlockLength:
123
78
  Metrics/BlockNesting:
124
79
  Max: 4
125
80
 
126
- # Offense count: 285
81
+ # Offense count: 276
127
82
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
128
83
  Metrics/CyclomaticComplexity:
129
84
  Enabled: false
130
85
 
131
- # Offense count: 529
86
+ # Offense count: 523
132
87
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
133
88
  Metrics/MethodLength:
134
89
  Max: 146
@@ -138,7 +93,7 @@ Metrics/MethodLength:
138
93
  Metrics/ParameterLists:
139
94
  Max: 10
140
95
 
141
- # Offense count: 221
96
+ # Offense count: 214
142
97
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
143
98
  Metrics/PerceivedComplexity:
144
99
  Enabled: false
@@ -177,7 +132,7 @@ Performance/CollectionLiteralInLoop:
177
132
  RSpec/ContextWording:
178
133
  Enabled: false
179
134
 
180
- # Offense count: 46
135
+ # Offense count: 47
181
136
  # Configuration parameters: IgnoredMetadata.
182
137
  RSpec/DescribeClass:
183
138
  Enabled: false
@@ -188,7 +143,7 @@ RSpec/DescribeMethod:
188
143
  - 'spec/canon/comparison/multiple_differences_spec.rb'
189
144
  - 'spec/canon/diff_formatter/character_map_customization_spec.rb'
190
145
 
191
- # Offense count: 876
146
+ # Offense count: 874
192
147
  # Configuration parameters: CountAsOne.
193
148
  RSpec/ExampleLength:
194
149
  Max: 44
@@ -240,7 +195,7 @@ RSpec/MultipleDescribes:
240
195
  Exclude:
241
196
  - 'spec/canon/comparison/match_options_spec.rb'
242
197
 
243
- # Offense count: 735
198
+ # Offense count: 736
244
199
  RSpec/MultipleExpectations:
245
200
  Max: 15
246
201
 
@@ -263,12 +218,13 @@ RSpec/NamedSubject:
263
218
  RSpec/NestedGroups:
264
219
  Max: 4
265
220
 
266
- # Offense count: 10
221
+ # Offense count: 11
267
222
  # Configuration parameters: AllowedPatterns.
268
223
  # AllowedPatterns: ^expect_, ^assert_
269
224
  RSpec/NoExpectationExample:
270
225
  Exclude:
271
226
  - 'spec/canon/context_grouping_spec.rb'
227
+ - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
272
228
  - 'spec/canon/informative_diffs_debug_spec.rb'
273
229
  - 'spec/canon/isodoc_blockquotes_spec.rb'
274
230
  - 'spec/canon/match_scenarios_spec.rb'
@@ -292,30 +248,17 @@ RSpec/SpecFilePathFormat:
292
248
  - 'spec/canon/yaml/formatter_spec.rb'
293
249
  - 'spec/xml_c14n_spec.rb'
294
250
 
295
- # Offense count: 100
251
+ # Offense count: 72
296
252
  # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
297
253
  RSpec/VerifiedDoubles:
298
254
  Exclude:
299
255
  - 'spec/canon/comparison/diff_node_builder_spec.rb'
300
256
  - 'spec/canon/comparison/whitespace_sensitivity_spec.rb'
301
257
  - 'spec/canon/diff/diff_classifier_spec.rb'
302
- - 'spec/canon/diff/path_builder_spec.rb'
303
258
  - 'spec/canon/diff/xml_serialization_formatter_spec.rb'
304
259
  - 'spec/canon/diff_formatter/diff_detail_formatter_spec.rb'
305
260
  - 'spec/canon/tree_diff/operation_converter_spec.rb'
306
261
 
307
- # Offense count: 8
308
- # This cop supports safe autocorrection (--autocorrect).
309
- # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
310
- # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
311
- # ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
312
- # FunctionalMethods: let, let!, subject, watch
313
- # AllowedMethods: lambda, proc, it
314
- Style/BlockDelimiters:
315
- Exclude:
316
- - 'spec/canon/comparison/comments_asymmetry_spec.rb'
317
- - 'spec/canon/comparison/whitespace_adjacency_spec.rb'
318
-
319
262
  # Offense count: 1
320
263
  # This cop supports safe autocorrection (--autocorrect).
321
264
  # Configuration parameters: EnforcedStyle, AllowComments.
data/Rakefile CHANGED
@@ -5,10 +5,27 @@ require "rspec/core/rake_task"
5
5
 
6
6
  RSpec::Core::RakeTask.new(:spec)
7
7
 
8
+ begin
9
+ require "opal/rspec/rake_task"
10
+ rescue LoadError
11
+ # Opal not available or incompatible with current Ruby version
12
+ end
13
+
8
14
  require "rubocop/rake_task"
9
15
 
10
16
  RuboCop::RakeTask.new
11
17
 
12
18
  Dir.glob("lib/tasks/**/*.rake").each { |r| load r }
13
19
 
20
+ namespace :spec do
21
+ if defined?(Opal::RSpec::RakeTask)
22
+ desc "Run Opal (JavaScript) tests"
23
+ Opal::RSpec::RakeTask.new(:opal) do |server, runner|
24
+ server.append_path "lib"
25
+ runner.default_path = "spec"
26
+ runner.pattern = "spec/canon/opal_xml_smoke_spec.rb"
27
+ end
28
+ end
29
+ end
30
+
14
31
  task default: %i[spec rubocop]
data/lib/canon/cli.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "thor"
3
+ require "thor" unless RUBY_ENGINE == "opal"
4
4
  require_relative "commands/format_command"
5
5
  require_relative "commands/diff_command"
6
6
  require_relative "options/registry"
@@ -67,11 +67,9 @@ module Canon
67
67
  # @param io [IO] Output stream
68
68
  # @return [Boolean] true if the stream is a TTY
69
69
  def tty?(io)
70
- return false unless io.respond_to?(:tty?)
71
- return false unless io.respond_to?(:isatty)
72
-
73
- # Ruby 2.5+ uses tty?, older uses isatty
74
- io.tty? || io.isatty
70
+ io.tty?
71
+ rescue NoMethodError
72
+ false
75
73
  rescue ArgumentError, IOError
76
74
  # Stream might be closed or invalid
77
75
  false
@@ -82,14 +82,11 @@ module Canon
82
82
  %i[text_content structural_whitespace].include?(dimension)
83
83
  end
84
84
 
85
- private
86
-
87
85
  # Get the behavior setting for a dimension
88
86
  # @param dimension [Symbol] The match dimension
89
87
  # @return [Symbol] The behavior (:strict, :normalize, :ignore)
90
88
  def behavior_for(dimension)
91
- # Handle both ResolvedMatchOptions and Hash
92
- if match_options.respond_to?(:behavior_for)
89
+ if match_options.is_a?(ResolvedMatchOptions)
93
90
  match_options.behavior_for(dimension)
94
91
  elsif match_options.is_a?(Hash)
95
92
  match_options[dimension] || :strict
@@ -21,14 +21,10 @@ module Canon
21
21
  def extract_data(node)
22
22
  return [] unless node
23
23
 
24
- # Handle Moxml nodes
25
- if node.is_a?(Moxml::Node)
26
- extract_from_moxml(node)
27
- # Handle Nokogiri nodes
28
- elsif node.is_a?(Nokogiri::XML::Node)
24
+ if Canon::XmlBackend.nokogiri?
29
25
  extract_from_nokogiri(node)
30
26
  else
31
- []
27
+ extract_from_moxml(node)
32
28
  end
33
29
  end
34
30
 
@@ -21,14 +21,10 @@ module Canon
21
21
  def extract_data(node)
22
22
  return [] unless node
23
23
 
24
- # Handle Moxml nodes
25
- if node.is_a?(Moxml::Node)
26
- extract_from_moxml(node)
27
- # Handle Nokogiri nodes
28
- elsif node.is_a?(Nokogiri::XML::Node)
24
+ if Canon::XmlBackend.nokogiri?
29
25
  extract_from_nokogiri(node)
30
26
  else
31
- []
27
+ extract_from_moxml(node)
32
28
  end
33
29
  end
34
30
 
@@ -27,14 +27,10 @@ module Canon
27
27
  def extract_data(node)
28
28
  return {} unless node
29
29
 
30
- # Handle Moxml nodes
31
- if node.is_a?(Moxml::Node)
32
- extract_from_moxml(node)
33
- # Handle Nokogiri nodes
34
- elsif node.is_a?(Nokogiri::XML::Node)
30
+ if Canon::XmlBackend.nokogiri?
35
31
  extract_from_nokogiri(node)
36
32
  else
37
- {}
33
+ extract_from_moxml(node)
38
34
  end
39
35
  end
40
36
 
@@ -21,14 +21,10 @@ module Canon
21
21
  def extract_data(node)
22
22
  return [] unless node
23
23
 
24
- # Handle Moxml nodes
25
- if node.is_a?(Moxml::Node)
26
- extract_from_moxml(node)
27
- # Handle Nokogiri nodes
28
- elsif node.is_a?(Nokogiri::XML::Node)
24
+ if Canon::XmlBackend.nokogiri?
29
25
  extract_from_nokogiri(node)
30
26
  else
31
- []
27
+ extract_from_moxml(node)
32
28
  end
33
29
  end
34
30
 
@@ -23,14 +23,10 @@ module Canon
23
23
  def extract_data(node)
24
24
  return 0 unless node
25
25
 
26
- # Handle Moxml nodes
27
- if node.is_a?(Moxml::Node)
28
- extract_from_moxml(node)
29
- # Handle Nokogiri nodes
30
- elsif node.is_a?(Nokogiri::XML::Node)
26
+ if Canon::XmlBackend.nokogiri?
31
27
  extract_from_nokogiri(node)
32
28
  else
33
- 0
29
+ extract_from_moxml(node)
34
30
  end
35
31
  end
36
32
 
@@ -25,14 +25,10 @@ module Canon
25
25
  def extract_data(node)
26
26
  return [] unless node
27
27
 
28
- # Handle Moxml nodes
29
- if node.is_a?(Moxml::Node)
30
- extract_from_moxml(node)
31
- # Handle Nokogiri nodes
32
- elsif node.is_a?(Nokogiri::XML::Node)
28
+ if Canon::XmlBackend.nokogiri?
33
29
  extract_from_nokogiri(node)
34
30
  else
35
- []
31
+ extract_from_moxml(node)
36
32
  end
37
33
  end
38
34
 
@@ -23,12 +23,10 @@ module Canon
23
23
  def extract_data(node)
24
24
  return nil unless node
25
25
 
26
- # Handle Moxml nodes
27
- if node.is_a?(Moxml::Node)
28
- extract_from_moxml(node)
29
- # Handle Nokogiri nodes
30
- elsif node.is_a?(Nokogiri::XML::Node)
26
+ if Canon::XmlBackend.nokogiri?
31
27
  extract_from_nokogiri(node)
28
+ else
29
+ extract_from_moxml(node)
32
30
  end
33
31
  end
34
32
 
@@ -22,27 +22,36 @@ module Canon
22
22
  # @param obj [Object] Object to detect format of
23
23
  # @return [Symbol] Format type (:xml, :html, :json, :yaml, :ruby_object, :string)
24
24
  def detect(obj)
25
- case obj
26
- when Moxml::Node, Moxml::Document
27
- :xml
28
- when Nokogiri::HTML::DocumentFragment, Nokogiri::HTML5::DocumentFragment
29
- # HTML DocumentFragments
30
- :html
31
- when Nokogiri::XML::DocumentFragment
32
- # XML DocumentFragments - check if it's actually HTML
33
- obj.document&.html? ? :html : :xml
34
- when Nokogiri::XML::Document, Nokogiri::XML::Node
35
- # Check if it's HTML by looking at the document type
36
- obj.html? ? :html : :xml
37
- when Nokogiri::HTML::Document, Nokogiri::HTML5::Document
38
- :html
39
- when String
40
- detect_string(obj)
41
- when Hash, Array
42
- # Raw Ruby objects (from parsed JSON/YAML)
43
- :ruby_object
25
+ if XmlBackend.moxml?
26
+ case obj
27
+ when Moxml::Node, Moxml::Document
28
+ :xml
29
+ when String
30
+ detect_string(obj)
31
+ when Hash, Array
32
+ :ruby_object
33
+ else
34
+ raise Canon::Error, "Unknown format for object: #{obj.class}"
35
+ end
44
36
  else
45
- raise Canon::Error, "Unknown format for object: #{obj.class}"
37
+ case obj
38
+ when Moxml::Node, Moxml::Document
39
+ :xml
40
+ when Nokogiri::HTML::DocumentFragment, Nokogiri::HTML5::DocumentFragment
41
+ :html
42
+ when Nokogiri::XML::DocumentFragment
43
+ obj.document&.html? ? :html : :xml
44
+ when Nokogiri::XML::Document, Nokogiri::XML::Node
45
+ obj.html? ? :html : :xml
46
+ when Nokogiri::HTML::Document, Nokogiri::HTML5::Document
47
+ :html
48
+ when String
49
+ detect_string(obj)
50
+ when Hash, Array
51
+ :ruby_object
52
+ else
53
+ raise Canon::Error, "Unknown format for object: #{obj.class}"
54
+ end
46
55
  end
47
56
  end
48
57
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "nokogiri"
3
+ require "nokogiri" unless RUBY_ENGINE == "opal"
4
4
  require_relative "../comparison" # Load base module with constants first
5
5
  require_relative "markup_comparator"
6
6
  require_relative "xml_comparator"
@@ -167,6 +167,11 @@ module Canon
167
167
  end
168
168
  end
169
169
 
170
+ # Public parsing API for external callers
171
+ def parse(html, preprocessing = :none)
172
+ parse_node_for_semantic(html, preprocessing)
173
+ end
174
+
170
175
  private
171
176
 
172
177
  # Check if both nodes are document fragments
@@ -337,13 +342,10 @@ module Canon
337
342
  # Convert to string if needed
338
343
  html_string = if html.is_a?(String)
339
344
  html
340
- elsif html.respond_to?(:to_html)
345
+ elsif Canon::XmlParsing.xml_node?(html)
341
346
  html.to_html
342
- elsif html.respond_to?(:to_s)
343
- html.to_s
344
347
  else
345
- raise Canon::Error,
346
- "Unable to convert HTML to string: #{html.class}"
348
+ html.to_s
347
349
  end
348
350
 
349
351
  # Strip DOCTYPE for consistent parsing
@@ -492,22 +494,18 @@ module Canon
492
494
  end
493
495
 
494
496
  def find_and_normalize_style_script(node)
495
- return unless node.respond_to?(:children)
497
+ return unless node.is_a?(Canon::Xml::Node)
496
498
 
497
499
  node.children.each do |child|
498
500
  next unless child.is_a?(Canon::Xml::Nodes::ElementNode)
499
501
 
500
502
  # If this is a style or script element, normalize its text content
501
503
  if %w[style script].include?(child.name.downcase)
502
- # Get text children and remove HTML comments from them
503
504
  child.children.each do |text_child|
504
505
  next unless text_child.is_a?(Canon::Xml::Nodes::TextNode)
505
506
 
506
- # Remove HTML comments from text content without using regex
507
- # to avoid ReDoS/incomplete sanitization vulnerabilities
508
507
  normalized = remove_html_comments(text_child.value)
509
- # Update the text value
510
- text_child.instance_variable_set(:@value, normalized)
508
+ text_child.value = normalized
511
509
  end
512
510
  end
513
511
 
@@ -584,13 +582,10 @@ module Canon
584
582
  # @param node [Canon::Xml::Node, Nokogiri::HTML::Document] Parsed node
585
583
  # @return [String] Serialized HTML string
586
584
  def serialize_for_display(node)
587
- # Use XmlNodeComparison's serializer for Canon::Xml::Node
588
585
  if node.is_a?(Canon::Xml::Node)
589
586
  XmlNodeComparison.serialize_node_to_xml(node)
590
- elsif node.respond_to?(:to_html)
591
- node.to_html
592
- elsif node.respond_to?(:to_xml)
593
- node.to_xml
587
+ elsif Canon::XmlParsing.xml_node?(node)
588
+ Canon::XmlBackend.nokogiri? ? node.to_html : Canon::XmlParsing.serialize(node)
594
589
  else
595
590
  node.to_s
596
591
  end
@@ -605,16 +600,11 @@ module Canon
605
600
  if html.is_a?(String)
606
601
  html
607
602
  elsif html.is_a?(Canon::Xml::Node)
608
- # Serialize Canon nodes to string
609
603
  Canon::Xml::DataModel.serialize(html)
610
- elsif html.respond_to?(:to_html)
611
- # Nokogiri nodes - use to_html to preserve formatting
612
- html.to_html
613
- elsif html.respond_to?(:to_s)
614
- html.to_s
604
+ elsif Canon::XmlParsing.xml_node?(html)
605
+ Canon::XmlBackend.nokogiri? ? html.to_html : html.to_s
615
606
  else
616
- raise Canon::Error,
617
- "Unable to extract original string from: #{html.class}"
607
+ html.to_s
618
608
  end
619
609
  end
620
610
 
@@ -727,11 +717,10 @@ compare_profile = nil)
727
717
  # Check if any ancestor of the given node preserves whitespace
728
718
  def ancestor_preserves_whitespace?(node, preserve_list)
729
719
  current = node
730
- while current.respond_to?(:name)
720
+ while current.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(current)
731
721
  return true if preserve_list.include?(current.name.downcase)
732
722
 
733
- # Stop at document root - documents don't have parents
734
- break if current.is_a?(Nokogiri::XML::Document)
723
+ break if Canon::XmlParsing.document?(current)
735
724
 
736
725
  current = current.parent
737
726
  end
@@ -811,7 +800,7 @@ compare_profile = nil)
811
800
  end
812
801
 
813
802
  # Check if it's a fragment that contains XML processing instructions
814
- if node.respond_to?(:children) && node.children.any? do |child|
803
+ if (node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)) && node.children.any? do |child|
815
804
  child.is_a?(Nokogiri::XML::ProcessingInstruction) &&
816
805
  child.name == "xml"
817
806
  end
@@ -48,9 +48,8 @@ module Canon
48
48
 
49
49
  # If key exists, check if it's :strict
50
50
  return match_options[:comments] == :strict
51
- elsif match_options.respond_to?(:behavior_for)
51
+ elsif match_options.is_a?(ResolvedMatchOptions)
52
52
  behavior = behavior_for(dimension)
53
- # In HTML, only :strict makes comments affect equivalence
54
53
  return behavior == :strict
55
54
  end
56
55
  # Default: comments don't affect equivalence in HTML
@@ -106,14 +105,8 @@ module Canon
106
105
  def has_explicit_option?(dimension)
107
106
  if match_options.is_a?(Hash)
108
107
  match_options.key?(dimension)
109
- elsif match_options.respond_to?(:[])
110
- # For ResolvedMatchOptions, check if key exists
111
- begin
112
- match_options[dimension]
113
- true
114
- rescue StandardError
115
- false
116
- end
108
+ elsif match_options.is_a?(ResolvedMatchOptions)
109
+ !match_options.options[dimension].nil?
117
110
  else
118
111
  false
119
112
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "nokogiri"
3
+ require "nokogiri" unless RUBY_ENGINE == "opal"
4
4
 
5
5
  module Canon
6
6
  module Comparison
@@ -26,6 +26,14 @@ module Canon
26
26
  }.freeze
27
27
 
28
28
  class << self
29
+ # Parse JSON from string or return as-is
30
+ #
31
+ # @param obj [String, Hash, Array] JSON string or parsed object
32
+ # @return [Object] Parsed JSON object
33
+ def parse(obj)
34
+ parse_json(obj)
35
+ end
36
+
29
37
  # Compare two JSON objects for equivalence
30
38
  #
31
39
  # @param json1 [String, Hash, Array] First JSON