canon 0.2.9 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +21 -22
  3. data/Rakefile +25 -2
  4. data/lib/canon/cache.rb +18 -27
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +20 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/node_inspector.rb +13 -48
  28. data/lib/canon/comparison/pipeline.rb +269 -0
  29. data/lib/canon/comparison/profile_definition.rb +0 -2
  30. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  31. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  32. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  33. data/lib/canon/comparison/strategies.rb +16 -0
  34. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -5
  35. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  36. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  37. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  38. data/lib/canon/comparison/xml_comparator/node_parser.rb +2 -6
  39. data/lib/canon/comparison/xml_comparator.rb +4 -492
  40. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  41. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  42. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  43. data/lib/canon/comparison.rb +144 -267
  44. data/lib/canon/config/config_dsl.rb +159 -0
  45. data/lib/canon/config/env_provider.rb +0 -3
  46. data/lib/canon/config/env_schema.rb +48 -58
  47. data/lib/canon/config/profile_loader.rb +0 -1
  48. data/lib/canon/config.rb +116 -468
  49. data/lib/canon/diff/diff_block_builder.rb +0 -2
  50. data/lib/canon/diff/diff_classifier.rb +0 -5
  51. data/lib/canon/diff/diff_context.rb +0 -2
  52. data/lib/canon/diff/diff_context_builder.rb +0 -2
  53. data/lib/canon/diff/diff_line_builder.rb +2 -3
  54. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  55. data/lib/canon/diff/diff_node_mapper.rb +10 -12
  56. data/lib/canon/diff/diff_report_builder.rb +0 -4
  57. data/lib/canon/diff/formatting_detector.rb +3 -3
  58. data/lib/canon/diff/node_serializer.rb +0 -7
  59. data/lib/canon/diff/xml_serialization_formatter.rb +0 -3
  60. data/lib/canon/diff.rb +39 -0
  61. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  62. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  63. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  64. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  66. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  67. data/lib/canon/diff_formatter/by_object/base_formatter.rb +20 -17
  68. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +119 -3
  70. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  71. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -5
  72. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +27 -61
  74. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -29
  75. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  76. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  77. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  78. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  79. data/lib/canon/diff_formatter.rb +26 -20
  80. data/lib/canon/formatters/html4_formatter.rb +0 -2
  81. data/lib/canon/formatters/html5_formatter.rb +0 -2
  82. data/lib/canon/formatters/html_formatter.rb +0 -3
  83. data/lib/canon/formatters/json_formatter.rb +0 -1
  84. data/lib/canon/formatters/xml_formatter.rb +0 -4
  85. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  86. data/lib/canon/formatters.rb +16 -0
  87. data/lib/canon/html/data_model.rb +1 -11
  88. data/lib/canon/html.rb +4 -3
  89. data/lib/canon/options/cli_generator.rb +0 -2
  90. data/lib/canon/options/registry.rb +0 -2
  91. data/lib/canon/options.rb +9 -0
  92. data/lib/canon/pretty_printer/html.rb +0 -1
  93. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  94. data/lib/canon/pretty_printer.rb +12 -0
  95. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  96. data/lib/canon/tree_diff/adapters.rb +14 -0
  97. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  98. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  99. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  100. data/lib/canon/tree_diff/core.rb +17 -0
  101. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  102. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  103. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  104. data/lib/canon/tree_diff/matchers.rb +15 -0
  105. data/lib/canon/tree_diff/operation_converter.rb +7 -15
  106. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  107. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  108. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  109. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  110. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  111. data/lib/canon/tree_diff/operations/operation_detector.rb +6 -5
  112. data/lib/canon/tree_diff/operations.rb +13 -0
  113. data/lib/canon/tree_diff.rb +26 -27
  114. data/lib/canon/validators/base_validator.rb +5 -10
  115. data/lib/canon/validators/html_validator.rb +2 -8
  116. data/lib/canon/validators/json_validator.rb +0 -1
  117. data/lib/canon/validators/xml_validator.rb +2 -8
  118. data/lib/canon/validators/yaml_validator.rb +0 -1
  119. data/lib/canon/validators.rb +12 -0
  120. data/lib/canon/version.rb +1 -1
  121. data/lib/canon/xml/c14n.rb +0 -4
  122. data/lib/canon/xml/data_model.rb +5 -15
  123. data/lib/canon/xml/line_range_mapper.rb +0 -2
  124. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  125. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  126. data/lib/canon/xml/nodes/element_node.rb +0 -2
  127. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  128. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  129. data/lib/canon/xml/nodes/root_node.rb +0 -2
  130. data/lib/canon/xml/nodes/text_node.rb +0 -2
  131. data/lib/canon/xml/nodes.rb +19 -0
  132. data/lib/canon/xml/processor.rb +0 -5
  133. data/lib/canon/xml/sax_builder.rb +1 -8
  134. data/lib/canon/xml/whitespace_normalizer.rb +2 -2
  135. data/lib/canon/xml.rb +33 -0
  136. data/lib/canon/xml_backend.rb +50 -14
  137. data/lib/canon/xml_parsing.rb +32 -18
  138. data/lib/canon.rb +25 -15
  139. data/lib/tasks/performance.rake +0 -58
  140. data/lib/tasks/performance_comparator.rb +132 -65
  141. data/lib/tasks/performance_helpers.rb +4 -249
  142. data/lib/tasks/performance_report.rb +309 -0
  143. metadata +28 -15
  144. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  145. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  146. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  147. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  148. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  149. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  150. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  151. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  152. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -270
@@ -1,33 +1,32 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
+ # Semantic tree-diff algorithm — distinct from the DOM positional
5
+ # diff in {Canon::Comparison}.
6
+ #
7
+ # This module computes signature-based tree matches and produces
8
+ # INSERT/DELETE/UPDATE/MOVE operations. Sub-namespaces:
9
+ #
10
+ # * Core — TreeNode, Matching, NodeSignature, NodeWeight,
11
+ # AttributeComparator, XmlEntityDecoder
12
+ # * Matchers — HashMatcher, SimilarityMatcher, StructuralPropagator,
13
+ # UniversalMatcher
14
+ # * Operations — Operation, OperationDetector
15
+ # * Adapters — format-specific tree adapters (XML, JSON, HTML, YAML)
16
+ # * OperationConverterHelpers — MetadataEnricher, ReasonBuilder,
17
+ # PostProcessor, UpdateChangeHandler
18
+ #
19
+ # Top-level entry points: OperationConverter and TreeDiffIntegrator.
20
+ #
21
+ # All children are autoloaded — never `require_relative` them.
4
22
  module TreeDiff
5
- # Tree diff module for semantic object tree diffing
23
+ autoload :Adapters, "canon/tree_diff/adapters"
24
+ autoload :Core, "canon/tree_diff/core"
25
+ autoload :Matchers, "canon/tree_diff/matchers"
26
+ autoload :OperationConverter, "canon/tree_diff/operation_converter"
27
+ autoload :OperationConverterHelpers,
28
+ "canon/tree_diff/operation_converter_helpers"
29
+ autoload :Operations, "canon/tree_diff/operations"
30
+ autoload :TreeDiffIntegrator, "canon/tree_diff/tree_diff_integrator"
6
31
  end
7
32
  end
8
-
9
- # Load core components
10
- require_relative "tree_diff/core/tree_node"
11
- require_relative "tree_diff/core/node_signature"
12
- require_relative "tree_diff/core/node_weight"
13
- require_relative "tree_diff/core/matching"
14
-
15
- # Load matchers
16
- require_relative "tree_diff/matchers/hash_matcher"
17
- require_relative "tree_diff/matchers/similarity_matcher"
18
- require_relative "tree_diff/matchers/structural_propagator"
19
- require_relative "tree_diff/matchers/universal_matcher"
20
-
21
- # Load operations
22
- require_relative "tree_diff/operations/operation"
23
- require_relative "tree_diff/operations/operation_detector"
24
- require_relative "tree_diff/operation_converter"
25
-
26
- # Load adapters
27
- require_relative "tree_diff/adapters/xml_adapter"
28
- require_relative "tree_diff/adapters/json_adapter"
29
- require_relative "tree_diff/adapters/html_adapter"
30
- require_relative "tree_diff/adapters/yaml_adapter"
31
-
32
- # Load integrator
33
- require_relative "tree_diff/tree_diff_integrator"
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../errors"
4
-
5
3
  module Canon
6
4
  module Validators
7
5
  # Base class for all input validators
@@ -29,17 +27,14 @@ module Canon
29
27
  line = nil
30
28
  column = nil
31
29
 
32
- # Try to extract line/column from error message
33
- if error.respond_to?(:line)
30
+ if error.is_a?(Nokogiri::XML::SyntaxError)
34
31
  line = error.line
32
+ column = error.column
35
33
  elsif error.message =~ /line[:\s]+(\d+)/i
36
34
  line = ::Regexp.last_match(1).to_i
37
- end
38
-
39
- if error.respond_to?(:column)
40
- column = error.column
41
- elsif error.message =~ /column[:\s]+(\d+)/i
42
- column = ::Regexp.last_match(1).to_i
35
+ if error.message =~ /column[:\s]+(\d+)/i
36
+ column = ::Regexp.last_match(1).to_i
37
+ end
43
38
  end
44
39
 
45
40
  { line: line, column: column }
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "base_validator"
5
4
 
6
5
  module Canon
7
6
  module Validators
@@ -94,13 +93,8 @@ module Canon
94
93
  #
95
94
  # @param error [Nokogiri::XML::SyntaxError] The syntax error
96
95
  # @return [String, nil] Additional details about the error
97
- def self.extract_details(error)
98
- return nil unless error.respond_to?(:errors)
99
-
100
- details = error.errors.map(&:message).reject do |msg|
101
- msg == error.message
102
- end
103
- details.join("; ") unless details.empty?
96
+ def self.extract_details(_error)
97
+ nil
104
98
  end
105
99
 
106
100
  # Build error details from multiple errors
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "json"
4
- require_relative "base_validator"
5
4
 
6
5
  module Canon
7
6
  module Validators
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "base_validator"
5
4
 
6
5
  module Canon
7
6
  module Validators
@@ -38,13 +37,8 @@ module Canon
38
37
  #
39
38
  # @param error [Nokogiri::XML::SyntaxError] The syntax error
40
39
  # @return [String, nil] Additional details about the error
41
- def self.extract_details(error)
42
- return nil unless error.respond_to?(:errors)
43
-
44
- details = error.errors.map(&:message).reject do |msg|
45
- msg == error.message
46
- end
47
- details.join("; ") unless details.empty?
40
+ def self.extract_details(_error)
41
+ nil
48
42
  end
49
43
 
50
44
  private_class_method :extract_details
@@ -3,7 +3,6 @@
3
3
  require "yaml"
4
4
  require "date"
5
5
  require "time"
6
- require_relative "base_validator"
7
6
 
8
7
  module Canon
9
8
  module Validators
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Format-specific validators that raise {Error} on invalid input.
5
+ module Validators
6
+ autoload :BaseValidator, "canon/validators/base_validator"
7
+ autoload :HtmlValidator, "canon/validators/html_validator"
8
+ autoload :JsonValidator, "canon/validators/json_validator"
9
+ autoload :XmlValidator, "canon/validators/xml_validator"
10
+ autoload :YamlValidator, "canon/validators/yaml_validator"
11
+ end
12
+ end
data/lib/canon/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
- VERSION = "0.2.9"
4
+ VERSION = "0.2.12"
5
5
  end
@@ -1,9 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "data_model"
4
- require_relative "processor"
5
- require_relative "xpath_engine"
6
-
7
3
  module Canon
8
4
  module Xml
9
5
  # XML Canonicalization 1.1 implementation
@@ -2,16 +2,6 @@
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
4
  require "set"
5
- require_relative "../data_model"
6
- require_relative "../xml_backend"
7
- require_relative "../xml_parsing"
8
- require_relative "nodes/root_node"
9
- require_relative "nodes/element_node"
10
- require_relative "nodes/namespace_node"
11
- require_relative "nodes/attribute_node"
12
- require_relative "nodes/text_node"
13
- require_relative "nodes/comment_node"
14
- require_relative "nodes/processing_instruction_node"
15
5
 
16
6
  module Canon
17
7
  module Xml
@@ -139,7 +129,7 @@ module Canon
139
129
  def self.build_from_nokogiri(nokogiri_doc, preserve_whitespace: false)
140
130
  root = Nodes::RootNode.new
141
131
 
142
- if nokogiri_doc.respond_to?(:root) && nokogiri_doc.root
132
+ if nokogiri_doc.is_a?(Nokogiri::XML::Document) && nokogiri_doc.root
143
133
  root.add_child(build_element_node(nokogiri_doc.root,
144
134
  preserve_whitespace: preserve_whitespace))
145
135
  nokogiri_doc.children.each do |child|
@@ -275,7 +265,7 @@ preserve_whitespace: false)
275
265
  def self.build_from_moxml(moxml_doc, preserve_whitespace: false)
276
266
  root = Nodes::RootNode.new
277
267
 
278
- if moxml_doc.respond_to?(:root) && moxml_doc.root
268
+ if moxml_doc.is_a?(Moxml::Document) && moxml_doc.root
279
269
  root.add_child(build_moxml_element_node(moxml_doc.root,
280
270
  preserve_whitespace: preserve_whitespace))
281
271
  end
@@ -327,7 +317,7 @@ preserve_whitespace: false)
327
317
  element.add_namespace(ns_node)
328
318
  end
329
319
 
330
- unless element.namespaces.any? do |n|
320
+ unless element.namespace_nodes.any? do |n|
331
321
  n.prefix == "xml"
332
322
  end
333
323
  element.add_namespace(Nodes::NamespaceNode.new(
@@ -348,7 +338,7 @@ preserve_whitespace: false)
348
338
  end
349
339
 
350
340
  def self.build_moxml_text_node(moxml_text, preserve_whitespace: false)
351
- content = moxml_text.text
341
+ content = moxml_text.content
352
342
 
353
343
  if !preserve_whitespace && content.strip.empty? && moxml_text.parent.is_a?(Moxml::Element)
354
344
  return nil
@@ -358,7 +348,7 @@ preserve_whitespace: false)
358
348
  end
359
349
 
360
350
  def self.build_moxml_comment_node(moxml_comment)
361
- Nodes::CommentNode.new(value: moxml_comment.text)
351
+ Nodes::CommentNode.new(value: moxml_comment.content)
362
352
  end
363
353
 
364
354
  def self.build_moxml_pi_node(moxml_pi)
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../pretty_printer/xml"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  # Maps DOM elements to line ranges in pretty-printed XML
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Xml
5
+ # XPath data model node types. All nodes inherit from
6
+ # {Canon::Xml::Node}. Children are autoloaded — never
7
+ # `require_relative` them.
8
+ module Nodes
9
+ autoload :AttributeNode, "canon/xml/nodes/attribute_node"
10
+ autoload :CommentNode, "canon/xml/nodes/comment_node"
11
+ autoload :ElementNode, "canon/xml/nodes/element_node"
12
+ autoload :NamespaceNode, "canon/xml/nodes/namespace_node"
13
+ autoload :ProcessingInstructionNode,
14
+ "canon/xml/nodes/processing_instruction_node"
15
+ autoload :RootNode, "canon/xml/nodes/root_node"
16
+ autoload :TextNode, "canon/xml/nodes/text_node"
17
+ end
18
+ end
19
+ end
@@ -1,10 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "character_encoder"
4
- require_relative "namespace_handler"
5
- require_relative "attribute_handler"
6
- require_relative "xml_base_handler"
7
-
8
3
  module Canon
9
4
  module Xml
10
5
  # C14N 1.1 processor
@@ -1,13 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "nodes/root_node"
5
- require_relative "nodes/element_node"
6
- require_relative "nodes/namespace_node"
7
- require_relative "nodes/attribute_node"
8
- require_relative "nodes/text_node"
9
- require_relative "nodes/comment_node"
10
- require_relative "nodes/processing_instruction_node"
11
4
 
12
5
  module Canon
13
6
  module Xml
@@ -28,7 +21,7 @@ module Canon
28
21
  # For C14N, use strip_doctype: true to avoid DTD default attribute expansion:
29
22
  # root = SaxBuilder.parse(xml_string, strip_doctype: true)
30
23
  #
31
- class SaxBuilder < Nokogiri::XML::SAX::Document
24
+ class SaxBuilder < (RUBY_ENGINE == "opal" ? Object : Nokogiri::XML::SAX::Document)
32
25
  # Parse XML string and return Canon::Xml::Node tree
33
26
  #
34
27
  # @param xml_string [String] XML content to parse
@@ -43,9 +43,9 @@ module Canon
43
43
  # @param node [Moxml::Node] Node to check
44
44
  # @return [Boolean] true if node is whitespace-only and should be ignored
45
45
  def inter_element_whitespace?(node)
46
- return false unless node.respond_to?(:text?) && node.text?
46
+ return false unless node.is_a?(Nokogiri::XML::Text) || node.is_a?(Moxml::Text)
47
47
 
48
- text = node.respond_to?(:content) ? node.content.to_s : node.text.to_s
48
+ text = node.is_a?(Moxml::Text) ? node.content.to_s : node.content.to_s
49
49
  text.strip.empty?
50
50
  end
51
51
 
data/lib/canon/xml.rb ADDED
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Native XML data model, parsing, and DOM utilities.
5
+ #
6
+ # This namespace holds Canon's own XML representation (independent of
7
+ # Nokogiri/Moxml), including:
8
+ # * the XPath data model nodes (Canon::Xml::Node and Canon::Xml::Nodes::*)
9
+ # * the SAX builder that produces them
10
+ # * element matching, line-range mapping, xpath, C14N, processors
11
+ # * canonicalization (C14n) and serialization helpers
12
+ #
13
+ # All children are autoloaded from this file. The nested Nodes namespace
14
+ # is itself a sibling and is autoloaded on first reference to
15
+ # Canon::Xml::Nodes.
16
+ module Xml
17
+ autoload :AttributeHandler, "canon/xml/attribute_handler"
18
+ autoload :C14n, "canon/xml/c14n"
19
+ autoload :CharacterEncoder, "canon/xml/character_encoder"
20
+ autoload :DataModel, "canon/xml/data_model"
21
+ autoload :ElementMatcher, "canon/xml/element_matcher"
22
+ autoload :LineRangeMapper, "canon/xml/line_range_mapper"
23
+ autoload :NamespaceHandler, "canon/xml/namespace_handler"
24
+ autoload :NamespaceHelper, "canon/xml/namespace_helper"
25
+ autoload :Node, "canon/xml/node"
26
+ autoload :Nodes, "canon/xml/nodes"
27
+ autoload :Processor, "canon/xml/processor"
28
+ autoload :SaxBuilder, "canon/xml/sax_builder"
29
+ autoload :WhitespaceNormalizer, "canon/xml/whitespace_normalizer"
30
+ autoload :XmlBaseHandler, "canon/xml/xml_base_handler"
31
+ autoload :XPathEngine, "canon/xml/xpath_engine"
32
+ end
33
+ end
@@ -1,20 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
- # Centralized XML backend detection for Canon.
5
- #
6
- # Canon supports two XML backends:
7
- # - :nokogiri — MRI with Nokogiri installed (default, existing code path)
8
- # - :moxml — Opal runtime or MRI without Nokogiri (uses Oga via moxml)
9
- #
10
- # The active backend is determined once at load time and cached.
11
- # All XML-related code should check `Canon::XmlBackend.moxml?` or
12
- # `Canon::XmlBackend.nokogiri?` to select the appropriate code path.
13
- #
14
- # This module intentionally does NOT wrap Nokogiri through moxml.
15
- # Each backend path is independent — the Nokogiri path is the existing
16
- # battle-tested code; the moxml path is a parallel implementation for
17
- # environments where Nokogiri is unavailable.
18
4
  module XmlBackend
19
5
  class << self
20
6
  def active
@@ -33,6 +19,56 @@ module Canon
33
19
  @active = nil
34
20
  end
35
21
 
22
+ # Whether the node is a document fragment (any variant).
23
+ def document_fragment?(node)
24
+ if nokogiri?
25
+ node.is_a?(Nokogiri::XML::DocumentFragment) ||
26
+ node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
27
+ node.is_a?(Nokogiri::HTML5::DocumentFragment)
28
+ else
29
+ false
30
+ end
31
+ end
32
+
33
+ # Whether the node is an HTML document (any variant).
34
+ def html_document?(node)
35
+ if nokogiri?
36
+ node.is_a?(Nokogiri::HTML::Document) ||
37
+ node.is_a?(Nokogiri::HTML4::Document) ||
38
+ node.is_a?(Nokogiri::HTML5::Document)
39
+ else
40
+ false
41
+ end
42
+ end
43
+
44
+ # Detect HTML version from a Nokogiri node.
45
+ # Returns :html5 or :html4. Defaults to :html5 for non-Nokogiri nodes.
46
+ def html_version_from_node(node)
47
+ if nokogiri?
48
+ if node.is_a?(Nokogiri::HTML5::Document) ||
49
+ node.is_a?(Nokogiri::HTML5::DocumentFragment)
50
+ :html5
51
+ elsif node.is_a?(Nokogiri::HTML4::Document) ||
52
+ node.is_a?(Nokogiri::HTML4::DocumentFragment)
53
+ :html4
54
+ else
55
+ :html5
56
+ end
57
+ else
58
+ :html5
59
+ end
60
+ end
61
+
62
+ # Parse an HTML string into an XML fragment.
63
+ def xml_fragment(html_string)
64
+ if nokogiri?
65
+ Nokogiri::XML.fragment(html_string)
66
+ else
67
+ raise Canon::Error,
68
+ "HTML fragment parsing requires the Nokogiri backend"
69
+ end
70
+ end
71
+
36
72
  private
37
73
 
38
74
  def detect
@@ -14,7 +14,7 @@ module Canon
14
14
  module XmlParsing
15
15
  class << self
16
16
  def moxml_context
17
- @moxml_context ||= Moxml.new(:oga)
17
+ @moxml_context ||= Moxml.new(RUBY_ENGINE == "opal" ? :rexml : :oga)
18
18
  end
19
19
 
20
20
  # --- Parsing ---
@@ -47,10 +47,15 @@ module Canon
47
47
  end
48
48
 
49
49
  # --- Type checks (backend-safe) ---
50
+ #
51
+ # Both Nokogiri and Moxml are loaded as dependencies. XmlBackend
52
+ # determines which is used for *parsing*, but nodes from either
53
+ # library may flow through comparison code (e.g. tests, format
54
+ # detection). Under Nokogiri backend, both types are checked.
50
55
 
51
56
  def document?(obj)
52
57
  if XmlBackend.nokogiri?
53
- obj.is_a?(Nokogiri::XML::Document)
58
+ obj.is_a?(Nokogiri::XML::Document) || obj.is_a?(Moxml::Document)
54
59
  else
55
60
  obj.is_a?(Moxml::Document)
56
61
  end
@@ -58,7 +63,7 @@ module Canon
58
63
 
59
64
  def xml_node?(obj)
60
65
  if XmlBackend.nokogiri?
61
- obj.is_a?(Nokogiri::XML::Node)
66
+ obj.is_a?(Nokogiri::XML::Node) || obj.is_a?(Moxml::Node)
62
67
  else
63
68
  obj.is_a?(Moxml::Node)
64
69
  end
@@ -66,7 +71,7 @@ module Canon
66
71
 
67
72
  def element?(node)
68
73
  if XmlBackend.nokogiri?
69
- node.is_a?(Nokogiri::XML::Element)
74
+ node.is_a?(Nokogiri::XML::Element) || node.is_a?(Moxml::Element)
70
75
  else
71
76
  node.is_a?(Moxml::Element)
72
77
  end
@@ -74,7 +79,7 @@ module Canon
74
79
 
75
80
  def text_node?(node)
76
81
  if XmlBackend.nokogiri?
77
- node.is_a?(Nokogiri::XML::Text)
82
+ node.is_a?(Nokogiri::XML::Text) || node.is_a?(Moxml::Text)
78
83
  else
79
84
  node.is_a?(Moxml::Text)
80
85
  end
@@ -82,7 +87,7 @@ module Canon
82
87
 
83
88
  def comment?(node)
84
89
  if XmlBackend.nokogiri?
85
- node.is_a?(Nokogiri::XML::Comment)
90
+ node.is_a?(Nokogiri::XML::Comment) || node.is_a?(Moxml::Comment)
86
91
  else
87
92
  node.is_a?(Moxml::Comment)
88
93
  end
@@ -90,7 +95,7 @@ module Canon
90
95
 
91
96
  def cdata?(node)
92
97
  if XmlBackend.nokogiri?
93
- node.is_a?(Nokogiri::XML::CDATA)
98
+ node.is_a?(Nokogiri::XML::CDATA) || node.is_a?(Moxml::Cdata)
94
99
  else
95
100
  node.is_a?(Moxml::Cdata)
96
101
  end
@@ -98,7 +103,7 @@ module Canon
98
103
 
99
104
  def processing_instruction?(node)
100
105
  if XmlBackend.nokogiri?
101
- node.is_a?(Nokogiri::XML::ProcessingInstruction)
106
+ node.is_a?(Nokogiri::XML::ProcessingInstruction) || node.is_a?(Moxml::ProcessingInstruction)
102
107
  else
103
108
  node.is_a?(Moxml::ProcessingInstruction)
104
109
  end
@@ -108,7 +113,7 @@ module Canon
108
113
  if XmlBackend.nokogiri?
109
114
  obj.is_a?(Nokogiri::XML::DocumentFragment)
110
115
  else
111
- obj.is_a?(Moxml::DocumentFragment)
116
+ false
112
117
  end
113
118
  end
114
119
 
@@ -142,7 +147,14 @@ module Canon
142
147
  if XmlBackend.nokogiri?
143
148
  node.is_a?(Nokogiri::XML::Node) ? node.content : node.to_s
144
149
  else
145
- node.is_a?(Moxml::Node) ? node.text : node.to_s
150
+ case node
151
+ when Moxml::Text, Moxml::Cdata, Moxml::Comment
152
+ node.content.to_s
153
+ when Moxml::Node
154
+ node.text.to_s
155
+ else
156
+ node.to_s
157
+ end
146
158
  end
147
159
  end
148
160
 
@@ -252,17 +264,19 @@ module Canon
252
264
  node.to_xml
253
265
  end
254
266
 
255
- def moxml_canonicalize(node, _options)
256
- node.to_xml
267
+ def moxml_canonicalize(_node, _options)
268
+ raise Canon::Error,
269
+ "C14N canonicalization is not supported by the moxml backend. " \
270
+ "Use the Nokogiri backend or a different preprocessing option."
257
271
  end
258
272
 
259
273
  def moxml_node_type(node)
260
- return :element if node.is_a?(Moxml::Element)
261
- return :text if node.is_a?(Moxml::Text)
262
- return :comment if node.is_a?(Moxml::Comment)
263
- return :cdata if node.is_a?(Moxml::Cdata)
264
- return :document if node.is_a?(Moxml::Document)
265
- return :processing_instruction if node.is_a?(Moxml::ProcessingInstruction)
274
+ return :element if node.element?
275
+ return :text if node.text?
276
+ return :comment if node.comment?
277
+ return :cdata if node.cdata?
278
+ return :document if node.document?
279
+ return :processing_instruction if node.processing_instruction?
266
280
 
267
281
  nil
268
282
  end