canon 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +12 -22
  3. data/Rakefile +5 -2
  4. data/lib/canon/cache.rb +3 -1
  5. data/lib/canon/cli.rb +0 -3
  6. data/lib/canon/commands/diff_command.rb +0 -6
  7. data/lib/canon/commands/format_command.rb +0 -4
  8. data/lib/canon/commands.rb +9 -0
  9. data/lib/canon/comparison/child_realignment.rb +0 -2
  10. data/lib/canon/comparison/compare_profile.rb +30 -36
  11. data/lib/canon/comparison/comparison_result.rb +0 -2
  12. data/lib/canon/comparison/diff_node_builder.rb +353 -0
  13. data/lib/canon/comparison/dimensions/dimension.rb +51 -0
  14. data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
  15. data/lib/canon/comparison/dimensions/registry.rb +101 -60
  16. data/lib/canon/comparison/dimensions.rb +15 -46
  17. data/lib/canon/comparison/html_comparator.rb +18 -141
  18. data/lib/canon/comparison/html_compare_profile.rb +15 -18
  19. data/lib/canon/comparison/json_comparator.rb +4 -165
  20. data/lib/canon/comparison/json_parser.rb +0 -2
  21. data/lib/canon/comparison/markup_comparator.rb +14 -210
  22. data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
  23. data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
  24. data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
  25. data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
  26. data/lib/canon/comparison/match_options.rb +13 -88
  27. data/lib/canon/comparison/pipeline.rb +269 -0
  28. data/lib/canon/comparison/profile_definition.rb +0 -2
  29. data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
  30. data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
  31. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
  32. data/lib/canon/comparison/strategies.rb +16 -0
  33. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
  34. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
  35. data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
  36. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
  37. data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
  38. data/lib/canon/comparison/xml_comparator.rb +4 -492
  39. data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
  40. data/lib/canon/comparison/xml_node_comparison.rb +4 -119
  41. data/lib/canon/comparison/yaml_comparator.rb +0 -3
  42. data/lib/canon/comparison.rb +143 -266
  43. data/lib/canon/config/config_dsl.rb +159 -0
  44. data/lib/canon/config/env_provider.rb +0 -3
  45. data/lib/canon/config/env_schema.rb +48 -58
  46. data/lib/canon/config/profile_loader.rb +0 -1
  47. data/lib/canon/config.rb +116 -468
  48. data/lib/canon/diff/diff_block_builder.rb +0 -2
  49. data/lib/canon/diff/diff_classifier.rb +0 -5
  50. data/lib/canon/diff/diff_context.rb +0 -2
  51. data/lib/canon/diff/diff_context_builder.rb +0 -2
  52. data/lib/canon/diff/diff_line_builder.rb +0 -3
  53. data/lib/canon/diff/diff_node_enricher.rb +0 -4
  54. data/lib/canon/diff/diff_node_mapper.rb +0 -4
  55. data/lib/canon/diff/diff_report_builder.rb +0 -4
  56. data/lib/canon/diff/formatting_detector.rb +0 -1
  57. data/lib/canon/diff/node_serializer.rb +0 -7
  58. data/lib/canon/diff.rb +39 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
  61. data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
  62. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
  63. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
  64. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
  65. data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
  66. data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
  67. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
  68. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
  69. data/lib/canon/diff_formatter/debug_output.rb +0 -2
  70. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
  71. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
  72. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
  73. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
  74. data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
  75. data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
  76. data/lib/canon/diff_formatter.rb +11 -9
  77. data/lib/canon/formatters/html4_formatter.rb +0 -2
  78. data/lib/canon/formatters/html5_formatter.rb +0 -2
  79. data/lib/canon/formatters/html_formatter.rb +0 -3
  80. data/lib/canon/formatters/json_formatter.rb +0 -1
  81. data/lib/canon/formatters/xml_formatter.rb +0 -4
  82. data/lib/canon/formatters/yaml_formatter.rb +0 -1
  83. data/lib/canon/formatters.rb +16 -0
  84. data/lib/canon/html/data_model.rb +0 -10
  85. data/lib/canon/html.rb +4 -3
  86. data/lib/canon/options/cli_generator.rb +0 -2
  87. data/lib/canon/options/registry.rb +0 -2
  88. data/lib/canon/options.rb +9 -0
  89. data/lib/canon/pretty_printer/html.rb +0 -1
  90. data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
  91. data/lib/canon/pretty_printer.rb +12 -0
  92. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  93. data/lib/canon/tree_diff/adapters.rb +14 -0
  94. data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
  95. data/lib/canon/tree_diff/core/node_signature.rb +1 -1
  96. data/lib/canon/tree_diff/core/tree_node.rb +12 -5
  97. data/lib/canon/tree_diff/core.rb +17 -0
  98. data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
  99. data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
  100. data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
  101. data/lib/canon/tree_diff/matchers.rb +15 -0
  102. data/lib/canon/tree_diff/operation_converter.rb +0 -8
  103. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
  104. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
  105. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
  106. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
  107. data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
  108. data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
  109. data/lib/canon/tree_diff/operations.rb +13 -0
  110. data/lib/canon/tree_diff.rb +26 -27
  111. data/lib/canon/validators/base_validator.rb +0 -2
  112. data/lib/canon/validators/html_validator.rb +0 -1
  113. data/lib/canon/validators/json_validator.rb +0 -1
  114. data/lib/canon/validators/xml_validator.rb +0 -1
  115. data/lib/canon/validators/yaml_validator.rb +0 -1
  116. data/lib/canon/validators.rb +12 -0
  117. data/lib/canon/version.rb +1 -1
  118. data/lib/canon/xml/c14n.rb +0 -4
  119. data/lib/canon/xml/data_model.rb +0 -10
  120. data/lib/canon/xml/line_range_mapper.rb +0 -2
  121. data/lib/canon/xml/nodes/attribute_node.rb +0 -2
  122. data/lib/canon/xml/nodes/comment_node.rb +0 -2
  123. data/lib/canon/xml/nodes/element_node.rb +0 -2
  124. data/lib/canon/xml/nodes/namespace_node.rb +0 -2
  125. data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
  126. data/lib/canon/xml/nodes/root_node.rb +0 -2
  127. data/lib/canon/xml/nodes/text_node.rb +0 -2
  128. data/lib/canon/xml/nodes.rb +19 -0
  129. data/lib/canon/xml/processor.rb +0 -5
  130. data/lib/canon/xml/sax_builder.rb +0 -7
  131. data/lib/canon/xml.rb +33 -0
  132. data/lib/canon/xml_backend.rb +50 -14
  133. data/lib/canon/xml_parsing.rb +4 -2
  134. data/lib/canon.rb +25 -15
  135. data/lib/tasks/performance.rake +0 -58
  136. data/lib/tasks/performance_comparator.rb +132 -65
  137. data/lib/tasks/performance_helpers.rb +4 -249
  138. data/lib/tasks/performance_report.rb +309 -0
  139. metadata +24 -11
  140. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
  141. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
  142. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
  143. data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
  144. data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
  145. data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
  146. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
  147. data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
  148. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../core/xml_entity_decoder"
4
-
5
3
  module Canon
6
4
  module TreeDiff
7
5
  module Operations
@@ -619,13 +617,12 @@ module Canon
619
617
  # Check if this node or any ancestor is whitespace-sensitive
620
618
  current = node
621
619
  while current
622
- if current.respond_to?(:label)
620
+ if current.is_a?(Core::TreeNode)
623
621
  label = current.label.to_s.downcase
624
622
  return true if whitespace_sensitive_tags.include?(label)
625
623
  end
626
624
 
627
- # Check parent
628
- current = current.parent if current.respond_to?(:parent)
625
+ current = current.is_a?(Core::TreeNode) ? current.parent : nil
629
626
  break unless current
630
627
  end
631
628
 
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module TreeDiff
5
+ # Tree-diff operations: Operation (INSERT/DELETE/UPDATE/MOVE) and
6
+ # the OperationDetector that emits them.
7
+ module Operations
8
+ autoload :Operation, "canon/tree_diff/operations/operation"
9
+ autoload :OperationDetector,
10
+ "canon/tree_diff/operations/operation_detector"
11
+ end
12
+ end
13
+ end
@@ -1,33 +1,32 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
+ # Semantic tree-diff algorithm — distinct from the DOM positional
5
+ # diff in {Canon::Comparison}.
6
+ #
7
+ # This module computes signature-based tree matches and produces
8
+ # INSERT/DELETE/UPDATE/MOVE operations. Sub-namespaces:
9
+ #
10
+ # * Core — TreeNode, Matching, NodeSignature, NodeWeight,
11
+ # AttributeComparator, XmlEntityDecoder
12
+ # * Matchers — HashMatcher, SimilarityMatcher, StructuralPropagator,
13
+ # UniversalMatcher
14
+ # * Operations — Operation, OperationDetector
15
+ # * Adapters — format-specific tree adapters (XML, JSON, HTML, YAML)
16
+ # * OperationConverterHelpers — MetadataEnricher, ReasonBuilder,
17
+ # PostProcessor, UpdateChangeHandler
18
+ #
19
+ # Top-level entry points: OperationConverter and TreeDiffIntegrator.
20
+ #
21
+ # All children are autoloaded — never `require_relative` them.
4
22
  module TreeDiff
5
- # Tree diff module for semantic object tree diffing
23
+ autoload :Adapters, "canon/tree_diff/adapters"
24
+ autoload :Core, "canon/tree_diff/core"
25
+ autoload :Matchers, "canon/tree_diff/matchers"
26
+ autoload :OperationConverter, "canon/tree_diff/operation_converter"
27
+ autoload :OperationConverterHelpers,
28
+ "canon/tree_diff/operation_converter_helpers"
29
+ autoload :Operations, "canon/tree_diff/operations"
30
+ autoload :TreeDiffIntegrator, "canon/tree_diff/tree_diff_integrator"
6
31
  end
7
32
  end
8
-
9
- # Load core components
10
- require_relative "tree_diff/core/tree_node"
11
- require_relative "tree_diff/core/node_signature"
12
- require_relative "tree_diff/core/node_weight"
13
- require_relative "tree_diff/core/matching"
14
-
15
- # Load matchers
16
- require_relative "tree_diff/matchers/hash_matcher"
17
- require_relative "tree_diff/matchers/similarity_matcher"
18
- require_relative "tree_diff/matchers/structural_propagator"
19
- require_relative "tree_diff/matchers/universal_matcher"
20
-
21
- # Load operations
22
- require_relative "tree_diff/operations/operation"
23
- require_relative "tree_diff/operations/operation_detector"
24
- require_relative "tree_diff/operation_converter"
25
-
26
- # Load adapters
27
- require_relative "tree_diff/adapters/xml_adapter"
28
- require_relative "tree_diff/adapters/json_adapter"
29
- require_relative "tree_diff/adapters/html_adapter"
30
- require_relative "tree_diff/adapters/yaml_adapter"
31
-
32
- # Load integrator
33
- require_relative "tree_diff/tree_diff_integrator"
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../errors"
4
-
5
3
  module Canon
6
4
  module Validators
7
5
  # Base class for all input validators
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "base_validator"
5
4
 
6
5
  module Canon
7
6
  module Validators
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "json"
4
- require_relative "base_validator"
5
4
 
6
5
  module Canon
7
6
  module Validators
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "base_validator"
5
4
 
6
5
  module Canon
7
6
  module Validators
@@ -3,7 +3,6 @@
3
3
  require "yaml"
4
4
  require "date"
5
5
  require "time"
6
- require_relative "base_validator"
7
6
 
8
7
  module Canon
9
8
  module Validators
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Format-specific validators that raise {Error} on invalid input.
5
+ module Validators
6
+ autoload :BaseValidator, "canon/validators/base_validator"
7
+ autoload :HtmlValidator, "canon/validators/html_validator"
8
+ autoload :JsonValidator, "canon/validators/json_validator"
9
+ autoload :XmlValidator, "canon/validators/xml_validator"
10
+ autoload :YamlValidator, "canon/validators/yaml_validator"
11
+ end
12
+ end
data/lib/canon/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
- VERSION = "0.2.11"
4
+ VERSION = "0.2.12"
5
5
  end
@@ -1,9 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "data_model"
4
- require_relative "processor"
5
- require_relative "xpath_engine"
6
-
7
3
  module Canon
8
4
  module Xml
9
5
  # XML Canonicalization 1.1 implementation
@@ -2,16 +2,6 @@
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
4
  require "set"
5
- require_relative "../data_model"
6
- require_relative "../xml_backend"
7
- require_relative "../xml_parsing"
8
- require_relative "nodes/root_node"
9
- require_relative "nodes/element_node"
10
- require_relative "nodes/namespace_node"
11
- require_relative "nodes/attribute_node"
12
- require_relative "nodes/text_node"
13
- require_relative "nodes/comment_node"
14
- require_relative "nodes/processing_instruction_node"
15
5
 
16
6
  module Canon
17
7
  module Xml
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../pretty_printer/xml"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  # Maps DOM elements to line ranges in pretty-printed XML
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../node"
4
-
5
3
  module Canon
6
4
  module Xml
7
5
  module Nodes
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Xml
5
+ # XPath data model node types. All nodes inherit from
6
+ # {Canon::Xml::Node}. Children are autoloaded — never
7
+ # `require_relative` them.
8
+ module Nodes
9
+ autoload :AttributeNode, "canon/xml/nodes/attribute_node"
10
+ autoload :CommentNode, "canon/xml/nodes/comment_node"
11
+ autoload :ElementNode, "canon/xml/nodes/element_node"
12
+ autoload :NamespaceNode, "canon/xml/nodes/namespace_node"
13
+ autoload :ProcessingInstructionNode,
14
+ "canon/xml/nodes/processing_instruction_node"
15
+ autoload :RootNode, "canon/xml/nodes/root_node"
16
+ autoload :TextNode, "canon/xml/nodes/text_node"
17
+ end
18
+ end
19
+ end
@@ -1,10 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "character_encoder"
4
- require_relative "namespace_handler"
5
- require_relative "attribute_handler"
6
- require_relative "xml_base_handler"
7
-
8
3
  module Canon
9
4
  module Xml
10
5
  # C14N 1.1 processor
@@ -1,13 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "nokogiri" unless RUBY_ENGINE == "opal"
4
- require_relative "nodes/root_node"
5
- require_relative "nodes/element_node"
6
- require_relative "nodes/namespace_node"
7
- require_relative "nodes/attribute_node"
8
- require_relative "nodes/text_node"
9
- require_relative "nodes/comment_node"
10
- require_relative "nodes/processing_instruction_node"
11
4
 
12
5
  module Canon
13
6
  module Xml
data/lib/canon/xml.rb ADDED
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Native XML data model, parsing, and DOM utilities.
5
+ #
6
+ # This namespace holds Canon's own XML representation (independent of
7
+ # Nokogiri/Moxml), including:
8
+ # * the XPath data model nodes (Canon::Xml::Node and Canon::Xml::Nodes::*)
9
+ # * the SAX builder that produces them
10
+ # * element matching, line-range mapping, xpath, C14N, processors
11
+ # * canonicalization (C14n) and serialization helpers
12
+ #
13
+ # All children are autoloaded from this file. The nested Nodes namespace
14
+ # is itself a sibling and is autoloaded on first reference to
15
+ # Canon::Xml::Nodes.
16
+ module Xml
17
+ autoload :AttributeHandler, "canon/xml/attribute_handler"
18
+ autoload :C14n, "canon/xml/c14n"
19
+ autoload :CharacterEncoder, "canon/xml/character_encoder"
20
+ autoload :DataModel, "canon/xml/data_model"
21
+ autoload :ElementMatcher, "canon/xml/element_matcher"
22
+ autoload :LineRangeMapper, "canon/xml/line_range_mapper"
23
+ autoload :NamespaceHandler, "canon/xml/namespace_handler"
24
+ autoload :NamespaceHelper, "canon/xml/namespace_helper"
25
+ autoload :Node, "canon/xml/node"
26
+ autoload :Nodes, "canon/xml/nodes"
27
+ autoload :Processor, "canon/xml/processor"
28
+ autoload :SaxBuilder, "canon/xml/sax_builder"
29
+ autoload :WhitespaceNormalizer, "canon/xml/whitespace_normalizer"
30
+ autoload :XmlBaseHandler, "canon/xml/xml_base_handler"
31
+ autoload :XPathEngine, "canon/xml/xpath_engine"
32
+ end
33
+ end
@@ -1,20 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Canon
4
- # Centralized XML backend detection for Canon.
5
- #
6
- # Canon supports two XML backends:
7
- # - :nokogiri — MRI with Nokogiri installed (default, existing code path)
8
- # - :moxml — Opal runtime or MRI without Nokogiri (uses Oga via moxml)
9
- #
10
- # The active backend is determined once at load time and cached.
11
- # All XML-related code should check `Canon::XmlBackend.moxml?` or
12
- # `Canon::XmlBackend.nokogiri?` to select the appropriate code path.
13
- #
14
- # This module intentionally does NOT wrap Nokogiri through moxml.
15
- # Each backend path is independent — the Nokogiri path is the existing
16
- # battle-tested code; the moxml path is a parallel implementation for
17
- # environments where Nokogiri is unavailable.
18
4
  module XmlBackend
19
5
  class << self
20
6
  def active
@@ -33,6 +19,56 @@ module Canon
33
19
  @active = nil
34
20
  end
35
21
 
22
+ # Whether the node is a document fragment (any variant).
23
+ def document_fragment?(node)
24
+ if nokogiri?
25
+ node.is_a?(Nokogiri::XML::DocumentFragment) ||
26
+ node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
27
+ node.is_a?(Nokogiri::HTML5::DocumentFragment)
28
+ else
29
+ false
30
+ end
31
+ end
32
+
33
+ # Whether the node is an HTML document (any variant).
34
+ def html_document?(node)
35
+ if nokogiri?
36
+ node.is_a?(Nokogiri::HTML::Document) ||
37
+ node.is_a?(Nokogiri::HTML4::Document) ||
38
+ node.is_a?(Nokogiri::HTML5::Document)
39
+ else
40
+ false
41
+ end
42
+ end
43
+
44
+ # Detect HTML version from a Nokogiri node.
45
+ # Returns :html5 or :html4. Defaults to :html5 for non-Nokogiri nodes.
46
+ def html_version_from_node(node)
47
+ if nokogiri?
48
+ if node.is_a?(Nokogiri::HTML5::Document) ||
49
+ node.is_a?(Nokogiri::HTML5::DocumentFragment)
50
+ :html5
51
+ elsif node.is_a?(Nokogiri::HTML4::Document) ||
52
+ node.is_a?(Nokogiri::HTML4::DocumentFragment)
53
+ :html4
54
+ else
55
+ :html5
56
+ end
57
+ else
58
+ :html5
59
+ end
60
+ end
61
+
62
+ # Parse an HTML string into an XML fragment.
63
+ def xml_fragment(html_string)
64
+ if nokogiri?
65
+ Nokogiri::XML.fragment(html_string)
66
+ else
67
+ raise Canon::Error,
68
+ "HTML fragment parsing requires the Nokogiri backend"
69
+ end
70
+ end
71
+
36
72
  private
37
73
 
38
74
  def detect
@@ -264,8 +264,10 @@ module Canon
264
264
  node.to_xml
265
265
  end
266
266
 
267
- def moxml_canonicalize(node, _options)
268
- node.to_xml
267
+ def moxml_canonicalize(_node, _options)
268
+ raise Canon::Error,
269
+ "C14N canonicalization is not supported by the moxml backend. " \
270
+ "Use the Nokogiri backend or a different preprocessing option."
269
271
  end
270
272
 
271
273
  def moxml_node_type(node)
data/lib/canon.rb CHANGED
@@ -1,23 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "canon/version"
4
- require_relative "canon/errors"
5
- require_relative "canon/xml_backend"
6
- require_relative "canon/xml_parsing"
7
- require_relative "canon/config"
8
- require_relative "canon/data_model"
9
- require_relative "canon/html"
10
- require_relative "canon/formatters/xml_formatter"
11
- require_relative "canon/formatters/yaml_formatter"
12
- require_relative "canon/formatters/json_formatter"
13
- require_relative "canon/formatters/html_formatter"
14
- require_relative "canon/formatters/html4_formatter"
15
- require_relative "canon/formatters/html5_formatter"
16
- require_relative "canon/comparison"
3
+ require "canon/version"
4
+ require "canon/errors"
5
+ require "nokogiri" unless RUBY_ENGINE == "opal"
6
+ require "canon/xml_backend"
7
+ require "canon/xml_parsing"
8
+ require "canon/config"
9
+ require "canon/data_model"
10
+ require "canon/xml"
11
+ require "canon/html"
12
+ require "canon/formatters"
13
+ require "canon/comparison"
14
+ require "canon/diff"
15
+ require "canon/tree_diff"
16
+ require "canon/validators"
17
+ require "canon/pretty_printer"
18
+ require "canon/options"
19
+ require "canon/commands"
17
20
 
18
- require_relative "canon/rspec_matchers" if defined?(RSpec.configure)
21
+ require "canon/rspec_matchers" if defined?(RSpec.configure)
19
22
 
20
23
  module Canon
24
+ autoload :Cache, "canon/cache"
25
+ autoload :Cli, "canon/cli"
26
+ autoload :ColorDetector, "canon/color_detector"
27
+ autoload :DiffFormatter, "canon/diff_formatter"
28
+
21
29
  SUPPORTED_FORMATS = %i[xml yaml json html html4 html5 string].freeze
22
30
 
23
31
  # Format content based on the specified format type
@@ -60,6 +68,8 @@ module Canon
60
68
  # Define shorthand methods for each supported format
61
69
  # Creates parse_{format} and format_{format} methods
62
70
  SUPPORTED_FORMATS.each do |format|
71
+ next if format == :string # comparison-only format, no formatter
72
+
63
73
  define_singleton_method("parse_#{format}") do |content|
64
74
  parse(content, format)
65
75
  end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "performance_comparator"
4
- require_relative "benchmark_runner"
5
4
 
6
5
  desc "Run performance benchmarks"
7
6
  namespace :performance do
@@ -16,66 +15,9 @@ namespace :performance do
16
15
  runner.run_benchmarks
17
16
  end
18
17
 
19
- desc "Run specific benchmark category (xml_parsing, html_parsing, xml_comparison, html_comparison, formatting)"
20
- task :category, [:name] do |_t, args|
21
- category = args[:name]
22
- unless PerformanceComparator::BENCHMARK_CATEGORIES.key?(category.to_sym)
23
- puts "Unknown category: #{category}"
24
- puts "Available: #{PerformanceComparator::BENCHMARK_CATEGORIES.keys.join(', ')}"
25
- exit(1)
26
- end
27
-
28
- runner = BenchmarkRunner.new(run_time: 10)
29
- runner.run_benchmarks
30
- end
31
-
32
18
  desc "Quick benchmark run (faster, less accurate)"
33
19
  task :quick do
34
20
  runner = BenchmarkRunner.new(run_time: 2, warmup: 1, items: 20)
35
21
  runner.run_benchmarks
36
22
  end
37
-
38
- desc "Run benchmarks and output as JSON"
39
- task :json do
40
- require "json"
41
- runner = BenchmarkRunner.new(run_time: 5)
42
-
43
- # Suppress pretty output, just get results
44
- results = runner.send(:run_all_benchmarks)
45
-
46
- output = results.each_with_object({}) do |(label, metrics), h|
47
- ips = (metrics[:lower] + metrics[:upper]) / 2.0
48
- deviation = ((metrics[:upper] - metrics[:lower]) / metrics[:upper] * 100).round(1)
49
- h[label] = {
50
- ips: ips.round(2),
51
- lower: metrics[:lower].round(2),
52
- upper: metrics[:upper].round(2),
53
- deviation: deviation,
54
- }
55
- end
56
-
57
- puts JSON.pretty_generate(output)
58
- end
59
-
60
- desc "Run benchmarks and output as YAML"
61
- task :yaml do
62
- require "yaml"
63
- runner = BenchmarkRunner.new(run_time: 5)
64
-
65
- # Suppress pretty output, just get results
66
- results = runner.send(:run_all_benchmarks)
67
-
68
- output = results.each_with_object({}) do |(label, metrics), h|
69
- ips = (metrics[:lower] + metrics[:upper]) / 2.0
70
- deviation = ((metrics[:upper] - metrics[:lower]) / metrics[:upper] * 100).round(1)
71
- h[label.to_sym] = {
72
- ips: ips.round(2),
73
- lower: metrics[:lower].round(2),
74
- upper: metrics[:upper].round(2),
75
- deviation: deviation,
76
- }
77
- end
78
-
79
- puts YAML.dump(output)
80
- end
81
23
  end