canon 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +12 -22
- data/Rakefile +5 -2
- data/lib/canon/cache.rb +3 -1
- data/lib/canon/cli.rb +0 -3
- data/lib/canon/commands/diff_command.rb +0 -6
- data/lib/canon/commands/format_command.rb +0 -4
- data/lib/canon/commands.rb +9 -0
- data/lib/canon/comparison/child_realignment.rb +0 -2
- data/lib/canon/comparison/compare_profile.rb +30 -36
- data/lib/canon/comparison/comparison_result.rb +0 -2
- data/lib/canon/comparison/diff_node_builder.rb +353 -0
- data/lib/canon/comparison/dimensions/dimension.rb +51 -0
- data/lib/canon/comparison/dimensions/dimension_set.rb +49 -0
- data/lib/canon/comparison/dimensions/registry.rb +101 -60
- data/lib/canon/comparison/dimensions.rb +15 -46
- data/lib/canon/comparison/html_comparator.rb +18 -141
- data/lib/canon/comparison/html_compare_profile.rb +15 -18
- data/lib/canon/comparison/json_comparator.rb +4 -165
- data/lib/canon/comparison/json_parser.rb +0 -2
- data/lib/canon/comparison/markup_comparator.rb +14 -210
- data/lib/canon/comparison/match_options/base_resolver.rb +18 -29
- data/lib/canon/comparison/match_options/json_resolver.rb +4 -28
- data/lib/canon/comparison/match_options/xml_resolver.rb +4 -45
- data/lib/canon/comparison/match_options/yaml_resolver.rb +4 -30
- data/lib/canon/comparison/match_options.rb +13 -88
- data/lib/canon/comparison/pipeline.rb +269 -0
- data/lib/canon/comparison/profile_definition.rb +0 -2
- data/lib/canon/comparison/ruby_object_comparator.rb +1 -1
- data/lib/canon/comparison/strategies/match_strategy_factory.rb +9 -58
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +4 -11
- data/lib/canon/comparison/strategies.rb +16 -0
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +0 -3
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +0 -3
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +0 -6
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +1 -6
- data/lib/canon/comparison/xml_comparator/node_parser.rb +0 -4
- data/lib/canon/comparison/xml_comparator.rb +4 -492
- data/lib/canon/comparison/xml_comparator_helpers.rb +21 -0
- data/lib/canon/comparison/xml_node_comparison.rb +4 -119
- data/lib/canon/comparison/yaml_comparator.rb +0 -3
- data/lib/canon/comparison.rb +143 -266
- data/lib/canon/config/config_dsl.rb +159 -0
- data/lib/canon/config/env_provider.rb +0 -3
- data/lib/canon/config/env_schema.rb +48 -58
- data/lib/canon/config/profile_loader.rb +0 -1
- data/lib/canon/config.rb +116 -468
- data/lib/canon/diff/diff_block_builder.rb +0 -2
- data/lib/canon/diff/diff_classifier.rb +0 -5
- data/lib/canon/diff/diff_context.rb +0 -2
- data/lib/canon/diff/diff_context_builder.rb +0 -2
- data/lib/canon/diff/diff_line_builder.rb +0 -3
- data/lib/canon/diff/diff_node_enricher.rb +0 -4
- data/lib/canon/diff/diff_node_mapper.rb +0 -4
- data/lib/canon/diff/diff_report_builder.rb +0 -4
- data/lib/canon/diff/formatting_detector.rb +0 -1
- data/lib/canon/diff/node_serializer.rb +0 -7
- data/lib/canon/diff.rb +39 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +4 -17
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +7 -19
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +7 -26
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +0 -3
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +8 -15
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +0 -2
- data/lib/canon/diff_formatter/debug_output.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +24 -58
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +0 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +1 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +1 -7
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +0 -7
- data/lib/canon/diff_formatter/diff_detail_formatter_helpers.rb +23 -0
- data/lib/canon/diff_formatter.rb +11 -9
- data/lib/canon/formatters/html4_formatter.rb +0 -2
- data/lib/canon/formatters/html5_formatter.rb +0 -2
- data/lib/canon/formatters/html_formatter.rb +0 -3
- data/lib/canon/formatters/json_formatter.rb +0 -1
- data/lib/canon/formatters/xml_formatter.rb +0 -4
- data/lib/canon/formatters/yaml_formatter.rb +0 -1
- data/lib/canon/formatters.rb +16 -0
- data/lib/canon/html/data_model.rb +0 -10
- data/lib/canon/html.rb +4 -3
- data/lib/canon/options/cli_generator.rb +0 -2
- data/lib/canon/options/registry.rb +0 -2
- data/lib/canon/options.rb +9 -0
- data/lib/canon/pretty_printer/html.rb +0 -1
- data/lib/canon/pretty_printer/xml_normalized.rb +0 -2
- data/lib/canon/pretty_printer.rb +12 -0
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters.rb +14 -0
- data/lib/canon/tree_diff/core/attribute_comparator.rb +0 -6
- data/lib/canon/tree_diff/core/node_signature.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +12 -5
- data/lib/canon/tree_diff/core.rb +17 -0
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +0 -7
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +1 -5
- data/lib/canon/tree_diff/matchers/structural_propagator.rb +1 -5
- data/lib/canon/tree_diff/matchers.rb +15 -0
- data/lib/canon/tree_diff/operation_converter.rb +0 -8
- data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +2 -12
- data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +13 -7
- data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +2 -2
- data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +4 -6
- data/lib/canon/tree_diff/operation_converter_helpers.rb +18 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +2 -5
- data/lib/canon/tree_diff/operations.rb +13 -0
- data/lib/canon/tree_diff.rb +26 -27
- data/lib/canon/validators/base_validator.rb +0 -2
- data/lib/canon/validators/html_validator.rb +0 -1
- data/lib/canon/validators/json_validator.rb +0 -1
- data/lib/canon/validators/xml_validator.rb +0 -1
- data/lib/canon/validators/yaml_validator.rb +0 -1
- data/lib/canon/validators.rb +12 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/c14n.rb +0 -4
- data/lib/canon/xml/data_model.rb +0 -10
- data/lib/canon/xml/line_range_mapper.rb +0 -2
- data/lib/canon/xml/nodes/attribute_node.rb +0 -2
- data/lib/canon/xml/nodes/comment_node.rb +0 -2
- data/lib/canon/xml/nodes/element_node.rb +0 -2
- data/lib/canon/xml/nodes/namespace_node.rb +0 -2
- data/lib/canon/xml/nodes/processing_instruction_node.rb +0 -2
- data/lib/canon/xml/nodes/root_node.rb +0 -2
- data/lib/canon/xml/nodes/text_node.rb +0 -2
- data/lib/canon/xml/nodes.rb +19 -0
- data/lib/canon/xml/processor.rb +0 -5
- data/lib/canon/xml/sax_builder.rb +0 -7
- data/lib/canon/xml.rb +33 -0
- data/lib/canon/xml_backend.rb +50 -14
- data/lib/canon/xml_parsing.rb +4 -2
- data/lib/canon.rb +25 -15
- data/lib/tasks/performance.rake +0 -58
- data/lib/tasks/performance_comparator.rb +132 -65
- data/lib/tasks/performance_helpers.rb +4 -249
- data/lib/tasks/performance_report.rb +309 -0
- metadata +24 -11
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +0 -64
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +0 -167
- data/lib/canon/comparison/dimensions/base_dimension.rb +0 -107
- data/lib/canon/comparison/dimensions/comments_dimension.rb +0 -117
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +0 -86
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +0 -115
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +0 -102
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +0 -300
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../core/xml_entity_decoder"
|
|
4
|
-
|
|
5
3
|
module Canon
|
|
6
4
|
module TreeDiff
|
|
7
5
|
module Operations
|
|
@@ -619,13 +617,12 @@ module Canon
|
|
|
619
617
|
# Check if this node or any ancestor is whitespace-sensitive
|
|
620
618
|
current = node
|
|
621
619
|
while current
|
|
622
|
-
if current.
|
|
620
|
+
if current.is_a?(Core::TreeNode)
|
|
623
621
|
label = current.label.to_s.downcase
|
|
624
622
|
return true if whitespace_sensitive_tags.include?(label)
|
|
625
623
|
end
|
|
626
624
|
|
|
627
|
-
|
|
628
|
-
current = current.parent if current.respond_to?(:parent)
|
|
625
|
+
current = current.is_a?(Core::TreeNode) ? current.parent : nil
|
|
629
626
|
break unless current
|
|
630
627
|
end
|
|
631
628
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module TreeDiff
|
|
5
|
+
# Tree-diff operations: Operation (INSERT/DELETE/UPDATE/MOVE) and
|
|
6
|
+
# the OperationDetector that emits them.
|
|
7
|
+
module Operations
|
|
8
|
+
autoload :Operation, "canon/tree_diff/operations/operation"
|
|
9
|
+
autoload :OperationDetector,
|
|
10
|
+
"canon/tree_diff/operations/operation_detector"
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
data/lib/canon/tree_diff.rb
CHANGED
|
@@ -1,33 +1,32 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Canon
|
|
4
|
+
# Semantic tree-diff algorithm — distinct from the DOM positional
|
|
5
|
+
# diff in {Canon::Comparison}.
|
|
6
|
+
#
|
|
7
|
+
# This module computes signature-based tree matches and produces
|
|
8
|
+
# INSERT/DELETE/UPDATE/MOVE operations. Sub-namespaces:
|
|
9
|
+
#
|
|
10
|
+
# * Core — TreeNode, Matching, NodeSignature, NodeWeight,
|
|
11
|
+
# AttributeComparator, XmlEntityDecoder
|
|
12
|
+
# * Matchers — HashMatcher, SimilarityMatcher, StructuralPropagator,
|
|
13
|
+
# UniversalMatcher
|
|
14
|
+
# * Operations — Operation, OperationDetector
|
|
15
|
+
# * Adapters — format-specific tree adapters (XML, JSON, HTML, YAML)
|
|
16
|
+
# * OperationConverterHelpers — MetadataEnricher, ReasonBuilder,
|
|
17
|
+
# PostProcessor, UpdateChangeHandler
|
|
18
|
+
#
|
|
19
|
+
# Top-level entry points: OperationConverter and TreeDiffIntegrator.
|
|
20
|
+
#
|
|
21
|
+
# All children are autoloaded — never `require_relative` them.
|
|
4
22
|
module TreeDiff
|
|
5
|
-
|
|
23
|
+
autoload :Adapters, "canon/tree_diff/adapters"
|
|
24
|
+
autoload :Core, "canon/tree_diff/core"
|
|
25
|
+
autoload :Matchers, "canon/tree_diff/matchers"
|
|
26
|
+
autoload :OperationConverter, "canon/tree_diff/operation_converter"
|
|
27
|
+
autoload :OperationConverterHelpers,
|
|
28
|
+
"canon/tree_diff/operation_converter_helpers"
|
|
29
|
+
autoload :Operations, "canon/tree_diff/operations"
|
|
30
|
+
autoload :TreeDiffIntegrator, "canon/tree_diff/tree_diff_integrator"
|
|
6
31
|
end
|
|
7
32
|
end
|
|
8
|
-
|
|
9
|
-
# Load core components
|
|
10
|
-
require_relative "tree_diff/core/tree_node"
|
|
11
|
-
require_relative "tree_diff/core/node_signature"
|
|
12
|
-
require_relative "tree_diff/core/node_weight"
|
|
13
|
-
require_relative "tree_diff/core/matching"
|
|
14
|
-
|
|
15
|
-
# Load matchers
|
|
16
|
-
require_relative "tree_diff/matchers/hash_matcher"
|
|
17
|
-
require_relative "tree_diff/matchers/similarity_matcher"
|
|
18
|
-
require_relative "tree_diff/matchers/structural_propagator"
|
|
19
|
-
require_relative "tree_diff/matchers/universal_matcher"
|
|
20
|
-
|
|
21
|
-
# Load operations
|
|
22
|
-
require_relative "tree_diff/operations/operation"
|
|
23
|
-
require_relative "tree_diff/operations/operation_detector"
|
|
24
|
-
require_relative "tree_diff/operation_converter"
|
|
25
|
-
|
|
26
|
-
# Load adapters
|
|
27
|
-
require_relative "tree_diff/adapters/xml_adapter"
|
|
28
|
-
require_relative "tree_diff/adapters/json_adapter"
|
|
29
|
-
require_relative "tree_diff/adapters/html_adapter"
|
|
30
|
-
require_relative "tree_diff/adapters/yaml_adapter"
|
|
31
|
-
|
|
32
|
-
# Load integrator
|
|
33
|
-
require_relative "tree_diff/tree_diff_integrator"
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
# Format-specific validators that raise {Error} on invalid input.
|
|
5
|
+
module Validators
|
|
6
|
+
autoload :BaseValidator, "canon/validators/base_validator"
|
|
7
|
+
autoload :HtmlValidator, "canon/validators/html_validator"
|
|
8
|
+
autoload :JsonValidator, "canon/validators/json_validator"
|
|
9
|
+
autoload :XmlValidator, "canon/validators/xml_validator"
|
|
10
|
+
autoload :YamlValidator, "canon/validators/yaml_validator"
|
|
11
|
+
end
|
|
12
|
+
end
|
data/lib/canon/version.rb
CHANGED
data/lib/canon/xml/c14n.rb
CHANGED
data/lib/canon/xml/data_model.rb
CHANGED
|
@@ -2,16 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
4
|
require "set"
|
|
5
|
-
require_relative "../data_model"
|
|
6
|
-
require_relative "../xml_backend"
|
|
7
|
-
require_relative "../xml_parsing"
|
|
8
|
-
require_relative "nodes/root_node"
|
|
9
|
-
require_relative "nodes/element_node"
|
|
10
|
-
require_relative "nodes/namespace_node"
|
|
11
|
-
require_relative "nodes/attribute_node"
|
|
12
|
-
require_relative "nodes/text_node"
|
|
13
|
-
require_relative "nodes/comment_node"
|
|
14
|
-
require_relative "nodes/processing_instruction_node"
|
|
15
5
|
|
|
16
6
|
module Canon
|
|
17
7
|
module Xml
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
module Xml
|
|
5
|
+
# XPath data model node types. All nodes inherit from
|
|
6
|
+
# {Canon::Xml::Node}. Children are autoloaded — never
|
|
7
|
+
# `require_relative` them.
|
|
8
|
+
module Nodes
|
|
9
|
+
autoload :AttributeNode, "canon/xml/nodes/attribute_node"
|
|
10
|
+
autoload :CommentNode, "canon/xml/nodes/comment_node"
|
|
11
|
+
autoload :ElementNode, "canon/xml/nodes/element_node"
|
|
12
|
+
autoload :NamespaceNode, "canon/xml/nodes/namespace_node"
|
|
13
|
+
autoload :ProcessingInstructionNode,
|
|
14
|
+
"canon/xml/nodes/processing_instruction_node"
|
|
15
|
+
autoload :RootNode, "canon/xml/nodes/root_node"
|
|
16
|
+
autoload :TextNode, "canon/xml/nodes/text_node"
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
data/lib/canon/xml/processor.rb
CHANGED
|
@@ -1,13 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
|
-
require_relative "nodes/root_node"
|
|
5
|
-
require_relative "nodes/element_node"
|
|
6
|
-
require_relative "nodes/namespace_node"
|
|
7
|
-
require_relative "nodes/attribute_node"
|
|
8
|
-
require_relative "nodes/text_node"
|
|
9
|
-
require_relative "nodes/comment_node"
|
|
10
|
-
require_relative "nodes/processing_instruction_node"
|
|
11
4
|
|
|
12
5
|
module Canon
|
|
13
6
|
module Xml
|
data/lib/canon/xml.rb
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Canon
|
|
4
|
+
# Native XML data model, parsing, and DOM utilities.
|
|
5
|
+
#
|
|
6
|
+
# This namespace holds Canon's own XML representation (independent of
|
|
7
|
+
# Nokogiri/Moxml), including:
|
|
8
|
+
# * the XPath data model nodes (Canon::Xml::Node and Canon::Xml::Nodes::*)
|
|
9
|
+
# * the SAX builder that produces them
|
|
10
|
+
# * element matching, line-range mapping, xpath, C14N, processors
|
|
11
|
+
# * canonicalization (C14n) and serialization helpers
|
|
12
|
+
#
|
|
13
|
+
# All children are autoloaded from this file. The nested Nodes namespace
|
|
14
|
+
# is itself a sibling and is autoloaded on first reference to
|
|
15
|
+
# Canon::Xml::Nodes.
|
|
16
|
+
module Xml
|
|
17
|
+
autoload :AttributeHandler, "canon/xml/attribute_handler"
|
|
18
|
+
autoload :C14n, "canon/xml/c14n"
|
|
19
|
+
autoload :CharacterEncoder, "canon/xml/character_encoder"
|
|
20
|
+
autoload :DataModel, "canon/xml/data_model"
|
|
21
|
+
autoload :ElementMatcher, "canon/xml/element_matcher"
|
|
22
|
+
autoload :LineRangeMapper, "canon/xml/line_range_mapper"
|
|
23
|
+
autoload :NamespaceHandler, "canon/xml/namespace_handler"
|
|
24
|
+
autoload :NamespaceHelper, "canon/xml/namespace_helper"
|
|
25
|
+
autoload :Node, "canon/xml/node"
|
|
26
|
+
autoload :Nodes, "canon/xml/nodes"
|
|
27
|
+
autoload :Processor, "canon/xml/processor"
|
|
28
|
+
autoload :SaxBuilder, "canon/xml/sax_builder"
|
|
29
|
+
autoload :WhitespaceNormalizer, "canon/xml/whitespace_normalizer"
|
|
30
|
+
autoload :XmlBaseHandler, "canon/xml/xml_base_handler"
|
|
31
|
+
autoload :XPathEngine, "canon/xml/xpath_engine"
|
|
32
|
+
end
|
|
33
|
+
end
|
data/lib/canon/xml_backend.rb
CHANGED
|
@@ -1,20 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Canon
|
|
4
|
-
# Centralized XML backend detection for Canon.
|
|
5
|
-
#
|
|
6
|
-
# Canon supports two XML backends:
|
|
7
|
-
# - :nokogiri — MRI with Nokogiri installed (default, existing code path)
|
|
8
|
-
# - :moxml — Opal runtime or MRI without Nokogiri (uses Oga via moxml)
|
|
9
|
-
#
|
|
10
|
-
# The active backend is determined once at load time and cached.
|
|
11
|
-
# All XML-related code should check `Canon::XmlBackend.moxml?` or
|
|
12
|
-
# `Canon::XmlBackend.nokogiri?` to select the appropriate code path.
|
|
13
|
-
#
|
|
14
|
-
# This module intentionally does NOT wrap Nokogiri through moxml.
|
|
15
|
-
# Each backend path is independent — the Nokogiri path is the existing
|
|
16
|
-
# battle-tested code; the moxml path is a parallel implementation for
|
|
17
|
-
# environments where Nokogiri is unavailable.
|
|
18
4
|
module XmlBackend
|
|
19
5
|
class << self
|
|
20
6
|
def active
|
|
@@ -33,6 +19,56 @@ module Canon
|
|
|
33
19
|
@active = nil
|
|
34
20
|
end
|
|
35
21
|
|
|
22
|
+
# Whether the node is a document fragment (any variant).
|
|
23
|
+
def document_fragment?(node)
|
|
24
|
+
if nokogiri?
|
|
25
|
+
node.is_a?(Nokogiri::XML::DocumentFragment) ||
|
|
26
|
+
node.is_a?(Nokogiri::HTML4::DocumentFragment) ||
|
|
27
|
+
node.is_a?(Nokogiri::HTML5::DocumentFragment)
|
|
28
|
+
else
|
|
29
|
+
false
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Whether the node is an HTML document (any variant).
|
|
34
|
+
def html_document?(node)
|
|
35
|
+
if nokogiri?
|
|
36
|
+
node.is_a?(Nokogiri::HTML::Document) ||
|
|
37
|
+
node.is_a?(Nokogiri::HTML4::Document) ||
|
|
38
|
+
node.is_a?(Nokogiri::HTML5::Document)
|
|
39
|
+
else
|
|
40
|
+
false
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Detect HTML version from a Nokogiri node.
|
|
45
|
+
# Returns :html5 or :html4. Defaults to :html5 for non-Nokogiri nodes.
|
|
46
|
+
def html_version_from_node(node)
|
|
47
|
+
if nokogiri?
|
|
48
|
+
if node.is_a?(Nokogiri::HTML5::Document) ||
|
|
49
|
+
node.is_a?(Nokogiri::HTML5::DocumentFragment)
|
|
50
|
+
:html5
|
|
51
|
+
elsif node.is_a?(Nokogiri::HTML4::Document) ||
|
|
52
|
+
node.is_a?(Nokogiri::HTML4::DocumentFragment)
|
|
53
|
+
:html4
|
|
54
|
+
else
|
|
55
|
+
:html5
|
|
56
|
+
end
|
|
57
|
+
else
|
|
58
|
+
:html5
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Parse an HTML string into an XML fragment.
|
|
63
|
+
def xml_fragment(html_string)
|
|
64
|
+
if nokogiri?
|
|
65
|
+
Nokogiri::XML.fragment(html_string)
|
|
66
|
+
else
|
|
67
|
+
raise Canon::Error,
|
|
68
|
+
"HTML fragment parsing requires the Nokogiri backend"
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
36
72
|
private
|
|
37
73
|
|
|
38
74
|
def detect
|
data/lib/canon/xml_parsing.rb
CHANGED
|
@@ -264,8 +264,10 @@ module Canon
|
|
|
264
264
|
node.to_xml
|
|
265
265
|
end
|
|
266
266
|
|
|
267
|
-
def moxml_canonicalize(
|
|
268
|
-
|
|
267
|
+
def moxml_canonicalize(_node, _options)
|
|
268
|
+
raise Canon::Error,
|
|
269
|
+
"C14N canonicalization is not supported by the moxml backend. " \
|
|
270
|
+
"Use the Nokogiri backend or a different preprocessing option."
|
|
269
271
|
end
|
|
270
272
|
|
|
271
273
|
def moxml_node_type(node)
|
data/lib/canon.rb
CHANGED
|
@@ -1,23 +1,31 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
3
|
+
require "canon/version"
|
|
4
|
+
require "canon/errors"
|
|
5
|
+
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
6
|
+
require "canon/xml_backend"
|
|
7
|
+
require "canon/xml_parsing"
|
|
8
|
+
require "canon/config"
|
|
9
|
+
require "canon/data_model"
|
|
10
|
+
require "canon/xml"
|
|
11
|
+
require "canon/html"
|
|
12
|
+
require "canon/formatters"
|
|
13
|
+
require "canon/comparison"
|
|
14
|
+
require "canon/diff"
|
|
15
|
+
require "canon/tree_diff"
|
|
16
|
+
require "canon/validators"
|
|
17
|
+
require "canon/pretty_printer"
|
|
18
|
+
require "canon/options"
|
|
19
|
+
require "canon/commands"
|
|
17
20
|
|
|
18
|
-
|
|
21
|
+
require "canon/rspec_matchers" if defined?(RSpec.configure)
|
|
19
22
|
|
|
20
23
|
module Canon
|
|
24
|
+
autoload :Cache, "canon/cache"
|
|
25
|
+
autoload :Cli, "canon/cli"
|
|
26
|
+
autoload :ColorDetector, "canon/color_detector"
|
|
27
|
+
autoload :DiffFormatter, "canon/diff_formatter"
|
|
28
|
+
|
|
21
29
|
SUPPORTED_FORMATS = %i[xml yaml json html html4 html5 string].freeze
|
|
22
30
|
|
|
23
31
|
# Format content based on the specified format type
|
|
@@ -60,6 +68,8 @@ module Canon
|
|
|
60
68
|
# Define shorthand methods for each supported format
|
|
61
69
|
# Creates parse_{format} and format_{format} methods
|
|
62
70
|
SUPPORTED_FORMATS.each do |format|
|
|
71
|
+
next if format == :string # comparison-only format, no formatter
|
|
72
|
+
|
|
63
73
|
define_singleton_method("parse_#{format}") do |content|
|
|
64
74
|
parse(content, format)
|
|
65
75
|
end
|
data/lib/tasks/performance.rake
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "performance_comparator"
|
|
4
|
-
require_relative "benchmark_runner"
|
|
5
4
|
|
|
6
5
|
desc "Run performance benchmarks"
|
|
7
6
|
namespace :performance do
|
|
@@ -16,66 +15,9 @@ namespace :performance do
|
|
|
16
15
|
runner.run_benchmarks
|
|
17
16
|
end
|
|
18
17
|
|
|
19
|
-
desc "Run specific benchmark category (xml_parsing, html_parsing, xml_comparison, html_comparison, formatting)"
|
|
20
|
-
task :category, [:name] do |_t, args|
|
|
21
|
-
category = args[:name]
|
|
22
|
-
unless PerformanceComparator::BENCHMARK_CATEGORIES.key?(category.to_sym)
|
|
23
|
-
puts "Unknown category: #{category}"
|
|
24
|
-
puts "Available: #{PerformanceComparator::BENCHMARK_CATEGORIES.keys.join(', ')}"
|
|
25
|
-
exit(1)
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
runner = BenchmarkRunner.new(run_time: 10)
|
|
29
|
-
runner.run_benchmarks
|
|
30
|
-
end
|
|
31
|
-
|
|
32
18
|
desc "Quick benchmark run (faster, less accurate)"
|
|
33
19
|
task :quick do
|
|
34
20
|
runner = BenchmarkRunner.new(run_time: 2, warmup: 1, items: 20)
|
|
35
21
|
runner.run_benchmarks
|
|
36
22
|
end
|
|
37
|
-
|
|
38
|
-
desc "Run benchmarks and output as JSON"
|
|
39
|
-
task :json do
|
|
40
|
-
require "json"
|
|
41
|
-
runner = BenchmarkRunner.new(run_time: 5)
|
|
42
|
-
|
|
43
|
-
# Suppress pretty output, just get results
|
|
44
|
-
results = runner.send(:run_all_benchmarks)
|
|
45
|
-
|
|
46
|
-
output = results.each_with_object({}) do |(label, metrics), h|
|
|
47
|
-
ips = (metrics[:lower] + metrics[:upper]) / 2.0
|
|
48
|
-
deviation = ((metrics[:upper] - metrics[:lower]) / metrics[:upper] * 100).round(1)
|
|
49
|
-
h[label] = {
|
|
50
|
-
ips: ips.round(2),
|
|
51
|
-
lower: metrics[:lower].round(2),
|
|
52
|
-
upper: metrics[:upper].round(2),
|
|
53
|
-
deviation: deviation,
|
|
54
|
-
}
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
puts JSON.pretty_generate(output)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
desc "Run benchmarks and output as YAML"
|
|
61
|
-
task :yaml do
|
|
62
|
-
require "yaml"
|
|
63
|
-
runner = BenchmarkRunner.new(run_time: 5)
|
|
64
|
-
|
|
65
|
-
# Suppress pretty output, just get results
|
|
66
|
-
results = runner.send(:run_all_benchmarks)
|
|
67
|
-
|
|
68
|
-
output = results.each_with_object({}) do |(label, metrics), h|
|
|
69
|
-
ips = (metrics[:lower] + metrics[:upper]) / 2.0
|
|
70
|
-
deviation = ((metrics[:upper] - metrics[:lower]) / metrics[:upper] * 100).round(1)
|
|
71
|
-
h[label.to_sym] = {
|
|
72
|
-
ips: ips.round(2),
|
|
73
|
-
lower: metrics[:lower].round(2),
|
|
74
|
-
upper: metrics[:upper].round(2),
|
|
75
|
-
deviation: deviation,
|
|
76
|
-
}
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
puts YAML.dump(output)
|
|
80
|
-
end
|
|
81
23
|
end
|