canon 0.1.3 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.rubocop.yml +9 -1
 - data/.rubocop_todo.yml +276 -7
 - data/README.adoc +203 -138
 - data/_config.yml +116 -0
 - data/docs/ADVANCED_TOPICS.adoc +20 -0
 - data/docs/BASIC_USAGE.adoc +16 -0
 - data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
 - data/docs/CLI.adoc +493 -0
 - data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
 - data/docs/DIFF_ARCHITECTURE.adoc +435 -0
 - data/docs/DIFF_FORMATTING.adoc +540 -0
 - data/docs/FORMATS.adoc +447 -0
 - data/docs/INDEX.adoc +222 -0
 - data/docs/INPUT_VALIDATION.adoc +477 -0
 - data/docs/MATCH_ARCHITECTURE.adoc +463 -0
 - data/docs/MATCH_OPTIONS.adoc +719 -0
 - data/docs/MODES.adoc +432 -0
 - data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
 - data/docs/OPTIONS.adoc +1387 -0
 - data/docs/PREPROCESSING.adoc +491 -0
 - data/docs/RSPEC.adoc +605 -0
 - data/docs/RUBY_API.adoc +478 -0
 - data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
 - data/docs/UNDERSTANDING_CANON.adoc +17 -0
 - data/docs/VERBOSE.adoc +482 -0
 - data/exe/canon +7 -0
 - data/lib/canon/cli.rb +179 -0
 - data/lib/canon/commands/diff_command.rb +195 -0
 - data/lib/canon/commands/format_command.rb +113 -0
 - data/lib/canon/comparison/base_comparator.rb +39 -0
 - data/lib/canon/comparison/comparison_result.rb +79 -0
 - data/lib/canon/comparison/html_comparator.rb +410 -0
 - data/lib/canon/comparison/json_comparator.rb +212 -0
 - data/lib/canon/comparison/match_options.rb +616 -0
 - data/lib/canon/comparison/xml_comparator.rb +566 -0
 - data/lib/canon/comparison/yaml_comparator.rb +93 -0
 - data/lib/canon/comparison.rb +239 -0
 - data/lib/canon/config.rb +172 -0
 - data/lib/canon/diff/diff_block.rb +71 -0
 - data/lib/canon/diff/diff_block_builder.rb +105 -0
 - data/lib/canon/diff/diff_classifier.rb +46 -0
 - data/lib/canon/diff/diff_context.rb +85 -0
 - data/lib/canon/diff/diff_context_builder.rb +107 -0
 - data/lib/canon/diff/diff_line.rb +77 -0
 - data/lib/canon/diff/diff_node.rb +56 -0
 - data/lib/canon/diff/diff_node_mapper.rb +148 -0
 - data/lib/canon/diff/diff_report.rb +133 -0
 - data/lib/canon/diff/diff_report_builder.rb +62 -0
 - data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
 - data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
 - data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
 - data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
 - data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
 - data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
 - data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
 - data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
 - data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
 - data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
 - data/lib/canon/diff_formatter/character_map.yml +197 -0
 - data/lib/canon/diff_formatter/debug_output.rb +431 -0
 - data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
 - data/lib/canon/diff_formatter/legend.rb +141 -0
 - data/lib/canon/diff_formatter.rb +520 -0
 - data/lib/canon/errors.rb +56 -0
 - data/lib/canon/formatters/html4_formatter.rb +17 -0
 - data/lib/canon/formatters/html5_formatter.rb +17 -0
 - data/lib/canon/formatters/html_formatter.rb +37 -0
 - data/lib/canon/formatters/html_formatter_base.rb +163 -0
 - data/lib/canon/formatters/json_formatter.rb +3 -0
 - data/lib/canon/formatters/xml_formatter.rb +20 -55
 - data/lib/canon/formatters/yaml_formatter.rb +4 -1
 - data/lib/canon/pretty_printer/html.rb +57 -0
 - data/lib/canon/pretty_printer/json.rb +25 -0
 - data/lib/canon/pretty_printer/xml.rb +29 -0
 - data/lib/canon/rspec_matchers.rb +222 -80
 - data/lib/canon/validators/base_validator.rb +49 -0
 - data/lib/canon/validators/html_validator.rb +138 -0
 - data/lib/canon/validators/json_validator.rb +89 -0
 - data/lib/canon/validators/xml_validator.rb +53 -0
 - data/lib/canon/validators/yaml_validator.rb +73 -0
 - data/lib/canon/version.rb +1 -1
 - data/lib/canon/xml/attribute_handler.rb +80 -0
 - data/lib/canon/xml/c14n.rb +36 -0
 - data/lib/canon/xml/character_encoder.rb +38 -0
 - data/lib/canon/xml/data_model.rb +225 -0
 - data/lib/canon/xml/element_matcher.rb +196 -0
 - data/lib/canon/xml/line_range_mapper.rb +158 -0
 - data/lib/canon/xml/namespace_handler.rb +86 -0
 - data/lib/canon/xml/node.rb +32 -0
 - data/lib/canon/xml/nodes/attribute_node.rb +54 -0
 - data/lib/canon/xml/nodes/comment_node.rb +23 -0
 - data/lib/canon/xml/nodes/element_node.rb +56 -0
 - data/lib/canon/xml/nodes/namespace_node.rb +38 -0
 - data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
 - data/lib/canon/xml/nodes/root_node.rb +16 -0
 - data/lib/canon/xml/nodes/text_node.rb +23 -0
 - data/lib/canon/xml/processor.rb +151 -0
 - data/lib/canon/xml/whitespace_normalizer.rb +72 -0
 - data/lib/canon/xml/xml_base_handler.rb +188 -0
 - data/lib/canon.rb +14 -3
 - metadata +116 -21
 
    
        data/lib/canon/rspec_matchers.rb
    CHANGED
    
    | 
         @@ -1,8 +1,9 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            require "canon" unless defined?(::Canon)
         
     | 
| 
       4 
     | 
    
         
            -
            require " 
     | 
| 
       5 
     | 
    
         
            -
            require " 
     | 
| 
      
 4 
     | 
    
         
            +
            require "canon/comparison"
         
     | 
| 
      
 5 
     | 
    
         
            +
            require "canon/diff_formatter"
         
     | 
| 
      
 6 
     | 
    
         
            +
            require "canon/config"
         
     | 
| 
       6 
7 
     | 
    
         | 
| 
       7 
8 
     | 
    
         
             
            begin
         
     | 
| 
       8 
9 
     | 
    
         
             
              require "rspec/expectations"
         
     | 
| 
         @@ -11,117 +12,226 @@ end 
     | 
|
| 
       11 
12 
     | 
    
         | 
| 
       12 
13 
     | 
    
         
             
            module Canon
         
     | 
| 
       13 
14 
     | 
    
         
             
              module RSpecMatchers
         
     | 
| 
      
 15 
     | 
    
         
            +
                # Configuration for RSpec matchers - delegates to Canon::Config
         
     | 
| 
      
 16 
     | 
    
         
            +
                class << self
         
     | 
| 
      
 17 
     | 
    
         
            +
                  def configure
         
     | 
| 
      
 18 
     | 
    
         
            +
                    yield Canon::Config.configure
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  def reset_config
         
     | 
| 
      
 22 
     | 
    
         
            +
                    Canon::Config.reset!
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                  # Delegate configuration getters to Canon::Config
         
     | 
| 
      
 26 
     | 
    
         
            +
                  def xml
         
     | 
| 
      
 27 
     | 
    
         
            +
                    Canon::Config.instance.xml
         
     | 
| 
      
 28 
     | 
    
         
            +
                  end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                  def html
         
     | 
| 
      
 31 
     | 
    
         
            +
                    Canon::Config.instance.html
         
     | 
| 
      
 32 
     | 
    
         
            +
                  end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                  def json
         
     | 
| 
      
 35 
     | 
    
         
            +
                    Canon::Config.instance.json
         
     | 
| 
      
 36 
     | 
    
         
            +
                  end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                  def yaml
         
     | 
| 
      
 39 
     | 
    
         
            +
                    Canon::Config.instance.yaml
         
     | 
| 
      
 40 
     | 
    
         
            +
                  end
         
     | 
| 
      
 41 
     | 
    
         
            +
                end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
       14 
43 
     | 
    
         
             
                # Base matcher class for serialization equivalence
         
     | 
| 
      
 44 
     | 
    
         
            +
                # This is a THIN WRAPPER around Canon::Comparison API
         
     | 
| 
       15 
45 
     | 
    
         
             
                class SerializationMatcher
         
     | 
| 
       16 
     | 
    
         
            -
                  def initialize(expected, format = : 
     | 
| 
      
 46 
     | 
    
         
            +
                  def initialize(expected, format = nil, match_profile: nil,
         
     | 
| 
      
 47 
     | 
    
         
            +
                                 match: nil, preprocessing: nil)
         
     | 
| 
       17 
48 
     | 
    
         
             
                    @expected = expected
         
     | 
| 
       18 
     | 
    
         
            -
                     
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
                     
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
                    @format = format.to_sym
         
     | 
| 
       23 
     | 
    
         
            -
                    @result = nil
         
     | 
| 
      
 49 
     | 
    
         
            +
                    @format = format&.to_sym
         
     | 
| 
      
 50 
     | 
    
         
            +
                    @match_profile = match_profile
         
     | 
| 
      
 51 
     | 
    
         
            +
                    @match = match
         
     | 
| 
      
 52 
     | 
    
         
            +
                    @preprocessing = preprocessing
         
     | 
| 
       24 
53 
     | 
    
         
             
                  end
         
     | 
| 
       25 
54 
     | 
    
         | 
| 
       26 
55 
     | 
    
         
             
                  def matches?(target)
         
     | 
| 
       27 
56 
     | 
    
         
             
                    @target = target
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
                     
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
                      },
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
                    # Build comparison options from config and matcher params
         
     | 
| 
      
 59 
     | 
    
         
            +
                    opts = build_comparison_options
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
                    # Add format hint if explicitly provided
         
     | 
| 
      
 62 
     | 
    
         
            +
                    opts[:format] = @format if @format
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                    # Delegate to Canon::Comparison.equivalent? - the SINGLE source of truth
         
     | 
| 
      
 65 
     | 
    
         
            +
                    # Comparison handles format detection, HTML parsing, and all business logic
         
     | 
| 
      
 66 
     | 
    
         
            +
                    @comparison_result = Canon::Comparison.equivalent?(
         
     | 
| 
      
 67 
     | 
    
         
            +
                      @expected,
         
     | 
| 
      
 68 
     | 
    
         
            +
                      @target,
         
     | 
| 
      
 69 
     | 
    
         
            +
                      opts,
         
     | 
| 
       42 
70 
     | 
    
         
             
                    )
         
     | 
| 
       43 
71 
     | 
    
         | 
| 
       44 
     | 
    
         
            -
                     
     | 
| 
      
 72 
     | 
    
         
            +
                    # When verbose: true, result is a ComparisonResult object
         
     | 
| 
      
 73 
     | 
    
         
            +
                    # Use the equivalent? method to check for normative differences
         
     | 
| 
      
 74 
     | 
    
         
            +
                    case @comparison_result
         
     | 
| 
      
 75 
     | 
    
         
            +
                    when Canon::Comparison::ComparisonResult
         
     | 
| 
      
 76 
     | 
    
         
            +
                      @comparison_result.equivalent?
         
     | 
| 
      
 77 
     | 
    
         
            +
                    when Hash
         
     | 
| 
      
 78 
     | 
    
         
            +
                      # Legacy format - Hash with :differences array and :preprocessed strings
         
     | 
| 
      
 79 
     | 
    
         
            +
                      @comparison_result[:differences].empty?
         
     | 
| 
      
 80 
     | 
    
         
            +
                    when Array
         
     | 
| 
      
 81 
     | 
    
         
            +
                      # Legacy format - XML/JSON/YAML returns []
         
     | 
| 
      
 82 
     | 
    
         
            +
                      @comparison_result.empty?
         
     | 
| 
      
 83 
     | 
    
         
            +
                    else
         
     | 
| 
      
 84 
     | 
    
         
            +
                      # Boolean result
         
     | 
| 
      
 85 
     | 
    
         
            +
                      @comparison_result
         
     | 
| 
      
 86 
     | 
    
         
            +
                    end
         
     | 
| 
       45 
87 
     | 
    
         
             
                  end
         
     | 
| 
       46 
88 
     | 
    
         | 
| 
       47 
     | 
    
         
            -
                   
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
                    canonicalize_and_compare(:yaml)
         
     | 
| 
      
 89 
     | 
    
         
            +
                  def failure_message
         
     | 
| 
      
 90 
     | 
    
         
            +
                    "expected #{format_name} to be equivalent\n\n#{diff_output}"
         
     | 
| 
       50 
91 
     | 
    
         
             
                  end
         
     | 
| 
       51 
92 
     | 
    
         | 
| 
       52 
     | 
    
         
            -
                  def  
     | 
| 
       53 
     | 
    
         
            -
                     
     | 
| 
      
 93 
     | 
    
         
            +
                  def failure_message_when_negated
         
     | 
| 
      
 94 
     | 
    
         
            +
                    "expected #{format_name} not to be equivalent"
         
     | 
| 
       54 
95 
     | 
    
         
             
                  end
         
     | 
| 
       55 
96 
     | 
    
         | 
| 
       56 
     | 
    
         
            -
                   
     | 
| 
      
 97 
     | 
    
         
            +
                  def expected
         
     | 
| 
      
 98 
     | 
    
         
            +
                    @expected
         
     | 
| 
      
 99 
     | 
    
         
            +
                  end
         
     | 
| 
       57 
100 
     | 
    
         | 
| 
       58 
     | 
    
         
            -
                  def  
     | 
| 
       59 
     | 
    
         
            -
                    @ 
     | 
| 
       60 
     | 
    
         
            -
                    @expected_sorted = Canon.format(@expected, format)
         
     | 
| 
       61 
     | 
    
         
            -
                    @actual_sorted == @expected_sorted
         
     | 
| 
      
 101 
     | 
    
         
            +
                  def actual
         
     | 
| 
      
 102 
     | 
    
         
            +
                    @target
         
     | 
| 
       62 
103 
     | 
    
         
             
                  end
         
     | 
| 
       63 
104 
     | 
    
         | 
| 
       64 
     | 
    
         
            -
                  def  
     | 
| 
       65 
     | 
    
         
            -
                     
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
      
 105 
     | 
    
         
            +
                  def diffable
         
     | 
| 
      
 106 
     | 
    
         
            +
                    false
         
     | 
| 
      
 107 
     | 
    
         
            +
                  end
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
                  private
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
                  def format_name
         
     | 
| 
      
 112 
     | 
    
         
            +
                    # Use explicitly provided format if available
         
     | 
| 
      
 113 
     | 
    
         
            +
                    if @format
         
     | 
| 
      
 114 
     | 
    
         
            +
                      case @format
         
     | 
| 
      
 115 
     | 
    
         
            +
                      when :html4, :html5 then "HTML"
         
     | 
| 
      
 116 
     | 
    
         
            +
                      when :string then "STRING"
         
     | 
| 
      
 117 
     | 
    
         
            +
                      else @format.to_s.upcase
         
     | 
| 
      
 118 
     | 
    
         
            +
                      end
         
     | 
| 
      
 119 
     | 
    
         
            +
                    else
         
     | 
| 
      
 120 
     | 
    
         
            +
                      # Fall back to detection only if format not provided
         
     | 
| 
      
 121 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 122 
     | 
    
         
            +
                        detected_format = Canon::Comparison.send(:detect_format, @expected)
         
     | 
| 
      
 123 
     | 
    
         
            +
                        detected_format.to_s.upcase
         
     | 
| 
      
 124 
     | 
    
         
            +
                      rescue StandardError
         
     | 
| 
      
 125 
     | 
    
         
            +
                        "CONTENT"
         
     | 
| 
      
 126 
     | 
    
         
            +
                      end
         
     | 
| 
       70 
127 
     | 
    
         
             
                    end
         
     | 
| 
       71 
128 
     | 
    
         
             
                  end
         
     | 
| 
       72 
129 
     | 
    
         | 
| 
       73 
     | 
    
         
            -
                  def  
     | 
| 
       74 
     | 
    
         
            -
                     
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
             
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
     | 
    
         
            -
             
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
      
 130 
     | 
    
         
            +
                  def build_comparison_options
         
     | 
| 
      
 131 
     | 
    
         
            +
                    opts = { verbose: true } # Always use verbose for diff generation
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
                    # Add per-test parameters (highest priority)
         
     | 
| 
      
 134 
     | 
    
         
            +
                    opts[:match_profile] = @match_profile if @match_profile
         
     | 
| 
      
 135 
     | 
    
         
            +
                    opts[:match] = @match if @match
         
     | 
| 
      
 136 
     | 
    
         
            +
                    opts[:preprocessing] = @preprocessing if @preprocessing
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
                    # Add global configuration from Canon::Config (lower priority)
         
     | 
| 
      
 139 
     | 
    
         
            +
                    if @format
         
     | 
| 
      
 140 
     | 
    
         
            +
                      config_format = normalize_format_for_config(@format)
         
     | 
| 
      
 141 
     | 
    
         
            +
             
     | 
| 
      
 142 
     | 
    
         
            +
                      # Only access config if format is supported
         
     | 
| 
      
 143 
     | 
    
         
            +
                      if Canon::Config.instance.respond_to?(config_format)
         
     | 
| 
      
 144 
     | 
    
         
            +
                        format_config = Canon::Config.instance.public_send(config_format)
         
     | 
| 
      
 145 
     | 
    
         
            +
                        if format_config.match.profile
         
     | 
| 
      
 146 
     | 
    
         
            +
                          opts[:global_profile] =
         
     | 
| 
      
 147 
     | 
    
         
            +
                            format_config.match.profile
         
     | 
| 
      
 148 
     | 
    
         
            +
                        end
         
     | 
| 
      
 149 
     | 
    
         
            +
                        unless format_config.match.options.empty?
         
     | 
| 
      
 150 
     | 
    
         
            +
                          opts[:global_options] =
         
     | 
| 
      
 151 
     | 
    
         
            +
                            format_config.match.options
         
     | 
| 
      
 152 
     | 
    
         
            +
                        end
         
     | 
| 
      
 153 
     | 
    
         
            +
                        opts[:preprocessing] ||= format_config.preprocessing
         
     | 
| 
      
 154 
     | 
    
         
            +
                      elsif !%i[xml html html4 html5 json yaml
         
     | 
| 
      
 155 
     | 
    
         
            +
                                string].include?(@format)
         
     | 
| 
      
 156 
     | 
    
         
            +
                        # Unsupported format - raise error early
         
     | 
| 
      
 157 
     | 
    
         
            +
                        raise Canon::Error, "Unsupported format: #{@format}"
         
     | 
| 
      
 158 
     | 
    
         
            +
                      end
         
     | 
| 
      
 159 
     | 
    
         
            +
                    end
         
     | 
| 
       94 
160 
     | 
    
         | 
| 
       95 
     | 
    
         
            -
                     
     | 
| 
       96 
     | 
    
         
            -
                      "Diff:\n" +
         
     | 
| 
       97 
     | 
    
         
            -
                      diff.to_s(:color)
         
     | 
| 
      
 161 
     | 
    
         
            +
                    opts
         
     | 
| 
       98 
162 
     | 
    
         
             
                  end
         
     | 
| 
       99 
163 
     | 
    
         | 
| 
       100 
     | 
    
         
            -
                  def  
     | 
| 
       101 
     | 
    
         
            -
                     
     | 
| 
       102 
     | 
    
         
            -
             
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
             
     | 
| 
       105 
     | 
    
         
            -
                      @expected.to_s,
         
     | 
| 
       106 
     | 
    
         
            -
                    ].join("\n")
         
     | 
| 
      
 164 
     | 
    
         
            +
                  def normalize_format_for_config(format)
         
     | 
| 
      
 165 
     | 
    
         
            +
                    case format
         
     | 
| 
      
 166 
     | 
    
         
            +
                    when :html4, :html5 then :html
         
     | 
| 
      
 167 
     | 
    
         
            +
                    else format
         
     | 
| 
      
 168 
     | 
    
         
            +
                    end
         
     | 
| 
       107 
169 
     | 
    
         
             
                  end
         
     | 
| 
       108 
170 
     | 
    
         | 
| 
       109 
     | 
    
         
            -
                  def  
     | 
| 
       110 
     | 
    
         
            -
                     
     | 
| 
      
 171 
     | 
    
         
            +
                  def diff_output
         
     | 
| 
      
 172 
     | 
    
         
            +
                    # For string format, use simple diff since there's no comparison_result
         
     | 
| 
      
 173 
     | 
    
         
            +
                    if @format == :string
         
     | 
| 
      
 174 
     | 
    
         
            +
                      config_format = :xml # Use XML config as fallback for string
         
     | 
| 
      
 175 
     | 
    
         
            +
                      diff_config = Canon::Config.instance.public_send(config_format).diff
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
                      formatter = Canon::DiffFormatter.new(
         
     | 
| 
      
 178 
     | 
    
         
            +
                        use_color: diff_config.use_color,
         
     | 
| 
      
 179 
     | 
    
         
            +
                        mode: :by_line, # Always use by_line for strings
         
     | 
| 
      
 180 
     | 
    
         
            +
                        context_lines: diff_config.context_lines,
         
     | 
| 
      
 181 
     | 
    
         
            +
                        diff_grouping_lines: diff_config.grouping_lines,
         
     | 
| 
      
 182 
     | 
    
         
            +
                        show_diffs: diff_config.show_diffs,
         
     | 
| 
      
 183 
     | 
    
         
            +
                      )
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
                      return formatter.format([], :string, doc1: @expected.to_s,
         
     | 
| 
      
 186 
     | 
    
         
            +
                                                           doc2: @target.to_s)
         
     | 
| 
      
 187 
     | 
    
         
            +
                    end
         
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
                    # Get diff configuration
         
     | 
| 
      
 190 
     | 
    
         
            +
                    config_format = normalize_format_for_config(@format || :xml)
         
     | 
| 
      
 191 
     | 
    
         
            +
                    diff_config = Canon::Config.instance.public_send(config_format).diff
         
     | 
| 
      
 192 
     | 
    
         
            +
             
     | 
| 
      
 193 
     | 
    
         
            +
                    # Delegate to Canon::DiffFormatter - the SINGLE source of diff generation
         
     | 
| 
      
 194 
     | 
    
         
            +
                    formatter = Canon::DiffFormatter.new(
         
     | 
| 
      
 195 
     | 
    
         
            +
                      use_color: diff_config.use_color,
         
     | 
| 
      
 196 
     | 
    
         
            +
                      mode: diff_config.mode,
         
     | 
| 
      
 197 
     | 
    
         
            +
                      context_lines: diff_config.context_lines,
         
     | 
| 
      
 198 
     | 
    
         
            +
                      diff_grouping_lines: diff_config.grouping_lines,
         
     | 
| 
      
 199 
     | 
    
         
            +
                      show_diffs: diff_config.show_diffs,
         
     | 
| 
      
 200 
     | 
    
         
            +
                      verbose_diff: diff_config.verbose_diff,
         
     | 
| 
      
 201 
     | 
    
         
            +
                    )
         
     | 
| 
      
 202 
     | 
    
         
            +
             
     | 
| 
      
 203 
     | 
    
         
            +
                    # Format the diff using the comparison result
         
     | 
| 
      
 204 
     | 
    
         
            +
                    formatter.format_comparison_result(@comparison_result, @expected,
         
     | 
| 
      
 205 
     | 
    
         
            +
                                                       @target)
         
     | 
| 
      
 206 
     | 
    
         
            +
                  rescue StandardError => e
         
     | 
| 
      
 207 
     | 
    
         
            +
                    "\nError generating diff: #{e.message}"
         
     | 
| 
       111 
208 
     | 
    
         
             
                  end
         
     | 
| 
       112 
209 
     | 
    
         
             
                end
         
     | 
| 
       113 
210 
     | 
    
         | 
| 
       114 
211 
     | 
    
         
             
                # Matcher methods
         
     | 
| 
       115 
     | 
    
         
            -
                def be_serialization_equivalent_to(expected, format: :xml 
     | 
| 
       116 
     | 
    
         
            -
             
     | 
| 
      
 212 
     | 
    
         
            +
                def be_serialization_equivalent_to(expected, format: :xml,
         
     | 
| 
      
 213 
     | 
    
         
            +
                                                  match_profile: nil, match: nil,
         
     | 
| 
      
 214 
     | 
    
         
            +
                                                  preprocessing: nil)
         
     | 
| 
      
 215 
     | 
    
         
            +
                  SerializationMatcher.new(expected, format,
         
     | 
| 
      
 216 
     | 
    
         
            +
                                           match_profile: match_profile,
         
     | 
| 
      
 217 
     | 
    
         
            +
                                           match: match,
         
     | 
| 
      
 218 
     | 
    
         
            +
                                           preprocessing: preprocessing)
         
     | 
| 
       117 
219 
     | 
    
         
             
                end
         
     | 
| 
       118 
220 
     | 
    
         | 
| 
       119 
     | 
    
         
            -
                def be_analogous_with(expected 
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
      
 221 
     | 
    
         
            +
                def be_analogous_with(expected, match_profile: nil, match: nil,
         
     | 
| 
      
 222 
     | 
    
         
            +
                                     preprocessing: nil)
         
     | 
| 
      
 223 
     | 
    
         
            +
                  SerializationMatcher.new(expected, :xml,
         
     | 
| 
      
 224 
     | 
    
         
            +
                                           match_profile: match_profile,
         
     | 
| 
      
 225 
     | 
    
         
            +
                                           match: match,
         
     | 
| 
      
 226 
     | 
    
         
            +
                                           preprocessing: preprocessing)
         
     | 
| 
       121 
227 
     | 
    
         
             
                end
         
     | 
| 
       122 
228 
     | 
    
         | 
| 
       123 
     | 
    
         
            -
                def be_xml_equivalent_to(expected 
     | 
| 
       124 
     | 
    
         
            -
             
     | 
| 
      
 229 
     | 
    
         
            +
                def be_xml_equivalent_to(expected, match_profile: nil, match: nil,
         
     | 
| 
      
 230 
     | 
    
         
            +
                                        preprocessing: nil)
         
     | 
| 
      
 231 
     | 
    
         
            +
                  SerializationMatcher.new(expected, :xml,
         
     | 
| 
      
 232 
     | 
    
         
            +
                                           match_profile: match_profile,
         
     | 
| 
      
 233 
     | 
    
         
            +
                                           match: match,
         
     | 
| 
      
 234 
     | 
    
         
            +
                                           preprocessing: preprocessing)
         
     | 
| 
       125 
235 
     | 
    
         
             
                end
         
     | 
| 
       126 
236 
     | 
    
         | 
| 
       127 
237 
     | 
    
         
             
                def be_yaml_equivalent_to(expected)
         
     | 
| 
         @@ -132,7 +242,39 @@ module Canon 
     | 
|
| 
       132 
242 
     | 
    
         
             
                  SerializationMatcher.new(expected, :json)
         
     | 
| 
       133 
243 
     | 
    
         
             
                end
         
     | 
| 
       134 
244 
     | 
    
         | 
| 
       135 
     | 
    
         
            -
                 
     | 
| 
      
 245 
     | 
    
         
            +
                def be_html_equivalent_to(expected, match_profile: nil, match: nil,
         
     | 
| 
      
 246 
     | 
    
         
            +
                                         preprocessing: nil)
         
     | 
| 
      
 247 
     | 
    
         
            +
                  SerializationMatcher.new(expected, :html,
         
     | 
| 
      
 248 
     | 
    
         
            +
                                           match_profile: match_profile,
         
     | 
| 
      
 249 
     | 
    
         
            +
                                           match: match,
         
     | 
| 
      
 250 
     | 
    
         
            +
                                           preprocessing: preprocessing)
         
     | 
| 
      
 251 
     | 
    
         
            +
                end
         
     | 
| 
      
 252 
     | 
    
         
            +
             
     | 
| 
      
 253 
     | 
    
         
            +
                def be_html4_equivalent_to(expected, match_profile: nil, match: nil,
         
     | 
| 
      
 254 
     | 
    
         
            +
                                          preprocessing: nil)
         
     | 
| 
      
 255 
     | 
    
         
            +
                  SerializationMatcher.new(expected, :html4,
         
     | 
| 
      
 256 
     | 
    
         
            +
                                           match_profile: match_profile,
         
     | 
| 
      
 257 
     | 
    
         
            +
                                           match: match,
         
     | 
| 
      
 258 
     | 
    
         
            +
                                           preprocessing: preprocessing)
         
     | 
| 
      
 259 
     | 
    
         
            +
                end
         
     | 
| 
      
 260 
     | 
    
         
            +
             
     | 
| 
      
 261 
     | 
    
         
            +
                def be_html5_equivalent_to(expected, match_profile: nil, match: nil,
         
     | 
| 
      
 262 
     | 
    
         
            +
                                          preprocessing: nil)
         
     | 
| 
      
 263 
     | 
    
         
            +
                  SerializationMatcher.new(expected, :html5,
         
     | 
| 
      
 264 
     | 
    
         
            +
                                           match_profile: match_profile,
         
     | 
| 
      
 265 
     | 
    
         
            +
                                           match: match,
         
     | 
| 
      
 266 
     | 
    
         
            +
                                           preprocessing: preprocessing)
         
     | 
| 
      
 267 
     | 
    
         
            +
                end
         
     | 
| 
      
 268 
     | 
    
         
            +
             
     | 
| 
      
 269 
     | 
    
         
            +
                def be_equivalent_to(expected)
         
     | 
| 
      
 270 
     | 
    
         
            +
                  SerializationMatcher.new(expected, nil)
         
     | 
| 
      
 271 
     | 
    
         
            +
                end
         
     | 
| 
      
 272 
     | 
    
         
            +
             
     | 
| 
      
 273 
     | 
    
         
            +
                def be_string_equivalent_to(expected)
         
     | 
| 
      
 274 
     | 
    
         
            +
                  SerializationMatcher.new(expected, :string)
         
     | 
| 
      
 275 
     | 
    
         
            +
                end
         
     | 
| 
      
 276 
     | 
    
         
            +
             
     | 
| 
      
 277 
     | 
    
         
            +
                if defined?(::RSpec) && ::RSpec.respond_to?(:configure)
         
     | 
| 
       136 
278 
     | 
    
         
             
                  RSpec.configure do |config|
         
     | 
| 
       137 
279 
     | 
    
         
             
                    config.include(Canon::RSpecMatchers)
         
     | 
| 
       138 
280 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -0,0 +1,49 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative "../errors"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module Canon
         
     | 
| 
      
 6 
     | 
    
         
            +
              module Validators
         
     | 
| 
      
 7 
     | 
    
         
            +
                # Base class for all input validators
         
     | 
| 
      
 8 
     | 
    
         
            +
                #
         
     | 
| 
      
 9 
     | 
    
         
            +
                # This abstract base class defines the interface that all format-specific
         
     | 
| 
      
 10 
     | 
    
         
            +
                # validators must implement. Each validator is responsible for validating
         
     | 
| 
      
 11 
     | 
    
         
            +
                # input in a specific format and raising detailed ValidationError when
         
     | 
| 
      
 12 
     | 
    
         
            +
                # issues are found.
         
     | 
| 
      
 13 
     | 
    
         
            +
                class BaseValidator
         
     | 
| 
      
 14 
     | 
    
         
            +
                  # Validate input and raise ValidationError if invalid
         
     | 
| 
      
 15 
     | 
    
         
            +
                  #
         
     | 
| 
      
 16 
     | 
    
         
            +
                  # @param input [String] The input to validate
         
     | 
| 
      
 17 
     | 
    
         
            +
                  # @raise [Canon::ValidationError] If input is invalid
         
     | 
| 
      
 18 
     | 
    
         
            +
                  # @return [void]
         
     | 
| 
      
 19 
     | 
    
         
            +
                  def self.validate!(input)
         
     | 
| 
      
 20 
     | 
    
         
            +
                    raise NotImplementedError,
         
     | 
| 
      
 21 
     | 
    
         
            +
                          "#{name} must implement validate! method"
         
     | 
| 
      
 22 
     | 
    
         
            +
                  end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                  # Extract line and column information from an error
         
     | 
| 
      
 25 
     | 
    
         
            +
                  #
         
     | 
| 
      
 26 
     | 
    
         
            +
                  # @param error [Exception] The error containing location information
         
     | 
| 
      
 27 
     | 
    
         
            +
                  # @return [Hash] Hash with :line and :column keys
         
     | 
| 
      
 28 
     | 
    
         
            +
                  def self.extract_location(error)
         
     | 
| 
      
 29 
     | 
    
         
            +
                    line = nil
         
     | 
| 
      
 30 
     | 
    
         
            +
                    column = nil
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                    # Try to extract line/column from error message
         
     | 
| 
      
 33 
     | 
    
         
            +
                    if error.respond_to?(:line)
         
     | 
| 
      
 34 
     | 
    
         
            +
                      line = error.line
         
     | 
| 
      
 35 
     | 
    
         
            +
                    elsif error.message =~ /line[:\s]+(\d+)/i
         
     | 
| 
      
 36 
     | 
    
         
            +
                      line = ::Regexp.last_match(1).to_i
         
     | 
| 
      
 37 
     | 
    
         
            +
                    end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                    if error.respond_to?(:column)
         
     | 
| 
      
 40 
     | 
    
         
            +
                      column = error.column
         
     | 
| 
      
 41 
     | 
    
         
            +
                    elsif error.message =~ /column[:\s]+(\d+)/i
         
     | 
| 
      
 42 
     | 
    
         
            +
                      column = ::Regexp.last_match(1).to_i
         
     | 
| 
      
 43 
     | 
    
         
            +
                    end
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                    { line: line, column: column }
         
     | 
| 
      
 46 
     | 
    
         
            +
                  end
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
              end
         
     | 
| 
      
 49 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,138 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "nokogiri"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative "base_validator"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            module Canon
         
     | 
| 
      
 7 
     | 
    
         
            +
              module Validators
         
     | 
| 
      
 8 
     | 
    
         
            +
                # Validator for HTML input
         
     | 
| 
      
 9 
     | 
    
         
            +
                #
         
     | 
| 
      
 10 
     | 
    
         
            +
                # Validates HTML input (HTML4, HTML5, or XHTML) using Nokogiri.
         
     | 
| 
      
 11 
     | 
    
         
            +
                # Automatically detects the HTML type and applies appropriate validation.
         
     | 
| 
      
 12 
     | 
    
         
            +
                # Raises detailed ValidationError with line and column information
         
     | 
| 
      
 13 
     | 
    
         
            +
                # when malformed HTML is detected.
         
     | 
| 
      
 14 
     | 
    
         
            +
                class HtmlValidator < BaseValidator
         
     | 
| 
      
 15 
     | 
    
         
            +
                  # Validate HTML input
         
     | 
| 
      
 16 
     | 
    
         
            +
                  #
         
     | 
| 
      
 17 
     | 
    
         
            +
                  # @param input [String] The HTML string to validate
         
     | 
| 
      
 18 
     | 
    
         
            +
                  # @raise [Canon::ValidationError] If HTML is malformed
         
     | 
| 
      
 19 
     | 
    
         
            +
                  # @return [void]
         
     | 
| 
      
 20 
     | 
    
         
            +
                  def self.validate!(input)
         
     | 
| 
      
 21 
     | 
    
         
            +
                    return if input.nil? || input.strip.empty?
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                    # Strip XML declaration for validation (it's not critical for parsing)
         
     | 
| 
      
 24 
     | 
    
         
            +
                    cleaned_input = input.sub(/\A\s*<\?xml[^?]*\?>\s*/, "")
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                    if xhtml?(cleaned_input)
         
     | 
| 
      
 27 
     | 
    
         
            +
                      validate_xhtml!(cleaned_input)
         
     | 
| 
      
 28 
     | 
    
         
            +
                    else
         
     | 
| 
      
 29 
     | 
    
         
            +
                      validate_html5!(cleaned_input)
         
     | 
| 
      
 30 
     | 
    
         
            +
                    end
         
     | 
| 
      
 31 
     | 
    
         
            +
                  end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                  # Check if HTML is XHTML
         
     | 
| 
      
 34 
     | 
    
         
            +
                  #
         
     | 
| 
      
 35 
     | 
    
         
            +
                  # @param html [String] The HTML string to check
         
     | 
| 
      
 36 
     | 
    
         
            +
                  # @return [Boolean] true if XHTML, false otherwise
         
     | 
| 
      
 37 
     | 
    
         
            +
                  def self.xhtml?(html)
         
     | 
| 
      
 38 
     | 
    
         
            +
                    html.include?("XHTML") ||
         
     | 
| 
      
 39 
     | 
    
         
            +
                      html.include?('xmlns="http://www.w3.org/1999/xhtml"') ||
         
     | 
| 
      
 40 
     | 
    
         
            +
                      html.match?(/xmlns:\w+/)
         
     | 
| 
      
 41 
     | 
    
         
            +
                  end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                  # Validate XHTML input using XML strict parsing
         
     | 
| 
      
 44 
     | 
    
         
            +
                  #
         
     | 
| 
      
 45 
     | 
    
         
            +
                  # @param input [String] The XHTML string to validate
         
     | 
| 
      
 46 
     | 
    
         
            +
                  # @raise [Canon::ValidationError] If XHTML is malformed
         
     | 
| 
      
 47 
     | 
    
         
            +
                  # @return [void]
         
     | 
| 
      
 48 
     | 
    
         
            +
                  def self.validate_xhtml!(input)
         
     | 
| 
      
 49 
     | 
    
         
            +
                    Nokogiri::XML(input) do |config|
         
     | 
| 
      
 50 
     | 
    
         
            +
                      config.strict.nonet
         
     | 
| 
      
 51 
     | 
    
         
            +
                    end
         
     | 
| 
      
 52 
     | 
    
         
            +
                  rescue Nokogiri::XML::SyntaxError => e
         
     | 
| 
      
 53 
     | 
    
         
            +
                    location = extract_location(e)
         
     | 
| 
      
 54 
     | 
    
         
            +
                    raise Canon::ValidationError.new(
         
     | 
| 
      
 55 
     | 
    
         
            +
                      e.message.split("\n").first,
         
     | 
| 
      
 56 
     | 
    
         
            +
                      format: :html,
         
     | 
| 
      
 57 
     | 
    
         
            +
                      line: location[:line],
         
     | 
| 
      
 58 
     | 
    
         
            +
                      column: location[:column],
         
     | 
| 
      
 59 
     | 
    
         
            +
                      details: "XHTML validation failed: #{extract_details(e)}",
         
     | 
| 
      
 60 
     | 
    
         
            +
                    )
         
     | 
| 
      
 61 
     | 
    
         
            +
                  end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                  # Validate HTML5 input
         
     | 
| 
      
 64 
     | 
    
         
            +
                  #
         
     | 
| 
      
 65 
     | 
    
         
            +
                  # @param input [String] The HTML5 string to validate
         
     | 
| 
      
 66 
     | 
    
         
            +
                  # @raise [Canon::ValidationError] If HTML5 is malformed
         
     | 
| 
      
 67 
     | 
    
         
            +
                  # @return [void]
         
     | 
| 
      
 68 
     | 
    
         
            +
                  def self.validate_html5!(input)
         
     | 
| 
      
 69 
     | 
    
         
            +
                    doc = Nokogiri::HTML5(input, max_errors: 100)
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
                    # Check for parse errors
         
     | 
| 
      
 72 
     | 
    
         
            +
                    return unless doc.errors.any?
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                    # Find first significant error (level 2 = error, level 1 = warning)
         
     | 
| 
      
 75 
     | 
    
         
            +
                    # Filter out doctype warnings and other non-critical issues
         
     | 
| 
      
 76 
     | 
    
         
            +
                    significant_errors = doc.errors.select do |e|
         
     | 
| 
      
 77 
     | 
    
         
            +
                      e.level >= 2 && !doctype_or_warning?(e)
         
     | 
| 
      
 78 
     | 
    
         
            +
                    end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                    return if significant_errors.empty?
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
                    error = significant_errors.first
         
     | 
| 
      
 83 
     | 
    
         
            +
                    location = extract_location(error)
         
     | 
| 
      
 84 
     | 
    
         
            +
                    raise Canon::ValidationError.new(
         
     | 
| 
      
 85 
     | 
    
         
            +
                      error.message,
         
     | 
| 
      
 86 
     | 
    
         
            +
                      format: :html,
         
     | 
| 
      
 87 
     | 
    
         
            +
                      line: location[:line],
         
     | 
| 
      
 88 
     | 
    
         
            +
                      column: location[:column],
         
     | 
| 
      
 89 
     | 
    
         
            +
                      details: build_error_details(significant_errors),
         
     | 
| 
      
 90 
     | 
    
         
            +
                    )
         
     | 
| 
      
 91 
     | 
    
         
            +
                  end
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
                  # Extract additional error details
         
     | 
| 
      
 94 
     | 
    
         
            +
                  #
         
     | 
| 
      
 95 
     | 
    
         
            +
                  # @param error [Nokogiri::XML::SyntaxError] The syntax error
         
     | 
| 
      
 96 
     | 
    
         
            +
                  # @return [String, nil] Additional details about the error
         
     | 
| 
      
 97 
     | 
    
         
            +
                  def self.extract_details(error)
         
     | 
| 
      
 98 
     | 
    
         
            +
                    return nil unless error.respond_to?(:errors)
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                    details = error.errors.map(&:message).reject do |msg|
         
     | 
| 
      
 101 
     | 
    
         
            +
                      msg == error.message
         
     | 
| 
      
 102 
     | 
    
         
            +
                    end
         
     | 
| 
      
 103 
     | 
    
         
            +
                    details.join("; ") unless details.empty?
         
     | 
| 
      
 104 
     | 
    
         
            +
                  end
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
                  # Build error details from multiple errors
         
     | 
| 
      
 107 
     | 
    
         
            +
                  #
         
     | 
| 
      
 108 
     | 
    
         
            +
                  # @param errors [Array<Nokogiri::XML::SyntaxError>] Array of errors
         
     | 
| 
      
 109 
     | 
    
         
            +
                  # @return [String, nil] Combined error details
         
     | 
| 
      
 110 
     | 
    
         
            +
                  def self.build_error_details(errors)
         
     | 
| 
      
 111 
     | 
    
         
            +
                    return nil if errors.size <= 1
         
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
                    significant = errors.select { |e| e.level >= 2 }
         
     | 
| 
      
 114 
     | 
    
         
            +
                    return nil if significant.empty?
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                    details = significant[1..3].map do |e|
         
     | 
| 
      
 117 
     | 
    
         
            +
                      loc = extract_location(e)
         
     | 
| 
      
 118 
     | 
    
         
            +
                      msg = e.message
         
     | 
| 
      
 119 
     | 
    
         
            +
                      msg += " (line #{loc[:line]})" if loc[:line]
         
     | 
| 
      
 120 
     | 
    
         
            +
                      msg
         
     | 
| 
      
 121 
     | 
    
         
            +
                    end
         
     | 
| 
      
 122 
     | 
    
         
            +
                    details.join("; ")
         
     | 
| 
      
 123 
     | 
    
         
            +
                  end
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
                  # Check if error is a doctype or other non-critical warning
         
     | 
| 
      
 126 
     | 
    
         
            +
                  #
         
     | 
| 
      
 127 
     | 
    
         
            +
                  # @param error [Nokogiri::XML::SyntaxError] The error to check
         
     | 
| 
      
 128 
     | 
    
         
            +
                  # @return [Boolean] true if error is non-critical
         
     | 
| 
      
 129 
     | 
    
         
            +
                  def self.doctype_or_warning?(error)
         
     | 
| 
      
 130 
     | 
    
         
            +
                    error.message.match?(/doctype|Expected a doctype token/i)
         
     | 
| 
      
 131 
     | 
    
         
            +
                  end
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
                  private_class_method :xhtml?, :validate_xhtml!, :validate_html5!,
         
     | 
| 
      
 134 
     | 
    
         
            +
                                       :extract_details, :build_error_details,
         
     | 
| 
      
 135 
     | 
    
         
            +
                                       :doctype_or_warning?
         
     | 
| 
      
 136 
     | 
    
         
            +
                end
         
     | 
| 
      
 137 
     | 
    
         
            +
              end
         
     | 
| 
      
 138 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,89 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "json"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative "base_validator"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            module Canon
         
     | 
| 
      
 7 
     | 
    
         
            +
              module Validators
         
     | 
| 
      
 8 
     | 
    
         
            +
                # Validator for JSON input
         
     | 
| 
      
 9 
     | 
    
         
            +
                #
         
     | 
| 
      
 10 
     | 
    
         
            +
                # Validates JSON input using Ruby's JSON parser.
         
     | 
| 
      
 11 
     | 
    
         
            +
                # Raises detailed ValidationError with position information
         
     | 
| 
      
 12 
     | 
    
         
            +
                # when malformed JSON is detected.
         
     | 
| 
      
 13 
     | 
    
         
            +
                class JsonValidator < BaseValidator
         
     | 
| 
      
 14 
     | 
    
         
            +
                  # Validate JSON input
         
     | 
| 
      
 15 
     | 
    
         
            +
                  #
         
     | 
| 
      
 16 
     | 
    
         
            +
                  # @param input [String] The JSON string to validate
         
     | 
| 
      
 17 
     | 
    
         
            +
                  # @raise [Canon::ValidationError] If JSON is malformed
         
     | 
| 
      
 18 
     | 
    
         
            +
                  # @return [void]
         
     | 
| 
      
 19 
     | 
    
         
            +
                  def self.validate!(input)
         
     | 
| 
      
 20 
     | 
    
         
            +
                    return if input.nil? || input.strip.empty?
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                    JSON.parse(input)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  rescue JSON::ParserError => e
         
     | 
| 
      
 24 
     | 
    
         
            +
                    # Extract position from error message
         
     | 
| 
      
 25 
     | 
    
         
            +
                    position = extract_position(e.message)
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                    raise Canon::ValidationError.new(
         
     | 
| 
      
 28 
     | 
    
         
            +
                      clean_error_message(e.message),
         
     | 
| 
      
 29 
     | 
    
         
            +
                      format: :json,
         
     | 
| 
      
 30 
     | 
    
         
            +
                      line: position[:line],
         
     | 
| 
      
 31 
     | 
    
         
            +
                      column: position[:column],
         
     | 
| 
      
 32 
     | 
    
         
            +
                      details: extract_context(input, position),
         
     | 
| 
      
 33 
     | 
    
         
            +
                    )
         
     | 
| 
      
 34 
     | 
    
         
            +
                  end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                  # Extract line and column from JSON error message
         
     | 
| 
      
 37 
     | 
    
         
            +
                  #
         
     | 
| 
      
 38 
     | 
    
         
            +
                  # @param message [String] The error message
         
     | 
| 
      
 39 
     | 
    
         
            +
                  # @return [Hash] Hash with :line and :column keys
         
     | 
| 
      
 40 
     | 
    
         
            +
                  def self.extract_position(message)
         
     | 
| 
      
 41 
     | 
    
         
            +
                    line = nil
         
     | 
| 
      
 42 
     | 
    
         
            +
                    column = nil
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                    # JSON errors often report character position
         
     | 
| 
      
 45 
     | 
    
         
            +
                    if message =~ /at line (\d+), column (\d+)/i
         
     | 
| 
      
 46 
     | 
    
         
            +
                      line = ::Regexp.last_match(1).to_i
         
     | 
| 
      
 47 
     | 
    
         
            +
                      column = ::Regexp.last_match(2).to_i
         
     | 
| 
      
 48 
     | 
    
         
            +
                    elsif /at character offset (\d+)/i.match?(message)
         
     | 
| 
      
 49 
     | 
    
         
            +
                      # For character offset, we can't easily determine line/column
         
     | 
| 
      
 50 
     | 
    
         
            +
                      # without parsing the input
         
     | 
| 
      
 51 
     | 
    
         
            +
                    end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                    { line: line, column: column }
         
     | 
| 
      
 54 
     | 
    
         
            +
                  end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                  # Clean error message by removing technical details
         
     | 
| 
      
 57 
     | 
    
         
            +
                  #
         
     | 
| 
      
 58 
     | 
    
         
            +
                  # @param message [String] The raw error message
         
     | 
| 
      
 59 
     | 
    
         
            +
                  # @return [String] Cleaned error message
         
     | 
| 
      
 60 
     | 
    
         
            +
                  def self.clean_error_message(message)
         
     | 
| 
      
 61 
     | 
    
         
            +
                    # Remove 'unexpected token' technical details and keep main message
         
     | 
| 
      
 62 
     | 
    
         
            +
                    message.split(" at ").first.strip
         
     | 
| 
      
 63 
     | 
    
         
            +
                  end
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                  # Extract context around the error position
         
     | 
| 
      
 66 
     | 
    
         
            +
                  #
         
     | 
| 
      
 67 
     | 
    
         
            +
                  # @param input [String] The input JSON string
         
     | 
| 
      
 68 
     | 
    
         
            +
                  # @param position [Hash] Position hash with :line key
         
     | 
| 
      
 69 
     | 
    
         
            +
                  # @return [String, nil] Context snippet around the error
         
     | 
| 
      
 70 
     | 
    
         
            +
                  def self.extract_context(input, position)
         
     | 
| 
      
 71 
     | 
    
         
            +
                    return nil unless position[:line]
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                    lines = input.split("\n")
         
     | 
| 
      
 74 
     | 
    
         
            +
                    line_idx = position[:line] - 1
         
     | 
| 
      
 75 
     | 
    
         
            +
                    return nil if line_idx.negative? || line_idx >= lines.size
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
                    # Get the problematic line and surrounding lines
         
     | 
| 
      
 78 
     | 
    
         
            +
                    start_idx = [0, line_idx - 1].max
         
     | 
| 
      
 79 
     | 
    
         
            +
                    end_idx = [lines.size - 1, line_idx + 1].min
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                    context_lines = lines[start_idx..end_idx]
         
     | 
| 
      
 82 
     | 
    
         
            +
                    "Near: #{context_lines.join(' ')}"
         
     | 
| 
      
 83 
     | 
    
         
            +
                  end
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                  private_class_method :extract_position, :clean_error_message,
         
     | 
| 
      
 86 
     | 
    
         
            +
                                       :extract_context
         
     | 
| 
      
 87 
     | 
    
         
            +
                end
         
     | 
| 
      
 88 
     | 
    
         
            +
              end
         
     | 
| 
      
 89 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,53 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "nokogiri"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative "base_validator"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            module Canon
         
     | 
| 
      
 7 
     | 
    
         
            +
              module Validators
         
     | 
| 
      
 8 
     | 
    
         
            +
                # Validator for XML input
         
     | 
| 
      
 9 
     | 
    
         
            +
                #
         
     | 
| 
      
 10 
     | 
    
         
            +
                # Validates XML input using Nokogiri's strict parsing mode.
         
     | 
| 
      
 11 
     | 
    
         
            +
                # Raises detailed ValidationError with line and column information
         
     | 
| 
      
 12 
     | 
    
         
            +
                # when malformed XML is detected.
         
     | 
| 
      
 13 
     | 
    
         
            +
                class XmlValidator < BaseValidator
         
     | 
| 
      
 14 
     | 
    
         
            +
                  # Validate XML input
         
     | 
| 
      
 15 
     | 
    
         
            +
                  #
         
     | 
| 
      
 16 
     | 
    
         
            +
                  # @param input [String] The XML string to validate
         
     | 
| 
      
 17 
     | 
    
         
            +
                  # @raise [Canon::ValidationError] If XML is malformed
         
     | 
| 
      
 18 
     | 
    
         
            +
                  # @return [void]
         
     | 
| 
      
 19 
     | 
    
         
            +
                  def self.validate!(input)
         
     | 
| 
      
 20 
     | 
    
         
            +
                    return if input.nil? || input.strip.empty?
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                    # Parse with strict error handling
         
     | 
| 
      
 23 
     | 
    
         
            +
                    Nokogiri::XML(input) do |config|
         
     | 
| 
      
 24 
     | 
    
         
            +
                      config.strict.nonet
         
     | 
| 
      
 25 
     | 
    
         
            +
                    end
         
     | 
| 
      
 26 
     | 
    
         
            +
                  rescue Nokogiri::XML::SyntaxError => e
         
     | 
| 
      
 27 
     | 
    
         
            +
                    location = extract_location(e)
         
     | 
| 
      
 28 
     | 
    
         
            +
                    raise Canon::ValidationError.new(
         
     | 
| 
      
 29 
     | 
    
         
            +
                      e.message.split("\n").first,
         
     | 
| 
      
 30 
     | 
    
         
            +
                      format: :xml,
         
     | 
| 
      
 31 
     | 
    
         
            +
                      line: location[:line],
         
     | 
| 
      
 32 
     | 
    
         
            +
                      column: location[:column],
         
     | 
| 
      
 33 
     | 
    
         
            +
                      details: extract_details(e),
         
     | 
| 
      
 34 
     | 
    
         
            +
                    )
         
     | 
| 
      
 35 
     | 
    
         
            +
                  end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                  # Extract additional error details
         
     | 
| 
      
 38 
     | 
    
         
            +
                  #
         
     | 
| 
      
 39 
     | 
    
         
            +
                  # @param error [Nokogiri::XML::SyntaxError] The syntax error
         
     | 
| 
      
 40 
     | 
    
         
            +
                  # @return [String, nil] Additional details about the error
         
     | 
| 
      
 41 
     | 
    
         
            +
                  def self.extract_details(error)
         
     | 
| 
      
 42 
     | 
    
         
            +
                    return nil unless error.respond_to?(:errors)
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                    details = error.errors.map(&:message).reject do |msg|
         
     | 
| 
      
 45 
     | 
    
         
            +
                      msg == error.message
         
     | 
| 
      
 46 
     | 
    
         
            +
                    end
         
     | 
| 
      
 47 
     | 
    
         
            +
                    details.join("; ") unless details.empty?
         
     | 
| 
      
 48 
     | 
    
         
            +
                  end
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                  private_class_method :extract_details
         
     | 
| 
      
 51 
     | 
    
         
            +
                end
         
     | 
| 
      
 52 
     | 
    
         
            +
              end
         
     | 
| 
      
 53 
     | 
    
         
            +
            end
         
     |