canon 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -1
- data/.rubocop_todo.yml +276 -7
- data/README.adoc +203 -138
- data/_config.yml +116 -0
- data/docs/ADVANCED_TOPICS.adoc +20 -0
- data/docs/BASIC_USAGE.adoc +16 -0
- data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/docs/CLI.adoc +493 -0
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/docs/DIFF_FORMATTING.adoc +540 -0
- data/docs/FORMATS.adoc +447 -0
- data/docs/INDEX.adoc +222 -0
- data/docs/INPUT_VALIDATION.adoc +477 -0
- data/docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/docs/MATCH_OPTIONS.adoc +719 -0
- data/docs/MODES.adoc +432 -0
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/docs/OPTIONS.adoc +1387 -0
- data/docs/PREPROCESSING.adoc +491 -0
- data/docs/RSPEC.adoc +605 -0
- data/docs/RUBY_API.adoc +478 -0
- data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
- data/docs/UNDERSTANDING_CANON.adoc +17 -0
- data/docs/VERBOSE.adoc +482 -0
- data/exe/canon +7 -0
- data/lib/canon/cli.rb +179 -0
- data/lib/canon/commands/diff_command.rb +195 -0
- data/lib/canon/commands/format_command.rb +113 -0
- data/lib/canon/comparison/base_comparator.rb +39 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +410 -0
- data/lib/canon/comparison/json_comparator.rb +212 -0
- data/lib/canon/comparison/match_options.rb +616 -0
- data/lib/canon/comparison/xml_comparator.rb +566 -0
- data/lib/canon/comparison/yaml_comparator.rb +93 -0
- data/lib/canon/comparison.rb +239 -0
- data/lib/canon/config.rb +172 -0
- data/lib/canon/diff/diff_block.rb +71 -0
- data/lib/canon/diff/diff_block_builder.rb +105 -0
- data/lib/canon/diff/diff_classifier.rb +46 -0
- data/lib/canon/diff/diff_context.rb +85 -0
- data/lib/canon/diff/diff_context_builder.rb +107 -0
- data/lib/canon/diff/diff_line.rb +77 -0
- data/lib/canon/diff/diff_node.rb +56 -0
- data/lib/canon/diff/diff_node_mapper.rb +148 -0
- data/lib/canon/diff/diff_report.rb +133 -0
- data/lib/canon/diff/diff_report_builder.rb +62 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
- data/lib/canon/diff_formatter/character_map.yml +197 -0
- data/lib/canon/diff_formatter/debug_output.rb +431 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
- data/lib/canon/diff_formatter/legend.rb +141 -0
- data/lib/canon/diff_formatter.rb +520 -0
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html4_formatter.rb +17 -0
- data/lib/canon/formatters/html5_formatter.rb +17 -0
- data/lib/canon/formatters/html_formatter.rb +37 -0
- data/lib/canon/formatters/html_formatter_base.rb +163 -0
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/xml_formatter.rb +20 -55
- data/lib/canon/formatters/yaml_formatter.rb +4 -1
- data/lib/canon/pretty_printer/html.rb +57 -0
- data/lib/canon/pretty_printer/json.rb +25 -0
- data/lib/canon/pretty_printer/xml.rb +29 -0
- data/lib/canon/rspec_matchers.rb +222 -80
- data/lib/canon/validators/base_validator.rb +49 -0
- data/lib/canon/validators/html_validator.rb +138 -0
- data/lib/canon/validators/json_validator.rb +89 -0
- data/lib/canon/validators/xml_validator.rb +53 -0
- data/lib/canon/validators/yaml_validator.rb +73 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/attribute_handler.rb +80 -0
- data/lib/canon/xml/c14n.rb +36 -0
- data/lib/canon/xml/character_encoder.rb +38 -0
- data/lib/canon/xml/data_model.rb +225 -0
- data/lib/canon/xml/element_matcher.rb +196 -0
- data/lib/canon/xml/line_range_mapper.rb +158 -0
- data/lib/canon/xml/namespace_handler.rb +86 -0
- data/lib/canon/xml/node.rb +32 -0
- data/lib/canon/xml/nodes/attribute_node.rb +54 -0
- data/lib/canon/xml/nodes/comment_node.rb +23 -0
- data/lib/canon/xml/nodes/element_node.rb +56 -0
- data/lib/canon/xml/nodes/namespace_node.rb +38 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
- data/lib/canon/xml/nodes/root_node.rb +16 -0
- data/lib/canon/xml/nodes/text_node.rb +23 -0
- data/lib/canon/xml/processor.rb +151 -0
- data/lib/canon/xml/whitespace_normalizer.rb +72 -0
- data/lib/canon/xml/xml_base_handler.rb +188 -0
- data/lib/canon.rb +14 -3
- metadata +116 -21
data/lib/canon/rspec_matchers.rb
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "canon" unless defined?(::Canon)
|
|
4
|
-
require "
|
|
5
|
-
require "
|
|
4
|
+
require "canon/comparison"
|
|
5
|
+
require "canon/diff_formatter"
|
|
6
|
+
require "canon/config"
|
|
6
7
|
|
|
7
8
|
begin
|
|
8
9
|
require "rspec/expectations"
|
|
@@ -11,117 +12,226 @@ end
|
|
|
11
12
|
|
|
12
13
|
module Canon
|
|
13
14
|
module RSpecMatchers
|
|
15
|
+
# Configuration for RSpec matchers - delegates to Canon::Config
|
|
16
|
+
class << self
|
|
17
|
+
def configure
|
|
18
|
+
yield Canon::Config.configure
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def reset_config
|
|
22
|
+
Canon::Config.reset!
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Delegate configuration getters to Canon::Config
|
|
26
|
+
def xml
|
|
27
|
+
Canon::Config.instance.xml
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def html
|
|
31
|
+
Canon::Config.instance.html
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def json
|
|
35
|
+
Canon::Config.instance.json
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def yaml
|
|
39
|
+
Canon::Config.instance.yaml
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
14
43
|
# Base matcher class for serialization equivalence
|
|
44
|
+
# This is a THIN WRAPPER around Canon::Comparison API
|
|
15
45
|
class SerializationMatcher
|
|
16
|
-
def initialize(expected, format = :
|
|
46
|
+
def initialize(expected, format = nil, match_profile: nil,
|
|
47
|
+
match: nil, preprocessing: nil)
|
|
17
48
|
@expected = expected
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@format = format.to_sym
|
|
23
|
-
@result = nil
|
|
49
|
+
@format = format&.to_sym
|
|
50
|
+
@match_profile = match_profile
|
|
51
|
+
@match = match
|
|
52
|
+
@preprocessing = preprocessing
|
|
24
53
|
end
|
|
25
54
|
|
|
26
55
|
def matches?(target)
|
|
27
56
|
@target = target
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
},
|
|
57
|
+
|
|
58
|
+
# Build comparison options from config and matcher params
|
|
59
|
+
opts = build_comparison_options
|
|
60
|
+
|
|
61
|
+
# Add format hint if explicitly provided
|
|
62
|
+
opts[:format] = @format if @format
|
|
63
|
+
|
|
64
|
+
# Delegate to Canon::Comparison.equivalent? - the SINGLE source of truth
|
|
65
|
+
# Comparison handles format detection, HTML parsing, and all business logic
|
|
66
|
+
@comparison_result = Canon::Comparison.equivalent?(
|
|
67
|
+
@expected,
|
|
68
|
+
@target,
|
|
69
|
+
opts,
|
|
42
70
|
)
|
|
43
71
|
|
|
44
|
-
|
|
72
|
+
# When verbose: true, result is a ComparisonResult object
|
|
73
|
+
# Use the equivalent? method to check for normative differences
|
|
74
|
+
case @comparison_result
|
|
75
|
+
when Canon::Comparison::ComparisonResult
|
|
76
|
+
@comparison_result.equivalent?
|
|
77
|
+
when Hash
|
|
78
|
+
# Legacy format - Hash with :differences array and :preprocessed strings
|
|
79
|
+
@comparison_result[:differences].empty?
|
|
80
|
+
when Array
|
|
81
|
+
# Legacy format - XML/JSON/YAML returns []
|
|
82
|
+
@comparison_result.empty?
|
|
83
|
+
else
|
|
84
|
+
# Boolean result
|
|
85
|
+
@comparison_result
|
|
86
|
+
end
|
|
45
87
|
end
|
|
46
88
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
canonicalize_and_compare(:yaml)
|
|
89
|
+
def failure_message
|
|
90
|
+
"expected #{format_name} to be equivalent\n\n#{diff_output}"
|
|
50
91
|
end
|
|
51
92
|
|
|
52
|
-
def
|
|
53
|
-
|
|
93
|
+
def failure_message_when_negated
|
|
94
|
+
"expected #{format_name} not to be equivalent"
|
|
54
95
|
end
|
|
55
96
|
|
|
56
|
-
|
|
97
|
+
def expected
|
|
98
|
+
@expected
|
|
99
|
+
end
|
|
57
100
|
|
|
58
|
-
def
|
|
59
|
-
@
|
|
60
|
-
@expected_sorted = Canon.format(@expected, format)
|
|
61
|
-
@actual_sorted == @expected_sorted
|
|
101
|
+
def actual
|
|
102
|
+
@target
|
|
62
103
|
end
|
|
63
104
|
|
|
64
|
-
def
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
105
|
+
def diffable
|
|
106
|
+
false
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
private
|
|
110
|
+
|
|
111
|
+
def format_name
|
|
112
|
+
# Use explicitly provided format if available
|
|
113
|
+
if @format
|
|
114
|
+
case @format
|
|
115
|
+
when :html4, :html5 then "HTML"
|
|
116
|
+
when :string then "STRING"
|
|
117
|
+
else @format.to_s.upcase
|
|
118
|
+
end
|
|
119
|
+
else
|
|
120
|
+
# Fall back to detection only if format not provided
|
|
121
|
+
begin
|
|
122
|
+
detected_format = Canon::Comparison.send(:detect_format, @expected)
|
|
123
|
+
detected_format.to_s.upcase
|
|
124
|
+
rescue StandardError
|
|
125
|
+
"CONTENT"
|
|
126
|
+
end
|
|
70
127
|
end
|
|
71
128
|
end
|
|
72
129
|
|
|
73
|
-
def
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
130
|
+
def build_comparison_options
|
|
131
|
+
opts = { verbose: true } # Always use verbose for diff generation
|
|
132
|
+
|
|
133
|
+
# Add per-test parameters (highest priority)
|
|
134
|
+
opts[:match_profile] = @match_profile if @match_profile
|
|
135
|
+
opts[:match] = @match if @match
|
|
136
|
+
opts[:preprocessing] = @preprocessing if @preprocessing
|
|
137
|
+
|
|
138
|
+
# Add global configuration from Canon::Config (lower priority)
|
|
139
|
+
if @format
|
|
140
|
+
config_format = normalize_format_for_config(@format)
|
|
141
|
+
|
|
142
|
+
# Only access config if format is supported
|
|
143
|
+
if Canon::Config.instance.respond_to?(config_format)
|
|
144
|
+
format_config = Canon::Config.instance.public_send(config_format)
|
|
145
|
+
if format_config.match.profile
|
|
146
|
+
opts[:global_profile] =
|
|
147
|
+
format_config.match.profile
|
|
148
|
+
end
|
|
149
|
+
unless format_config.match.options.empty?
|
|
150
|
+
opts[:global_options] =
|
|
151
|
+
format_config.match.options
|
|
152
|
+
end
|
|
153
|
+
opts[:preprocessing] ||= format_config.preprocessing
|
|
154
|
+
elsif !%i[xml html html4 html5 json yaml
|
|
155
|
+
string].include?(@format)
|
|
156
|
+
# Unsupported format - raise error early
|
|
157
|
+
raise Canon::Error, "Unsupported format: #{@format}"
|
|
158
|
+
end
|
|
159
|
+
end
|
|
94
160
|
|
|
95
|
-
|
|
96
|
-
"Diff:\n" +
|
|
97
|
-
diff.to_s(:color)
|
|
161
|
+
opts
|
|
98
162
|
end
|
|
99
163
|
|
|
100
|
-
def
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
@expected.to_s,
|
|
106
|
-
].join("\n")
|
|
164
|
+
def normalize_format_for_config(format)
|
|
165
|
+
case format
|
|
166
|
+
when :html4, :html5 then :html
|
|
167
|
+
else format
|
|
168
|
+
end
|
|
107
169
|
end
|
|
108
170
|
|
|
109
|
-
def
|
|
110
|
-
|
|
171
|
+
def diff_output
|
|
172
|
+
# For string format, use simple diff since there's no comparison_result
|
|
173
|
+
if @format == :string
|
|
174
|
+
config_format = :xml # Use XML config as fallback for string
|
|
175
|
+
diff_config = Canon::Config.instance.public_send(config_format).diff
|
|
176
|
+
|
|
177
|
+
formatter = Canon::DiffFormatter.new(
|
|
178
|
+
use_color: diff_config.use_color,
|
|
179
|
+
mode: :by_line, # Always use by_line for strings
|
|
180
|
+
context_lines: diff_config.context_lines,
|
|
181
|
+
diff_grouping_lines: diff_config.grouping_lines,
|
|
182
|
+
show_diffs: diff_config.show_diffs,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
return formatter.format([], :string, doc1: @expected.to_s,
|
|
186
|
+
doc2: @target.to_s)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Get diff configuration
|
|
190
|
+
config_format = normalize_format_for_config(@format || :xml)
|
|
191
|
+
diff_config = Canon::Config.instance.public_send(config_format).diff
|
|
192
|
+
|
|
193
|
+
# Delegate to Canon::DiffFormatter - the SINGLE source of diff generation
|
|
194
|
+
formatter = Canon::DiffFormatter.new(
|
|
195
|
+
use_color: diff_config.use_color,
|
|
196
|
+
mode: diff_config.mode,
|
|
197
|
+
context_lines: diff_config.context_lines,
|
|
198
|
+
diff_grouping_lines: diff_config.grouping_lines,
|
|
199
|
+
show_diffs: diff_config.show_diffs,
|
|
200
|
+
verbose_diff: diff_config.verbose_diff,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Format the diff using the comparison result
|
|
204
|
+
formatter.format_comparison_result(@comparison_result, @expected,
|
|
205
|
+
@target)
|
|
206
|
+
rescue StandardError => e
|
|
207
|
+
"\nError generating diff: #{e.message}"
|
|
111
208
|
end
|
|
112
209
|
end
|
|
113
210
|
|
|
114
211
|
# Matcher methods
|
|
115
|
-
def be_serialization_equivalent_to(expected, format: :xml
|
|
116
|
-
|
|
212
|
+
def be_serialization_equivalent_to(expected, format: :xml,
|
|
213
|
+
match_profile: nil, match: nil,
|
|
214
|
+
preprocessing: nil)
|
|
215
|
+
SerializationMatcher.new(expected, format,
|
|
216
|
+
match_profile: match_profile,
|
|
217
|
+
match: match,
|
|
218
|
+
preprocessing: preprocessing)
|
|
117
219
|
end
|
|
118
220
|
|
|
119
|
-
def be_analogous_with(expected
|
|
120
|
-
|
|
221
|
+
def be_analogous_with(expected, match_profile: nil, match: nil,
|
|
222
|
+
preprocessing: nil)
|
|
223
|
+
SerializationMatcher.new(expected, :xml,
|
|
224
|
+
match_profile: match_profile,
|
|
225
|
+
match: match,
|
|
226
|
+
preprocessing: preprocessing)
|
|
121
227
|
end
|
|
122
228
|
|
|
123
|
-
def be_xml_equivalent_to(expected
|
|
124
|
-
|
|
229
|
+
def be_xml_equivalent_to(expected, match_profile: nil, match: nil,
|
|
230
|
+
preprocessing: nil)
|
|
231
|
+
SerializationMatcher.new(expected, :xml,
|
|
232
|
+
match_profile: match_profile,
|
|
233
|
+
match: match,
|
|
234
|
+
preprocessing: preprocessing)
|
|
125
235
|
end
|
|
126
236
|
|
|
127
237
|
def be_yaml_equivalent_to(expected)
|
|
@@ -132,7 +242,39 @@ module Canon
|
|
|
132
242
|
SerializationMatcher.new(expected, :json)
|
|
133
243
|
end
|
|
134
244
|
|
|
135
|
-
|
|
245
|
+
def be_html_equivalent_to(expected, match_profile: nil, match: nil,
|
|
246
|
+
preprocessing: nil)
|
|
247
|
+
SerializationMatcher.new(expected, :html,
|
|
248
|
+
match_profile: match_profile,
|
|
249
|
+
match: match,
|
|
250
|
+
preprocessing: preprocessing)
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def be_html4_equivalent_to(expected, match_profile: nil, match: nil,
|
|
254
|
+
preprocessing: nil)
|
|
255
|
+
SerializationMatcher.new(expected, :html4,
|
|
256
|
+
match_profile: match_profile,
|
|
257
|
+
match: match,
|
|
258
|
+
preprocessing: preprocessing)
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def be_html5_equivalent_to(expected, match_profile: nil, match: nil,
|
|
262
|
+
preprocessing: nil)
|
|
263
|
+
SerializationMatcher.new(expected, :html5,
|
|
264
|
+
match_profile: match_profile,
|
|
265
|
+
match: match,
|
|
266
|
+
preprocessing: preprocessing)
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def be_equivalent_to(expected)
|
|
270
|
+
SerializationMatcher.new(expected, nil)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def be_string_equivalent_to(expected)
|
|
274
|
+
SerializationMatcher.new(expected, :string)
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
if defined?(::RSpec) && ::RSpec.respond_to?(:configure)
|
|
136
278
|
RSpec.configure do |config|
|
|
137
279
|
config.include(Canon::RSpecMatchers)
|
|
138
280
|
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../errors"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
module Validators
|
|
7
|
+
# Base class for all input validators
|
|
8
|
+
#
|
|
9
|
+
# This abstract base class defines the interface that all format-specific
|
|
10
|
+
# validators must implement. Each validator is responsible for validating
|
|
11
|
+
# input in a specific format and raising detailed ValidationError when
|
|
12
|
+
# issues are found.
|
|
13
|
+
class BaseValidator
|
|
14
|
+
# Validate input and raise ValidationError if invalid
|
|
15
|
+
#
|
|
16
|
+
# @param input [String] The input to validate
|
|
17
|
+
# @raise [Canon::ValidationError] If input is invalid
|
|
18
|
+
# @return [void]
|
|
19
|
+
def self.validate!(input)
|
|
20
|
+
raise NotImplementedError,
|
|
21
|
+
"#{name} must implement validate! method"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Extract line and column information from an error
|
|
25
|
+
#
|
|
26
|
+
# @param error [Exception] The error containing location information
|
|
27
|
+
# @return [Hash] Hash with :line and :column keys
|
|
28
|
+
def self.extract_location(error)
|
|
29
|
+
line = nil
|
|
30
|
+
column = nil
|
|
31
|
+
|
|
32
|
+
# Try to extract line/column from error message
|
|
33
|
+
if error.respond_to?(:line)
|
|
34
|
+
line = error.line
|
|
35
|
+
elsif error.message =~ /line[:\s]+(\d+)/i
|
|
36
|
+
line = ::Regexp.last_match(1).to_i
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
if error.respond_to?(:column)
|
|
40
|
+
column = error.column
|
|
41
|
+
elsif error.message =~ /column[:\s]+(\d+)/i
|
|
42
|
+
column = ::Regexp.last_match(1).to_i
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
{ line: line, column: column }
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
require_relative "base_validator"
|
|
5
|
+
|
|
6
|
+
module Canon
|
|
7
|
+
module Validators
|
|
8
|
+
# Validator for HTML input
|
|
9
|
+
#
|
|
10
|
+
# Validates HTML input (HTML4, HTML5, or XHTML) using Nokogiri.
|
|
11
|
+
# Automatically detects the HTML type and applies appropriate validation.
|
|
12
|
+
# Raises detailed ValidationError with line and column information
|
|
13
|
+
# when malformed HTML is detected.
|
|
14
|
+
class HtmlValidator < BaseValidator
|
|
15
|
+
# Validate HTML input
|
|
16
|
+
#
|
|
17
|
+
# @param input [String] The HTML string to validate
|
|
18
|
+
# @raise [Canon::ValidationError] If HTML is malformed
|
|
19
|
+
# @return [void]
|
|
20
|
+
def self.validate!(input)
|
|
21
|
+
return if input.nil? || input.strip.empty?
|
|
22
|
+
|
|
23
|
+
# Strip XML declaration for validation (it's not critical for parsing)
|
|
24
|
+
cleaned_input = input.sub(/\A\s*<\?xml[^?]*\?>\s*/, "")
|
|
25
|
+
|
|
26
|
+
if xhtml?(cleaned_input)
|
|
27
|
+
validate_xhtml!(cleaned_input)
|
|
28
|
+
else
|
|
29
|
+
validate_html5!(cleaned_input)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Check if HTML is XHTML
|
|
34
|
+
#
|
|
35
|
+
# @param html [String] The HTML string to check
|
|
36
|
+
# @return [Boolean] true if XHTML, false otherwise
|
|
37
|
+
def self.xhtml?(html)
|
|
38
|
+
html.include?("XHTML") ||
|
|
39
|
+
html.include?('xmlns="http://www.w3.org/1999/xhtml"') ||
|
|
40
|
+
html.match?(/xmlns:\w+/)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Validate XHTML input using XML strict parsing
|
|
44
|
+
#
|
|
45
|
+
# @param input [String] The XHTML string to validate
|
|
46
|
+
# @raise [Canon::ValidationError] If XHTML is malformed
|
|
47
|
+
# @return [void]
|
|
48
|
+
def self.validate_xhtml!(input)
|
|
49
|
+
Nokogiri::XML(input) do |config|
|
|
50
|
+
config.strict.nonet
|
|
51
|
+
end
|
|
52
|
+
rescue Nokogiri::XML::SyntaxError => e
|
|
53
|
+
location = extract_location(e)
|
|
54
|
+
raise Canon::ValidationError.new(
|
|
55
|
+
e.message.split("\n").first,
|
|
56
|
+
format: :html,
|
|
57
|
+
line: location[:line],
|
|
58
|
+
column: location[:column],
|
|
59
|
+
details: "XHTML validation failed: #{extract_details(e)}",
|
|
60
|
+
)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Validate HTML5 input
|
|
64
|
+
#
|
|
65
|
+
# @param input [String] The HTML5 string to validate
|
|
66
|
+
# @raise [Canon::ValidationError] If HTML5 is malformed
|
|
67
|
+
# @return [void]
|
|
68
|
+
def self.validate_html5!(input)
|
|
69
|
+
doc = Nokogiri::HTML5(input, max_errors: 100)
|
|
70
|
+
|
|
71
|
+
# Check for parse errors
|
|
72
|
+
return unless doc.errors.any?
|
|
73
|
+
|
|
74
|
+
# Find first significant error (level 2 = error, level 1 = warning)
|
|
75
|
+
# Filter out doctype warnings and other non-critical issues
|
|
76
|
+
significant_errors = doc.errors.select do |e|
|
|
77
|
+
e.level >= 2 && !doctype_or_warning?(e)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
return if significant_errors.empty?
|
|
81
|
+
|
|
82
|
+
error = significant_errors.first
|
|
83
|
+
location = extract_location(error)
|
|
84
|
+
raise Canon::ValidationError.new(
|
|
85
|
+
error.message,
|
|
86
|
+
format: :html,
|
|
87
|
+
line: location[:line],
|
|
88
|
+
column: location[:column],
|
|
89
|
+
details: build_error_details(significant_errors),
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Extract additional error details
|
|
94
|
+
#
|
|
95
|
+
# @param error [Nokogiri::XML::SyntaxError] The syntax error
|
|
96
|
+
# @return [String, nil] Additional details about the error
|
|
97
|
+
def self.extract_details(error)
|
|
98
|
+
return nil unless error.respond_to?(:errors)
|
|
99
|
+
|
|
100
|
+
details = error.errors.map(&:message).reject do |msg|
|
|
101
|
+
msg == error.message
|
|
102
|
+
end
|
|
103
|
+
details.join("; ") unless details.empty?
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Build error details from multiple errors
|
|
107
|
+
#
|
|
108
|
+
# @param errors [Array<Nokogiri::XML::SyntaxError>] Array of errors
|
|
109
|
+
# @return [String, nil] Combined error details
|
|
110
|
+
def self.build_error_details(errors)
|
|
111
|
+
return nil if errors.size <= 1
|
|
112
|
+
|
|
113
|
+
significant = errors.select { |e| e.level >= 2 }
|
|
114
|
+
return nil if significant.empty?
|
|
115
|
+
|
|
116
|
+
details = significant[1..3].map do |e|
|
|
117
|
+
loc = extract_location(e)
|
|
118
|
+
msg = e.message
|
|
119
|
+
msg += " (line #{loc[:line]})" if loc[:line]
|
|
120
|
+
msg
|
|
121
|
+
end
|
|
122
|
+
details.join("; ")
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Check if error is a doctype or other non-critical warning
|
|
126
|
+
#
|
|
127
|
+
# @param error [Nokogiri::XML::SyntaxError] The error to check
|
|
128
|
+
# @return [Boolean] true if error is non-critical
|
|
129
|
+
def self.doctype_or_warning?(error)
|
|
130
|
+
error.message.match?(/doctype|Expected a doctype token/i)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
private_class_method :xhtml?, :validate_xhtml!, :validate_html5!,
|
|
134
|
+
:extract_details, :build_error_details,
|
|
135
|
+
:doctype_or_warning?
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "base_validator"
|
|
5
|
+
|
|
6
|
+
module Canon
|
|
7
|
+
module Validators
|
|
8
|
+
# Validator for JSON input
|
|
9
|
+
#
|
|
10
|
+
# Validates JSON input using Ruby's JSON parser.
|
|
11
|
+
# Raises detailed ValidationError with position information
|
|
12
|
+
# when malformed JSON is detected.
|
|
13
|
+
class JsonValidator < BaseValidator
|
|
14
|
+
# Validate JSON input
|
|
15
|
+
#
|
|
16
|
+
# @param input [String] The JSON string to validate
|
|
17
|
+
# @raise [Canon::ValidationError] If JSON is malformed
|
|
18
|
+
# @return [void]
|
|
19
|
+
def self.validate!(input)
|
|
20
|
+
return if input.nil? || input.strip.empty?
|
|
21
|
+
|
|
22
|
+
JSON.parse(input)
|
|
23
|
+
rescue JSON::ParserError => e
|
|
24
|
+
# Extract position from error message
|
|
25
|
+
position = extract_position(e.message)
|
|
26
|
+
|
|
27
|
+
raise Canon::ValidationError.new(
|
|
28
|
+
clean_error_message(e.message),
|
|
29
|
+
format: :json,
|
|
30
|
+
line: position[:line],
|
|
31
|
+
column: position[:column],
|
|
32
|
+
details: extract_context(input, position),
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Extract line and column from JSON error message
|
|
37
|
+
#
|
|
38
|
+
# @param message [String] The error message
|
|
39
|
+
# @return [Hash] Hash with :line and :column keys
|
|
40
|
+
def self.extract_position(message)
|
|
41
|
+
line = nil
|
|
42
|
+
column = nil
|
|
43
|
+
|
|
44
|
+
# JSON errors often report character position
|
|
45
|
+
if message =~ /at line (\d+), column (\d+)/i
|
|
46
|
+
line = ::Regexp.last_match(1).to_i
|
|
47
|
+
column = ::Regexp.last_match(2).to_i
|
|
48
|
+
elsif /at character offset (\d+)/i.match?(message)
|
|
49
|
+
# For character offset, we can't easily determine line/column
|
|
50
|
+
# without parsing the input
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
{ line: line, column: column }
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Clean error message by removing technical details
|
|
57
|
+
#
|
|
58
|
+
# @param message [String] The raw error message
|
|
59
|
+
# @return [String] Cleaned error message
|
|
60
|
+
def self.clean_error_message(message)
|
|
61
|
+
# Remove 'unexpected token' technical details and keep main message
|
|
62
|
+
message.split(" at ").first.strip
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Extract context around the error position
|
|
66
|
+
#
|
|
67
|
+
# @param input [String] The input JSON string
|
|
68
|
+
# @param position [Hash] Position hash with :line key
|
|
69
|
+
# @return [String, nil] Context snippet around the error
|
|
70
|
+
def self.extract_context(input, position)
|
|
71
|
+
return nil unless position[:line]
|
|
72
|
+
|
|
73
|
+
lines = input.split("\n")
|
|
74
|
+
line_idx = position[:line] - 1
|
|
75
|
+
return nil if line_idx.negative? || line_idx >= lines.size
|
|
76
|
+
|
|
77
|
+
# Get the problematic line and surrounding lines
|
|
78
|
+
start_idx = [0, line_idx - 1].max
|
|
79
|
+
end_idx = [lines.size - 1, line_idx + 1].min
|
|
80
|
+
|
|
81
|
+
context_lines = lines[start_idx..end_idx]
|
|
82
|
+
"Near: #{context_lines.join(' ')}"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private_class_method :extract_position, :clean_error_message,
|
|
86
|
+
:extract_context
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
require_relative "base_validator"
|
|
5
|
+
|
|
6
|
+
module Canon
|
|
7
|
+
module Validators
|
|
8
|
+
# Validator for XML input
|
|
9
|
+
#
|
|
10
|
+
# Validates XML input using Nokogiri's strict parsing mode.
|
|
11
|
+
# Raises detailed ValidationError with line and column information
|
|
12
|
+
# when malformed XML is detected.
|
|
13
|
+
class XmlValidator < BaseValidator
|
|
14
|
+
# Validate XML input
|
|
15
|
+
#
|
|
16
|
+
# @param input [String] The XML string to validate
|
|
17
|
+
# @raise [Canon::ValidationError] If XML is malformed
|
|
18
|
+
# @return [void]
|
|
19
|
+
def self.validate!(input)
|
|
20
|
+
return if input.nil? || input.strip.empty?
|
|
21
|
+
|
|
22
|
+
# Parse with strict error handling
|
|
23
|
+
Nokogiri::XML(input) do |config|
|
|
24
|
+
config.strict.nonet
|
|
25
|
+
end
|
|
26
|
+
rescue Nokogiri::XML::SyntaxError => e
|
|
27
|
+
location = extract_location(e)
|
|
28
|
+
raise Canon::ValidationError.new(
|
|
29
|
+
e.message.split("\n").first,
|
|
30
|
+
format: :xml,
|
|
31
|
+
line: location[:line],
|
|
32
|
+
column: location[:column],
|
|
33
|
+
details: extract_details(e),
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Extract additional error details
|
|
38
|
+
#
|
|
39
|
+
# @param error [Nokogiri::XML::SyntaxError] The syntax error
|
|
40
|
+
# @return [String, nil] Additional details about the error
|
|
41
|
+
def self.extract_details(error)
|
|
42
|
+
return nil unless error.respond_to?(:errors)
|
|
43
|
+
|
|
44
|
+
details = error.errors.map(&:message).reject do |msg|
|
|
45
|
+
msg == error.message
|
|
46
|
+
end
|
|
47
|
+
details.join("; ") unless details.empty?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private_class_method :extract_details
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|