senko 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Senko
4
+ module Dialect
5
+ DEFAULT = :'2020-12'
6
+
7
+ DIALECT_MAP = {
8
+ 'https://json-schema.org/draft/2020-12/schema' => :'2020-12',
9
+ 'https://json-schema.org/draft/2020-12/schema#' => :'2020-12',
10
+ 'https://json-schema.org/draft/2019-09/schema' => :'2019-09',
11
+ 'https://json-schema.org/draft/2019-09/schema#' => :'2019-09',
12
+ 'http://json-schema.org/draft-07/schema#' => :'draft-07',
13
+ 'http://json-schema.org/draft-07/schema' => :'draft-07',
14
+ 'http://json-schema.org/draft-06/schema#' => :'draft-06',
15
+ 'http://json-schema.org/draft-06/schema' => :'draft-06',
16
+ 'http://json-schema.org/draft-04/schema#' => :'draft-04',
17
+ 'http://json-schema.org/draft-04/schema' => :'draft-04'
18
+ }.freeze
19
+
20
+ SUPPORTED = %i[2020-12 2019-09 draft-07 draft-06 draft-04].freeze
21
+
22
+ module_function
23
+
24
+ def detect(schema, explicit_draft = nil)
25
+ draft = explicit_draft || schema_draft(schema) || DEFAULT
26
+ normalized = normalize_name(draft)
27
+ unless SUPPORTED.include?(normalized)
28
+ raise UnsupportedDialectError,
29
+ "unsupported JSON Schema dialect: #{draft.inspect}"
30
+ end
31
+
32
+ normalized
33
+ end
34
+
35
+ def schema_draft(schema)
36
+ return nil unless schema.is_a?(Hash)
37
+
38
+ value = schema['$schema'] || schema[:$schema]
39
+ DIALECT_MAP[value]
40
+ end
41
+
42
+ def normalize_name(name)
43
+ case name
44
+ when :'2020-12', :draft202012, '2020-12', 'draft202012'
45
+ :'2020-12'
46
+ when :'2019-09', :draft201909, '2019-09', 'draft201909'
47
+ :'2019-09'
48
+ when :'draft-07', :draft07, 'draft-07', 'draft07'
49
+ :'draft-07'
50
+ when :'draft-06', :draft06, 'draft-06', 'draft06'
51
+ :'draft-06'
52
+ when :'draft-04', :draft04, 'draft-04', 'draft04'
53
+ :'draft-04'
54
+ else
55
+ name
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Senko
4
+ class Error
5
+ attr_reader :message, :instance_location, :keyword_location, :keyword, :schema, :data
6
+
7
+ def initialize(message:, instance_location:, keyword_location:, keyword:, schema: nil, data: nil)
8
+ @message = message
9
+ @instance_location = instance_location
10
+ @keyword_location = keyword_location
11
+ @keyword = keyword
12
+ @schema = schema
13
+ @data = data
14
+ end
15
+
16
+ alias path instance_location
17
+ alias schema_path keyword_location
18
+
19
+ def to_h
20
+ {
21
+ 'keywordLocation' => keyword_location,
22
+ 'instanceLocation' => instance_location,
23
+ 'error' => message
24
+ }
25
+ end
26
+ end
27
+
28
+ class CompileError < StandardError; end
29
+ class CircularReferenceError < CompileError; end
30
+ class UnresolvableRefError < CompileError; end
31
+ class UnsupportedDialectError < CompileError; end
32
+
33
+ class ValidationError < StandardError
34
+ attr_reader :result
35
+
36
+ def initialize(result)
37
+ @result = result
38
+ super('validation failed')
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,327 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+ require 'ipaddr'
5
+ require 'simpleidn'
6
+ require 'time'
7
+ require 'uri'
8
+ require 'addressable/uri'
9
+
10
+ module Senko
11
+ module Format
12
+ DOT_ATOM = %r{\A[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+)*\z}
13
+ HOST_LABEL = /\A[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\z/
14
+ UUID = /\A[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\z/
15
+ JSON_POINTER = %r{\A(?:/(?:[^~/]|~0|~1)*)*\z}
16
+ RELATIVE_JSON_POINTER = %r{\A(?:0|[1-9][0-9]*)(?:#|(?:/(?:[^~/]|~0|~1)*)*)\z}
17
+ RFC3339_DATE = /\A([0-9]{4})-([0-9]{2})-([0-9]{2})\z/
18
+ RFC3339_DATE_TIME = /\A([0-9]{4}-[0-9]{2}-[0-9]{2})[Tt]([0-9]{2}:[0-9]{2}:[0-9]{2}(?:\.[0-9]+)?(?:[Zz]|[+-][0-9]{2}:[0-9]{2}))\z/
19
+ RFC3339_TIME = /\A([0-9]{2}):([0-9]{2}):([0-9]{2})(?:\.[0-9]+)?([Zz]|[+-]([0-9]{2}):([0-9]{2}))\z/
20
+ IPV4_DECIMAL = /\A(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(?:\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}\z/
21
+ IDN_SEPARATORS = /[.\u{3002}\u{ff0e}\u{ff61}]/
22
+ DISALLOWED_IDNA = /[\u{0640}\u{07fa}\u{302e}\u{302f}\u{3031}-\u{3035}\u{303b}]/
23
+
24
+ module_function
25
+
26
+ def valid?(name, value, custom_formats = {})
27
+ validator = custom_formats[name] || validators[name]
28
+ return true unless validator
29
+ return true unless value.is_a?(String)
30
+
31
+ validator.call(value)
32
+ rescue Addressable::URI::InvalidURIError, ArgumentError, IPAddr::InvalidAddressError,
33
+ RegexpError, SimpleIDN::ConversionError, URI::InvalidURIError
34
+ false
35
+ end
36
+
37
+ def validators
38
+ @validators ||= {
39
+ 'date-time' => ->(value) { rfc3339_date_time?(value) },
40
+ 'date' => ->(value) { rfc3339_date?(value) },
41
+ 'time' => ->(value) { rfc3339_time?(value) },
42
+ 'duration' => ->(value) { duration?(value) },
43
+ 'email' => ->(value) { email?(value, idn: false) },
44
+ 'idn-email' => ->(value) { email?(value, idn: true) },
45
+ 'hostname' => ->(value) { hostname?(value) },
46
+ 'idn-hostname' => ->(value) { hostname?(value, ascii_only: false) },
47
+ 'ipv4' => ->(value) { value.match?(IPV4_DECIMAL) && IPAddr.new(value).ipv4? },
48
+ 'ipv6' => ->(value) { ipv6?(value) },
49
+ 'uri' => ->(value) { absolute_uri?(value) },
50
+ 'uri-reference' => ->(value) { uri_reference?(value) },
51
+ 'iri' => ->(value) { iri?(value, absolute: true) },
52
+ 'iri-reference' => ->(value) { iri?(value, absolute: false) },
53
+ 'uuid' => ->(value) { value.match?(UUID) },
54
+ 'uri-template' => ->(value) { uri_template?(value) },
55
+ 'json-pointer' => ->(value) { value.match?(JSON_POINTER) },
56
+ 'relative-json-pointer' => ->(value) { value.match?(RELATIVE_JSON_POINTER) },
57
+ 'regex' => ->(value) { ecma_regex?(value) }
58
+ }
59
+ end
60
+
61
+ def hostname?(value, ascii_only: true)
62
+ return false if value.empty? || value.length > 253
63
+ return false if ascii_only && !value.ascii_only?
64
+
65
+ labels = value.split(ascii_only ? '.' : IDN_SEPARATORS, -1)
66
+ return false if labels.empty? || labels.any?(&:empty?)
67
+
68
+ labels.all? { |label| hostname_label?(label, ascii_only: ascii_only) }
69
+ end
70
+
71
+ def absolute_uri?(value)
72
+ return false if invalid_uri_ascii?(value)
73
+
74
+ uri = Addressable::URI.parse(value)
75
+ uri.scheme && value.match?(/\A[A-Za-z][A-Za-z0-9+\-.]*:/)
76
+ end
77
+
78
+ def uri_reference?(value)
79
+ return false if invalid_uri_ascii?(value, allow_relative: true)
80
+
81
+ Addressable::URI.parse(value)
82
+ true
83
+ end
84
+
85
+ def iri?(value, absolute:)
86
+ return false if value.match?(/[\s<>\\]/)
87
+ return false if bad_percent_encoding?(value)
88
+ return true unless absolute
89
+ return false if unbracketed_ipv6_authority?(value)
90
+
91
+ Addressable::URI.parse(value)
92
+ value.match?(/\A[A-Za-z][A-Za-z0-9+\-.]*:/)
93
+ end
94
+
95
+ def uri_template?(value)
96
+ depth = 0
97
+ value.each_char do |char|
98
+ depth += 1 if char == '{'
99
+ depth -= 1 if char == '}'
100
+ return false if depth.negative? || depth > 1
101
+ end
102
+ depth.zero?
103
+ end
104
+
105
+ def duration?(value)
106
+ return true if value.match?(/\AP[0-9]+W\z/)
107
+ return false unless value.ascii_only?
108
+
109
+ match = value.match(/\AP(?:(?<years>[0-9]+)Y)?(?:(?<months>[0-9]+)M)?(?:(?<days>[0-9]+)D)?(?:T(?:(?<hours>[0-9]+)H)?(?:(?<minutes>[0-9]+)M)?(?:(?<seconds>[0-9]+)S)?)?\z/)
110
+ return false unless match
111
+
112
+ date_units = %w[years months days].select { |name| match[name] }
113
+ time_units = %w[hours minutes seconds].select { |name| match[name] }
114
+ return false if date_units.empty? && time_units.empty?
115
+ return false if value.include?('T') && time_units.empty?
116
+ return false if match['years'] && match['days'] && !match['months']
117
+ return false if match['hours'] && match['seconds'] && !match['minutes']
118
+
119
+ true
120
+ end
121
+
122
+ def rfc3339_date_time?(value)
123
+ match = value.match(RFC3339_DATE_TIME)
124
+ return false unless match
125
+
126
+ rfc3339_date?(match[1]) && rfc3339_time?(match[2])
127
+ end
128
+
129
+ def rfc3339_date?(value)
130
+ match = value.match(RFC3339_DATE)
131
+ return false unless match
132
+
133
+ year = match[1].to_i
134
+ month = match[2].to_i
135
+ day = match[3].to_i
136
+ return false unless month.between?(1, 12)
137
+
138
+ day.between?(1, days_in_month(year, month))
139
+ end
140
+
141
+ def rfc3339_time?(value)
142
+ match = value.match(RFC3339_TIME)
143
+ return false unless match
144
+
145
+ hour = match[1].to_i
146
+ minute = match[2].to_i
147
+ second = match[3].to_i
148
+ offset = match[4]
149
+ return false unless hour.between?(0, 23) && minute.between?(0, 59)
150
+ return false unless second.between?(0, 60)
151
+ return false if offset != 'Z' && offset != 'z' && (!match[5].to_i.between?(0,
152
+ 23) || !match[6].to_i.between?(0, 59))
153
+ return true if second < 60
154
+
155
+ leap_second_utc_minute?(hour, minute, offset, match[5], match[6])
156
+ end
157
+
158
+ def leap_second_utc_minute?(hour, minute, offset, offset_hour, offset_minute)
159
+ offset_minutes = if %w[Z z].include?(offset)
160
+ 0
161
+ else
162
+ sign = offset.start_with?('+') ? 1 : -1
163
+ sign * ((offset_hour.to_i * 60) + offset_minute.to_i)
164
+ end
165
+
166
+ (((hour * 60) + minute - offset_minutes) % 1440) == ((23 * 60) + 59)
167
+ end
168
+
169
+ def days_in_month(year, month)
170
+ return 29 if month == 2 && leap_year?(year)
171
+ return 28 if month == 2
172
+ return 30 if [4, 6, 9, 11].include?(month)
173
+
174
+ 31
175
+ end
176
+
177
+ def leap_year?(year)
178
+ (year % 4).zero? && (!(year % 100).zero? || (year % 400).zero?)
179
+ end
180
+
181
+ def email?(value, idn:)
182
+ local, domain = split_email(value)
183
+ return false unless local && domain
184
+ return false unless local_part?(local, idn: idn)
185
+
186
+ if domain.start_with?('[') && domain.end_with?(']')
187
+ address_literal?(domain[1...-1])
188
+ else
189
+ hostname?(domain, ascii_only: !idn)
190
+ end
191
+ end
192
+
193
+ def split_email(value)
194
+ if value.start_with?('"')
195
+ index = value.index('"@', 1)
196
+ return nil unless index
197
+
198
+ [value[0..index], value[(index + 2)..]]
199
+ else
200
+ parts = value.split('@', -1)
201
+ return nil unless parts.length == 2
202
+
203
+ parts
204
+ end
205
+ end
206
+
207
+ def local_part?(value, idn:)
208
+ return value.match?(/\A"(?:[^"\\]|\\.)+"\z/) if value.start_with?('"')
209
+ return value.match?(/\A[^\s@.]+(?:\.[^\s@.]+)*\z/) if idn
210
+
211
+ value.match?(DOT_ATOM)
212
+ end
213
+
214
+ def address_literal?(value)
215
+ if value.start_with?('IPv6:')
216
+ ipv6?(value.delete_prefix('IPv6:'))
217
+ else
218
+ value.match?(IPV4_DECIMAL) && IPAddr.new(value).ipv4?
219
+ end
220
+ end
221
+
222
+ def hostname_label?(label, ascii_only:)
223
+ return false if label.empty?
224
+ return false if label.length > 63 && label.ascii_only?
225
+ return false if label.start_with?('-') || label.end_with?('-')
226
+ return ascii_label?(label) if ascii_only && !label.downcase.start_with?('xn--')
227
+
228
+ unicode = label.downcase.start_with?('xn--') ? SimpleIDN.to_unicode(label.downcase) : label
229
+ return false if unicode[2, 2] == '--'
230
+ return false unless canonical_idn_label?(label, unicode)
231
+ return false if unicode.match?(DISALLOWED_IDNA)
232
+ return false if unicode.match?(/\A\p{Mark}/)
233
+
234
+ contextual_idn_label?(unicode)
235
+ end
236
+
237
+ def ascii_label?(label)
238
+ label.ascii_only? && label.match?(HOST_LABEL) && !label.include?('_')
239
+ end
240
+
241
+ def canonical_idn_label?(label, unicode)
242
+ ascii = SimpleIDN.to_ascii(unicode).downcase
243
+ return label.downcase == ascii if label.downcase.start_with?('xn--')
244
+
245
+ ascii.length <= 63
246
+ end
247
+
248
+ def contextual_idn_label?(label)
249
+ chars = label.each_char.to_a
250
+ chars.each_with_index do |char, index|
251
+ return false if char == "\u{00b7}" && !(index.positive? && chars[index - 1] == 'l' && chars[index + 1] == 'l')
252
+ return false if char == "\u{0375}" && !greek?(chars[index + 1])
253
+ return false if char == "\u{05f3}" && !(index.positive? && hebrew?(chars[index - 1]))
254
+ return false if char == "\u{05f4}" && !(index.positive? && hebrew?(chars[index - 1]))
255
+ return false if char == "\u{30fb}" && !label.match?(/[\p{Hiragana}\p{Katakana}\p{Han}]/)
256
+ return false if char == "\u{200d}" && !(index.positive? && virama?(chars[index - 1]))
257
+ end
258
+ return false if label.match?(/[\u{0660}-\u{0669}]/) && label.match?(/[\u{06f0}-\u{06f9}]/)
259
+
260
+ true
261
+ end
262
+
263
+ def greek?(char)
264
+ char&.match?(/\p{Greek}/)
265
+ end
266
+
267
+ def hebrew?(char)
268
+ char&.match?(/\p{Hebrew}/)
269
+ end
270
+
271
+ def virama?(char)
272
+ char == "\u{094d}"
273
+ end
274
+
275
+ def ipv6?(value)
276
+ return false unless value.ascii_only?
277
+ return false if value.match?(/\s/) || value.include?('/') || value.include?('%') || !value.include?(':')
278
+
279
+ IPAddr.new(value).ipv6?
280
+ end
281
+
282
+ def invalid_uri_ascii?(value, allow_relative: false)
283
+ return true unless value.ascii_only?
284
+ return true if value.match?(/[\s<>\\]/)
285
+ return true if value.match?(/[{}|^`"]/)
286
+ return true if bad_percent_encoding?(value)
287
+ return true if !allow_relative && !value.match?(/\A[A-Za-z][A-Za-z0-9+\-.]*:/)
288
+
289
+ false
290
+ end
291
+
292
+ def bad_percent_encoding?(value)
293
+ value.match?(/%(?![0-9A-Fa-f]{2})/)
294
+ end
295
+
296
+ def unbracketed_ipv6_authority?(value)
297
+ match = value.match(%r{\A[A-Za-z][A-Za-z0-9+\-.]*://([^/?#]*)})
298
+ return false unless match
299
+
300
+ host = match[1].split('@', 2).last
301
+ host.include?(':') && !host.start_with?('[')
302
+ end
303
+
304
+ def ecma_regex?(value)
305
+ return false if value.match?(/(?:^|[^\\])\\a/)
306
+
307
+ verbose = $VERBOSE
308
+ $VERBOSE = nil
309
+ Regexp.new(ecma_pattern_source(value))
310
+ true
311
+ ensure
312
+ $VERBOSE = verbose
313
+ end
314
+
315
+ def ecma_pattern_source(value)
316
+ value
317
+ .gsub(/\\c([A-Za-z])/) { Regexp.escape((::Regexp.last_match(1).upcase.ord - 64).chr) }
318
+ .gsub('\\d', '[0-9]')
319
+ .gsub('\\D', '[^0-9]')
320
+ .gsub('\\w', '[A-Za-z0-9_]')
321
+ .gsub('\\W', '[^A-Za-z0-9_]')
322
+ .gsub('\\s', '[\\t\\n\\v\\f\\r \\u00a0\\ufeff\\u2028\\u2029\\p{Space_Separator}]')
323
+ .gsub('\\S', '[^\\t\\n\\v\\f\\r \\u00a0\\ufeff\\u2028\\u2029\\p{Space_Separator}]')
324
+ .gsub(/(?<!\\)\$/, '\\z')
325
+ end
326
+ end
327
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require 'senko/senko'
5
+ rescue LoadError
6
+ begin
7
+ require_relative '../../ext/senko/senko'
8
+ rescue LoadError
9
+ # Pure Ruby fallback. The native extension is a Phase 2 optimization.
10
+ end
11
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'errors'
4
+
5
+ module Senko
6
+ class Result
7
+ attr_reader :errors, :annotations
8
+
9
+ def initialize(fail_fast: false)
10
+ @fail_fast = fail_fast
11
+ @errors = []
12
+ @annotations = []
13
+ end
14
+
15
+ def valid?
16
+ @errors.empty?
17
+ end
18
+
19
+ def fail_fast?
20
+ @fail_fast
21
+ end
22
+
23
+ def add_error(error)
24
+ @errors << error
25
+ end
26
+
27
+ def add_annotation(annotation)
28
+ @annotations << annotation
29
+ end
30
+
31
+ def to_basic
32
+ payload = { 'valid' => valid? }
33
+ payload['errors'] = @errors.map(&:to_h) unless valid?
34
+ payload
35
+ end
36
+
37
+ def to_detailed
38
+ {
39
+ 'valid' => valid?,
40
+ 'keywordLocation' => '',
41
+ 'instanceLocation' => '',
42
+ 'errors' => @errors.map(&:to_h)
43
+ }.tap do |payload|
44
+ payload.delete('errors') if valid?
45
+ end
46
+ end
47
+
48
+ def to_verbose
49
+ payload = to_detailed
50
+ payload['annotations'] = @annotations unless @annotations.empty?
51
+ payload['errors'] = @errors.map { |error| verbose_error(error) } unless valid?
52
+ payload
53
+ end
54
+
55
+ private
56
+
57
+ def verbose_error(error)
58
+ error.to_h.merge(
59
+ 'keyword' => error.keyword,
60
+ 'schema' => error.schema,
61
+ 'data' => error.data
62
+ )
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ require_relative 'errors'
6
+ require_relative 'validator'
7
+
8
+ module Senko
9
+ class Schema
10
+ attr_reader :source, :instructions, :options
11
+
12
+ def initialize(source:, instructions:, options:, generated_validator: nil)
13
+ @source = source
14
+ @instructions = instructions
15
+ @options = options
16
+ @generated_validator = generated_validator
17
+ @validator = Validator.new(options)
18
+ @track_evaluation = Validator.requires_evaluation_tracking?(instructions)
19
+ end
20
+
21
+ def valid?(data)
22
+ return @generated_validator.call(data) if @generated_validator
23
+
24
+ @validator.valid?(@instructions, data, track_evaluation: @track_evaluation)
25
+ end
26
+
27
+ def validate(data, fail_fast: @options[:fail_fast])
28
+ @validator.validate(@instructions, data, fail_fast: fail_fast)
29
+ end
30
+
31
+ def valid_json?(json)
32
+ validate_json(json, fail_fast: true).valid?
33
+ end
34
+
35
+ def validate_json(json, fail_fast: @options[:fail_fast])
36
+ validate(JSON.parse(json), fail_fast: fail_fast)
37
+ rescue JSON::ParserError => e
38
+ result = Result.new(fail_fast: fail_fast)
39
+ result.add_error(
40
+ Error.new(
41
+ message: "invalid JSON: #{e.message}",
42
+ instance_location: '',
43
+ keyword_location: '',
44
+ keyword: 'parse',
45
+ data: json
46
+ )
47
+ )
48
+ result
49
+ end
50
+
51
+ def validate!(data)
52
+ result = validate(data, fail_fast: true)
53
+ raise ValidationError, result unless result.valid?
54
+
55
+ data
56
+ end
57
+ end
58
+ end