json_schemer 0.2.15 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 14785b9586c0f152587eedbdea3da0a37f8a766b21db28821ffa374fef6d6235
4
- data.tar.gz: fe50adb42c6b0da48fc43b7c8ebee40e61a42f4ccfa3d87de6723551388b223c
3
+ metadata.gz: 460e70448c0b37c65fd4e51f046036336b17121cc716c197fe80f2e2517f45e7
4
+ data.tar.gz: f06d11eab9bb88c45a018c8913d6a5d2e33162064c9f995e5772beb811e68ed6
5
5
  SHA512:
6
- metadata.gz: e23c122d31d6a9c38326c90c1ac9af35148ce0b13ff1a576f2fb9bfff7cfdd289d2122b73d5b8b20330d8323e8ec98b11452ec0b70ccd339a26e6dba4d7f33b5
7
- data.tar.gz: a4be1485304ae45f125e6b73494c5a1a5cf98d092c66b25dd2ff81fed0a74833165d8655abc46799faa97af0372d9d3f06e4a5e30554b0c31260b01699aa76be
6
+ metadata.gz: 83549b328ba3067ed206a8bdea448e34ba274c7c5a7857d38793d74f0691339f845b0e39fdbb0ff8c3869dfbaf0dcd13380c458c80397b100a848f96443bf691
7
+ data.tar.gz: 106314824f1805e7d2306c10c696d4503b54b4d9203321e37816242c30598c3870ec40ade53dc08692ec3b2a35f18360c96211f0b8af44fd4414931da4e5b1e5
@@ -1,16 +1,27 @@
1
1
  name: ci
2
2
  on: [push, pull_request]
3
3
  jobs:
4
- ruby:
4
+ test:
5
5
  strategy:
6
+ fail-fast: false
6
7
  matrix:
7
- ruby: [2.4, 2.5, 2.6, 2.7, truffleruby-head]
8
- runs-on: ubuntu-latest
8
+ os: [ubuntu-latest, windows-latest, macos-latest]
9
+ ruby: [2.5, 2.6, 2.7, 3.0, 3.1, 3.2, head, jruby, jruby-head, truffleruby, truffleruby-head]
10
+ exclude:
11
+ - os: windows-latest
12
+ ruby: truffleruby
13
+ - os: windows-latest
14
+ ruby: truffleruby-head
15
+ runs-on: ${{ matrix.os }}
9
16
  steps:
10
17
  - uses: actions/checkout@v2
11
18
  - uses: ruby/setup-ruby@v1
12
19
  with:
13
20
  ruby-version: ${{ matrix.ruby }}
21
+ bundler-cache: true
14
22
  - run: |
15
- bundle install
16
- bundle exec rake test
23
+ mkdir -p tmp/gems
24
+ gem build json_schemer.gemspec
25
+ gem install --local --ignore-dependencies --no-document --install-dir tmp/gems json_schemer-*.gem
26
+ rm json_schemer-*.gem
27
+ bin/rake test
data/CHANGELOG.md ADDED
@@ -0,0 +1,18 @@
1
+ # Changelog
2
+
3
+ ## [1.0.0] - 2023-05-26
4
+
5
+ ### Breaking Changes
6
+
7
+ - Ruby 2.4 is no longer supported.
8
+ - The default `regexp_resolver` is now `ruby`, which passes patterns directly to `Regexp`. The previous default, `ecma`, rewrites patterns to behave more like Javascript (ECMA-262) regular expressions:
9
+ - Beginning of string: `^` -> `\A`
10
+ - End of string: `$` -> `\z`
11
+ - Space: `\s` -> `[\t\r\n\f\v\uFEFF\u2029\p{Zs}]`
12
+ - Non-space: `\S` -> `[^\t\r\n\f\v\uFEFF\u2029\p{Zs}]`
13
+ - Invalid ECMA-262 regular expressions raise `JSONSchemer::InvalidEcmaRegexp` when `regexp_resolver` is set to `ecma`.
14
+ - Embedded subschemas (ie, subschemas referenced by `$id`) can only be found under "known" keywords (eg, `definitions`). Previously, the entire schema object was scanned for `$id`.
15
+ - Empty fragments are now removed from `$ref` URIs before calling `ref_resolver`.
16
+ - Refs that are fragment-only JSON pointers with special characters must use the proper encoding (eg, `"$ref": "#/definitions/some-%7Bid%7D"`).
17
+
18
+ [1.0.0]: https://github.com/davishmcclurg/json_schemer/releases/tag/v1.0.0
data/Gemfile.lock CHANGED
@@ -1,24 +1,34 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- json_schemer (0.2.15)
5
- ecma-re-validator (~> 0.2)
4
+ json_schemer (1.0.1)
6
5
  hana (~> 1.3)
7
- regexp_parser (~> 1.5)
8
- uri_template (~> 0.7)
6
+ regexp_parser (~> 2.0)
7
+ simpleidn (~> 0.2)
9
8
 
10
9
  GEM
11
10
  remote: https://rubygems.org/
12
11
  specs:
13
- ecma-re-validator (0.2.1)
14
- regexp_parser (~> 1.2)
15
- hana (1.3.6)
16
- minitest (5.11.3)
17
- rake (13.0.1)
18
- regexp_parser (1.8.1)
19
- uri_template (0.7.0)
12
+ docile (1.4.0)
13
+ hana (1.3.7)
14
+ minitest (5.15.0)
15
+ rake (13.0.6)
16
+ regexp_parser (2.6.1)
17
+ simplecov (0.22.0)
18
+ docile (~> 1.1)
19
+ simplecov-html (~> 0.11)
20
+ simplecov_json_formatter (~> 0.1)
21
+ simplecov-html (0.12.3)
22
+ simplecov_json_formatter (0.1.4)
23
+ simpleidn (0.2.1)
24
+ unf (~> 0.1.4)
25
+ unf (0.1.4)
26
+ unf_ext
27
+ unf (0.1.4-java)
28
+ unf_ext (0.0.8.2)
20
29
 
21
30
  PLATFORMS
31
+ java
22
32
  ruby
23
33
 
24
34
  DEPENDENCIES
@@ -26,6 +36,7 @@ DEPENDENCIES
26
36
  json_schemer!
27
37
  minitest (~> 5.0)
28
38
  rake (~> 13.0)
39
+ simplecov (~> 0.22)
29
40
 
30
41
  BUNDLED WITH
31
- 2.1.4
42
+ 2.3.25
data/README.md CHANGED
@@ -45,7 +45,12 @@ schemer.valid?({ 'abc' => 10 })
45
45
  # error validation (`validate` returns an enumerator)
46
46
 
47
47
  schemer.validate({ 'abc' => 10 }).to_a
48
- # => [{"data"=>10, "schema"=>{"type"=>"integer", "minimum"=>11}, "pointer"=>"#/abc", "type"=>"minimum"}]
48
+ # => [{"data"=>10,
49
+ # "data_pointer"=>"/abc",
50
+ # "schema"=>{"type"=>"integer", "minimum"=>11},
51
+ # "schema_pointer"=>"/properties/abc",
52
+ # "root_schema"=>{"type"=>"object", "properties"=>{"abc"=>{"type"=>"integer", "minimum"=>11}}},
53
+ # "type"=>"minimum"}]
49
54
 
50
55
  # default property values
51
56
 
@@ -74,6 +79,30 @@ schemer = JSONSchemer.schema(schema)
74
79
 
75
80
  schema = '{ "type": "integer" }'
76
81
  schemer = JSONSchemer.schema(schema)
82
+
83
+ # schema validation
84
+
85
+ JSONSchemer.valid_schema?({ '$id' => '#valid' })
86
+ # => true
87
+
88
+ JSONSchemer.validate_schema({ '$id' => nil }).to_a
89
+ # => [{"data"=>nil,
90
+ # "data_pointer"=>"/$id",
91
+ # "schema"=>{"type"=>"string", "format"=>"uri-reference"},
92
+ # "schema_pointer"=>"/properties/$id",
93
+ # "root_schema"=>{...meta schema},
94
+ # "type"=>"string"}]
95
+
96
+ JSONSchemer.schema({ '$id' => '#valid' }).valid_schema?
97
+ # => true
98
+
99
+ JSONSchemer.schema({ '$id' => nil }).validate_schema.to_a
100
+ # => [{"data"=>nil,
101
+ # "data_pointer"=>"/$id",
102
+ # "schema"=>{"type"=>"string", "format"=>"uri-reference"},
103
+ # "schema_pointer"=>"/properties/$id",
104
+ # "root_schema"=>{...meta schema},
105
+ # "type"=>"string"}]
77
106
  ```
78
107
 
79
108
  ## Options
@@ -92,14 +121,60 @@ JSONSchemer.schema(
92
121
  # default: false
93
122
  insert_property_defaults: true,
94
123
 
124
+ # modify properties during validation. You can pass one Proc or a list of Procs to modify data.
125
+ # Proc/[Proc]
126
+ # default: nil
127
+ before_property_validation: proc do |data, property, property_schema, _parent|
128
+ data[property] ||= 42
129
+ end,
130
+
131
+ # modify properties after validation. You can pass one Proc or a list of Procs to modify data.
132
+ # Proc/[Proc]
133
+ # default: nil
134
+ after_property_validation: proc do |data, property, property_schema, _parent|
135
+ data[property] = Date.iso8601(data[property]) if property_schema.is_a?(Hash) && property_schema['format'] == 'date'
136
+ end,
137
+
95
138
  # resolve external references
96
139
  # 'net/http'/proc/lambda/respond_to?(:call)
97
140
  # 'net/http': proc { |uri| JSON.parse(Net::HTTP.get(uri)) }
98
141
  # default: proc { |uri| raise UnknownRef, uri.to_s }
99
- ref_resolver: 'net/http'
142
+ ref_resolver: 'net/http',
143
+
144
+ # use different method to match regexes
145
+ # 'ruby'/'ecma'/proc/lambda/respond_to?(:call)
146
+ # 'ruby': proc { |pattern| Regexp.new(pattern) }
147
+ # default: 'ruby'
148
+ regexp_resolver: proc do |pattern|
149
+ RE2::Regexp.new(pattern)
150
+ end
100
151
  )
101
152
  ```
102
153
 
154
+ ## CLI
155
+
156
+ The `json_schemer` executable takes a JSON schema file as the first argument followed by one or more JSON data files to validate. If there are any validation errors, it outputs them and returns an error code.
157
+
158
+ Validation errors are output as single-line JSON objects. The `--errors` option can be used to limit the number of errors returned or prevent output entirely (and fail fast).
159
+
160
+ The schema or data can also be read from stdin using `-`.
161
+
162
+ ```
163
+ % json_schemer --help
164
+ Usage:
165
+ json_schemer [options] <schema> <data>...
166
+ json_schemer [options] <schema> -
167
+ json_schemer [options] - <data>...
168
+ json_schemer -h | --help
169
+ json_schemer --version
170
+
171
+ Options:
172
+ -e, --errors MAX Maximum number of errors to output
173
+ Use "0" to validate with no output
174
+ -h, --help Show help
175
+ -v, --version Show version
176
+ ```
177
+
103
178
  ## Development
104
179
 
105
180
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'open-uri'
4
+ require 'csv'
5
+
6
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
7
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
8
+
9
+ csv_options = { :col_sep => ';', :skip_blanks => true, :skip_lines => /\A#/ }
10
+
11
+ unicode_data = URI('https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt')
12
+ derived_joining_type = URI('https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedJoiningType.txt')
13
+
14
+ # https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values
15
+ virama_canonical_combining_class = '9'
16
+
17
+ virama_codes = CSV.new(unicode_data.read, **csv_options).select do |code, _name, _category, canonical_combining_class|
18
+ canonical_combining_class == virama_canonical_combining_class
19
+ end.map(&:first)
20
+
21
+ # https://www.unicode.org/reports/tr44/#Default_Values
22
+ # https://www.unicode.org/reports/tr44/#Derived_Extracted
23
+ codes_by_joining_type = CSV.new(derived_joining_type.read, **csv_options).group_by do |_code, joining_type|
24
+ joining_type.gsub(/#.+/, '').strip
25
+ end.transform_values do |rows|
26
+ rows.map do |code, _joining_type|
27
+ code.strip
28
+ end
29
+ end
30
+
31
+ def codes_to_character_class(codes)
32
+ characters = codes.map do |code|
33
+ code.gsub(/(\h+)/, '\u{\1}').gsub('..', '-')
34
+ end
35
+ "[#{characters.join}]"
36
+ end
37
+
38
+ puts "VIRAMA_CHARACTER_CLASS = '#{codes_to_character_class(virama_codes)}'"
39
+
40
+ codes_by_joining_type.slice('L', 'D', 'T', 'R').each do |joining_type, codes|
41
+ puts "JOINING_TYPE_#{joining_type}_CHARACTER_CLASS = '#{codes_to_character_class(codes)}'"
42
+ end
data/bin/rake ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rake' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rake", "rake")
data/exe/json_schemer ADDED
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'json'
4
+ require 'optparse'
5
+ require 'pathname'
6
+ require 'json_schemer'
7
+
8
+ parser = OptionParser.new('Usage:', 32, ' ')
9
+ parser.separator(" #{parser.program_name} [options] <schema> <data>...")
10
+ parser.separator(" #{parser.program_name} [options] <schema> -")
11
+ parser.separator(" #{parser.program_name} [options] - <data>...")
12
+ parser.separator(" #{parser.program_name} -h | --help")
13
+ parser.separator(" #{parser.program_name} --version")
14
+ parser.separator('')
15
+ parser.separator('Options:')
16
+ parser.on('-e', '--errors MAX', Integer, 'Maximum number of errors to output', 'Use "0" to validate with no output')
17
+ parser.on_tail('-h', '--help', 'Show help')
18
+ parser.on_tail('-v', '--version', 'Show version')
19
+
20
+ options = {}
21
+ parser.parse!(:into => options)
22
+
23
+ if options[:help]
24
+ $stdout.puts(parser)
25
+ exit
26
+ end
27
+
28
+ if options[:version]
29
+ $stdout.puts("#{parser.program_name} #{JSONSchemer::VERSION}")
30
+ exit
31
+ end
32
+
33
+ if ARGV.size == 0
34
+ $stderr.puts("#{parser.program_name}: no schema or data")
35
+ exit(false)
36
+ end
37
+
38
+ if ARGV.size == 1
39
+ $stderr.puts("#{parser.program_name}: no data")
40
+ exit(false)
41
+ end
42
+
43
+ if ARGV.count('-') > 1
44
+ $stderr.puts("#{parser.program_name}: multiple stdin")
45
+ exit(false)
46
+ end
47
+
48
+ errors = 0
49
+ schema = ARGF.file.is_a?(File) ? Pathname.new(ARGF.file.path) : ARGF.file.read
50
+ schemer = JSONSchemer.schema(schema)
51
+
52
+ while ARGV.any?
53
+ data = JSON.parse(ARGF.skip.file.read)
54
+ schemer.validate(data).each do |error|
55
+ exit(false) if options[:errors] == 0
56
+ errors += 1
57
+ $stdout.puts(JSON.generate(error))
58
+ exit(false) if options[:errors] == errors
59
+ end
60
+ end
61
+
62
+ exit(errors.zero?)
data/json_schemer.gemspec CHANGED
@@ -20,22 +20,14 @@ Gem::Specification.new do |spec|
20
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  spec.require_paths = ["lib"]
22
22
 
23
- spec.required_ruby_version = '~> 2.4'
23
+ spec.required_ruby_version = '>= 2.5'
24
24
 
25
25
  spec.add_development_dependency "bundler", "~> 2.0"
26
26
  spec.add_development_dependency "rake", "~> 13.0"
27
27
  spec.add_development_dependency "minitest", "~> 5.0"
28
+ spec.add_development_dependency "simplecov", "~> 0.22"
28
29
 
29
- # spec.add_development_dependency "benchmark-ips", "~> 2.7.2"
30
- # spec.add_development_dependency "jschema", "~> 0.2.1"
31
- # spec.add_development_dependency "json-schema", "~> 2.8.0"
32
- # spec.add_development_dependency "json_schema", "~> 0.17.0"
33
- # spec.add_development_dependency "json_validation", "~> 0.1.0"
34
- # spec.add_development_dependency "jsonschema", "~> 2.0.2"
35
- # spec.add_development_dependency "rj_schema", "~> 0.2.0"
36
-
37
- spec.add_runtime_dependency "ecma-re-validator", "~> 0.2"
38
30
  spec.add_runtime_dependency "hana", "~> 1.3"
39
- spec.add_runtime_dependency "uri_template", "~> 0.7"
40
- spec.add_runtime_dependency "regexp_parser", "~> 1.5"
31
+ spec.add_runtime_dependency "regexp_parser", "~> 2.0"
32
+ spec.add_runtime_dependency "simpleidn", "~> 0.2"
41
33
  end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+ module JSONSchemer
3
+ class CachedResolver
4
+ def initialize(&resolver)
5
+ @resolver = resolver
6
+ @cache = {}
7
+ end
8
+
9
+ def call(*args)
10
+ @cache[args] = @resolver.call(*args) unless @cache.key?(args)
11
+ @cache[args]
12
+ end
13
+ end
14
+
15
+ class CachedRefResolver < CachedResolver; end
16
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+ module JSONSchemer
3
+ class EcmaRegexp
4
+ class Syntax < Regexp::Syntax::Base
5
+ # regexp_parser >= 2.3.0 uses syntax classes directly instead of instances
6
+ # :nocov:
7
+ SYNTAX = respond_to?(:implements) ? self : new
8
+ # :nocov:
9
+ SYNTAX.implements :anchor, Anchor::Extended
10
+ SYNTAX.implements :assertion, Assertion::All
11
+ # literal %i[number] to support regexp_parser < 2.2.0 (Backreference::Plain)
12
+ SYNTAX.implements :backref, %i[number] + Backreference::Name
13
+ # :meta_sequence, :bell, and :escape are not supported in ecma
14
+ SYNTAX.implements :escape, Escape::Basic + (Escape::Control - %i[meta_sequence]) + (Escape::ASCII - %i[bell escape]) + Escape::Unicode + Escape::Meta + Escape::Hex + Escape::Octal
15
+ SYNTAX.implements :property, UnicodeProperty::All
16
+ SYNTAX.implements :nonproperty, UnicodeProperty::All
17
+ # :comment is not supported in ecma
18
+ SYNTAX.implements :free_space, (FreeSpace::All - %i[comment])
19
+ SYNTAX.implements :group, Group::Basic + Group::Named + Group::Passive
20
+ SYNTAX.implements :literal, Literal::All
21
+ SYNTAX.implements :meta, Meta::Extended
22
+ SYNTAX.implements :quantifier, Quantifier::Greedy + Quantifier::Reluctant + Quantifier::Interval + Quantifier::IntervalReluctant
23
+ SYNTAX.implements :set, CharacterSet::Basic
24
+ SYNTAX.implements :type, CharacterType::Extended
25
+ end
26
+
27
+ RUBY_EQUIVALENTS = {
28
+ :anchor => {
29
+ :bol => '\A',
30
+ :eol => '\z'
31
+ },
32
+ :type => {
33
+ :space => '[\t\r\n\f\v\uFEFF\u2029\p{Zs}]',
34
+ :nonspace => '[^\t\r\n\f\v\uFEFF\u2029\p{Zs}]'
35
+ }
36
+ }.freeze
37
+
38
+ class << self
39
+ def ruby_equivalent(pattern)
40
+ Regexp::Scanner.scan(pattern).map do |type, token, text|
41
+ Syntax::SYNTAX.check!(*Syntax::SYNTAX.normalize(type, token))
42
+ RUBY_EQUIVALENTS.dig(type, token) || text
43
+ rescue Regexp::Syntax::NotImplementedError
44
+ raise InvalidEcmaRegexp, "invalid token #{text.inspect} (#{type}:#{token}) in #{pattern.inspect}"
45
+ end.join
46
+ rescue Regexp::Scanner::ScannerError
47
+ raise InvalidEcmaRegexp, "invalid pattern #{pattern.inspect}"
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+ module JSONSchemer
3
+ module Format
4
+ module Hostname
5
+ # https://datatracker.ietf.org/doc/html/rfc5892#section-2.1
6
+ MARKS = '\p{Mn}\p{Mc}'
7
+ LETTER_DIGITS = "\\p{Ll}\\p{Lu}\\p{Lo}\\p{Nd}\\p{Lm}#{MARKS}"
8
+ # https://datatracker.ietf.org/doc/html/rfc5892#section-2.6
9
+ EXCEPTIONS_PVALID = '\u{06FD}\u{06FE}\u{0F0B}\u{3007}' # \u{00DF}\u{03C2} covered by \p{Ll}
10
+ EXCEPTIONS_DISALLOWED = '\u{0640}\u{07FA}\u{302E}\u{302F}\u{3031}\u{3032}\u{3033}\u{3034}\u{3035}\u{303B}'
11
+ LABEL_CHARACTER_CLASS = "[#{LETTER_DIGITS}#{EXCEPTIONS_PVALID}&&[^#{EXCEPTIONS_DISALLOWED}]]"
12
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
13
+ LEADING_CHARACTER_CLASS = "[#{LABEL_CHARACTER_CLASS}&&[^#{MARKS}]]"
14
+ LABEL_REGEX_STRING = "#{LEADING_CHARACTER_CLASS}([#{LABEL_CHARACTER_CLASS}\-]*#{LABEL_CHARACTER_CLASS})?"
15
+ HOSTNAME_REGEX = /\A(#{LABEL_REGEX_STRING}\.)*#{LABEL_REGEX_STRING}\z/i.freeze
16
+ # bin/hostname_character_classes
17
+ VIRAMA_CHARACTER_CLASS = '[\u{094D}\u{09CD}\u{0A4D}\u{0ACD}\u{0B4D}\u{0BCD}\u{0C4D}\u{0CCD}\u{0D3B}\u{0D3C}\u{0D4D}\u{0DCA}\u{0E3A}\u{0EBA}\u{0F84}\u{1039}\u{103A}\u{1714}\u{1715}\u{1734}\u{17D2}\u{1A60}\u{1B44}\u{1BAA}\u{1BAB}\u{1BF2}\u{1BF3}\u{2D7F}\u{A806}\u{A82C}\u{A8C4}\u{A953}\u{A9C0}\u{AAF6}\u{ABED}\u{10A3F}\u{11046}\u{11070}\u{1107F}\u{110B9}\u{11133}\u{11134}\u{111C0}\u{11235}\u{112EA}\u{1134D}\u{11442}\u{114C2}\u{115BF}\u{1163F}\u{116B6}\u{1172B}\u{11839}\u{1193D}\u{1193E}\u{119E0}\u{11A34}\u{11A47}\u{11A99}\u{11C3F}\u{11D44}\u{11D45}\u{11D97}\u{11F41}\u{11F42}]'
18
+ JOINING_TYPE_L_CHARACTER_CLASS = '[\u{A872}\u{10ACD}\u{10AD7}\u{10D00}\u{10FCB}]'
19
+ JOINING_TYPE_D_CHARACTER_CLASS = '[\u{0620}\u{0626}\u{0628}\u{062A}-\u{062E}\u{0633}-\u{063F}\u{0641}-\u{0647}\u{0649}-\u{064A}\u{066E}-\u{066F}\u{0678}-\u{0687}\u{069A}-\u{06BF}\u{06C1}-\u{06C2}\u{06CC}\u{06CE}\u{06D0}-\u{06D1}\u{06FA}-\u{06FC}\u{06FF}\u{0712}-\u{0714}\u{071A}-\u{071D}\u{071F}-\u{0727}\u{0729}\u{072B}\u{072D}-\u{072E}\u{074E}-\u{0758}\u{075C}-\u{076A}\u{076D}-\u{0770}\u{0772}\u{0775}-\u{0777}\u{077A}-\u{077F}\u{07CA}-\u{07EA}\u{0841}-\u{0845}\u{0848}\u{084A}-\u{0853}\u{0855}\u{0860}\u{0862}-\u{0865}\u{0868}\u{0886}\u{0889}-\u{088D}\u{08A0}-\u{08A9}\u{08AF}-\u{08B0}\u{08B3}-\u{08B8}\u{08BA}-\u{08C8}\u{1807}\u{1820}-\u{1842}\u{1843}\u{1844}-\u{1878}\u{1887}-\u{18A8}\u{18AA}\u{A840}-\u{A871}\u{10AC0}-\u{10AC4}\u{10AD3}-\u{10AD6}\u{10AD8}-\u{10ADC}\u{10ADE}-\u{10AE0}\u{10AEB}-\u{10AEE}\u{10B80}\u{10B82}\u{10B86}-\u{10B88}\u{10B8A}-\u{10B8B}\u{10B8D}\u{10B90}\u{10BAD}-\u{10BAE}\u{10D01}-\u{10D21}\u{10D23}\u{10F30}-\u{10F32}\u{10F34}-\u{10F44}\u{10F51}-\u{10F53}\u{10F70}-\u{10F73}\u{10F76}-\u{10F81}\u{10FB0}\u{10FB2}-\u{10FB3}\u{10FB8}\u{10FBB}-\u{10FBC}\u{10FBE}-\u{10FBF}\u{10FC1}\u{10FC4}\u{10FCA}\u{1E900}-\u{1E943}]'
20
+ JOINING_TYPE_T_CHARACTER_CLASS = '[\u{00AD}\u{0300}-\u{036F}\u{0483}-\u{0487}\u{0488}-\u{0489}\u{0591}-\u{05BD}\u{05BF}\u{05C1}-\u{05C2}\u{05C4}-\u{05C5}\u{05C7}\u{0610}-\u{061A}\u{061C}\u{064B}-\u{065F}\u{0670}\u{06D6}-\u{06DC}\u{06DF}-\u{06E4}\u{06E7}-\u{06E8}\u{06EA}-\u{06ED}\u{070F}\u{0711}\u{0730}-\u{074A}\u{07A6}-\u{07B0}\u{07EB}-\u{07F3}\u{07FD}\u{0816}-\u{0819}\u{081B}-\u{0823}\u{0825}-\u{0827}\u{0829}-\u{082D}\u{0859}-\u{085B}\u{0898}-\u{089F}\u{08CA}-\u{08E1}\u{08E3}-\u{0902}\u{093A}\u{093C}\u{0941}-\u{0948}\u{094D}\u{0951}-\u{0957}\u{0962}-\u{0963}\u{0981}\u{09BC}\u{09C1}-\u{09C4}\u{09CD}\u{09E2}-\u{09E3}\u{09FE}\u{0A01}-\u{0A02}\u{0A3C}\u{0A41}-\u{0A42}\u{0A47}-\u{0A48}\u{0A4B}-\u{0A4D}\u{0A51}\u{0A70}-\u{0A71}\u{0A75}\u{0A81}-\u{0A82}\u{0ABC}\u{0AC1}-\u{0AC5}\u{0AC7}-\u{0AC8}\u{0ACD}\u{0AE2}-\u{0AE3}\u{0AFA}-\u{0AFF}\u{0B01}\u{0B3C}\u{0B3F}\u{0B41}-\u{0B44}\u{0B4D}\u{0B55}-\u{0B56}\u{0B62}-\u{0B63}\u{0B82}\u{0BC0}\u{0BCD}\u{0C00}\u{0C04}\u{0C3C}\u{0C3E}-\u{0C40}\u{0C46}-\u{0C48}\u{0C4A}-\u{0C4D}\u{0C55}-\u{0C56}\u{0C62}-\u{0C63}\u{0C81}\u{0CBC}\u{0CBF}\u{0CC6}\u{0CCC}-\u{0CCD}\u{0CE2}-\u{0CE3}\u{0D00}-\u{0D01}\u{0D3B}-\u{0D3C}\u{0D41}-\u{0D44}\u{0D4D}\u{0D62}-\u{0D63}\u{0D81}\u{0DCA}\u{0DD2}-\u{0DD4}\u{0DD6}\u{0E31}\u{0E34}-\u{0E3A}\u{0E47}-\u{0E4E}\u{0EB1}\u{0EB4}-\u{0EBC}\u{0EC8}-\u{0ECE}\u{0F18}-\u{0F19}\u{0F35}\u{0F37}\u{0F39}\u{0F71}-\u{0F7E}\u{0F80}-\u{0F84}\u{0F86}-\u{0F87}\u{0F8D}-\u{0F97}\u{0F99}-\u{0FBC}\u{0FC6}\u{102D}-\u{1030}\u{1032}-\u{1037}\u{1039}-\u{103A}\u{103D}-\u{103E}\u{1058}-\u{1059}\u{105E}-\u{1060}\u{1071}-\u{1074}\u{1082}\u{1085}-\u{1086}\u{108D}\u{109D}\u{135D}-\u{135F}\u{1712}-\u{1714}\u{1732}-\u{1733}\u{1752}-\u{1753}\u{1772}-\u{1773}\u{17B4}-\u{17B5}\u{17B7}-\u{17BD}\u{17C6}\u{17C9}-\u{17D3}\u{17DD}\u{180B}-\u{180D}\u{180F}\u{1885}-\u{1886}\u{18A9}\u{1920}-\u{1922}\u{1927}-\u{1928}\u{1932}\u{1939}-\u{193B}\u{1A17}-\u{1A18}\u{1A1B}\u{1A56}\u{1A58}-\u{1A5E}\u{1A60}\u{1A62}\u{1A65}-\u{1A6C}\u{1A73}-\u{1A7C}\u{1A7F}\u{1AB0}-\u{1ABD}\u{1ABE}\u{1ABF}-\u{1ACE}\u{1B00}-\u{1B03}\u{1B34}\u{1B36}-\u{1B3A}\u{1B3C}\u{1B42}\u{1B6B}-\u{1B73}\u{1B80}-\u{1B81}\u{1BA2}-\u{1BA5}\u{1BA8}-\u{1BA9}\u{1BAB}-\u{1BAD}\u{1BE6}\u{1BE8}-\u{1BE9}\u{1BED}\u{1BEF}-\u{1BF1}\u{1C2C}-\u{1C33}\u{1C36}-\u{1C37}\u{1CD0}-\u{1CD2}\u{1CD4}-\u{1CE0}\u{1CE2}-\u{1CE8}\u{1CED}\u{1CF4}\u{1CF8}-\u{1CF9}\u{1DC0}-\u{1DFF}\u{200B}\u{200E}-\u{200F}\u{202A}-\u{202E}\u{2060}-\u{2064}\u{206A}-\u{206F}\u{20D0}-\u{20DC}\u{20DD}-\u{20E0}\u{20E1}\u{20E2}-\u{20E4}\u{20E5}-\u{20F0}\u{2CEF}-\u{2CF1}\u{2D7F}\u{2DE0}-\u{2DFF}\u{302A}-\u{302D}\u{3099}-\u{309A}\u{A66F}\u{A670}-\u{A672}\u{A674}-\u{A67D}\u{A69E}-\u{A69F}\u{A6F0}-\u{A6F1}\u{A802}\u{A806}\u{A80B}\u{A825}-\u{A826}\u{A82C}\u{A8C4}-\u{A8C5}\u{A8E0}-\u{A8F1}\u{A8FF}\u{A926}-\u{A92D}\u{A947}-\u{A951}\u{A980}-\u{A982}\u{A9B3}\u{A9B6}-\u{A9B9}\u{A9BC}-\u{A9BD}\u{A9E5}\u{AA29}-\u{AA2E}\u{AA31}-\u{AA32}\u{AA35}-\u{AA36}\u{AA43}\u{AA4C}\u{AA7C}\u{AAB0}\u{AAB2}-\u{AAB4}\u{AAB7}-\u{AAB8}\u{AABE}-\u{AABF}\u{AAC1}\u{AAEC}-\u{AAED}\u{AAF6}\u{ABE5}\u{ABE8}\u{ABED}\u{FB1E}\u{FE00}-\u{FE0F}\u{FE20}-\u{FE2F}\u{FEFF}\u{FFF9}-\u{FFFB}\u{101FD}\u{102E0}\u{10376}-\u{1037A}\u{10A01}-\u{10A03}\u{10A05}-\u{10A06}\u{10A0C}-\u{10A0F}\u{10A38}-\u{10A3A}\u{10A3F}\u{10AE5}-\u{10AE6}\u{10D24}-\u{10D27}\u{10EAB}-\u{10EAC}\u{10EFD}-\u{10EFF}\u{10F46}-\u{10F50}\u{10F82}-\u{10F85}\u{11001}\u{11038}-\u{11046}\u{11070}\u{11073}-\u{11074}\u{1107F}-\u{11081}\u{110B3}-\u{110B6}\u{110B9}-\u{110BA}\u{110C2}\u{11100}-\u{11102}\u{11127}-\u{1112B}\u{1112D}-\u{11134}\u{11173}\u{11180}-\u{11181}\u{111B6}-\u{111BE}\u{111C9}-\u{111CC}\u{111CF}\u{1122F}-\u{11231}\u{11234}\u{11236}-\u{11237}\u{1123E}\u{11241}\u{112DF}\u{112E3}-\u{112EA}\u{11300}-\u{11301}\u{1133B}-\u{1133C}\u{11340}\u{11366}-\u{1136C}\u{11370}-\u{11374}\u{11438}-\u{1143F}\u{11442}-\u{11444}\u{11446}\u{1145E}\u{114B3}-\u{114B8}\u{114BA}\u{114BF}-\u{114C0}\u{114C2}-\u{114C3}\u{115B2}-\u{115B5}\u{115BC}-\u{115BD}\u{115BF}-\u{115C0}\u{115DC}-\u{115DD}\u{11633}-\u{1163A}\u{1163D}\u{1163F}-\u{11640}\u{116AB}\u{116AD}\u{116B0}-\u{116B5}\u{116B7}\u{1171D}-\u{1171F}\u{11722}-\u{11725}\u{11727}-\u{1172B}\u{1182F}-\u{11837}\u{11839}-\u{1183A}\u{1193B}-\u{1193C}\u{1193E}\u{11943}\u{119D4}-\u{119D7}\u{119DA}-\u{119DB}\u{119E0}\u{11A01}-\u{11A0A}\u{11A33}-\u{11A38}\u{11A3B}-\u{11A3E}\u{11A47}\u{11A51}-\u{11A56}\u{11A59}-\u{11A5B}\u{11A8A}-\u{11A96}\u{11A98}-\u{11A99}\u{11C30}-\u{11C36}\u{11C38}-\u{11C3D}\u{11C3F}\u{11C92}-\u{11CA7}\u{11CAA}-\u{11CB0}\u{11CB2}-\u{11CB3}\u{11CB5}-\u{11CB6}\u{11D31}-\u{11D36}\u{11D3A}\u{11D3C}-\u{11D3D}\u{11D3F}-\u{11D45}\u{11D47}\u{11D90}-\u{11D91}\u{11D95}\u{11D97}\u{11EF3}-\u{11EF4}\u{11F00}-\u{11F01}\u{11F36}-\u{11F3A}\u{11F40}\u{11F42}\u{13430}-\u{1343F}\u{13440}\u{13447}-\u{13455}\u{16AF0}-\u{16AF4}\u{16B30}-\u{16B36}\u{16F4F}\u{16F8F}-\u{16F92}\u{16FE4}\u{1BC9D}-\u{1BC9E}\u{1BCA0}-\u{1BCA3}\u{1CF00}-\u{1CF2D}\u{1CF30}-\u{1CF46}\u{1D167}-\u{1D169}\u{1D173}-\u{1D17A}\u{1D17B}-\u{1D182}\u{1D185}-\u{1D18B}\u{1D1AA}-\u{1D1AD}\u{1D242}-\u{1D244}\u{1DA00}-\u{1DA36}\u{1DA3B}-\u{1DA6C}\u{1DA75}\u{1DA84}\u{1DA9B}-\u{1DA9F}\u{1DAA1}-\u{1DAAF}\u{1E000}-\u{1E006}\u{1E008}-\u{1E018}\u{1E01B}-\u{1E021}\u{1E023}-\u{1E024}\u{1E026}-\u{1E02A}\u{1E08F}\u{1E130}-\u{1E136}\u{1E2AE}\u{1E2EC}-\u{1E2EF}\u{1E4EC}-\u{1E4EF}\u{1E8D0}-\u{1E8D6}\u{1E944}-\u{1E94A}\u{1E94B}\u{E0001}\u{E0020}-\u{E007F}\u{E0100}-\u{E01EF}]'
21
+ JOINING_TYPE_R_CHARACTER_CLASS = '[\u{0622}-\u{0625}\u{0627}\u{0629}\u{062F}-\u{0632}\u{0648}\u{0671}-\u{0673}\u{0675}-\u{0677}\u{0688}-\u{0699}\u{06C0}\u{06C3}-\u{06CB}\u{06CD}\u{06CF}\u{06D2}-\u{06D3}\u{06D5}\u{06EE}-\u{06EF}\u{0710}\u{0715}-\u{0719}\u{071E}\u{0728}\u{072A}\u{072C}\u{072F}\u{074D}\u{0759}-\u{075B}\u{076B}-\u{076C}\u{0771}\u{0773}-\u{0774}\u{0778}-\u{0779}\u{0840}\u{0846}-\u{0847}\u{0849}\u{0854}\u{0856}-\u{0858}\u{0867}\u{0869}-\u{086A}\u{0870}-\u{0882}\u{088E}\u{08AA}-\u{08AC}\u{08AE}\u{08B1}-\u{08B2}\u{08B9}\u{10AC5}\u{10AC7}\u{10AC9}-\u{10ACA}\u{10ACE}-\u{10AD2}\u{10ADD}\u{10AE1}\u{10AE4}\u{10AEF}\u{10B81}\u{10B83}-\u{10B85}\u{10B89}\u{10B8C}\u{10B8E}-\u{10B8F}\u{10B91}\u{10BA9}-\u{10BAC}\u{10D22}\u{10F33}\u{10F54}\u{10F74}-\u{10F75}\u{10FB4}-\u{10FB6}\u{10FB9}-\u{10FBA}\u{10FBD}\u{10FC2}-\u{10FC3}\u{10FC9}]'
22
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
23
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
24
+ ZERO_WIDTH_VIRAMA = "#{VIRAMA_CHARACTER_CLASS}[\\u{200C}\\u{200D}]"
25
+ ZERO_WIDTH_NON_JOINER_JOINING_TYPE = "[#{JOINING_TYPE_L_CHARACTER_CLASS}#{JOINING_TYPE_D_CHARACTER_CLASS}]#{JOINING_TYPE_T_CHARACTER_CLASS}*\\u{200C}#{JOINING_TYPE_T_CHARACTER_CLASS}*[#{JOINING_TYPE_R_CHARACTER_CLASS}#{JOINING_TYPE_D_CHARACTER_CLASS}]"
26
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
27
+ MIDDLE_DOT = '\u{006C}\u{00B7}\u{006C}'
28
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
29
+ GREEK_LOWER_NUMERAL_SIGN = '\u{0375}\p{Greek}'
30
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
31
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
32
+ HEBREW_PUNCTUATION = '\p{Hebrew}[\u{05F3}\u{05F4}]'
33
+ CONTEXT_REGEX = /(#{ZERO_WIDTH_VIRAMA}|#{ZERO_WIDTH_NON_JOINER_JOINING_TYPE}|#{MIDDLE_DOT}|#{GREEK_LOWER_NUMERAL_SIGN}|#{HEBREW_PUNCTUATION})/.freeze
34
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
35
+ KATAKANA_MIDDLE_DOT_REGEX = /\u{30FB}/.freeze
36
+ KATAKANA_MIDDLE_DOT_CONTEXT_REGEX = /[\p{Hiragana}\p{Katakana}\p{Han}]/.freeze
37
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
38
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
39
+ ARABIC_INDIC_DIGITS_REGEX = /[\u{0660}-\u{0669}]/.freeze
40
+ ARABIC_EXTENDED_DIGITS_REGEX = /[\u{06F0}-\u{06F9}]/.freeze
41
+
42
+ def valid_hostname?(data)
43
+ data.split('.').map do |a_label|
44
+ return false if a_label.size > 63
45
+ u_label = SimpleIDN.to_unicode(a_label)
46
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
47
+ return false if u_label.slice(2, 2) == '--'
48
+ return false if ARABIC_INDIC_DIGITS_REGEX.match?(u_label) && ARABIC_EXTENDED_DIGITS_REGEX.match?(u_label)
49
+ u_label.gsub!(CONTEXT_REGEX, 'ok')
50
+ u_label.gsub!(KATAKANA_MIDDLE_DOT_REGEX, 'ok') if KATAKANA_MIDDLE_DOT_CONTEXT_REGEX.match?(u_label)
51
+ u_label
52
+ end.join('.').match?(HOSTNAME_REGEX)
53
+ rescue SimpleIDN::ConversionError
54
+ false
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+ module JSONSchemer
3
+ module Format
4
+ module URITemplate
5
+ # https://datatracker.ietf.org/doc/html/rfc6570
6
+ PCT_ENCODED = '%\h{2}' # pct-encoded = "%" HEXDIG HEXDIG
7
+ EXPLODE = '\*' # explode = "*"
8
+ MAX_LENGTH = '[1-9]\d{0,3}' # max-length = %x31-39 0*3DIGIT ; positive integer < 10000
9
+ PREFIX = ":#{MAX_LENGTH}" # prefix = ":" max-length
10
+ MODIFIER_LEVEL4 = "#{PREFIX}|#{EXPLODE}" # modifier-level4 = prefix / explode
11
+ VARCHAR = "(\\w|#{PCT_ENCODED})" # varchar = ALPHA / DIGIT / "_" / pct-encoded
12
+ VARNAME = "#{VARCHAR}(\\.?#{VARCHAR})*" # varname = varchar *( ["."] varchar )
13
+ VARSPEC = "#{VARNAME}(#{MODIFIER_LEVEL4})?" # varspec = varname [ modifier-level4 ]
14
+ VARIABLE_LIST = "#{VARSPEC}(,#{VARSPEC})*" # variable-list = varspec *( "," varspec )
15
+ OPERATOR = '[+#./;?&=,!@|]' # operator = op-level2 / op-level3 / op-reserve
16
+ # op-level2 = "+" / "#"
17
+ # op-level3 = "." / "/" / ";" / "?" / "&"
18
+ # op-reserve = "=" / "," / "!" / "@" / "|"
19
+ EXPRESSION = "{#{OPERATOR}?#{VARIABLE_LIST}}" # expression = "{" [ operator ] variable-list "}"
20
+ LITERALS = "[^\\x00-\\x20\\x7F\"%'<>\\\\^`{|}]|#{PCT_ENCODED}" # literals = %x21 / %x23-24 / %x26 / %x28-3B / %x3D / %x3F-5B
21
+ # / %x5D / %x5F / %x61-7A / %x7E / ucschar / iprivate
22
+ # / pct-encoded
23
+ # ; any Unicode character except: CTL, SP,
24
+ # ; DQUOTE, "'", "%" (aside from pct-encoded),
25
+ # ; "<", ">", "\", "^", "`", "{", "|", "}"
26
+ URI_TEMPLATE = "(#{LITERALS}|#{EXPRESSION})*" # URI-Template = *( literals / expression )
27
+ URI_TEMPLATE_REGEX = /\A#{URI_TEMPLATE}\z/
28
+
29
+ def valid_uri_template?(data)
30
+ URI_TEMPLATE_REGEX.match?(data)
31
+ end
32
+ end
33
+ end
34
+ end