json_schemer 0.2.24 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24ab3d7f62a2ca93d0dcd3fb33191de983006b504d7bdae60066268fabf7eb50
4
- data.tar.gz: 5f695dffd93a834e81491d5a5b328e6b193d56886666533e6ec0c5e7ba5b5be0
3
+ metadata.gz: 93264835a6cd46b1c657876bda6a68c53a130c36390e84c316f6ffad3d00194f
4
+ data.tar.gz: 6bc567c682cb103cc673dade1f0a46a0b7500973acf6915acd7d7093de7a74e3
5
5
  SHA512:
6
- metadata.gz: 0bd588c8b88bd813c52a13bb997cdd3f668f9b36570bc15a0780aa35b80fdaa49cc2862f6749be1cea9c5708169cbee990385799907a3957c7ceac32ce9ff5cf
7
- data.tar.gz: 6c835337e619f60ae9a72a4036dbcbeb06519e9c3c6f47216ab4650f0c15955a45be0ae2ace13a0a2c92f6066e106fa6073b6c795ff808cc90854f85a1ad9e76
6
+ metadata.gz: 5e0fe51563031e2bcce3331cd13b251a0e8007740c9d5b70e59a03b5151114e221ad50b24358094ccc33b31fcabb0b62e76674a966d5c8bf3ed872accd839f4f
7
+ data.tar.gz: 2835c05bbb368718f20e8ec435def32619b3876d7c1e78259f92ac8b57d0f45b27cc6a9481fc101efe8e0456054ddfffc8d05ad60f37b63f8e2475e95b81e368
@@ -6,12 +6,8 @@ jobs:
6
6
  fail-fast: false
7
7
  matrix:
8
8
  os: [ubuntu-latest, windows-latest, macos-latest]
9
- ruby: [2.4, 2.5, 2.6, 2.7, 3.0, 3.1, head, jruby, jruby-head, truffleruby, truffleruby-head]
9
+ ruby: [2.5, 2.6, 2.7, 3.0, 3.1, 3.2, head, jruby, jruby-head, truffleruby, truffleruby-head]
10
10
  exclude:
11
- - os: windows-latest
12
- ruby: jruby
13
- - os: windows-latest
14
- ruby: jruby-head
15
11
  - os: windows-latest
16
12
  ruby: truffleruby
17
13
  - os: windows-latest
data/CHANGELOG.md ADDED
@@ -0,0 +1,18 @@
1
+ # Changelog
2
+
3
+ ## [1.0.0] - 2023-05-26
4
+
5
+ ### Breaking Changes
6
+
7
+ - Ruby 2.4 is no longer supported.
8
+ - The default `regexp_resolver` is now `ruby`, which passes patterns directly to `Regexp`. The previous default, `ecma`, rewrites patterns to behave more like Javascript (ECMA-262) regular expressions:
9
+ - Beginning of string: `^` -> `\A`
10
+ - End of string: `$` -> `\z`
11
+ - Space: `\s` -> `[\t\r\n\f\v\uFEFF\u2029\p{Zs}]`
12
+ - Non-space: `\S` -> `[^\t\r\n\f\v\uFEFF\u2029\p{Zs}]`
13
+ - Invalid ECMA-262 regular expressions raise `JSONSchemer::InvalidEcmaRegexp` when `regexp_resolver` is set to `ecma`.
14
+ - Embedded subschemas (ie, subschemas referenced by `$id`) can only be found under "known" keywords (eg, `definitions`). Previously, the entire schema object was scanned for `$id`.
15
+ - Empty fragments are now removed from `$ref` URIs before calling `ref_resolver`.
16
+ - Refs that are fragment-only JSON pointers with special characters must use the proper encoding (eg, `"$ref": "#/definitions/some-%7Bid%7D"`).
17
+
18
+ [1.0.0]: https://github.com/davishmcclurg/json_schemer/releases/tag/v1.0.0
data/Gemfile.lock CHANGED
@@ -1,24 +1,36 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- json_schemer (0.2.24)
5
- ecma-re-validator (~> 0.3)
4
+ json_schemer (1.0.0)
6
5
  hana (~> 1.3)
7
6
  regexp_parser (~> 2.0)
7
+ simpleidn (~> 0.2)
8
8
  uri_template (~> 0.7)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- ecma-re-validator (0.3.0)
14
- regexp_parser (~> 2.0)
13
+ docile (1.4.0)
15
14
  hana (1.3.7)
16
15
  minitest (5.15.0)
17
16
  rake (13.0.6)
18
17
  regexp_parser (2.6.1)
18
+ simplecov (0.22.0)
19
+ docile (~> 1.1)
20
+ simplecov-html (~> 0.11)
21
+ simplecov_json_formatter (~> 0.1)
22
+ simplecov-html (0.12.3)
23
+ simplecov_json_formatter (0.1.4)
24
+ simpleidn (0.2.1)
25
+ unf (~> 0.1.4)
26
+ unf (0.1.4)
27
+ unf_ext
28
+ unf (0.1.4-java)
29
+ unf_ext (0.0.8.2)
19
30
  uri_template (0.7.0)
20
31
 
21
32
  PLATFORMS
33
+ java
22
34
  ruby
23
35
 
24
36
  DEPENDENCIES
@@ -26,6 +38,7 @@ DEPENDENCIES
26
38
  json_schemer!
27
39
  minitest (~> 5.0)
28
40
  rake (~> 13.0)
41
+ simplecov (~> 0.22)
29
42
 
30
43
  BUNDLED WITH
31
44
  2.3.25
data/README.md CHANGED
@@ -45,7 +45,12 @@ schemer.valid?({ 'abc' => 10 })
45
45
  # error validation (`validate` returns an enumerator)
46
46
 
47
47
  schemer.validate({ 'abc' => 10 }).to_a
48
- # => [{"data"=>10, "schema"=>{"type"=>"integer", "minimum"=>11}, "pointer"=>"#/abc", "type"=>"minimum"}]
48
+ # => [{"data"=>10,
49
+ # "data_pointer"=>"/abc",
50
+ # "schema"=>{"type"=>"integer", "minimum"=>11},
51
+ # "schema_pointer"=>"/properties/abc",
52
+ # "root_schema"=>{"type"=>"object", "properties"=>{"abc"=>{"type"=>"integer", "minimum"=>11}}},
53
+ # "type"=>"minimum"}]
49
54
 
50
55
  # default property values
51
56
 
@@ -74,6 +79,30 @@ schemer = JSONSchemer.schema(schema)
74
79
 
75
80
  schema = '{ "type": "integer" }'
76
81
  schemer = JSONSchemer.schema(schema)
82
+
83
+ # schema validation
84
+
85
+ JSONSchemer.valid_schema?({ '$id' => '#valid' })
86
+ # => true
87
+
88
+ JSONSchemer.validate_schema({ '$id' => nil }).to_a
89
+ # => [{"data"=>nil,
90
+ # "data_pointer"=>"/$id",
91
+ # "schema"=>{"type"=>"string", "format"=>"uri-reference"},
92
+ # "schema_pointer"=>"/properties/$id",
93
+ # "root_schema"=>{...meta schema},
94
+ # "type"=>"string"}]
95
+
96
+ JSONSchemer.schema({ '$id' => '#valid' }).valid_schema?
97
+ # => true
98
+
99
+ JSONSchemer.schema({ '$id' => nil }).validate_schema.to_a
100
+ # => [{"data"=>nil,
101
+ # "data_pointer"=>"/$id",
102
+ # "schema"=>{"type"=>"string", "format"=>"uri-reference"},
103
+ # "schema_pointer"=>"/properties/$id",
104
+ # "root_schema"=>{...meta schema},
105
+ # "type"=>"string"}]
77
106
  ```
78
107
 
79
108
  ## Options
@@ -113,8 +142,9 @@ JSONSchemer.schema(
113
142
  ref_resolver: 'net/http',
114
143
 
115
144
  # use different method to match regexes
116
- # 'ecma'/'ruby'/proc/lambda/respond_to?(:call)
117
- # default: 'ecma'
145
+ # 'ruby'/'ecma'/proc/lambda/respond_to?(:call)
146
+ # 'ruby': proc { |pattern| Regexp.new(pattern) }
147
+ # default: 'ruby'
118
148
  regexp_resolver: proc do |pattern|
119
149
  RE2::Regexp.new(pattern)
120
150
  end
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'open-uri'
4
+ require 'csv'
5
+
6
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
7
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
8
+
9
+ csv_options = { :col_sep => ';', :skip_blanks => true, :skip_lines => /\A#/ }
10
+
11
+ unicode_data = URI('https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt')
12
+ derived_joining_type = URI('https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedJoiningType.txt')
13
+
14
+ # https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values
15
+ virama_canonical_combining_class = '9'
16
+
17
+ virama_codes = CSV.new(unicode_data.read, **csv_options).select do |code, _name, _category, canonical_combining_class|
18
+ canonical_combining_class == virama_canonical_combining_class
19
+ end.map(&:first)
20
+
21
+ # https://www.unicode.org/reports/tr44/#Default_Values
22
+ # https://www.unicode.org/reports/tr44/#Derived_Extracted
23
+ codes_by_joining_type = CSV.new(derived_joining_type.read, **csv_options).group_by do |_code, joining_type|
24
+ joining_type.gsub(/#.+/, '').strip
25
+ end.transform_values do |rows|
26
+ rows.map do |code, _joining_type|
27
+ code.strip
28
+ end
29
+ end
30
+
31
+ def codes_to_character_class(codes)
32
+ characters = codes.map do |code|
33
+ code.gsub(/(\h+)/, '\u{\1}').gsub('..', '-')
34
+ end
35
+ "[#{characters.join}]"
36
+ end
37
+
38
+ puts "VIRAMA_CHARACTER_CLASS = '#{codes_to_character_class(virama_codes)}'"
39
+
40
+ codes_by_joining_type.slice('L', 'D', 'T', 'R').each do |joining_type, codes|
41
+ puts "JOINING_TYPE_#{joining_type}_CHARACTER_CLASS = '#{codes_to_character_class(codes)}'"
42
+ end
data/json_schemer.gemspec CHANGED
@@ -20,22 +20,15 @@ Gem::Specification.new do |spec|
20
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  spec.require_paths = ["lib"]
22
22
 
23
- spec.required_ruby_version = '>= 2.4'
23
+ spec.required_ruby_version = '>= 2.5'
24
24
 
25
25
  spec.add_development_dependency "bundler", "~> 2.0"
26
26
  spec.add_development_dependency "rake", "~> 13.0"
27
27
  spec.add_development_dependency "minitest", "~> 5.0"
28
+ spec.add_development_dependency "simplecov", "~> 0.22"
28
29
 
29
- # spec.add_development_dependency "benchmark-ips", "~> 2.7.2"
30
- # spec.add_development_dependency "jschema", "~> 0.2.1"
31
- # spec.add_development_dependency "json-schema", "~> 2.8.0"
32
- # spec.add_development_dependency "json_schema", "~> 0.17.0"
33
- # spec.add_development_dependency "json_validation", "~> 0.1.0"
34
- # spec.add_development_dependency "jsonschema", "~> 2.0.2"
35
- # spec.add_development_dependency "rj_schema", "~> 0.2.0"
36
-
37
- spec.add_runtime_dependency "ecma-re-validator", "~> 0.3"
38
30
  spec.add_runtime_dependency "hana", "~> 1.3"
39
- spec.add_runtime_dependency "uri_template", "~> 0.7"
40
31
  spec.add_runtime_dependency "regexp_parser", "~> 2.0"
32
+ spec.add_runtime_dependency "simpleidn", "~> 0.2"
33
+ spec.add_runtime_dependency "uri_template", "~> 0.7"
41
34
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+ module JSONSchemer
3
+ class EcmaRegexp
4
+ class Syntax < Regexp::Syntax::Base
5
+ implements :anchor, Anchor::Extended
6
+ implements :assertion, Assertion::All
7
+ implements :backref, Backreference::Plain + Backreference::Name
8
+ implements :escape, Escape::Basic + %i[control backspace form_feed newline carriage tab vertical_tab] + Escape::Unicode + Escape::Meta + Escape::Hex + Escape::Octal
9
+ implements :property, UnicodeProperty::All
10
+ implements :nonproperty, UnicodeProperty::All
11
+ implements :free_space, %i[whitespace]
12
+ implements :group, Group::Basic + Group::Named + Group::Passive
13
+ implements :literal, Literal::All
14
+ implements :meta, Meta::Extended
15
+ implements :quantifier, Quantifier::Greedy + Quantifier::Reluctant + Quantifier::Interval + Quantifier::IntervalReluctant
16
+ implements :set, CharacterSet::Basic
17
+ implements :type, CharacterType::Extended
18
+ end
19
+
20
+ RUBY_EQUIVALENTS = {
21
+ :anchor => {
22
+ :bol => '\A',
23
+ :eol => '\z'
24
+ },
25
+ :type => {
26
+ :space => '[\t\r\n\f\v\uFEFF\u2029\p{Zs}]',
27
+ :nonspace => '[^\t\r\n\f\v\uFEFF\u2029\p{Zs}]'
28
+ }
29
+ }.freeze
30
+
31
+ class << self
32
+ def ruby_equivalent(pattern)
33
+ Regexp::Scanner.scan(pattern).map do |type, token, text|
34
+ Syntax.check!(*Syntax.normalize(type, token))
35
+ RUBY_EQUIVALENTS.dig(type, token) || text
36
+ rescue Regexp::Syntax::NotImplementedError
37
+ raise InvalidEcmaRegexp, "invalid token #{text.inspect} (#{type}:#{token}) in #{pattern.inspect}"
38
+ end.join
39
+ rescue Regexp::Scanner::ScannerError
40
+ raise InvalidEcmaRegexp, "invalid pattern #{pattern.inspect}"
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+ module JSONSchemer
3
+ module Format
4
+ module Hostname
5
+ # https://datatracker.ietf.org/doc/html/rfc5892#section-2.1
6
+ MARKS = '\p{Mn}\p{Mc}'
7
+ LETTER_DIGITS = "\\p{Ll}\\p{Lu}\\p{Lo}\\p{Nd}\\p{Lm}#{MARKS}"
8
+ # https://datatracker.ietf.org/doc/html/rfc5892#section-2.6
9
+ EXCEPTIONS_PVALID = '\u{06FD}\u{06FE}\u{0F0B}\u{3007}' # \u{00DF}\u{03C2} covered by \p{Ll}
10
+ EXCEPTIONS_DISALLOWED = '\u{0640}\u{07FA}\u{302E}\u{302F}\u{3031}\u{3032}\u{3033}\u{3034}\u{3035}\u{303B}'
11
+ LABEL_CHARACTER_CLASS = "[#{LETTER_DIGITS}#{EXCEPTIONS_PVALID}&&[^#{EXCEPTIONS_DISALLOWED}]]"
12
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
13
+ LEADING_CHARACTER_CLASS = "[#{LABEL_CHARACTER_CLASS}&&[^#{MARKS}]]"
14
+ LABEL_REGEX_STRING = "#{LEADING_CHARACTER_CLASS}([#{LABEL_CHARACTER_CLASS}\-]*#{LABEL_CHARACTER_CLASS})?"
15
+ HOSTNAME_REGEX = /\A(#{LABEL_REGEX_STRING}\.)*#{LABEL_REGEX_STRING}\z/i.freeze
16
+ # bin/hostname_character_classes
17
+ VIRAMA_CHARACTER_CLASS = '[\u{094D}\u{09CD}\u{0A4D}\u{0ACD}\u{0B4D}\u{0BCD}\u{0C4D}\u{0CCD}\u{0D3B}\u{0D3C}\u{0D4D}\u{0DCA}\u{0E3A}\u{0EBA}\u{0F84}\u{1039}\u{103A}\u{1714}\u{1715}\u{1734}\u{17D2}\u{1A60}\u{1B44}\u{1BAA}\u{1BAB}\u{1BF2}\u{1BF3}\u{2D7F}\u{A806}\u{A82C}\u{A8C4}\u{A953}\u{A9C0}\u{AAF6}\u{ABED}\u{10A3F}\u{11046}\u{11070}\u{1107F}\u{110B9}\u{11133}\u{11134}\u{111C0}\u{11235}\u{112EA}\u{1134D}\u{11442}\u{114C2}\u{115BF}\u{1163F}\u{116B6}\u{1172B}\u{11839}\u{1193D}\u{1193E}\u{119E0}\u{11A34}\u{11A47}\u{11A99}\u{11C3F}\u{11D44}\u{11D45}\u{11D97}\u{11F41}\u{11F42}]'
18
+ JOINING_TYPE_L_CHARACTER_CLASS = '[\u{A872}\u{10ACD}\u{10AD7}\u{10D00}\u{10FCB}]'
19
+ JOINING_TYPE_D_CHARACTER_CLASS = '[\u{0620}\u{0626}\u{0628}\u{062A}-\u{062E}\u{0633}-\u{063F}\u{0641}-\u{0647}\u{0649}-\u{064A}\u{066E}-\u{066F}\u{0678}-\u{0687}\u{069A}-\u{06BF}\u{06C1}-\u{06C2}\u{06CC}\u{06CE}\u{06D0}-\u{06D1}\u{06FA}-\u{06FC}\u{06FF}\u{0712}-\u{0714}\u{071A}-\u{071D}\u{071F}-\u{0727}\u{0729}\u{072B}\u{072D}-\u{072E}\u{074E}-\u{0758}\u{075C}-\u{076A}\u{076D}-\u{0770}\u{0772}\u{0775}-\u{0777}\u{077A}-\u{077F}\u{07CA}-\u{07EA}\u{0841}-\u{0845}\u{0848}\u{084A}-\u{0853}\u{0855}\u{0860}\u{0862}-\u{0865}\u{0868}\u{0886}\u{0889}-\u{088D}\u{08A0}-\u{08A9}\u{08AF}-\u{08B0}\u{08B3}-\u{08B8}\u{08BA}-\u{08C8}\u{1807}\u{1820}-\u{1842}\u{1843}\u{1844}-\u{1878}\u{1887}-\u{18A8}\u{18AA}\u{A840}-\u{A871}\u{10AC0}-\u{10AC4}\u{10AD3}-\u{10AD6}\u{10AD8}-\u{10ADC}\u{10ADE}-\u{10AE0}\u{10AEB}-\u{10AEE}\u{10B80}\u{10B82}\u{10B86}-\u{10B88}\u{10B8A}-\u{10B8B}\u{10B8D}\u{10B90}\u{10BAD}-\u{10BAE}\u{10D01}-\u{10D21}\u{10D23}\u{10F30}-\u{10F32}\u{10F34}-\u{10F44}\u{10F51}-\u{10F53}\u{10F70}-\u{10F73}\u{10F76}-\u{10F81}\u{10FB0}\u{10FB2}-\u{10FB3}\u{10FB8}\u{10FBB}-\u{10FBC}\u{10FBE}-\u{10FBF}\u{10FC1}\u{10FC4}\u{10FCA}\u{1E900}-\u{1E943}]'
20
+ JOINING_TYPE_T_CHARACTER_CLASS = '[\u{00AD}\u{0300}-\u{036F}\u{0483}-\u{0487}\u{0488}-\u{0489}\u{0591}-\u{05BD}\u{05BF}\u{05C1}-\u{05C2}\u{05C4}-\u{05C5}\u{05C7}\u{0610}-\u{061A}\u{061C}\u{064B}-\u{065F}\u{0670}\u{06D6}-\u{06DC}\u{06DF}-\u{06E4}\u{06E7}-\u{06E8}\u{06EA}-\u{06ED}\u{070F}\u{0711}\u{0730}-\u{074A}\u{07A6}-\u{07B0}\u{07EB}-\u{07F3}\u{07FD}\u{0816}-\u{0819}\u{081B}-\u{0823}\u{0825}-\u{0827}\u{0829}-\u{082D}\u{0859}-\u{085B}\u{0898}-\u{089F}\u{08CA}-\u{08E1}\u{08E3}-\u{0902}\u{093A}\u{093C}\u{0941}-\u{0948}\u{094D}\u{0951}-\u{0957}\u{0962}-\u{0963}\u{0981}\u{09BC}\u{09C1}-\u{09C4}\u{09CD}\u{09E2}-\u{09E3}\u{09FE}\u{0A01}-\u{0A02}\u{0A3C}\u{0A41}-\u{0A42}\u{0A47}-\u{0A48}\u{0A4B}-\u{0A4D}\u{0A51}\u{0A70}-\u{0A71}\u{0A75}\u{0A81}-\u{0A82}\u{0ABC}\u{0AC1}-\u{0AC5}\u{0AC7}-\u{0AC8}\u{0ACD}\u{0AE2}-\u{0AE3}\u{0AFA}-\u{0AFF}\u{0B01}\u{0B3C}\u{0B3F}\u{0B41}-\u{0B44}\u{0B4D}\u{0B55}-\u{0B56}\u{0B62}-\u{0B63}\u{0B82}\u{0BC0}\u{0BCD}\u{0C00}\u{0C04}\u{0C3C}\u{0C3E}-\u{0C40}\u{0C46}-\u{0C48}\u{0C4A}-\u{0C4D}\u{0C55}-\u{0C56}\u{0C62}-\u{0C63}\u{0C81}\u{0CBC}\u{0CBF}\u{0CC6}\u{0CCC}-\u{0CCD}\u{0CE2}-\u{0CE3}\u{0D00}-\u{0D01}\u{0D3B}-\u{0D3C}\u{0D41}-\u{0D44}\u{0D4D}\u{0D62}-\u{0D63}\u{0D81}\u{0DCA}\u{0DD2}-\u{0DD4}\u{0DD6}\u{0E31}\u{0E34}-\u{0E3A}\u{0E47}-\u{0E4E}\u{0EB1}\u{0EB4}-\u{0EBC}\u{0EC8}-\u{0ECE}\u{0F18}-\u{0F19}\u{0F35}\u{0F37}\u{0F39}\u{0F71}-\u{0F7E}\u{0F80}-\u{0F84}\u{0F86}-\u{0F87}\u{0F8D}-\u{0F97}\u{0F99}-\u{0FBC}\u{0FC6}\u{102D}-\u{1030}\u{1032}-\u{1037}\u{1039}-\u{103A}\u{103D}-\u{103E}\u{1058}-\u{1059}\u{105E}-\u{1060}\u{1071}-\u{1074}\u{1082}\u{1085}-\u{1086}\u{108D}\u{109D}\u{135D}-\u{135F}\u{1712}-\u{1714}\u{1732}-\u{1733}\u{1752}-\u{1753}\u{1772}-\u{1773}\u{17B4}-\u{17B5}\u{17B7}-\u{17BD}\u{17C6}\u{17C9}-\u{17D3}\u{17DD}\u{180B}-\u{180D}\u{180F}\u{1885}-\u{1886}\u{18A9}\u{1920}-\u{1922}\u{1927}-\u{1928}\u{1932}\u{1939}-\u{193B}\u{1A17}-\u{1A18}\u{1A1B}\u{1A56}\u{1A58}-\u{1A5E}\u{1A60}\u{1A62}\u{1A65}-\u{1A6C}\u{1A73}-\u{1A7C}\u{1A7F}\u{1AB0}-\u{1ABD}\u{1ABE}\u{1ABF}-\u{1ACE}\u{1B00}-\u{1B03}\u{1B34}\u{1B36}-\u{1B3A}\u{1B3C}\u{1B42}\u{1B6B}-\u{1B73}\u{1B80}-\u{1B81}\u{1BA2}-\u{1BA5}\u{1BA8}-\u{1BA9}\u{1BAB}-\u{1BAD}\u{1BE6}\u{1BE8}-\u{1BE9}\u{1BED}\u{1BEF}-\u{1BF1}\u{1C2C}-\u{1C33}\u{1C36}-\u{1C37}\u{1CD0}-\u{1CD2}\u{1CD4}-\u{1CE0}\u{1CE2}-\u{1CE8}\u{1CED}\u{1CF4}\u{1CF8}-\u{1CF9}\u{1DC0}-\u{1DFF}\u{200B}\u{200E}-\u{200F}\u{202A}-\u{202E}\u{2060}-\u{2064}\u{206A}-\u{206F}\u{20D0}-\u{20DC}\u{20DD}-\u{20E0}\u{20E1}\u{20E2}-\u{20E4}\u{20E5}-\u{20F0}\u{2CEF}-\u{2CF1}\u{2D7F}\u{2DE0}-\u{2DFF}\u{302A}-\u{302D}\u{3099}-\u{309A}\u{A66F}\u{A670}-\u{A672}\u{A674}-\u{A67D}\u{A69E}-\u{A69F}\u{A6F0}-\u{A6F1}\u{A802}\u{A806}\u{A80B}\u{A825}-\u{A826}\u{A82C}\u{A8C4}-\u{A8C5}\u{A8E0}-\u{A8F1}\u{A8FF}\u{A926}-\u{A92D}\u{A947}-\u{A951}\u{A980}-\u{A982}\u{A9B3}\u{A9B6}-\u{A9B9}\u{A9BC}-\u{A9BD}\u{A9E5}\u{AA29}-\u{AA2E}\u{AA31}-\u{AA32}\u{AA35}-\u{AA36}\u{AA43}\u{AA4C}\u{AA7C}\u{AAB0}\u{AAB2}-\u{AAB4}\u{AAB7}-\u{AAB8}\u{AABE}-\u{AABF}\u{AAC1}\u{AAEC}-\u{AAED}\u{AAF6}\u{ABE5}\u{ABE8}\u{ABED}\u{FB1E}\u{FE00}-\u{FE0F}\u{FE20}-\u{FE2F}\u{FEFF}\u{FFF9}-\u{FFFB}\u{101FD}\u{102E0}\u{10376}-\u{1037A}\u{10A01}-\u{10A03}\u{10A05}-\u{10A06}\u{10A0C}-\u{10A0F}\u{10A38}-\u{10A3A}\u{10A3F}\u{10AE5}-\u{10AE6}\u{10D24}-\u{10D27}\u{10EAB}-\u{10EAC}\u{10EFD}-\u{10EFF}\u{10F46}-\u{10F50}\u{10F82}-\u{10F85}\u{11001}\u{11038}-\u{11046}\u{11070}\u{11073}-\u{11074}\u{1107F}-\u{11081}\u{110B3}-\u{110B6}\u{110B9}-\u{110BA}\u{110C2}\u{11100}-\u{11102}\u{11127}-\u{1112B}\u{1112D}-\u{11134}\u{11173}\u{11180}-\u{11181}\u{111B6}-\u{111BE}\u{111C9}-\u{111CC}\u{111CF}\u{1122F}-\u{11231}\u{11234}\u{11236}-\u{11237}\u{1123E}\u{11241}\u{112DF}\u{112E3}-\u{112EA}\u{11300}-\u{11301}\u{1133B}-\u{1133C}\u{11340}\u{11366}-\u{1136C}\u{11370}-\u{11374}\u{11438}-\u{1143F}\u{11442}-\u{11444}\u{11446}\u{1145E}\u{114B3}-\u{114B8}\u{114BA}\u{114BF}-\u{114C0}\u{114C2}-\u{114C3}\u{115B2}-\u{115B5}\u{115BC}-\u{115BD}\u{115BF}-\u{115C0}\u{115DC}-\u{115DD}\u{11633}-\u{1163A}\u{1163D}\u{1163F}-\u{11640}\u{116AB}\u{116AD}\u{116B0}-\u{116B5}\u{116B7}\u{1171D}-\u{1171F}\u{11722}-\u{11725}\u{11727}-\u{1172B}\u{1182F}-\u{11837}\u{11839}-\u{1183A}\u{1193B}-\u{1193C}\u{1193E}\u{11943}\u{119D4}-\u{119D7}\u{119DA}-\u{119DB}\u{119E0}\u{11A01}-\u{11A0A}\u{11A33}-\u{11A38}\u{11A3B}-\u{11A3E}\u{11A47}\u{11A51}-\u{11A56}\u{11A59}-\u{11A5B}\u{11A8A}-\u{11A96}\u{11A98}-\u{11A99}\u{11C30}-\u{11C36}\u{11C38}-\u{11C3D}\u{11C3F}\u{11C92}-\u{11CA7}\u{11CAA}-\u{11CB0}\u{11CB2}-\u{11CB3}\u{11CB5}-\u{11CB6}\u{11D31}-\u{11D36}\u{11D3A}\u{11D3C}-\u{11D3D}\u{11D3F}-\u{11D45}\u{11D47}\u{11D90}-\u{11D91}\u{11D95}\u{11D97}\u{11EF3}-\u{11EF4}\u{11F00}-\u{11F01}\u{11F36}-\u{11F3A}\u{11F40}\u{11F42}\u{13430}-\u{1343F}\u{13440}\u{13447}-\u{13455}\u{16AF0}-\u{16AF4}\u{16B30}-\u{16B36}\u{16F4F}\u{16F8F}-\u{16F92}\u{16FE4}\u{1BC9D}-\u{1BC9E}\u{1BCA0}-\u{1BCA3}\u{1CF00}-\u{1CF2D}\u{1CF30}-\u{1CF46}\u{1D167}-\u{1D169}\u{1D173}-\u{1D17A}\u{1D17B}-\u{1D182}\u{1D185}-\u{1D18B}\u{1D1AA}-\u{1D1AD}\u{1D242}-\u{1D244}\u{1DA00}-\u{1DA36}\u{1DA3B}-\u{1DA6C}\u{1DA75}\u{1DA84}\u{1DA9B}-\u{1DA9F}\u{1DAA1}-\u{1DAAF}\u{1E000}-\u{1E006}\u{1E008}-\u{1E018}\u{1E01B}-\u{1E021}\u{1E023}-\u{1E024}\u{1E026}-\u{1E02A}\u{1E08F}\u{1E130}-\u{1E136}\u{1E2AE}\u{1E2EC}-\u{1E2EF}\u{1E4EC}-\u{1E4EF}\u{1E8D0}-\u{1E8D6}\u{1E944}-\u{1E94A}\u{1E94B}\u{E0001}\u{E0020}-\u{E007F}\u{E0100}-\u{E01EF}]'
21
+ JOINING_TYPE_R_CHARACTER_CLASS = '[\u{0622}-\u{0625}\u{0627}\u{0629}\u{062F}-\u{0632}\u{0648}\u{0671}-\u{0673}\u{0675}-\u{0677}\u{0688}-\u{0699}\u{06C0}\u{06C3}-\u{06CB}\u{06CD}\u{06CF}\u{06D2}-\u{06D3}\u{06D5}\u{06EE}-\u{06EF}\u{0710}\u{0715}-\u{0719}\u{071E}\u{0728}\u{072A}\u{072C}\u{072F}\u{074D}\u{0759}-\u{075B}\u{076B}-\u{076C}\u{0771}\u{0773}-\u{0774}\u{0778}-\u{0779}\u{0840}\u{0846}-\u{0847}\u{0849}\u{0854}\u{0856}-\u{0858}\u{0867}\u{0869}-\u{086A}\u{0870}-\u{0882}\u{088E}\u{08AA}-\u{08AC}\u{08AE}\u{08B1}-\u{08B2}\u{08B9}\u{10AC5}\u{10AC7}\u{10AC9}-\u{10ACA}\u{10ACE}-\u{10AD2}\u{10ADD}\u{10AE1}\u{10AE4}\u{10AEF}\u{10B81}\u{10B83}-\u{10B85}\u{10B89}\u{10B8C}\u{10B8E}-\u{10B8F}\u{10B91}\u{10BA9}-\u{10BAC}\u{10D22}\u{10F33}\u{10F54}\u{10F74}-\u{10F75}\u{10FB4}-\u{10FB6}\u{10FB9}-\u{10FBA}\u{10FBD}\u{10FC2}-\u{10FC3}\u{10FC9}]'
22
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
23
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
24
+ ZERO_WIDTH_VIRAMA = "#{VIRAMA_CHARACTER_CLASS}[\\u{200C}\\u{200D}]"
25
+ ZERO_WIDTH_NON_JOINER_JOINING_TYPE = "[#{JOINING_TYPE_L_CHARACTER_CLASS}#{JOINING_TYPE_D_CHARACTER_CLASS}]#{JOINING_TYPE_T_CHARACTER_CLASS}*\\u{200C}#{JOINING_TYPE_T_CHARACTER_CLASS}*[#{JOINING_TYPE_R_CHARACTER_CLASS}#{JOINING_TYPE_D_CHARACTER_CLASS}]"
26
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
27
+ MIDDLE_DOT = '\u{006C}\u{00B7}\u{006C}'
28
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
29
+ GREEK_LOWER_NUMERAL_SIGN = '\u{0375}\p{Greek}'
30
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
31
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
32
+ HEBREW_PUNCTUATION = '\p{Hebrew}[\u{05F3}\u{05F4}]'
33
+ CONTEXT_REGEX = /(#{ZERO_WIDTH_VIRAMA}|#{ZERO_WIDTH_NON_JOINER_JOINING_TYPE}|#{MIDDLE_DOT}|#{GREEK_LOWER_NUMERAL_SIGN}|#{HEBREW_PUNCTUATION})/.freeze
34
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
35
+ KATAKANA_MIDDLE_DOT_REGEX = /\u{30FB}/.freeze
36
+ KATAKANA_MIDDLE_DOT_CONTEXT_REGEX = /[\p{Hiragana}\p{Katakana}\p{Han}]/.freeze
37
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
38
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
39
+ ARABIC_INDIC_DIGITS_REGEX = /[\u{0660}-\u{0669}]/.freeze
40
+ ARABIC_EXTENDED_DIGITS_REGEX = /[\u{06F0}-\u{06F9}]/.freeze
41
+
42
+ def valid_hostname?(data)
43
+ data.split('.').map do |a_label|
44
+ return false if a_label.size > 63
45
+ u_label = SimpleIDN.to_unicode(a_label)
46
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
47
+ return false if u_label.slice(2, 2) == '--'
48
+ return false if ARABIC_INDIC_DIGITS_REGEX.match?(u_label) && ARABIC_EXTENDED_DIGITS_REGEX.match?(u_label)
49
+ u_label.gsub!(CONTEXT_REGEX, 'ok')
50
+ u_label.gsub!(KATAKANA_MIDDLE_DOT_REGEX, 'ok') if KATAKANA_MIDDLE_DOT_CONTEXT_REGEX.match?(u_label)
51
+ u_label
52
+ end.join('.').match?(HOSTNAME_REGEX)
53
+ rescue SimpleIDN::ConversionError
54
+ false
55
+ end
56
+ end
57
+ end
58
+ end
@@ -1,15 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
  module JSONSchemer
3
3
  module Format
4
+ include Hostname
5
+
4
6
  # this is no good
5
7
  EMAIL_REGEX = /\A[^@\s]+@([\p{L}\d-]+\.)+[\p{L}\d\-]{2,}\z/i.freeze
6
- LABEL_REGEX_STRING = '[\p{L}\p{N}]([\p{L}\p{N}\-]*[\p{L}\p{N}])?'
7
- HOSTNAME_REGEX = /\A(#{LABEL_REGEX_STRING}\.)*#{LABEL_REGEX_STRING}\z/i.freeze
8
8
  JSON_POINTER_REGEX_STRING = '(\/([^~\/]|~[01])*)*'
9
9
  JSON_POINTER_REGEX = /\A#{JSON_POINTER_REGEX_STRING}\z/.freeze
10
10
  RELATIVE_JSON_POINTER_REGEX = /\A(0|[1-9]\d*)(#|#{JSON_POINTER_REGEX_STRING})?\z/.freeze
11
11
  DATE_TIME_OFFSET_REGEX = /(Z|[\+\-]([01][0-9]|2[0-3]):[0-5][0-9])\z/i.freeze
12
- INVALID_QUERY_REGEX = /[[:space:]]/.freeze
12
+ HOUR_24_REGEX = /T24/.freeze
13
+ LEAP_SECOND_REGEX = /T\d{2}:\d{2}:6/.freeze
14
+ IP_REGEX = /\A[\h:.]+\z/.freeze
15
+ INVALID_QUERY_REGEX = /\s/.freeze
13
16
 
14
17
  def valid_spec_format?(data, format)
15
18
  case format
@@ -28,9 +31,9 @@ module JSONSchemer
28
31
  when 'idn-hostname'
29
32
  valid_hostname?(data)
30
33
  when 'ipv4'
31
- valid_ip?(data, :v4)
34
+ valid_ip?(data, Socket::AF_INET)
32
35
  when 'ipv6'
33
- valid_ip?(data, :v6)
36
+ valid_ip?(data, Socket::AF_INET6)
34
37
  when 'uri'
35
38
  valid_uri?(data)
36
39
  when 'uri-reference'
@@ -46,7 +49,9 @@ module JSONSchemer
46
49
  when 'relative-json-pointer'
47
50
  valid_relative_json_pointer?(data)
48
51
  when 'regex'
49
- EcmaReValidator.valid?(data)
52
+ valid_regex?(data)
53
+ else
54
+ raise UnknownFormat, format
50
55
  end
51
56
  end
52
57
 
@@ -58,24 +63,24 @@ module JSONSchemer
58
63
  end
59
64
 
60
65
  def valid_date_time?(data)
61
- DateTime.rfc3339(data)
66
+ return false if HOUR_24_REGEX.match?(data)
67
+ datetime = DateTime.rfc3339(data)
68
+ return false if LEAP_SECOND_REGEX.match?(data) && datetime.to_time.utc.strftime('%H:%M') != '23:59'
62
69
  DATE_TIME_OFFSET_REGEX.match?(data)
63
70
  rescue ArgumentError
64
71
  false
65
72
  end
66
73
 
67
74
  def valid_email?(data)
68
- EMAIL_REGEX.match?(data)
69
- end
70
-
71
- def valid_hostname?(data)
72
- HOSTNAME_REGEX.match?(data) && data.split('.').all? { |label| label.size <= 63 }
75
+ return false unless EMAIL_REGEX.match?(data)
76
+ local, _domain = data.partition('@')
77
+ !local.start_with?('.') && !local.end_with?('.') && !local.include?('..')
73
78
  end
74
79
 
75
- def valid_ip?(data, type)
76
- ip_address = IPAddr.new(data)
77
- type == :v4 ? ip_address.ipv4? : ip_address.ipv6?
78
- rescue IPAddr::InvalidAddressError
80
+ def valid_ip?(data, family)
81
+ IPAddr.new(data, family)
82
+ IP_REGEX.match?(data)
83
+ rescue IPAddr::Error
79
84
  false
80
85
  end
81
86
 
@@ -124,5 +129,11 @@ module JSONSchemer
124
129
  def valid_relative_json_pointer?(data)
125
130
  RELATIVE_JSON_POINTER_REGEX.match?(data)
126
131
  end
132
+
133
+ def valid_regex?(data)
134
+ !!EcmaRegexp.ruby_equivalent(data)
135
+ rescue InvalidEcmaRegexp
136
+ false
137
+ end
127
138
  end
128
139
  end