json_schemer 0.2.24 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24ab3d7f62a2ca93d0dcd3fb33191de983006b504d7bdae60066268fabf7eb50
4
- data.tar.gz: 5f695dffd93a834e81491d5a5b328e6b193d56886666533e6ec0c5e7ba5b5be0
3
+ metadata.gz: 93264835a6cd46b1c657876bda6a68c53a130c36390e84c316f6ffad3d00194f
4
+ data.tar.gz: 6bc567c682cb103cc673dade1f0a46a0b7500973acf6915acd7d7093de7a74e3
5
5
  SHA512:
6
- metadata.gz: 0bd588c8b88bd813c52a13bb997cdd3f668f9b36570bc15a0780aa35b80fdaa49cc2862f6749be1cea9c5708169cbee990385799907a3957c7ceac32ce9ff5cf
7
- data.tar.gz: 6c835337e619f60ae9a72a4036dbcbeb06519e9c3c6f47216ab4650f0c15955a45be0ae2ace13a0a2c92f6066e106fa6073b6c795ff808cc90854f85a1ad9e76
6
+ metadata.gz: 5e0fe51563031e2bcce3331cd13b251a0e8007740c9d5b70e59a03b5151114e221ad50b24358094ccc33b31fcabb0b62e76674a966d5c8bf3ed872accd839f4f
7
+ data.tar.gz: 2835c05bbb368718f20e8ec435def32619b3876d7c1e78259f92ac8b57d0f45b27cc6a9481fc101efe8e0456054ddfffc8d05ad60f37b63f8e2475e95b81e368
@@ -6,12 +6,8 @@ jobs:
6
6
  fail-fast: false
7
7
  matrix:
8
8
  os: [ubuntu-latest, windows-latest, macos-latest]
9
- ruby: [2.4, 2.5, 2.6, 2.7, 3.0, 3.1, head, jruby, jruby-head, truffleruby, truffleruby-head]
9
+ ruby: [2.5, 2.6, 2.7, 3.0, 3.1, 3.2, head, jruby, jruby-head, truffleruby, truffleruby-head]
10
10
  exclude:
11
- - os: windows-latest
12
- ruby: jruby
13
- - os: windows-latest
14
- ruby: jruby-head
15
11
  - os: windows-latest
16
12
  ruby: truffleruby
17
13
  - os: windows-latest
data/CHANGELOG.md ADDED
@@ -0,0 +1,18 @@
1
+ # Changelog
2
+
3
+ ## [1.0.0] - 2023-05-26
4
+
5
+ ### Breaking Changes
6
+
7
+ - Ruby 2.4 is no longer supported.
8
+ - The default `regexp_resolver` is now `ruby`, which passes patterns directly to `Regexp`. The previous default, `ecma`, rewrites patterns to behave more like Javascript (ECMA-262) regular expressions:
9
+ - Beginning of string: `^` -> `\A`
10
+ - End of string: `$` -> `\z`
11
+ - Space: `\s` -> `[\t\r\n\f\v\uFEFF\u2029\p{Zs}]`
12
+ - Non-space: `\S` -> `[^\t\r\n\f\v\uFEFF\u2029\p{Zs}]`
13
+ - Invalid ECMA-262 regular expressions raise `JSONSchemer::InvalidEcmaRegexp` when `regexp_resolver` is set to `ecma`.
14
+ - Embedded subschemas (ie, subschemas referenced by `$id`) can only be found under "known" keywords (eg, `definitions`). Previously, the entire schema object was scanned for `$id`.
15
+ - Empty fragments are now removed from `$ref` URIs before calling `ref_resolver`.
16
+ - Refs that are fragment-only JSON pointers with special characters must use the proper encoding (eg, `"$ref": "#/definitions/some-%7Bid%7D"`).
17
+
18
+ [1.0.0]: https://github.com/davishmcclurg/json_schemer/releases/tag/v1.0.0
data/Gemfile.lock CHANGED
@@ -1,24 +1,36 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- json_schemer (0.2.24)
5
- ecma-re-validator (~> 0.3)
4
+ json_schemer (1.0.0)
6
5
  hana (~> 1.3)
7
6
  regexp_parser (~> 2.0)
7
+ simpleidn (~> 0.2)
8
8
  uri_template (~> 0.7)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- ecma-re-validator (0.3.0)
14
- regexp_parser (~> 2.0)
13
+ docile (1.4.0)
15
14
  hana (1.3.7)
16
15
  minitest (5.15.0)
17
16
  rake (13.0.6)
18
17
  regexp_parser (2.6.1)
18
+ simplecov (0.22.0)
19
+ docile (~> 1.1)
20
+ simplecov-html (~> 0.11)
21
+ simplecov_json_formatter (~> 0.1)
22
+ simplecov-html (0.12.3)
23
+ simplecov_json_formatter (0.1.4)
24
+ simpleidn (0.2.1)
25
+ unf (~> 0.1.4)
26
+ unf (0.1.4)
27
+ unf_ext
28
+ unf (0.1.4-java)
29
+ unf_ext (0.0.8.2)
19
30
  uri_template (0.7.0)
20
31
 
21
32
  PLATFORMS
33
+ java
22
34
  ruby
23
35
 
24
36
  DEPENDENCIES
@@ -26,6 +38,7 @@ DEPENDENCIES
26
38
  json_schemer!
27
39
  minitest (~> 5.0)
28
40
  rake (~> 13.0)
41
+ simplecov (~> 0.22)
29
42
 
30
43
  BUNDLED WITH
31
44
  2.3.25
data/README.md CHANGED
@@ -45,7 +45,12 @@ schemer.valid?({ 'abc' => 10 })
45
45
  # error validation (`validate` returns an enumerator)
46
46
 
47
47
  schemer.validate({ 'abc' => 10 }).to_a
48
- # => [{"data"=>10, "schema"=>{"type"=>"integer", "minimum"=>11}, "pointer"=>"#/abc", "type"=>"minimum"}]
48
+ # => [{"data"=>10,
49
+ # "data_pointer"=>"/abc",
50
+ # "schema"=>{"type"=>"integer", "minimum"=>11},
51
+ # "schema_pointer"=>"/properties/abc",
52
+ # "root_schema"=>{"type"=>"object", "properties"=>{"abc"=>{"type"=>"integer", "minimum"=>11}}},
53
+ # "type"=>"minimum"}]
49
54
 
50
55
  # default property values
51
56
 
@@ -74,6 +79,30 @@ schemer = JSONSchemer.schema(schema)
74
79
 
75
80
  schema = '{ "type": "integer" }'
76
81
  schemer = JSONSchemer.schema(schema)
82
+
83
+ # schema validation
84
+
85
+ JSONSchemer.valid_schema?({ '$id' => '#valid' })
86
+ # => true
87
+
88
+ JSONSchemer.validate_schema({ '$id' => nil }).to_a
89
+ # => [{"data"=>nil,
90
+ # "data_pointer"=>"/$id",
91
+ # "schema"=>{"type"=>"string", "format"=>"uri-reference"},
92
+ # "schema_pointer"=>"/properties/$id",
93
+ # "root_schema"=>{...meta schema},
94
+ # "type"=>"string"}]
95
+
96
+ JSONSchemer.schema({ '$id' => '#valid' }).valid_schema?
97
+ # => true
98
+
99
+ JSONSchemer.schema({ '$id' => nil }).validate_schema.to_a
100
+ # => [{"data"=>nil,
101
+ # "data_pointer"=>"/$id",
102
+ # "schema"=>{"type"=>"string", "format"=>"uri-reference"},
103
+ # "schema_pointer"=>"/properties/$id",
104
+ # "root_schema"=>{...meta schema},
105
+ # "type"=>"string"}]
77
106
  ```
78
107
 
79
108
  ## Options
@@ -113,8 +142,9 @@ JSONSchemer.schema(
113
142
  ref_resolver: 'net/http',
114
143
 
115
144
  # use different method to match regexes
116
- # 'ecma'/'ruby'/proc/lambda/respond_to?(:call)
117
- # default: 'ecma'
145
+ # 'ruby'/'ecma'/proc/lambda/respond_to?(:call)
146
+ # 'ruby': proc { |pattern| Regexp.new(pattern) }
147
+ # default: 'ruby'
118
148
  regexp_resolver: proc do |pattern|
119
149
  RE2::Regexp.new(pattern)
120
150
  end
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'open-uri'
4
+ require 'csv'
5
+
6
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
7
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
8
+
9
+ csv_options = { :col_sep => ';', :skip_blanks => true, :skip_lines => /\A#/ }
10
+
11
+ unicode_data = URI('https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt')
12
+ derived_joining_type = URI('https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedJoiningType.txt')
13
+
14
+ # https://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values
15
+ virama_canonical_combining_class = '9'
16
+
17
+ virama_codes = CSV.new(unicode_data.read, **csv_options).select do |code, _name, _category, canonical_combining_class|
18
+ canonical_combining_class == virama_canonical_combining_class
19
+ end.map(&:first)
20
+
21
+ # https://www.unicode.org/reports/tr44/#Default_Values
22
+ # https://www.unicode.org/reports/tr44/#Derived_Extracted
23
+ codes_by_joining_type = CSV.new(derived_joining_type.read, **csv_options).group_by do |_code, joining_type|
24
+ joining_type.gsub(/#.+/, '').strip
25
+ end.transform_values do |rows|
26
+ rows.map do |code, _joining_type|
27
+ code.strip
28
+ end
29
+ end
30
+
31
+ def codes_to_character_class(codes)
32
+ characters = codes.map do |code|
33
+ code.gsub(/(\h+)/, '\u{\1}').gsub('..', '-')
34
+ end
35
+ "[#{characters.join}]"
36
+ end
37
+
38
+ puts "VIRAMA_CHARACTER_CLASS = '#{codes_to_character_class(virama_codes)}'"
39
+
40
+ codes_by_joining_type.slice('L', 'D', 'T', 'R').each do |joining_type, codes|
41
+ puts "JOINING_TYPE_#{joining_type}_CHARACTER_CLASS = '#{codes_to_character_class(codes)}'"
42
+ end
data/json_schemer.gemspec CHANGED
@@ -20,22 +20,15 @@ Gem::Specification.new do |spec|
20
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  spec.require_paths = ["lib"]
22
22
 
23
- spec.required_ruby_version = '>= 2.4'
23
+ spec.required_ruby_version = '>= 2.5'
24
24
 
25
25
  spec.add_development_dependency "bundler", "~> 2.0"
26
26
  spec.add_development_dependency "rake", "~> 13.0"
27
27
  spec.add_development_dependency "minitest", "~> 5.0"
28
+ spec.add_development_dependency "simplecov", "~> 0.22"
28
29
 
29
- # spec.add_development_dependency "benchmark-ips", "~> 2.7.2"
30
- # spec.add_development_dependency "jschema", "~> 0.2.1"
31
- # spec.add_development_dependency "json-schema", "~> 2.8.0"
32
- # spec.add_development_dependency "json_schema", "~> 0.17.0"
33
- # spec.add_development_dependency "json_validation", "~> 0.1.0"
34
- # spec.add_development_dependency "jsonschema", "~> 2.0.2"
35
- # spec.add_development_dependency "rj_schema", "~> 0.2.0"
36
-
37
- spec.add_runtime_dependency "ecma-re-validator", "~> 0.3"
38
30
  spec.add_runtime_dependency "hana", "~> 1.3"
39
- spec.add_runtime_dependency "uri_template", "~> 0.7"
40
31
  spec.add_runtime_dependency "regexp_parser", "~> 2.0"
32
+ spec.add_runtime_dependency "simpleidn", "~> 0.2"
33
+ spec.add_runtime_dependency "uri_template", "~> 0.7"
41
34
  end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+ module JSONSchemer
3
+ class EcmaRegexp
4
+ class Syntax < Regexp::Syntax::Base
5
+ implements :anchor, Anchor::Extended
6
+ implements :assertion, Assertion::All
7
+ implements :backref, Backreference::Plain + Backreference::Name
8
+ implements :escape, Escape::Basic + %i[control backspace form_feed newline carriage tab vertical_tab] + Escape::Unicode + Escape::Meta + Escape::Hex + Escape::Octal
9
+ implements :property, UnicodeProperty::All
10
+ implements :nonproperty, UnicodeProperty::All
11
+ implements :free_space, %i[whitespace]
12
+ implements :group, Group::Basic + Group::Named + Group::Passive
13
+ implements :literal, Literal::All
14
+ implements :meta, Meta::Extended
15
+ implements :quantifier, Quantifier::Greedy + Quantifier::Reluctant + Quantifier::Interval + Quantifier::IntervalReluctant
16
+ implements :set, CharacterSet::Basic
17
+ implements :type, CharacterType::Extended
18
+ end
19
+
20
+ RUBY_EQUIVALENTS = {
21
+ :anchor => {
22
+ :bol => '\A',
23
+ :eol => '\z'
24
+ },
25
+ :type => {
26
+ :space => '[\t\r\n\f\v\uFEFF\u2029\p{Zs}]',
27
+ :nonspace => '[^\t\r\n\f\v\uFEFF\u2029\p{Zs}]'
28
+ }
29
+ }.freeze
30
+
31
+ class << self
32
+ def ruby_equivalent(pattern)
33
+ Regexp::Scanner.scan(pattern).map do |type, token, text|
34
+ Syntax.check!(*Syntax.normalize(type, token))
35
+ RUBY_EQUIVALENTS.dig(type, token) || text
36
+ rescue Regexp::Syntax::NotImplementedError
37
+ raise InvalidEcmaRegexp, "invalid token #{text.inspect} (#{type}:#{token}) in #{pattern.inspect}"
38
+ end.join
39
+ rescue Regexp::Scanner::ScannerError
40
+ raise InvalidEcmaRegexp, "invalid pattern #{pattern.inspect}"
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+ module JSONSchemer
3
+ module Format
4
+ module Hostname
5
+ # https://datatracker.ietf.org/doc/html/rfc5892#section-2.1
6
+ MARKS = '\p{Mn}\p{Mc}'
7
+ LETTER_DIGITS = "\\p{Ll}\\p{Lu}\\p{Lo}\\p{Nd}\\p{Lm}#{MARKS}"
8
+ # https://datatracker.ietf.org/doc/html/rfc5892#section-2.6
9
+ EXCEPTIONS_PVALID = '\u{06FD}\u{06FE}\u{0F0B}\u{3007}' # \u{00DF}\u{03C2} covered by \p{Ll}
10
+ EXCEPTIONS_DISALLOWED = '\u{0640}\u{07FA}\u{302E}\u{302F}\u{3031}\u{3032}\u{3033}\u{3034}\u{3035}\u{303B}'
11
+ LABEL_CHARACTER_CLASS = "[#{LETTER_DIGITS}#{EXCEPTIONS_PVALID}&&[^#{EXCEPTIONS_DISALLOWED}]]"
12
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
13
+ LEADING_CHARACTER_CLASS = "[#{LABEL_CHARACTER_CLASS}&&[^#{MARKS}]]"
14
+ LABEL_REGEX_STRING = "#{LEADING_CHARACTER_CLASS}([#{LABEL_CHARACTER_CLASS}\-]*#{LABEL_CHARACTER_CLASS})?"
15
+ HOSTNAME_REGEX = /\A(#{LABEL_REGEX_STRING}\.)*#{LABEL_REGEX_STRING}\z/i.freeze
16
+ # bin/hostname_character_classes
17
+ VIRAMA_CHARACTER_CLASS = '[\u{094D}\u{09CD}\u{0A4D}\u{0ACD}\u{0B4D}\u{0BCD}\u{0C4D}\u{0CCD}\u{0D3B}\u{0D3C}\u{0D4D}\u{0DCA}\u{0E3A}\u{0EBA}\u{0F84}\u{1039}\u{103A}\u{1714}\u{1715}\u{1734}\u{17D2}\u{1A60}\u{1B44}\u{1BAA}\u{1BAB}\u{1BF2}\u{1BF3}\u{2D7F}\u{A806}\u{A82C}\u{A8C4}\u{A953}\u{A9C0}\u{AAF6}\u{ABED}\u{10A3F}\u{11046}\u{11070}\u{1107F}\u{110B9}\u{11133}\u{11134}\u{111C0}\u{11235}\u{112EA}\u{1134D}\u{11442}\u{114C2}\u{115BF}\u{1163F}\u{116B6}\u{1172B}\u{11839}\u{1193D}\u{1193E}\u{119E0}\u{11A34}\u{11A47}\u{11A99}\u{11C3F}\u{11D44}\u{11D45}\u{11D97}\u{11F41}\u{11F42}]'
18
+ JOINING_TYPE_L_CHARACTER_CLASS = '[\u{A872}\u{10ACD}\u{10AD7}\u{10D00}\u{10FCB}]'
19
+ JOINING_TYPE_D_CHARACTER_CLASS = '[\u{0620}\u{0626}\u{0628}\u{062A}-\u{062E}\u{0633}-\u{063F}\u{0641}-\u{0647}\u{0649}-\u{064A}\u{066E}-\u{066F}\u{0678}-\u{0687}\u{069A}-\u{06BF}\u{06C1}-\u{06C2}\u{06CC}\u{06CE}\u{06D0}-\u{06D1}\u{06FA}-\u{06FC}\u{06FF}\u{0712}-\u{0714}\u{071A}-\u{071D}\u{071F}-\u{0727}\u{0729}\u{072B}\u{072D}-\u{072E}\u{074E}-\u{0758}\u{075C}-\u{076A}\u{076D}-\u{0770}\u{0772}\u{0775}-\u{0777}\u{077A}-\u{077F}\u{07CA}-\u{07EA}\u{0841}-\u{0845}\u{0848}\u{084A}-\u{0853}\u{0855}\u{0860}\u{0862}-\u{0865}\u{0868}\u{0886}\u{0889}-\u{088D}\u{08A0}-\u{08A9}\u{08AF}-\u{08B0}\u{08B3}-\u{08B8}\u{08BA}-\u{08C8}\u{1807}\u{1820}-\u{1842}\u{1843}\u{1844}-\u{1878}\u{1887}-\u{18A8}\u{18AA}\u{A840}-\u{A871}\u{10AC0}-\u{10AC4}\u{10AD3}-\u{10AD6}\u{10AD8}-\u{10ADC}\u{10ADE}-\u{10AE0}\u{10AEB}-\u{10AEE}\u{10B80}\u{10B82}\u{10B86}-\u{10B88}\u{10B8A}-\u{10B8B}\u{10B8D}\u{10B90}\u{10BAD}-\u{10BAE}\u{10D01}-\u{10D21}\u{10D23}\u{10F30}-\u{10F32}\u{10F34}-\u{10F44}\u{10F51}-\u{10F53}\u{10F70}-\u{10F73}\u{10F76}-\u{10F81}\u{10FB0}\u{10FB2}-\u{10FB3}\u{10FB8}\u{10FBB}-\u{10FBC}\u{10FBE}-\u{10FBF}\u{10FC1}\u{10FC4}\u{10FCA}\u{1E900}-\u{1E943}]'
20
+ JOINING_TYPE_T_CHARACTER_CLASS = '[\u{00AD}\u{0300}-\u{036F}\u{0483}-\u{0487}\u{0488}-\u{0489}\u{0591}-\u{05BD}\u{05BF}\u{05C1}-\u{05C2}\u{05C4}-\u{05C5}\u{05C7}\u{0610}-\u{061A}\u{061C}\u{064B}-\u{065F}\u{0670}\u{06D6}-\u{06DC}\u{06DF}-\u{06E4}\u{06E7}-\u{06E8}\u{06EA}-\u{06ED}\u{070F}\u{0711}\u{0730}-\u{074A}\u{07A6}-\u{07B0}\u{07EB}-\u{07F3}\u{07FD}\u{0816}-\u{0819}\u{081B}-\u{0823}\u{0825}-\u{0827}\u{0829}-\u{082D}\u{0859}-\u{085B}\u{0898}-\u{089F}\u{08CA}-\u{08E1}\u{08E3}-\u{0902}\u{093A}\u{093C}\u{0941}-\u{0948}\u{094D}\u{0951}-\u{0957}\u{0962}-\u{0963}\u{0981}\u{09BC}\u{09C1}-\u{09C4}\u{09CD}\u{09E2}-\u{09E3}\u{09FE}\u{0A01}-\u{0A02}\u{0A3C}\u{0A41}-\u{0A42}\u{0A47}-\u{0A48}\u{0A4B}-\u{0A4D}\u{0A51}\u{0A70}-\u{0A71}\u{0A75}\u{0A81}-\u{0A82}\u{0ABC}\u{0AC1}-\u{0AC5}\u{0AC7}-\u{0AC8}\u{0ACD}\u{0AE2}-\u{0AE3}\u{0AFA}-\u{0AFF}\u{0B01}\u{0B3C}\u{0B3F}\u{0B41}-\u{0B44}\u{0B4D}\u{0B55}-\u{0B56}\u{0B62}-\u{0B63}\u{0B82}\u{0BC0}\u{0BCD}\u{0C00}\u{0C04}\u{0C3C}\u{0C3E}-\u{0C40}\u{0C46}-\u{0C48}\u{0C4A}-\u{0C4D}\u{0C55}-\u{0C56}\u{0C62}-\u{0C63}\u{0C81}\u{0CBC}\u{0CBF}\u{0CC6}\u{0CCC}-\u{0CCD}\u{0CE2}-\u{0CE3}\u{0D00}-\u{0D01}\u{0D3B}-\u{0D3C}\u{0D41}-\u{0D44}\u{0D4D}\u{0D62}-\u{0D63}\u{0D81}\u{0DCA}\u{0DD2}-\u{0DD4}\u{0DD6}\u{0E31}\u{0E34}-\u{0E3A}\u{0E47}-\u{0E4E}\u{0EB1}\u{0EB4}-\u{0EBC}\u{0EC8}-\u{0ECE}\u{0F18}-\u{0F19}\u{0F35}\u{0F37}\u{0F39}\u{0F71}-\u{0F7E}\u{0F80}-\u{0F84}\u{0F86}-\u{0F87}\u{0F8D}-\u{0F97}\u{0F99}-\u{0FBC}\u{0FC6}\u{102D}-\u{1030}\u{1032}-\u{1037}\u{1039}-\u{103A}\u{103D}-\u{103E}\u{1058}-\u{1059}\u{105E}-\u{1060}\u{1071}-\u{1074}\u{1082}\u{1085}-\u{1086}\u{108D}\u{109D}\u{135D}-\u{135F}\u{1712}-\u{1714}\u{1732}-\u{1733}\u{1752}-\u{1753}\u{1772}-\u{1773}\u{17B4}-\u{17B5}\u{17B7}-\u{17BD}\u{17C6}\u{17C9}-\u{17D3}\u{17DD}\u{180B}-\u{180D}\u{180F}\u{1885}-\u{1886}\u{18A9}\u{1920}-\u{1922}\u{1927}-\u{1928}\u{1932}\u{1939}-\u{193B}\u{1A17}-\u{1A18}\u{1A1B}\u{1A56}\u{1A58}-\u{1A5E}\u{1A60}\u{1A62}\u{1A65}-\u{1A6C}\u{1A73}-\u{1A7C}\u{1A7F}\u{1AB0}-\u{1ABD}\u{1ABE}\u{1ABF}-\u{1ACE}\u{1B00}-\u{1B03}\u{1B34}\u{1B36}-\u{1B3A}\u{1B3C}\u{1B42}\u{1B6B}-\u{1B73}\u{1B80}-\u{1B81}\u{1BA2}-\u{1BA5}\u{1BA8}-\u{1BA9}\u{1BAB}-\u{1BAD}\u{1BE6}\u{1BE8}-\u{1BE9}\u{1BED}\u{1BEF}-\u{1BF1}\u{1C2C}-\u{1C33}\u{1C36}-\u{1C37}\u{1CD0}-\u{1CD2}\u{1CD4}-\u{1CE0}\u{1CE2}-\u{1CE8}\u{1CED}\u{1CF4}\u{1CF8}-\u{1CF9}\u{1DC0}-\u{1DFF}\u{200B}\u{200E}-\u{200F}\u{202A}-\u{202E}\u{2060}-\u{2064}\u{206A}-\u{206F}\u{20D0}-\u{20DC}\u{20DD}-\u{20E0}\u{20E1}\u{20E2}-\u{20E4}\u{20E5}-\u{20F0}\u{2CEF}-\u{2CF1}\u{2D7F}\u{2DE0}-\u{2DFF}\u{302A}-\u{302D}\u{3099}-\u{309A}\u{A66F}\u{A670}-\u{A672}\u{A674}-\u{A67D}\u{A69E}-\u{A69F}\u{A6F0}-\u{A6F1}\u{A802}\u{A806}\u{A80B}\u{A825}-\u{A826}\u{A82C}\u{A8C4}-\u{A8C5}\u{A8E0}-\u{A8F1}\u{A8FF}\u{A926}-\u{A92D}\u{A947}-\u{A951}\u{A980}-\u{A982}\u{A9B3}\u{A9B6}-\u{A9B9}\u{A9BC}-\u{A9BD}\u{A9E5}\u{AA29}-\u{AA2E}\u{AA31}-\u{AA32}\u{AA35}-\u{AA36}\u{AA43}\u{AA4C}\u{AA7C}\u{AAB0}\u{AAB2}-\u{AAB4}\u{AAB7}-\u{AAB8}\u{AABE}-\u{AABF}\u{AAC1}\u{AAEC}-\u{AAED}\u{AAF6}\u{ABE5}\u{ABE8}\u{ABED}\u{FB1E}\u{FE00}-\u{FE0F}\u{FE20}-\u{FE2F}\u{FEFF}\u{FFF9}-\u{FFFB}\u{101FD}\u{102E0}\u{10376}-\u{1037A}\u{10A01}-\u{10A03}\u{10A05}-\u{10A06}\u{10A0C}-\u{10A0F}\u{10A38}-\u{10A3A}\u{10A3F}\u{10AE5}-\u{10AE6}\u{10D24}-\u{10D27}\u{10EAB}-\u{10EAC}\u{10EFD}-\u{10EFF}\u{10F46}-\u{10F50}\u{10F82}-\u{10F85}\u{11001}\u{11038}-\u{11046}\u{11070}\u{11073}-\u{11074}\u{1107F}-\u{11081}\u{110B3}-\u{110B6}\u{110B9}-\u{110BA}\u{110C2}\u{11100}-\u{11102}\u{11127}-\u{1112B}\u{1112D}-\u{11134}\u{11173}\u{11180}-\u{11181}\u{111B6}-\u{111BE}\u{111C9}-\u{111CC}\u{111CF}\u{1122F}-\u{11231}\u{11234}\u{11236}-\u{11237}\u{1123E}\u{11241}\u{112DF}\u{112E3}-\u{112EA}\u{11300}-\u{11301}\u{1133B}-\u{1133C}\u{11340}\u{11366}-\u{1136C}\u{11370}-\u{11374}\u{11438}-\u{1143F}\u{11442}-\u{11444}\u{11446}\u{1145E}\u{114B3}-\u{114B8}\u{114BA}\u{114BF}-\u{114C0}\u{114C2}-\u{114C3}\u{115B2}-\u{115B5}\u{115BC}-\u{115BD}\u{115BF}-\u{115C0}\u{115DC}-\u{115DD}\u{11633}-\u{1163A}\u{1163D}\u{1163F}-\u{11640}\u{116AB}\u{116AD}\u{116B0}-\u{116B5}\u{116B7}\u{1171D}-\u{1171F}\u{11722}-\u{11725}\u{11727}-\u{1172B}\u{1182F}-\u{11837}\u{11839}-\u{1183A}\u{1193B}-\u{1193C}\u{1193E}\u{11943}\u{119D4}-\u{119D7}\u{119DA}-\u{119DB}\u{119E0}\u{11A01}-\u{11A0A}\u{11A33}-\u{11A38}\u{11A3B}-\u{11A3E}\u{11A47}\u{11A51}-\u{11A56}\u{11A59}-\u{11A5B}\u{11A8A}-\u{11A96}\u{11A98}-\u{11A99}\u{11C30}-\u{11C36}\u{11C38}-\u{11C3D}\u{11C3F}\u{11C92}-\u{11CA7}\u{11CAA}-\u{11CB0}\u{11CB2}-\u{11CB3}\u{11CB5}-\u{11CB6}\u{11D31}-\u{11D36}\u{11D3A}\u{11D3C}-\u{11D3D}\u{11D3F}-\u{11D45}\u{11D47}\u{11D90}-\u{11D91}\u{11D95}\u{11D97}\u{11EF3}-\u{11EF4}\u{11F00}-\u{11F01}\u{11F36}-\u{11F3A}\u{11F40}\u{11F42}\u{13430}-\u{1343F}\u{13440}\u{13447}-\u{13455}\u{16AF0}-\u{16AF4}\u{16B30}-\u{16B36}\u{16F4F}\u{16F8F}-\u{16F92}\u{16FE4}\u{1BC9D}-\u{1BC9E}\u{1BCA0}-\u{1BCA3}\u{1CF00}-\u{1CF2D}\u{1CF30}-\u{1CF46}\u{1D167}-\u{1D169}\u{1D173}-\u{1D17A}\u{1D17B}-\u{1D182}\u{1D185}-\u{1D18B}\u{1D1AA}-\u{1D1AD}\u{1D242}-\u{1D244}\u{1DA00}-\u{1DA36}\u{1DA3B}-\u{1DA6C}\u{1DA75}\u{1DA84}\u{1DA9B}-\u{1DA9F}\u{1DAA1}-\u{1DAAF}\u{1E000}-\u{1E006}\u{1E008}-\u{1E018}\u{1E01B}-\u{1E021}\u{1E023}-\u{1E024}\u{1E026}-\u{1E02A}\u{1E08F}\u{1E130}-\u{1E136}\u{1E2AE}\u{1E2EC}-\u{1E2EF}\u{1E4EC}-\u{1E4EF}\u{1E8D0}-\u{1E8D6}\u{1E944}-\u{1E94A}\u{1E94B}\u{E0001}\u{E0020}-\u{E007F}\u{E0100}-\u{E01EF}]'
21
+ JOINING_TYPE_R_CHARACTER_CLASS = '[\u{0622}-\u{0625}\u{0627}\u{0629}\u{062F}-\u{0632}\u{0648}\u{0671}-\u{0673}\u{0675}-\u{0677}\u{0688}-\u{0699}\u{06C0}\u{06C3}-\u{06CB}\u{06CD}\u{06CF}\u{06D2}-\u{06D3}\u{06D5}\u{06EE}-\u{06EF}\u{0710}\u{0715}-\u{0719}\u{071E}\u{0728}\u{072A}\u{072C}\u{072F}\u{074D}\u{0759}-\u{075B}\u{076B}-\u{076C}\u{0771}\u{0773}-\u{0774}\u{0778}-\u{0779}\u{0840}\u{0846}-\u{0847}\u{0849}\u{0854}\u{0856}-\u{0858}\u{0867}\u{0869}-\u{086A}\u{0870}-\u{0882}\u{088E}\u{08AA}-\u{08AC}\u{08AE}\u{08B1}-\u{08B2}\u{08B9}\u{10AC5}\u{10AC7}\u{10AC9}-\u{10ACA}\u{10ACE}-\u{10AD2}\u{10ADD}\u{10AE1}\u{10AE4}\u{10AEF}\u{10B81}\u{10B83}-\u{10B85}\u{10B89}\u{10B8C}\u{10B8E}-\u{10B8F}\u{10B91}\u{10BA9}-\u{10BAC}\u{10D22}\u{10F33}\u{10F54}\u{10F74}-\u{10F75}\u{10FB4}-\u{10FB6}\u{10FB9}-\u{10FBA}\u{10FBD}\u{10FC2}-\u{10FC3}\u{10FC9}]'
22
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
23
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
24
+ ZERO_WIDTH_VIRAMA = "#{VIRAMA_CHARACTER_CLASS}[\\u{200C}\\u{200D}]"
25
+ ZERO_WIDTH_NON_JOINER_JOINING_TYPE = "[#{JOINING_TYPE_L_CHARACTER_CLASS}#{JOINING_TYPE_D_CHARACTER_CLASS}]#{JOINING_TYPE_T_CHARACTER_CLASS}*\\u{200C}#{JOINING_TYPE_T_CHARACTER_CLASS}*[#{JOINING_TYPE_R_CHARACTER_CLASS}#{JOINING_TYPE_D_CHARACTER_CLASS}]"
26
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
27
+ MIDDLE_DOT = '\u{006C}\u{00B7}\u{006C}'
28
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
29
+ GREEK_LOWER_NUMERAL_SIGN = '\u{0375}\p{Greek}'
30
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
31
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
32
+ HEBREW_PUNCTUATION = '\p{Hebrew}[\u{05F3}\u{05F4}]'
33
+ CONTEXT_REGEX = /(#{ZERO_WIDTH_VIRAMA}|#{ZERO_WIDTH_NON_JOINER_JOINING_TYPE}|#{MIDDLE_DOT}|#{GREEK_LOWER_NUMERAL_SIGN}|#{HEBREW_PUNCTUATION})/.freeze
34
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
35
+ KATAKANA_MIDDLE_DOT_REGEX = /\u{30FB}/.freeze
36
+ KATAKANA_MIDDLE_DOT_CONTEXT_REGEX = /[\p{Hiragana}\p{Katakana}\p{Han}]/.freeze
37
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
38
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
39
+ ARABIC_INDIC_DIGITS_REGEX = /[\u{0660}-\u{0669}]/.freeze
40
+ ARABIC_EXTENDED_DIGITS_REGEX = /[\u{06F0}-\u{06F9}]/.freeze
41
+
42
+ def valid_hostname?(data)
43
+ data.split('.').map do |a_label|
44
+ return false if a_label.size > 63
45
+ u_label = SimpleIDN.to_unicode(a_label)
46
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
47
+ return false if u_label.slice(2, 2) == '--'
48
+ return false if ARABIC_INDIC_DIGITS_REGEX.match?(u_label) && ARABIC_EXTENDED_DIGITS_REGEX.match?(u_label)
49
+ u_label.gsub!(CONTEXT_REGEX, 'ok')
50
+ u_label.gsub!(KATAKANA_MIDDLE_DOT_REGEX, 'ok') if KATAKANA_MIDDLE_DOT_CONTEXT_REGEX.match?(u_label)
51
+ u_label
52
+ end.join('.').match?(HOSTNAME_REGEX)
53
+ rescue SimpleIDN::ConversionError
54
+ false
55
+ end
56
+ end
57
+ end
58
+ end
@@ -1,15 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
  module JSONSchemer
3
3
  module Format
4
+ include Hostname
5
+
4
6
  # this is no good
5
7
  EMAIL_REGEX = /\A[^@\s]+@([\p{L}\d-]+\.)+[\p{L}\d\-]{2,}\z/i.freeze
6
- LABEL_REGEX_STRING = '[\p{L}\p{N}]([\p{L}\p{N}\-]*[\p{L}\p{N}])?'
7
- HOSTNAME_REGEX = /\A(#{LABEL_REGEX_STRING}\.)*#{LABEL_REGEX_STRING}\z/i.freeze
8
8
  JSON_POINTER_REGEX_STRING = '(\/([^~\/]|~[01])*)*'
9
9
  JSON_POINTER_REGEX = /\A#{JSON_POINTER_REGEX_STRING}\z/.freeze
10
10
  RELATIVE_JSON_POINTER_REGEX = /\A(0|[1-9]\d*)(#|#{JSON_POINTER_REGEX_STRING})?\z/.freeze
11
11
  DATE_TIME_OFFSET_REGEX = /(Z|[\+\-]([01][0-9]|2[0-3]):[0-5][0-9])\z/i.freeze
12
- INVALID_QUERY_REGEX = /[[:space:]]/.freeze
12
+ HOUR_24_REGEX = /T24/.freeze
13
+ LEAP_SECOND_REGEX = /T\d{2}:\d{2}:6/.freeze
14
+ IP_REGEX = /\A[\h:.]+\z/.freeze
15
+ INVALID_QUERY_REGEX = /\s/.freeze
13
16
 
14
17
  def valid_spec_format?(data, format)
15
18
  case format
@@ -28,9 +31,9 @@ module JSONSchemer
28
31
  when 'idn-hostname'
29
32
  valid_hostname?(data)
30
33
  when 'ipv4'
31
- valid_ip?(data, :v4)
34
+ valid_ip?(data, Socket::AF_INET)
32
35
  when 'ipv6'
33
- valid_ip?(data, :v6)
36
+ valid_ip?(data, Socket::AF_INET6)
34
37
  when 'uri'
35
38
  valid_uri?(data)
36
39
  when 'uri-reference'
@@ -46,7 +49,9 @@ module JSONSchemer
46
49
  when 'relative-json-pointer'
47
50
  valid_relative_json_pointer?(data)
48
51
  when 'regex'
49
- EcmaReValidator.valid?(data)
52
+ valid_regex?(data)
53
+ else
54
+ raise UnknownFormat, format
50
55
  end
51
56
  end
52
57
 
@@ -58,24 +63,24 @@ module JSONSchemer
58
63
  end
59
64
 
60
65
  def valid_date_time?(data)
61
- DateTime.rfc3339(data)
66
+ return false if HOUR_24_REGEX.match?(data)
67
+ datetime = DateTime.rfc3339(data)
68
+ return false if LEAP_SECOND_REGEX.match?(data) && datetime.to_time.utc.strftime('%H:%M') != '23:59'
62
69
  DATE_TIME_OFFSET_REGEX.match?(data)
63
70
  rescue ArgumentError
64
71
  false
65
72
  end
66
73
 
67
74
  def valid_email?(data)
68
- EMAIL_REGEX.match?(data)
69
- end
70
-
71
- def valid_hostname?(data)
72
- HOSTNAME_REGEX.match?(data) && data.split('.').all? { |label| label.size <= 63 }
75
+ return false unless EMAIL_REGEX.match?(data)
76
+ local, _domain = data.partition('@')
77
+ !local.start_with?('.') && !local.end_with?('.') && !local.include?('..')
73
78
  end
74
79
 
75
- def valid_ip?(data, type)
76
- ip_address = IPAddr.new(data)
77
- type == :v4 ? ip_address.ipv4? : ip_address.ipv6?
78
- rescue IPAddr::InvalidAddressError
80
+ def valid_ip?(data, family)
81
+ IPAddr.new(data, family)
82
+ IP_REGEX.match?(data)
83
+ rescue IPAddr::Error
79
84
  false
80
85
  end
81
86
 
@@ -124,5 +129,11 @@ module JSONSchemer
124
129
  def valid_relative_json_pointer?(data)
125
130
  RELATIVE_JSON_POINTER_REGEX.match?(data)
126
131
  end
132
+
133
+ def valid_regex?(data)
134
+ !!EcmaRegexp.ruby_equivalent(data)
135
+ rescue InvalidEcmaRegexp
136
+ false
137
+ end
127
138
  end
128
139
  end