uri-idna 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is automatically generated by bin/generate
4
+ # Unicode version 15.1.0
5
+
6
+ module URI
7
+ module IDNA
8
+ VIRAMA_COMBINING_CLASSES = "["\
9
+ "\u094D"\
10
+ "\u09CD"\
11
+ "\u0A4D"\
12
+ "\u0ACD"\
13
+ "\u0B4D"\
14
+ "\u0BCD"\
15
+ "\u0C4D"\
16
+ "\u0CCD"\
17
+ "\u0D3B\u0D3C"\
18
+ "\u0D4D"\
19
+ "\u0DCA"\
20
+ "\u0E3A"\
21
+ "\u0EBA"\
22
+ "\u0F84"\
23
+ "\u1039\u103A"\
24
+ "\u1714\u1715"\
25
+ "\u1734"\
26
+ "\u17D2"\
27
+ "\u1A60"\
28
+ "\u1B44"\
29
+ "\u1BAA\u1BAB"\
30
+ "\u1BF2\u1BF3"\
31
+ "\u2D7F"\
32
+ "\uA806"\
33
+ "\uA82C"\
34
+ "\uA8C4"\
35
+ "\uA953"\
36
+ "\uA9C0"\
37
+ "\uAAF6"\
38
+ "\uABED"\
39
+ "\u{10A3F}"\
40
+ "\u{11046}"\
41
+ "\u{11070}"\
42
+ "\u{1107F}"\
43
+ "\u{110B9}"\
44
+ "\u{11133}\u{11134}"\
45
+ "\u{111C0}"\
46
+ "\u{11235}"\
47
+ "\u{112EA}"\
48
+ "\u{1134D}"\
49
+ "\u{11442}"\
50
+ "\u{114C2}"\
51
+ "\u{115BF}"\
52
+ "\u{1163F}"\
53
+ "\u{116B6}"\
54
+ "\u{1172B}"\
55
+ "\u{11839}"\
56
+ "\u{1193D}\u{1193E}"\
57
+ "\u{119E0}"\
58
+ "\u{11A34}"\
59
+ "\u{11A47}"\
60
+ "\u{11A99}"\
61
+ "\u{11C3F}"\
62
+ "\u{11D44}\u{11D45}"\
63
+ "\u{11D97}"\
64
+ "\u{11F41}\u{11F42}"\
65
+ "]"
66
+ end
67
+ end
@@ -1,17 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "options"
4
+ require_relative "../validation/contextj"
5
+ require_relative "../validation/contexto"
6
+ require_relative "../validation/idna_permitted"
7
+ require_relative "../validation/leading_combining"
4
8
 
5
9
  module URI
6
10
  module IDNA
7
11
  module IDNA2008
8
12
  class Processing < BaseProcessing
9
- private
10
-
11
- def options_class
13
+ def self.options_class
12
14
  Options
13
15
  end
14
16
 
17
+ private
18
+
15
19
  def validate(label)
16
20
  return if label.empty?
17
21
 
@@ -21,35 +25,16 @@ module URI
21
25
  else
22
26
  Validation::Label.check_ace_prefix(label)
23
27
  end
24
- Validation::Label.check_leading_combining(label) if options.leading_combining?
25
-
26
- label.each_codepoint.with_index do |cp, pos|
27
- begin
28
- next if Validation::Codepoint.check_contextj(label, cp, pos)
29
- rescue InvalidCodepointContextError => e
30
- next unless options.check_joiners?
31
-
32
- raise e
33
- end
34
-
35
- begin
36
- next if Validation::Codepoint.check_contexto(label, cp, pos)
37
- rescue InvalidCodepointContextError => e
38
- next unless options.check_others?
39
-
40
- raise e
41
- end
42
-
43
- Validation::Codepoint.check_idna_validity(label, cp, pos)
44
- end
28
+ Validation::LeadingCombining.call(label) if options.leading_combining?
29
+ Validation::ContextJ.call(label) if options.check_joiners?
30
+ Validation::ContextO.call(label) if options.check_others?
31
+ Validation::IDNAPermitted.call(label)
45
32
  Validation::Bidi.call(label) if check_bidi?
46
33
  end
47
34
 
48
35
  def punycode_decode(label)
49
36
  return label unless label.start_with?(ACE_PREFIX)
50
37
 
51
- raise Error, "A-label must not end with a hyphen" if label[-1] == "-"
52
-
53
38
  super
54
39
  end
55
40
  end
@@ -95,9 +80,9 @@ module URI
95
80
 
96
81
  Validation::Label.check_length(a_ulabel) if options.verify_dns_length?
97
82
 
98
- if alabel && ulabel && (a_ulabel != alabel) && (a_ulabel != alabel)
83
+ if alabel && ulabel && a_ulabel != alabel
99
84
  raise Error,
100
- "Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{u_alabel.inspect}"
85
+ "Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{a_ulabel.inspect}"
101
86
  end
102
87
 
103
88
  a_ulabel
@@ -13,6 +13,7 @@ module URI
13
13
  DAMP = 700
14
14
  INITIAL_BIAS = 72
15
15
  INITIAL_N = 0x80
16
+ ADAPT_THRESHOLD = ((BASE - TMIN) * TMAX) / 2
16
17
 
17
18
  DELIMITER = 0x2D
18
19
  MAXINT = 0x7FFFFFFF
@@ -30,7 +31,9 @@ module URI
30
31
  end
31
32
 
32
33
  def encode_digit(d)
33
- d + 22 + 75 * (d < 26 ? 1 : 0)
34
+ return d + 22 if d >= 26
35
+
36
+ d + 97
34
37
  end
35
38
 
36
39
  def adapt(delta, num_points, first_time)
@@ -38,7 +41,7 @@ module URI
38
41
  delta += (delta / num_points)
39
42
 
40
43
  k = 0
41
- while delta > (((BASE - TMIN) * TMAX) / 2)
44
+ while delta > ADAPT_THRESHOLD
42
45
  delta /= BASE - TMIN
43
46
  k += BASE
44
47
  end
@@ -47,18 +50,17 @@ module URI
47
50
 
48
51
  def encode(input)
49
52
  input = input.codepoints
50
- output = []
51
53
 
52
54
  n = INITIAL_N
53
55
  delta = 0
54
56
  bias = INITIAL_BIAS
55
57
 
56
- input.each { |cp| output << cp if cp < 0x80 }
58
+ output = input.select { |cp| cp < 0x80 }
57
59
  h = b = output.length
58
60
 
59
61
  output << DELIMITER if b > 0
60
-
61
- while h < input.length
62
+ input_length = input.length
63
+ while h < input_length
62
64
  m = MAXINT
63
65
  input.each do |cp|
64
66
  m = cp if cp >= n && cp < m
@@ -116,15 +118,15 @@ module URI
116
118
 
117
119
  b = input.rindex(DELIMITER) || 0
118
120
 
119
- 0.upto(b - 1) do |idx|
120
- cp = input[idx]
121
+ input[0, b].each do |cp|
121
122
  raise PunycodeError, "Invalid input" unless cp < 0x80
122
123
 
123
124
  output << cp
124
125
  end
125
126
 
126
127
  inc = b > 0 ? b + 1 : 0
127
- while inc < input.length
128
+ input_length = input.length
129
+ while inc < input_length
128
130
  old_i = i
129
131
  w = 1
130
132
  k = BASE
@@ -8,52 +8,54 @@ module URI
8
8
  # https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
9
9
  module Mapping
10
10
  class << self
11
+ STATUS_D_REGEX = Regexp.new(REGEX_D_STRING, Regexp::EXTENDED).freeze
12
+ REGEX_STD3_M_REGEX = Regexp.new(REGEX_STD3_M_STRING, Regexp::EXTENDED).freeze
13
+
14
+ MAP_REGEX = Regexp.new("#{REGEX_M_STRING}|#{REGEX_I_STRING}").freeze
15
+ REGEX_NOT_V = Regexp.new("[^#{REGEX_V_STRING}]").freeze
16
+ REGEX_NOT_VD = Regexp.new("[^#{REGEX_V_STRING}|#{REGEX_D_STRING}]").freeze
17
+ REGEX_NOT_V3 = Regexp.new("[^#{REGEX_V_STRING}|#{REGEX_STD3_M_STRING}|#{REGEX_STD3_V_STRING}]").freeze
18
+ REGEX_NOT_VD3 = Regexp.new(
19
+ "[^#{REGEX_V_STRING}|#{REGEX_D_STRING}|#{REGEX_STD3_M_STRING}|#{REGEX_STD3_V_STRING}]",
20
+ ).freeze
21
+
11
22
  def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
12
- output = []
13
- domain_name.each_codepoint do |codepoint|
14
- _, status, replacement = status(codepoint)
15
- case status
16
- when "V", "X" # valid, disallowed
17
- output << codepoint
18
- when "M" # mapped
19
- output += if transitional_processing && codepoint == 7838
20
- [115, 115]
21
- else
22
- replacement
23
- end
24
- when "D" # deviation
25
- if transitional_processing
26
- output += replacement
27
- else
28
- output << codepoint
29
- end
30
- when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
31
- if use_std3_ascii_rules || !replacement
32
- output << codepoint
33
- else
34
- output += replacement
35
- end
36
- when "I" # ignored
37
- next
23
+ return domain_name.downcase if domain_name.ascii_only?
24
+
25
+ output = domain_name.gsub(MAP_REGEX) do |match|
26
+ if transitional_processing && match == "\u1E9E"
27
+ "ss"
28
+ else
29
+ REPLACEMENTS[match]
38
30
  end
39
31
  end
40
- output.pack("U*").unicode_normalize!(:nfc)
32
+ output.gsub!(STATUS_D_REGEX, REPLACEMENTS) if transitional_processing
33
+ output.gsub!(REGEX_STD3_M_REGEX, REPLACEMENTS) unless use_std3_ascii_rules
34
+
35
+ output.ascii_only? ? output : output.unicode_normalize!(:nfc)
41
36
  end
42
37
 
43
- def validate_status(label, cp, pos, transitional_processing:, use_std3_ascii_rules:)
44
- _, status, = status(cp)
45
- return if status == "V"
46
- return if !transitional_processing && status == "D"
47
- return if !use_std3_ascii_rules && status == "3"
38
+ def validate_label_status(label, transitional_processing:, use_std3_ascii_rules:)
39
+ regex =
40
+ if transitional_processing && use_std3_ascii_rules
41
+ REGEX_NOT_V
42
+ elsif transitional_processing
43
+ REGEX_NOT_V3
44
+ elsif use_std3_ascii_rules
45
+ REGEX_NOT_VD
46
+ else
47
+ REGEX_NOT_VD3
48
+ end
49
+
50
+ return unless (pos = label.index(regex))
48
51
 
49
- raise InvalidCodepointError, Validation::Codepoint.cp_error_message(label, cp, pos)
52
+ raise InvalidCodepointError, error_message(label, pos)
50
53
  end
51
54
 
52
- def status(codepoint)
53
- return UTS46_DATA[codepoint] if codepoint < 256
55
+ private
54
56
 
55
- index = (UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
56
- UTS46_DATA[index] || []
57
+ def error_message(label, pos)
58
+ format("Codepoint U+%04X at position %d of %p not allowed in UTS46", label[pos].ord, pos + 1, label)
57
59
  end
58
60
  end
59
61
  end
@@ -2,12 +2,18 @@
2
2
 
3
3
  require_relative "mapping"
4
4
  require_relative "options"
5
+ require_relative "../validation/contextj"
6
+ require_relative "../validation/leading_combining"
5
7
 
6
8
  module URI
7
9
  module IDNA
8
10
  module UTS46
9
11
  # https://www.unicode.org/reports/tr46/#Processing
10
12
  class Processing < BaseProcessing
13
+ def self.options_class
14
+ Options
15
+ end
16
+
11
17
  def initialize(domain_name, **options)
12
18
  super
13
19
  @domain_name = Mapping.call(
@@ -40,10 +46,6 @@ module URI
40
46
 
41
47
  private
42
48
 
43
- def options_class
44
- Options
45
- end
46
-
47
49
  # https://www.unicode.org/reports/tr46/#Validity_Criteria
48
50
  def validate(label, transitional_processing: options.transitional_processing?)
49
51
  return if label.empty?
@@ -56,16 +58,13 @@ module URI
56
58
  Validation::Label.check_ace_prefix(label)
57
59
  end
58
60
  Validation::Label.check_dot(label)
59
- Validation::Label.check_leading_combining(label)
60
-
61
- label.each_codepoint.with_index do |cp, pos|
62
- Mapping.validate_status(
63
- label, cp, pos,
64
- transitional_processing: transitional_processing, use_std3_ascii_rules: options.use_std3_ascii_rules?
65
- )
66
-
67
- Validation::Codepoint.check_contextj(label, cp, pos) if options.check_joiners?
68
- end
61
+ Validation::LeadingCombining.call(label)
62
+ Mapping.validate_label_status(
63
+ label,
64
+ transitional_processing: transitional_processing,
65
+ use_std3_ascii_rules: options.use_std3_ascii_rules?,
66
+ )
67
+ Validation::ContextJ.call(label) if options.check_joiners?
69
68
  Validation::Bidi.call(label) if check_bidi?
70
69
  end
71
70
  end
@@ -76,7 +75,7 @@ module URI
76
75
 
77
76
  # https://www.unicode.org/reports/tr46/#ToASCII
78
77
  class ToASCII < Processing
79
- def options_class
78
+ def self.options_class
80
79
  ToASCIIOptions
81
80
  end
82
81
 
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../data/bidi_classes"
4
+
3
5
  module URI
4
6
  module IDNA
5
7
  module Validation
@@ -8,55 +10,46 @@ module URI
8
10
  # https://datatracker.ietf.org/doc/html/rfc5893#section-2
9
11
  module Bidi
10
12
  class << self
13
+ BIDI_R1_RTL = Regexp.new(BIDI_CLASSES["RTL"]).freeze
14
+ BIDI_R1_LTR = Regexp.new(BIDI_CLASSES["L"]).freeze
15
+ BIDI_R2 = Regexp.new("#{BIDI_CLASSES['L']}|#{BIDI_CLASSES['UNUSED']}").freeze
16
+ BIDI_R3 = Regexp.new(
17
+ "(?:#{"#{BIDI_CLASSES['RTL']}|#{BIDI_CLASSES['AN']}|#{BIDI_CLASSES['EN']}"})#{BIDI_CLASSES['NSM']}*\\z",
18
+ ).freeze
19
+ BIDI_R4_EN = Regexp.new(BIDI_CLASSES["EN"]).freeze
20
+ BIDI_R4_AN = Regexp.new(BIDI_CLASSES["AN"]).freeze
21
+ BIDI_R5 = Regexp.new("#{BIDI_CLASSES['RTL']}|#{BIDI_CLASSES['AN']}").freeze
22
+ BIDI_R6 = Regexp.new("(?:#{"#{BIDI_CLASSES['L']}|#{BIDI_CLASSES['EN']}"})#{BIDI_CLASSES['NSM']}*\\z").freeze
23
+
11
24
  def call(label)
12
25
  # Bidi rule 1
13
- if bidi_class(label[0].ord, "RTL")
14
- rtl = true
15
- elsif bidi_class(label[0].ord, "L")
26
+ if BIDI_R1_LTR.match?(label[0])
16
27
  rtl = false
28
+ elsif BIDI_R1_RTL.match?(label[0])
29
+ rtl = true
17
30
  else
18
31
  raise BidiError, "First codepoint in label #{label} must be directionality L, R or AL"
19
32
  end
20
33
 
21
- valid_ending = false
22
- number_type = nil
23
- label.each_codepoint.with_index do |cp, pos|
24
- if rtl
25
- # Bidi rule 2
26
- if bidi_class(cp, "L") || bidi_class(cp, "UNUSED")
27
- raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
28
- end
29
-
30
- # Bidi rule 3
31
- direction = bidi_class(cp, "RTL") || bidi_class(cp, "EN") || bidi_class(cp, "AN")
32
- if direction
33
- valid_ending = true
34
- elsif !bidi_class(cp, "NSM")
35
- valid_ending = false
36
- end
37
- # Bidi rule 4
38
- if %w[EN AN].include?(direction)
39
- number_type ||= direction
40
- raise BidiError, "Can not mix numeral types in a right-to-left label" if number_type != direction
41
- end
42
- else
43
- # Bidi rule 5
44
- if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
45
- raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
46
- end
47
-
48
- # Bidi rule 6
49
- if bidi_class(cp, "L") || bidi_class(cp, "EN")
50
- valid_ending = true
51
- elsif !bidi_class(cp, "NSM")
52
- valid_ending = false
53
- end
34
+ if rtl
35
+ # Bidi rule 2
36
+ if (pos = label.index(BIDI_R2))
37
+ raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
54
38
  end
39
+ # Bidi rule 3
40
+ raise BidiError, "Label ends with illegal codepoint directionality" unless label.match?(BIDI_R3)
41
+ # Bidi rule 4
42
+ if label.match?(BIDI_R4_EN) && label.match?(BIDI_R4_AN)
43
+ raise BidiError, "Can not mix numeral types in a right-to-left label"
44
+ end
45
+ else
46
+ # Bidi rule 5
47
+ if (pos = label.index(BIDI_R5))
48
+ raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
49
+ end
50
+ # Bidi rule 6
51
+ raise BidiError, "Label ends with illegal codepoint directionality" unless label.match?(BIDI_R6)
55
52
  end
56
-
57
- raise BidiError, "Label ends with illegal codepoint directionality" unless valid_ending
58
-
59
- true
60
53
  end
61
54
 
62
55
  # https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
@@ -71,22 +64,11 @@ module URI
71
64
  end
72
65
  next if label.ascii_only?
73
66
 
74
- label.each_codepoint do |cp|
75
- next if cp < 256
76
- return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
77
- end
67
+ return true if label.match?(BIDI_R5)
78
68
  end
79
69
 
80
70
  false
81
71
  end
82
-
83
- private
84
-
85
- def bidi_class(codepoint, bidi_class)
86
- return bidi_class if Intranges.contain?(codepoint, BIDI_CLASSES[bidi_class])
87
-
88
- false
89
- end
90
72
  end
91
73
  end
92
74
  end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../data/codepoint_classes"
4
+ require_relative "../data/joining_types"
5
+ require_relative "../data/virama_combining_classes"
6
+
7
+ module URI
8
+ module IDNA
9
+ module Validation
10
+ # https://datatracker.ietf.org/doc/html/rfc5892
11
+ module ContextJ
12
+ class << self
13
+ CONTEXTJ_REGEX = Regexp.new("[#{CODEPOINT_CLASSES['CONTEXTJ']}]").freeze
14
+
15
+ def call(label)
16
+ return if label.ascii_only?
17
+
18
+ offset = 0
19
+ while (pos = label.index(CONTEXTJ_REGEX, offset))
20
+ raise InvalidCodepointContextError, error_message(label, pos) unless valid_contextj?(label, pos)
21
+
22
+ offset = pos + 1
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def valid_contextj?(label, pos)
29
+ case label[pos]
30
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
31
+ when "\u200c"
32
+ return true if pos > 0 && VIRAMA_COMBINING_CLASSES.match?(label[pos - 1])
33
+
34
+ ok = false
35
+ (pos - 1).downto(0) do |i|
36
+ joining_type = JOINING_TYPES[label[i]]
37
+ if [0x4c, 0x44].include?(joining_type)
38
+ ok = true
39
+ break
40
+ end
41
+ end
42
+ return false unless ok
43
+
44
+ (pos + 1).upto(label.length - 1) do |i|
45
+ joining_type = JOINING_TYPES[label[i]]
46
+ return true if [0x52, 0x44].include?(joining_type)
47
+ end
48
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
49
+ when "\u200d"
50
+ return VIRAMA_COMBINING_CLASSES.match?(label[pos - 1]) if pos > 0
51
+ end
52
+ false
53
+ end
54
+
55
+ def error_message(label, pos)
56
+ format("ContextJ codepoint U+%04X at position %d of %p not allowed", label[pos].ord, pos + 1, label)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../data/codepoint_classes"
4
+ require_relative "../data/scripts"
5
+
6
+ module URI
7
+ module IDNA
8
+ module Validation
9
+ # https://datatracker.ietf.org/doc/html/rfc5892
10
+ module ContextO
11
+ class << self
12
+ CONTEXTO_REGEX = Regexp.new("[#{CODEPOINT_CLASSES['CONTEXTO']}]").freeze
13
+ CONTEXTO_A4_REGEX = Regexp.new(SCRIPTS["Greek"])
14
+ CONTEXTO_A5_REGEX = Regexp.new(SCRIPTS["Hebrew"])
15
+ CONTEXTO_A7_REGEX = Regexp.new("#{SCRIPTS['Hiragana']}|#{SCRIPTS['Katakana']}|#{SCRIPTS['Han']}").freeze
16
+ CONTEXTO_A8_REGEX = /[\u06F0-\u06F9]/.freeze
17
+ CONTEXTO_A9_REGEX = /[\u0660-\u0669]/.freeze
18
+
19
+ def call(label)
20
+ offset = 0
21
+ while (pos = label.index(CONTEXTO_REGEX, offset))
22
+ raise InvalidCodepointContextError, error_message(label, pos) unless valid_contexto?(label, pos)
23
+
24
+ offset = pos + 1
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def valid_contexto?(label, pos)
31
+ case label[pos]
32
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
33
+ when "\u00b7"
34
+ pos > 0 && pos < label.length - 1 ? (label[pos - 1] == "\u006c" && label[pos + 1] == "\u006c") : false
35
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
36
+ when "\u0375"
37
+ pos < label.length - 1 ? CONTEXTO_A4_REGEX.match?(label[pos + 1]) : false
38
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
39
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
40
+ when "\u05f3", "\u05f4"
41
+ pos > 0 ? CONTEXTO_A5_REGEX.match?(label[pos - 1]) : false
42
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
43
+ when "\u30fb"
44
+ CONTEXTO_A7_REGEX.match?(label)
45
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
46
+ when "\u0660".."\u0669"
47
+ !CONTEXTO_A8_REGEX.match?(label)
48
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
49
+ when "\u06f0".."\u06f9"
50
+ !CONTEXTO_A9_REGEX.match?(label)
51
+ end
52
+ end
53
+
54
+ def error_message(label, pos)
55
+ format("ContextO codepoint U+%04X at position %d of %p not allowed", label[pos].ord, pos + 1, label)
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../data/codepoint_classes"
4
+
5
+ module URI
6
+ module IDNA
7
+ module Validation
8
+ module IDNAPermitted
9
+ class << self
10
+ IDNA_REGEX = Regexp.new(
11
+ "[^(#{CODEPOINT_CLASSES['PVALID']}|#{CODEPOINT_CLASSES['CONTEXTJ']}|#{CODEPOINT_CLASSES['CONTEXTO']})]",
12
+ ).freeze
13
+
14
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
15
+ def call(label)
16
+ return unless (pos = label.index(IDNA_REGEX))
17
+
18
+ raise InvalidCodepointError, error_message(label, pos)
19
+ end
20
+
21
+ private
22
+
23
+ def error_message(label, pos)
24
+ format("Codepoint U+%04X at position %d of %p not allowed in IDNA2008", label[pos].ord, pos + 1, label)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../intranges"
4
- require_relative "../data/idna"
5
-
6
3
  module URI
7
4
  module IDNA
8
5
  module Validation
@@ -11,7 +8,7 @@ module URI
11
8
  # 4.1. Input to IDNA Registration
12
9
  # https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
13
10
  def check_nfc(label)
14
- return if label.unicode_normalized?(:nfc)
11
+ return if label.ascii_only? || label.unicode_normalized?(:nfc)
15
12
 
16
13
  raise Error, "Label must be in Unicode Normalization Form NFC"
17
14
  end
@@ -38,16 +35,6 @@ module URI
38
35
  raise Error, "Label must not begin with `xn--`"
39
36
  end
40
37
 
41
- # 4.2.3.2. Leading Combining Marks
42
- # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
43
- def check_leading_combining(label)
44
- cp = label[0].ord
45
- return if cp < 256
46
- return unless Intranges.contain?(cp, INITIAL_COMBINERS)
47
-
48
- raise Error, "Label begins with an illegal combining character"
49
- end
50
-
51
38
  def check_dot(label)
52
39
  raise Error, "Label must not contain a U+002E ( . ) FULL STOP" if label.include?(".")
53
40
  end