uri-idna 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is automatically generated by bin/generate
4
+ # Unicode version 15.1.0
5
+
6
+ module URI
7
+ module IDNA
8
+ VIRAMA_COMBINING_CLASSES = "["\
9
+ "\u094D"\
10
+ "\u09CD"\
11
+ "\u0A4D"\
12
+ "\u0ACD"\
13
+ "\u0B4D"\
14
+ "\u0BCD"\
15
+ "\u0C4D"\
16
+ "\u0CCD"\
17
+ "\u0D3B\u0D3C"\
18
+ "\u0D4D"\
19
+ "\u0DCA"\
20
+ "\u0E3A"\
21
+ "\u0EBA"\
22
+ "\u0F84"\
23
+ "\u1039\u103A"\
24
+ "\u1714\u1715"\
25
+ "\u1734"\
26
+ "\u17D2"\
27
+ "\u1A60"\
28
+ "\u1B44"\
29
+ "\u1BAA\u1BAB"\
30
+ "\u1BF2\u1BF3"\
31
+ "\u2D7F"\
32
+ "\uA806"\
33
+ "\uA82C"\
34
+ "\uA8C4"\
35
+ "\uA953"\
36
+ "\uA9C0"\
37
+ "\uAAF6"\
38
+ "\uABED"\
39
+ "\u{10A3F}"\
40
+ "\u{11046}"\
41
+ "\u{11070}"\
42
+ "\u{1107F}"\
43
+ "\u{110B9}"\
44
+ "\u{11133}\u{11134}"\
45
+ "\u{111C0}"\
46
+ "\u{11235}"\
47
+ "\u{112EA}"\
48
+ "\u{1134D}"\
49
+ "\u{11442}"\
50
+ "\u{114C2}"\
51
+ "\u{115BF}"\
52
+ "\u{1163F}"\
53
+ "\u{116B6}"\
54
+ "\u{1172B}"\
55
+ "\u{11839}"\
56
+ "\u{1193D}\u{1193E}"\
57
+ "\u{119E0}"\
58
+ "\u{11A34}"\
59
+ "\u{11A47}"\
60
+ "\u{11A99}"\
61
+ "\u{11C3F}"\
62
+ "\u{11D44}\u{11D45}"\
63
+ "\u{11D97}"\
64
+ "\u{11F41}\u{11F42}"\
65
+ "]"
66
+ end
67
+ end
@@ -1,17 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "options"
4
+ require_relative "../validation/contextj"
5
+ require_relative "../validation/contexto"
6
+ require_relative "../validation/idna_permitted"
7
+ require_relative "../validation/leading_combining"
4
8
 
5
9
  module URI
6
10
  module IDNA
7
11
  module IDNA2008
8
12
  class Processing < BaseProcessing
9
- private
10
-
11
- def options_class
13
+ def self.options_class
12
14
  Options
13
15
  end
14
16
 
17
+ private
18
+
15
19
  def validate(label)
16
20
  return if label.empty?
17
21
 
@@ -21,35 +25,16 @@ module URI
21
25
  else
22
26
  Validation::Label.check_ace_prefix(label)
23
27
  end
24
- Validation::Label.check_leading_combining(label) if options.leading_combining?
25
-
26
- label.each_codepoint.with_index do |cp, pos|
27
- begin
28
- next if Validation::Codepoint.check_contextj(label, cp, pos)
29
- rescue InvalidCodepointContextError => e
30
- next unless options.check_joiners?
31
-
32
- raise e
33
- end
34
-
35
- begin
36
- next if Validation::Codepoint.check_contexto(label, cp, pos)
37
- rescue InvalidCodepointContextError => e
38
- next unless options.check_others?
39
-
40
- raise e
41
- end
42
-
43
- Validation::Codepoint.check_idna_validity(label, cp, pos)
44
- end
28
+ Validation::LeadingCombining.call(label) if options.leading_combining?
29
+ Validation::ContextJ.call(label) if options.check_joiners?
30
+ Validation::ContextO.call(label) if options.check_others?
31
+ Validation::IDNAPermitted.call(label)
45
32
  Validation::Bidi.call(label) if check_bidi?
46
33
  end
47
34
 
48
35
  def punycode_decode(label)
49
36
  return label unless label.start_with?(ACE_PREFIX)
50
37
 
51
- raise Error, "A-label must not end with a hyphen" if label[-1] == "-"
52
-
53
38
  super
54
39
  end
55
40
  end
@@ -95,9 +80,9 @@ module URI
95
80
 
96
81
  Validation::Label.check_length(a_ulabel) if options.verify_dns_length?
97
82
 
98
- if alabel && ulabel && (a_ulabel != alabel) && (a_ulabel != alabel)
83
+ if alabel && ulabel && a_ulabel != alabel
99
84
  raise Error,
100
- "Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{u_alabel.inspect}"
85
+ "Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{a_ulabel.inspect}"
101
86
  end
102
87
 
103
88
  a_ulabel
@@ -13,6 +13,7 @@ module URI
13
13
  DAMP = 700
14
14
  INITIAL_BIAS = 72
15
15
  INITIAL_N = 0x80
16
+ ADAPT_THRESHOLD = ((BASE - TMIN) * TMAX) / 2
16
17
 
17
18
  DELIMITER = 0x2D
18
19
  MAXINT = 0x7FFFFFFF
@@ -30,7 +31,9 @@ module URI
30
31
  end
31
32
 
32
33
  def encode_digit(d)
33
- d + 22 + 75 * (d < 26 ? 1 : 0)
34
+ return d + 22 if d >= 26
35
+
36
+ d + 97
34
37
  end
35
38
 
36
39
  def adapt(delta, num_points, first_time)
@@ -38,7 +41,7 @@ module URI
38
41
  delta += (delta / num_points)
39
42
 
40
43
  k = 0
41
- while delta > (((BASE - TMIN) * TMAX) / 2)
44
+ while delta > ADAPT_THRESHOLD
42
45
  delta /= BASE - TMIN
43
46
  k += BASE
44
47
  end
@@ -47,18 +50,17 @@ module URI
47
50
 
48
51
  def encode(input)
49
52
  input = input.codepoints
50
- output = []
51
53
 
52
54
  n = INITIAL_N
53
55
  delta = 0
54
56
  bias = INITIAL_BIAS
55
57
 
56
- input.each { |cp| output << cp if cp < 0x80 }
58
+ output = input.select { |cp| cp < 0x80 }
57
59
  h = b = output.length
58
60
 
59
61
  output << DELIMITER if b > 0
60
-
61
- while h < input.length
62
+ input_length = input.length
63
+ while h < input_length
62
64
  m = MAXINT
63
65
  input.each do |cp|
64
66
  m = cp if cp >= n && cp < m
@@ -116,15 +118,15 @@ module URI
116
118
 
117
119
  b = input.rindex(DELIMITER) || 0
118
120
 
119
- 0.upto(b - 1) do |idx|
120
- cp = input[idx]
121
+ input[0, b].each do |cp|
121
122
  raise PunycodeError, "Invalid input" unless cp < 0x80
122
123
 
123
124
  output << cp
124
125
  end
125
126
 
126
127
  inc = b > 0 ? b + 1 : 0
127
- while inc < input.length
128
+ input_length = input.length
129
+ while inc < input_length
128
130
  old_i = i
129
131
  w = 1
130
132
  k = BASE
@@ -8,52 +8,54 @@ module URI
8
8
  # https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
9
9
  module Mapping
10
10
  class << self
11
+ STATUS_D_REGEX = Regexp.new(REGEX_D_STRING, Regexp::EXTENDED).freeze
12
+ REGEX_STD3_M_REGEX = Regexp.new(REGEX_STD3_M_STRING, Regexp::EXTENDED).freeze
13
+
14
+ MAP_REGEX = Regexp.new("#{REGEX_M_STRING}|#{REGEX_I_STRING}").freeze
15
+ REGEX_NOT_V = Regexp.new("[^#{REGEX_V_STRING}]").freeze
16
+ REGEX_NOT_VD = Regexp.new("[^#{REGEX_V_STRING}|#{REGEX_D_STRING}]").freeze
17
+ REGEX_NOT_V3 = Regexp.new("[^#{REGEX_V_STRING}|#{REGEX_STD3_M_STRING}|#{REGEX_STD3_V_STRING}]").freeze
18
+ REGEX_NOT_VD3 = Regexp.new(
19
+ "[^#{REGEX_V_STRING}|#{REGEX_D_STRING}|#{REGEX_STD3_M_STRING}|#{REGEX_STD3_V_STRING}]",
20
+ ).freeze
21
+
11
22
  def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
12
- output = []
13
- domain_name.each_codepoint do |codepoint|
14
- _, status, replacement = status(codepoint)
15
- case status
16
- when "V", "X" # valid, disallowed
17
- output << codepoint
18
- when "M" # mapped
19
- output += if transitional_processing && codepoint == 7838
20
- [115, 115]
21
- else
22
- replacement
23
- end
24
- when "D" # deviation
25
- if transitional_processing
26
- output += replacement
27
- else
28
- output << codepoint
29
- end
30
- when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
31
- if use_std3_ascii_rules || !replacement
32
- output << codepoint
33
- else
34
- output += replacement
35
- end
36
- when "I" # ignored
37
- next
23
+ return domain_name.downcase if domain_name.ascii_only?
24
+
25
+ output = domain_name.gsub(MAP_REGEX) do |match|
26
+ if transitional_processing && match == "\u1E9E"
27
+ "ss"
28
+ else
29
+ REPLACEMENTS[match]
38
30
  end
39
31
  end
40
- output.pack("U*").unicode_normalize!(:nfc)
32
+ output.gsub!(STATUS_D_REGEX, REPLACEMENTS) if transitional_processing
33
+ output.gsub!(REGEX_STD3_M_REGEX, REPLACEMENTS) unless use_std3_ascii_rules
34
+
35
+ output.ascii_only? ? output : output.unicode_normalize!(:nfc)
41
36
  end
42
37
 
43
- def validate_status(label, cp, pos, transitional_processing:, use_std3_ascii_rules:)
44
- _, status, = status(cp)
45
- return if status == "V"
46
- return if !transitional_processing && status == "D"
47
- return if !use_std3_ascii_rules && status == "3"
38
+ def validate_label_status(label, transitional_processing:, use_std3_ascii_rules:)
39
+ regex =
40
+ if transitional_processing && use_std3_ascii_rules
41
+ REGEX_NOT_V
42
+ elsif transitional_processing
43
+ REGEX_NOT_V3
44
+ elsif use_std3_ascii_rules
45
+ REGEX_NOT_VD
46
+ else
47
+ REGEX_NOT_VD3
48
+ end
49
+
50
+ return unless (pos = label.index(regex))
48
51
 
49
- raise InvalidCodepointError, Validation::Codepoint.cp_error_message(label, cp, pos)
52
+ raise InvalidCodepointError, error_message(label, pos)
50
53
  end
51
54
 
52
- def status(codepoint)
53
- return UTS46_DATA[codepoint] if codepoint < 256
55
+ private
54
56
 
55
- index = (UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
56
- UTS46_DATA[index] || []
57
+ def error_message(label, pos)
58
+ format("Codepoint U+%04X at position %d of %p not allowed in UTS46", label[pos].ord, pos + 1, label)
57
59
  end
58
60
  end
59
61
  end
@@ -2,12 +2,18 @@
2
2
 
3
3
  require_relative "mapping"
4
4
  require_relative "options"
5
+ require_relative "../validation/contextj"
6
+ require_relative "../validation/leading_combining"
5
7
 
6
8
  module URI
7
9
  module IDNA
8
10
  module UTS46
9
11
  # https://www.unicode.org/reports/tr46/#Processing
10
12
  class Processing < BaseProcessing
13
+ def self.options_class
14
+ Options
15
+ end
16
+
11
17
  def initialize(domain_name, **options)
12
18
  super
13
19
  @domain_name = Mapping.call(
@@ -40,10 +46,6 @@ module URI
40
46
 
41
47
  private
42
48
 
43
- def options_class
44
- Options
45
- end
46
-
47
49
  # https://www.unicode.org/reports/tr46/#Validity_Criteria
48
50
  def validate(label, transitional_processing: options.transitional_processing?)
49
51
  return if label.empty?
@@ -56,16 +58,13 @@ module URI
56
58
  Validation::Label.check_ace_prefix(label)
57
59
  end
58
60
  Validation::Label.check_dot(label)
59
- Validation::Label.check_leading_combining(label)
60
-
61
- label.each_codepoint.with_index do |cp, pos|
62
- Mapping.validate_status(
63
- label, cp, pos,
64
- transitional_processing: transitional_processing, use_std3_ascii_rules: options.use_std3_ascii_rules?
65
- )
66
-
67
- Validation::Codepoint.check_contextj(label, cp, pos) if options.check_joiners?
68
- end
61
+ Validation::LeadingCombining.call(label)
62
+ Mapping.validate_label_status(
63
+ label,
64
+ transitional_processing: transitional_processing,
65
+ use_std3_ascii_rules: options.use_std3_ascii_rules?,
66
+ )
67
+ Validation::ContextJ.call(label) if options.check_joiners?
69
68
  Validation::Bidi.call(label) if check_bidi?
70
69
  end
71
70
  end
@@ -76,7 +75,7 @@ module URI
76
75
 
77
76
  # https://www.unicode.org/reports/tr46/#ToASCII
78
77
  class ToASCII < Processing
79
- def options_class
78
+ def self.options_class
80
79
  ToASCIIOptions
81
80
  end
82
81
 
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../data/bidi_classes"
4
+
3
5
  module URI
4
6
  module IDNA
5
7
  module Validation
@@ -8,55 +10,46 @@ module URI
8
10
  # https://datatracker.ietf.org/doc/html/rfc5893#section-2
9
11
  module Bidi
10
12
  class << self
13
+ BIDI_R1_RTL = Regexp.new(BIDI_CLASSES["RTL"]).freeze
14
+ BIDI_R1_LTR = Regexp.new(BIDI_CLASSES["L"]).freeze
15
+ BIDI_R2 = Regexp.new("#{BIDI_CLASSES['L']}|#{BIDI_CLASSES['UNUSED']}").freeze
16
+ BIDI_R3 = Regexp.new(
17
+ "(?:#{"#{BIDI_CLASSES['RTL']}|#{BIDI_CLASSES['AN']}|#{BIDI_CLASSES['EN']}"})#{BIDI_CLASSES['NSM']}*\\z",
18
+ ).freeze
19
+ BIDI_R4_EN = Regexp.new(BIDI_CLASSES["EN"]).freeze
20
+ BIDI_R4_AN = Regexp.new(BIDI_CLASSES["AN"]).freeze
21
+ BIDI_R5 = Regexp.new("#{BIDI_CLASSES['RTL']}|#{BIDI_CLASSES['AN']}").freeze
22
+ BIDI_R6 = Regexp.new("(?:#{"#{BIDI_CLASSES['L']}|#{BIDI_CLASSES['EN']}"})#{BIDI_CLASSES['NSM']}*\\z").freeze
23
+
11
24
  def call(label)
12
25
  # Bidi rule 1
13
- if bidi_class(label[0].ord, "RTL")
14
- rtl = true
15
- elsif bidi_class(label[0].ord, "L")
26
+ if BIDI_R1_LTR.match?(label[0])
16
27
  rtl = false
28
+ elsif BIDI_R1_RTL.match?(label[0])
29
+ rtl = true
17
30
  else
18
31
  raise BidiError, "First codepoint in label #{label} must be directionality L, R or AL"
19
32
  end
20
33
 
21
- valid_ending = false
22
- number_type = nil
23
- label.each_codepoint.with_index do |cp, pos|
24
- if rtl
25
- # Bidi rule 2
26
- if bidi_class(cp, "L") || bidi_class(cp, "UNUSED")
27
- raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
28
- end
29
-
30
- # Bidi rule 3
31
- direction = bidi_class(cp, "RTL") || bidi_class(cp, "EN") || bidi_class(cp, "AN")
32
- if direction
33
- valid_ending = true
34
- elsif !bidi_class(cp, "NSM")
35
- valid_ending = false
36
- end
37
- # Bidi rule 4
38
- if %w[EN AN].include?(direction)
39
- number_type ||= direction
40
- raise BidiError, "Can not mix numeral types in a right-to-left label" if number_type != direction
41
- end
42
- else
43
- # Bidi rule 5
44
- if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
45
- raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
46
- end
47
-
48
- # Bidi rule 6
49
- if bidi_class(cp, "L") || bidi_class(cp, "EN")
50
- valid_ending = true
51
- elsif !bidi_class(cp, "NSM")
52
- valid_ending = false
53
- end
34
+ if rtl
35
+ # Bidi rule 2
36
+ if (pos = label.index(BIDI_R2))
37
+ raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
54
38
  end
39
+ # Bidi rule 3
40
+ raise BidiError, "Label ends with illegal codepoint directionality" unless label.match?(BIDI_R3)
41
+ # Bidi rule 4
42
+ if label.match?(BIDI_R4_EN) && label.match?(BIDI_R4_AN)
43
+ raise BidiError, "Can not mix numeral types in a right-to-left label"
44
+ end
45
+ else
46
+ # Bidi rule 5
47
+ if (pos = label.index(BIDI_R5))
48
+ raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
49
+ end
50
+ # Bidi rule 6
51
+ raise BidiError, "Label ends with illegal codepoint directionality" unless label.match?(BIDI_R6)
55
52
  end
56
-
57
- raise BidiError, "Label ends with illegal codepoint directionality" unless valid_ending
58
-
59
- true
60
53
  end
61
54
 
62
55
  # https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
@@ -71,22 +64,11 @@ module URI
71
64
  end
72
65
  next if label.ascii_only?
73
66
 
74
- label.each_codepoint do |cp|
75
- next if cp < 256
76
- return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
77
- end
67
+ return true if label.match?(BIDI_R5)
78
68
  end
79
69
 
80
70
  false
81
71
  end
82
-
83
- private
84
-
85
- def bidi_class(codepoint, bidi_class)
86
- return bidi_class if Intranges.contain?(codepoint, BIDI_CLASSES[bidi_class])
87
-
88
- false
89
- end
90
72
  end
91
73
  end
92
74
  end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../data/codepoint_classes"
4
+ require_relative "../data/joining_types"
5
+ require_relative "../data/virama_combining_classes"
6
+
7
+ module URI
8
+ module IDNA
9
+ module Validation
10
+ # https://datatracker.ietf.org/doc/html/rfc5892
11
+ module ContextJ
12
+ class << self
13
+ CONTEXTJ_REGEX = Regexp.new("[#{CODEPOINT_CLASSES['CONTEXTJ']}]").freeze
14
+
15
+ def call(label)
16
+ return if label.ascii_only?
17
+
18
+ offset = 0
19
+ while (pos = label.index(CONTEXTJ_REGEX, offset))
20
+ raise InvalidCodepointContextError, error_message(label, pos) unless valid_contextj?(label, pos)
21
+
22
+ offset = pos + 1
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def valid_contextj?(label, pos)
29
+ case label[pos]
30
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
31
+ when "\u200c"
32
+ return true if pos > 0 && VIRAMA_COMBINING_CLASSES.match?(label[pos - 1])
33
+
34
+ ok = false
35
+ (pos - 1).downto(0) do |i|
36
+ joining_type = JOINING_TYPES[label[i]]
37
+ if [0x4c, 0x44].include?(joining_type)
38
+ ok = true
39
+ break
40
+ end
41
+ end
42
+ return false unless ok
43
+
44
+ (pos + 1).upto(label.length - 1) do |i|
45
+ joining_type = JOINING_TYPES[label[i]]
46
+ return true if [0x52, 0x44].include?(joining_type)
47
+ end
48
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
49
+ when "\u200d"
50
+ return VIRAMA_COMBINING_CLASSES.match?(label[pos - 1]) if pos > 0
51
+ end
52
+ false
53
+ end
54
+
55
+ def error_message(label, pos)
56
+ format("ContextJ codepoint U+%04X at position %d of %p not allowed", label[pos].ord, pos + 1, label)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../data/codepoint_classes"
4
+ require_relative "../data/scripts"
5
+
6
+ module URI
7
+ module IDNA
8
+ module Validation
9
+ # https://datatracker.ietf.org/doc/html/rfc5892
10
+ module ContextO
11
+ class << self
12
+ CONTEXTO_REGEX = Regexp.new("[#{CODEPOINT_CLASSES['CONTEXTO']}]").freeze
13
+ CONTEXTO_A4_REGEX = Regexp.new(SCRIPTS["Greek"])
14
+ CONTEXTO_A5_REGEX = Regexp.new(SCRIPTS["Hebrew"])
15
+ CONTEXTO_A7_REGEX = Regexp.new("#{SCRIPTS['Hiragana']}|#{SCRIPTS['Katakana']}|#{SCRIPTS['Han']}").freeze
16
+ CONTEXTO_A8_REGEX = /[\u06F0-\u06F9]/.freeze
17
+ CONTEXTO_A9_REGEX = /[\u0660-\u0669]/.freeze
18
+
19
+ def call(label)
20
+ offset = 0
21
+ while (pos = label.index(CONTEXTO_REGEX, offset))
22
+ raise InvalidCodepointContextError, error_message(label, pos) unless valid_contexto?(label, pos)
23
+
24
+ offset = pos + 1
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def valid_contexto?(label, pos)
31
+ case label[pos]
32
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
33
+ when "\u00b7"
34
+ pos > 0 && pos < label.length - 1 ? (label[pos - 1] == "\u006c" && label[pos + 1] == "\u006c") : false
35
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
36
+ when "\u0375"
37
+ pos < label.length - 1 ? CONTEXTO_A4_REGEX.match?(label[pos + 1]) : false
38
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
39
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
40
+ when "\u05f3", "\u05f4"
41
+ pos > 0 ? CONTEXTO_A5_REGEX.match?(label[pos - 1]) : false
42
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
43
+ when "\u30fb"
44
+ CONTEXTO_A7_REGEX.match?(label)
45
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
46
+ when "\u0660".."\u0669"
47
+ !CONTEXTO_A8_REGEX.match?(label)
48
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
49
+ when "\u06f0".."\u06f9"
50
+ !CONTEXTO_A9_REGEX.match?(label)
51
+ end
52
+ end
53
+
54
+ def error_message(label, pos)
55
+ format("ContextO codepoint U+%04X at position %d of %p not allowed", label[pos].ord, pos + 1, label)
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../data/codepoint_classes"
4
+
5
+ module URI
6
+ module IDNA
7
+ module Validation
8
+ module IDNAPermitted
9
+ class << self
10
+ IDNA_REGEX = Regexp.new(
11
+ "[^(#{CODEPOINT_CLASSES['PVALID']}|#{CODEPOINT_CLASSES['CONTEXTJ']}|#{CODEPOINT_CLASSES['CONTEXTO']})]",
12
+ ).freeze
13
+
14
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
15
+ def call(label)
16
+ return unless (pos = label.index(IDNA_REGEX))
17
+
18
+ raise InvalidCodepointError, error_message(label, pos)
19
+ end
20
+
21
+ private
22
+
23
+ def error_message(label, pos)
24
+ format("Codepoint U+%04X at position %d of %p not allowed in IDNA2008", label[pos].ord, pos + 1, label)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../intranges"
4
- require_relative "../data/idna"
5
-
6
3
  module URI
7
4
  module IDNA
8
5
  module Validation
@@ -11,7 +8,7 @@ module URI
11
8
  # 4.1. Input to IDNA Registration
12
9
  # https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
13
10
  def check_nfc(label)
14
- return if label.unicode_normalized?(:nfc)
11
+ return if label.ascii_only? || label.unicode_normalized?(:nfc)
15
12
 
16
13
  raise Error, "Label must be in Unicode Normalization Form NFC"
17
14
  end
@@ -38,16 +35,6 @@ module URI
38
35
  raise Error, "Label must not begin with `xn--`"
39
36
  end
40
37
 
41
- # 4.2.3.2. Leading Combining Marks
42
- # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
43
- def check_leading_combining(label)
44
- cp = label[0].ord
45
- return if cp < 256
46
- return unless Intranges.contain?(cp, INITIAL_COMBINERS)
47
-
48
- raise Error, "Label begins with an illegal combining character"
49
- end
50
-
51
38
  def check_dot(label)
52
39
  raise Error, "Label must not contain a U+002E ( . ) FULL STOP" if label.include?(".")
53
40
  end