uri-idna 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -45,12 +45,6 @@ module URI
45
45
  Validation::Bidi.call(label) if check_bidi?
46
46
  end
47
47
 
48
- def check_bidi?
49
- return @check_bidi if instance_variable_defined?(:@check_bidi)
50
-
51
- @check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
52
- end
53
-
54
48
  def punycode_decode(label)
55
49
  return label unless label.start_with?(ACE_PREFIX)
56
50
 
@@ -72,8 +66,8 @@ module URI
72
66
  end
73
67
 
74
68
  def call
75
- alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize(:nfc)) if alabel
76
- ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize(:nfc)) if ulabel
69
+ alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize!(:nfc)) if alabel
70
+ ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize!(:nfc)) if ulabel
77
71
 
78
72
  if alabels && ulabels && (alabels.size != ulabels.size || alabel_trailing_dot != ulabel_trailing_dot)
79
73
  raise Error, "alabel doesn't match ulabel"
@@ -128,7 +122,7 @@ module URI
128
122
  # # https://datatracker.ietf.org/doc/html/rfc5891#section-5
129
123
  class Lookup < Processing
130
124
  def call
131
- domain = domain_name.encode("UTF-8").unicode_normalize(:nfc)
125
+ domain = domain_name.encode("UTF-8").unicode_normalize!(:nfc)
132
126
 
133
127
  result = process_labels(domain) do |label|
134
128
  orig_label = label
@@ -9,31 +9,35 @@ module URI
9
9
  module Mapping
10
10
  class << self
11
11
  def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
12
- output = +""
12
+ output = []
13
13
  domain_name.each_codepoint do |codepoint|
14
14
  _, status, replacement = status(codepoint)
15
15
  case status
16
16
  when "V", "X" # valid, disallowed
17
- output << codepoint.chr(Encoding::UTF_8)
17
+ output << codepoint
18
18
  when "M" # mapped
19
- output << if transitional_processing && codepoint == 7838
20
- "ss"
19
+ output += if transitional_processing && codepoint == 7838
20
+ [115, 115]
21
21
  else
22
22
  replacement
23
23
  end
24
24
  when "D" # deviation
25
- output << (transitional_processing ? replacement : codepoint.chr(Encoding::UTF_8))
25
+ if transitional_processing
26
+ output += replacement
27
+ else
28
+ output << codepoint
29
+ end
26
30
  when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
27
- output << if use_std3_ascii_rules
28
- codepoint.chr(Encoding::UTF_8)
29
- else
30
- (replacement || codepoint.chr(Encoding::UTF_8))
31
- end
31
+ if use_std3_ascii_rules || !replacement
32
+ output << codepoint
33
+ else
34
+ output += replacement
35
+ end
32
36
  when "I" # ignored
33
37
  next
34
38
  end
35
39
  end
36
- output.unicode_normalize(:nfc)
40
+ output.pack("U*").unicode_normalize!(:nfc)
37
41
  end
38
42
 
39
43
  def validate_status(label, cp, pos, transitional_processing:, use_std3_ascii_rules:)
@@ -46,12 +50,9 @@ module URI
46
50
  end
47
51
 
48
52
  def status(codepoint)
49
- index =
50
- if codepoint < 256
51
- codepoint
52
- else
53
- (UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
54
- end
53
+ return UTS46_DATA[codepoint] if codepoint < 256
54
+
55
+ index = (UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
55
56
  UTS46_DATA[index] || []
56
57
  end
57
58
  end
@@ -8,14 +8,17 @@ module URI
8
8
  module UTS46
9
9
  # https://www.unicode.org/reports/tr46/#Processing
10
10
  class Processing < BaseProcessing
11
- def call
12
- domain = Mapping.call(
11
+ def initialize(domain_name, **options)
12
+ super
13
+ @domain_name = Mapping.call(
13
14
  domain_name,
14
- transitional_processing: options.transitional_processing?,
15
- use_std3_ascii_rules: options.use_std3_ascii_rules?,
15
+ transitional_processing: @options.transitional_processing?,
16
+ use_std3_ascii_rules: @options.use_std3_ascii_rules?,
16
17
  )
18
+ end
17
19
 
18
- process_labels(domain) do |label|
20
+ def call
21
+ process_labels(domain_name) do |label|
19
22
  if label.start_with?(ACE_PREFIX)
20
23
  begin
21
24
  label = punycode_decode(label)
@@ -41,12 +44,6 @@ module URI
41
44
  Options
42
45
  end
43
46
 
44
- def check_bidi?
45
- return @check_bidi if instance_variable_defined?(:@check_bidi)
46
-
47
- @check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
48
- end
49
-
50
47
  # https://www.unicode.org/reports/tr46/#Validity_Criteria
51
48
  def validate(label, transitional_processing: options.transitional_processing?)
52
49
  return if label.empty?
@@ -61,22 +61,22 @@ module URI
61
61
 
62
62
  # https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
63
63
  def check?(domain)
64
- labels = domain.split(".", -1)
65
- domain = labels.map do |label|
64
+ domain.split(".").each do |label|
66
65
  if label.start_with?(ACE_PREFIX)
67
66
  begin
68
- Punycode.decode(label[ACE_PREFIX.length..])
67
+ label = Punycode.decode(label[ACE_PREFIX.length..])
69
68
  rescue PunycodeError
70
- ""
69
+ next
71
70
  end
72
- else
73
- label
74
71
  end
75
- end.join(".")
72
+ next if label.ascii_only?
76
73
 
77
- domain.each_codepoint do |cp|
78
- return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
74
+ label.each_codepoint do |cp|
75
+ next if cp < 256
76
+ return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
77
+ end
79
78
  end
79
+
80
80
  false
81
81
  end
82
82
 
@@ -17,6 +17,7 @@ module URI
17
17
 
18
18
  # https://datatracker.ietf.org/doc/html/rfc5892
19
19
  def check_contextj(label, cp, pos)
20
+ return false if cp < 256
20
21
  return false unless codepoint?(cp, "CONTEXTJ")
21
22
  return true if valid_contextj?(label, cp, pos)
22
23
 
@@ -25,6 +26,7 @@ module URI
25
26
 
26
27
  # https://datatracker.ietf.org/doc/html/rfc5892
27
28
  def check_contexto(label, cp, pos)
29
+ return false if cp < 183
28
30
  return false unless codepoint?(cp, "CONTEXTO")
29
31
  return true if valid_contexto?(label, cp, pos)
30
32
 
@@ -105,10 +107,14 @@ module URI
105
107
  end
106
108
 
107
109
  def script?(cp, script)
110
+ return false if cp < 256
111
+
108
112
  Intranges.contain?(cp, SCRIPTS[script])
109
113
  end
110
114
 
111
115
  def virama_combining_class?(cp)
116
+ return false if cp < 256
117
+
112
118
  Intranges.contain?(cp, VIRAMA_COMBINING_CLASSES)
113
119
  end
114
120
 
@@ -27,7 +27,7 @@ module URI
27
27
  # 4.2.3.1. Hyphen Restrictions
28
28
  # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
29
29
  def check_hyphen_sides(label)
30
- return unless label[0] == "-" || label[-1] == "-"
30
+ return unless label.start_with?("-") || label.end_with?("-")
31
31
 
32
32
  raise Error, "Label must neither begin nor end with a U+002D HYPHEN-MINUS character"
33
33
  end
@@ -41,7 +41,9 @@ module URI
41
41
  # 4.2.3.2. Leading Combining Marks
42
42
  # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
43
43
  def check_leading_combining(label)
44
- return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
44
+ cp = label[0].ord
45
+ return if cp < 256
46
+ return unless Intranges.contain?(cp, INITIAL_COMBINERS)
45
47
 
46
48
  raise Error, "Label begins with an illegal combining character"
47
49
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module URI
4
4
  module IDNA
5
- VERSION = "0.2.0"
5
+ VERSION = "0.2.1"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uri-idna
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Svyatoslav Kryukov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-14 00:00:00.000000000 Z
11
+ date: 2023-11-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Internationalized Domain Names in Applications (IDNA)
14
14
  email: