uri-idna 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,12 +45,6 @@ module URI
45
45
  Validation::Bidi.call(label) if check_bidi?
46
46
  end
47
47
 
48
- def check_bidi?
49
- return @check_bidi if instance_variable_defined?(:@check_bidi)
50
-
51
- @check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
52
- end
53
-
54
48
  def punycode_decode(label)
55
49
  return label unless label.start_with?(ACE_PREFIX)
56
50
 
@@ -72,8 +66,8 @@ module URI
72
66
  end
73
67
 
74
68
  def call
75
- alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize(:nfc)) if alabel
76
- ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize(:nfc)) if ulabel
69
+ alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize!(:nfc)) if alabel
70
+ ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize!(:nfc)) if ulabel
77
71
 
78
72
  if alabels && ulabels && (alabels.size != ulabels.size || alabel_trailing_dot != ulabel_trailing_dot)
79
73
  raise Error, "alabel doesn't match ulabel"
@@ -128,7 +122,7 @@ module URI
128
122
  # # https://datatracker.ietf.org/doc/html/rfc5891#section-5
129
123
  class Lookup < Processing
130
124
  def call
131
- domain = domain_name.encode("UTF-8").unicode_normalize(:nfc)
125
+ domain = domain_name.encode("UTF-8").unicode_normalize!(:nfc)
132
126
 
133
127
  result = process_labels(domain) do |label|
134
128
  orig_label = label
@@ -9,31 +9,35 @@ module URI
9
9
  module Mapping
10
10
  class << self
11
11
  def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
12
- output = +""
12
+ output = []
13
13
  domain_name.each_codepoint do |codepoint|
14
14
  _, status, replacement = status(codepoint)
15
15
  case status
16
16
  when "V", "X" # valid, disallowed
17
- output << codepoint.chr(Encoding::UTF_8)
17
+ output << codepoint
18
18
  when "M" # mapped
19
- output << if transitional_processing && codepoint == 7838
20
- "ss"
19
+ output += if transitional_processing && codepoint == 7838
20
+ [115, 115]
21
21
  else
22
22
  replacement
23
23
  end
24
24
  when "D" # deviation
25
- output << (transitional_processing ? replacement : codepoint.chr(Encoding::UTF_8))
25
+ if transitional_processing
26
+ output += replacement
27
+ else
28
+ output << codepoint
29
+ end
26
30
  when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
27
- output << if use_std3_ascii_rules
28
- codepoint.chr(Encoding::UTF_8)
29
- else
30
- (replacement || codepoint.chr(Encoding::UTF_8))
31
- end
31
+ if use_std3_ascii_rules || !replacement
32
+ output << codepoint
33
+ else
34
+ output += replacement
35
+ end
32
36
  when "I" # ignored
33
37
  next
34
38
  end
35
39
  end
36
- output.unicode_normalize(:nfc)
40
+ output.pack("U*").unicode_normalize!(:nfc)
37
41
  end
38
42
 
39
43
  def validate_status(label, cp, pos, transitional_processing:, use_std3_ascii_rules:)
@@ -46,12 +50,9 @@ module URI
46
50
  end
47
51
 
48
52
  def status(codepoint)
49
- index =
50
- if codepoint < 256
51
- codepoint
52
- else
53
- (UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
54
- end
53
+ return UTS46_DATA[codepoint] if codepoint < 256
54
+
55
+ index = (UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
55
56
  UTS46_DATA[index] || []
56
57
  end
57
58
  end
@@ -8,14 +8,17 @@ module URI
8
8
  module UTS46
9
9
  # https://www.unicode.org/reports/tr46/#Processing
10
10
  class Processing < BaseProcessing
11
- def call
12
- domain = Mapping.call(
11
+ def initialize(domain_name, **options)
12
+ super
13
+ @domain_name = Mapping.call(
13
14
  domain_name,
14
- transitional_processing: options.transitional_processing?,
15
- use_std3_ascii_rules: options.use_std3_ascii_rules?,
15
+ transitional_processing: @options.transitional_processing?,
16
+ use_std3_ascii_rules: @options.use_std3_ascii_rules?,
16
17
  )
18
+ end
17
19
 
18
- process_labels(domain) do |label|
20
+ def call
21
+ process_labels(domain_name) do |label|
19
22
  if label.start_with?(ACE_PREFIX)
20
23
  begin
21
24
  label = punycode_decode(label)
@@ -41,12 +44,6 @@ module URI
41
44
  Options
42
45
  end
43
46
 
44
- def check_bidi?
45
- return @check_bidi if instance_variable_defined?(:@check_bidi)
46
-
47
- @check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
48
- end
49
-
50
47
  # https://www.unicode.org/reports/tr46/#Validity_Criteria
51
48
  def validate(label, transitional_processing: options.transitional_processing?)
52
49
  return if label.empty?
@@ -61,22 +61,22 @@ module URI
61
61
 
62
62
  # https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
63
63
  def check?(domain)
64
- labels = domain.split(".", -1)
65
- domain = labels.map do |label|
64
+ domain.split(".").each do |label|
66
65
  if label.start_with?(ACE_PREFIX)
67
66
  begin
68
- Punycode.decode(label[ACE_PREFIX.length..])
67
+ label = Punycode.decode(label[ACE_PREFIX.length..])
69
68
  rescue PunycodeError
70
- ""
69
+ next
71
70
  end
72
- else
73
- label
74
71
  end
75
- end.join(".")
72
+ next if label.ascii_only?
76
73
 
77
- domain.each_codepoint do |cp|
78
- return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
74
+ label.each_codepoint do |cp|
75
+ next if cp < 256
76
+ return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
77
+ end
79
78
  end
79
+
80
80
  false
81
81
  end
82
82
 
@@ -17,6 +17,7 @@ module URI
17
17
 
18
18
  # https://datatracker.ietf.org/doc/html/rfc5892
19
19
  def check_contextj(label, cp, pos)
20
+ return false if cp < 256
20
21
  return false unless codepoint?(cp, "CONTEXTJ")
21
22
  return true if valid_contextj?(label, cp, pos)
22
23
 
@@ -25,6 +26,7 @@ module URI
25
26
 
26
27
  # https://datatracker.ietf.org/doc/html/rfc5892
27
28
  def check_contexto(label, cp, pos)
29
+ return false if cp < 183
28
30
  return false unless codepoint?(cp, "CONTEXTO")
29
31
  return true if valid_contexto?(label, cp, pos)
30
32
 
@@ -105,10 +107,14 @@ module URI
105
107
  end
106
108
 
107
109
  def script?(cp, script)
110
+ return false if cp < 256
111
+
108
112
  Intranges.contain?(cp, SCRIPTS[script])
109
113
  end
110
114
 
111
115
  def virama_combining_class?(cp)
116
+ return false if cp < 256
117
+
112
118
  Intranges.contain?(cp, VIRAMA_COMBINING_CLASSES)
113
119
  end
114
120
 
@@ -27,7 +27,7 @@ module URI
27
27
  # 4.2.3.1. Hyphen Restrictions
28
28
  # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
29
29
  def check_hyphen_sides(label)
30
- return unless label[0] == "-" || label[-1] == "-"
30
+ return unless label.start_with?("-") || label.end_with?("-")
31
31
 
32
32
  raise Error, "Label must neither begin nor end with a U+002D HYPHEN-MINUS character"
33
33
  end
@@ -41,7 +41,9 @@ module URI
41
41
  # 4.2.3.2. Leading Combining Marks
42
42
  # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
43
43
  def check_leading_combining(label)
44
- return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
44
+ cp = label[0].ord
45
+ return if cp < 256
46
+ return unless Intranges.contain?(cp, INITIAL_COMBINERS)
45
47
 
46
48
  raise Error, "Label begins with an illegal combining character"
47
49
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module URI
4
4
  module IDNA
5
- VERSION = "0.2.0"
5
+ VERSION = "0.2.1"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: uri-idna
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Svyatoslav Kryukov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-14 00:00:00.000000000 Z
11
+ date: 2023-11-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Internationalized Domain Names in Applications (IDNA)
14
14
  email: