uri-idna 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +149 -45
- data/lib/uri/idna/base_processing.rb +65 -0
- data/lib/uri/idna/data/idna.rb +11 -6
- data/lib/uri/idna/data/uts46.rb +4 -6
- data/lib/uri/idna/idna2008/options.rb +59 -0
- data/lib/uri/idna/idna2008/processing.rb +158 -0
- data/lib/uri/idna/intranges.rb +12 -4
- data/lib/uri/idna/punycode.rb +11 -15
- data/lib/uri/idna/uts46/mapping.rb +61 -0
- data/lib/uri/idna/uts46/options.rb +75 -0
- data/lib/uri/idna/uts46/processing.rb +98 -0
- data/lib/uri/idna/validation/bidi.rb +14 -13
- data/lib/uri/idna/validation/codepoint.rb +122 -0
- data/lib/uri/idna/validation/label.rb +70 -0
- data/lib/uri/idna/version.rb +1 -1
- data/lib/uri/idna/whatwg/processing.rb +35 -0
- data/lib/uri/idna.rb +30 -24
- data/lib/uri-idna.rb +3 -0
- metadata +12 -5
- data/lib/uri/idna/process.rb +0 -139
- data/lib/uri/idna/uts46.rb +0 -60
- data/lib/uri/idna/validation.rb +0 -199
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6682f4500b6119fcd467ff88793d5a2d4522308ea4884cc6f69f04cb073d1089
|
4
|
+
data.tar.gz: 1e63bd541e8020f789916a82950a81d2f67b8d29a882f45d7c4a369a10c15de1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86032ea0573558c6386154a29265dddc81e8914a9720643bceab6f80de49d9f11ff2fc8bf5bfa789c84a515050457e9493937040541831a8d14394f49e15549d
|
7
|
+
data.tar.gz: 493f974a03ccf2abc69e1090f2a61d029a5f2e47a3725f87bdc3bf6ced69a7d9e052a424ddc95268c0b26a4354c7b31d72268c9a404d4b16180e77db8c5ff009
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning].
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [0.2.0] - 2023-11-14
|
11
|
+
|
12
|
+
### Added
|
13
|
+
|
14
|
+
- WHATWG IDNA functions
|
15
|
+
|
16
|
+
### Changed
|
17
|
+
|
18
|
+
- **BREAKING!** Names of options updated to match UTS46 flags
|
19
|
+
- Unicode version updated to 15.1
|
20
|
+
- UTS46 functions now support Revision 31
|
21
|
+
|
22
|
+
### Fixed
|
23
|
+
|
24
|
+
- IDNA2008 functions now support not only labels, but full domains
|
25
|
+
|
10
26
|
## [0.1.0] - 2023-08-05
|
11
27
|
|
12
28
|
### Added
|
@@ -15,7 +31,8 @@ and this project adheres to [Semantic Versioning].
|
|
15
31
|
|
16
32
|
[@skryukov]: https://github.com/skryukov
|
17
33
|
|
18
|
-
[Unreleased]: https://github.com/skryukov/uri-idna/compare/v0.
|
34
|
+
[Unreleased]: https://github.com/skryukov/uri-idna/compare/v0.2.0...HEAD
|
35
|
+
[0.2.0]: https://github.com/skryukov/uri-idna/compare/v0.1.0...v0.2.0
|
19
36
|
[0.1.0]: https://github.com/skryukov/uri-idna/commits/v0.1.0
|
20
37
|
|
21
38
|
[Keep a Changelog]: https://keepachangelog.com/en/1.0.0/
|
data/README.md
CHANGED
@@ -3,11 +3,11 @@
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/uri-idna.svg)](https://rubygems.org/gems/uri-idna)
|
4
4
|
[![Ruby](https://github.com/skryukov/uri-idna/actions/workflows/main.yml/badge.svg)](https://github.com/skryukov/uri-idna/actions/workflows/main.yml)
|
5
5
|
|
6
|
-
A
|
6
|
+
A IDNA2008, UTS46, IDNA from WHATWG URL Standard and Punycode implementation in pure Ruby.
|
7
7
|
|
8
8
|
This gem provides a number of functions for converting internationalized domain names (IDNs) between the Unicode and ASCII Compatible Encoding (ACE) forms.
|
9
9
|
|
10
|
-
<a href="https://evilmartians.com/?utm_source=
|
10
|
+
<a href="https://evilmartians.com/?utm_source=uri-idna&utm_campaign=project_page">
|
11
11
|
<img src="https://evilmartians.com/badges/sponsored-by-evil-martians.svg" alt="Sponsored by Evil Martians" width="236" height="54">
|
12
12
|
</a>
|
13
13
|
|
@@ -15,7 +15,7 @@ This gem provides a number of functions for converting internationalized domain
|
|
15
15
|
|
16
16
|
Add to your Gemfile:
|
17
17
|
```ruby
|
18
|
-
gem "
|
18
|
+
gem "uri-idna"
|
19
19
|
```
|
20
20
|
|
21
21
|
And then run `bundle install`.
|
@@ -24,23 +24,33 @@ And then run `bundle install`.
|
|
24
24
|
|
25
25
|
There are plenty of ways to convert IDNs between Unicode and ACE forms.
|
26
26
|
|
27
|
-
###
|
27
|
+
### IDNA2008
|
28
28
|
|
29
|
-
The [RFC
|
29
|
+
The [RFC 5891] defines two protocols for IDN conversion: [Registration](https://datatracker.ietf.org/doc/html/rfc5891#section-4) and [Domain Name Lookup](https://datatracker.ietf.org/doc/html/rfc5891#section-5).
|
30
30
|
|
31
31
|
#### Registration protocol
|
32
32
|
|
33
|
+
`URI::IDNA.register(alabel:, ulabel:, **options)`
|
34
|
+
|
35
|
+
##### Options
|
36
|
+
|
37
|
+
- `check_hyphens`: `true` – whether to check hyphens according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
38
|
+
- `leading_combining`: `true` – whether to check leading combining marks according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
39
|
+
- `check_joiners`: `true` – whether to check `CONTEXTJ` code points according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
40
|
+
- `check_others`: `true` – whether to check `CONTEXTO` code points according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
41
|
+
- `check_bidi`: `true` – whether to check bidirectional characters according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
42
|
+
|
33
43
|
```ruby
|
34
44
|
require "uri/idna"
|
35
45
|
|
36
|
-
URI::IDNA.register(alabel: "xn--gdkl8fhk5egc", ulabel: "
|
37
|
-
#=> "xn--gdkl8fhk5egc"
|
46
|
+
URI::IDNA.register(alabel: "xn--gdkl8fhk5egc.jp", ulabel: "ハロー・ワールド.jp")
|
47
|
+
#=> "xn--gdkl8fhk5egc.jp"
|
38
48
|
|
39
|
-
URI::IDNA.register(ulabel: "
|
40
|
-
#=> "xn--gdkl8fhk5egc"
|
49
|
+
URI::IDNA.register(ulabel: "ハロー・ワールド.jp")
|
50
|
+
#=> "xn--gdkl8fhk5egc.jp"
|
41
51
|
|
42
|
-
URI::IDNA.register(alabel: "xn--gdkl8fhk5egc")
|
43
|
-
#=> "xn--gdkl8fhk5egc"
|
52
|
+
URI::IDNA.register(alabel: "xn--gdkl8fhk5egc.jp")
|
53
|
+
#=> "xn--gdkl8fhk5egc.jp"
|
44
54
|
|
45
55
|
URI::IDNA.register(ulabel: "☕.us")
|
46
56
|
#<URI::IDNA::InvalidCodepointError: Codepoint U+2615 at position 1 of "☕" not allowed>
|
@@ -48,43 +58,79 @@ URI::IDNA.register(ulabel: "☕.us")
|
|
48
58
|
|
49
59
|
#### Domain Name Lookup Protocol
|
50
60
|
|
61
|
+
`URI::IDNA.lookup(domain_name, **options)`
|
62
|
+
|
63
|
+
##### Options
|
64
|
+
|
65
|
+
- `check_hyphens`: `true` – whether to check hyphens according to [Section 4.2.3.1](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1).
|
66
|
+
- `leading_combining`: `true` – whether to check leading combining marks according to [Section 4.2.3.2](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2).
|
67
|
+
- `check_joiners`: `true` – whether to check CONTEXTJ code points according to [Section 4.2.3.3](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.3).
|
68
|
+
- `check_others`: `true` – whether to check CONTEXTO code points according to [Section 4.2.3.3](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.3).
|
69
|
+
- `check_bidi`: `true` – whether to check bidirectional characters according to [Section 4.2.3.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.4).
|
70
|
+
- `verify_dns_length`: `true` – whether to check DNS length according to [Section 4.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.4).
|
71
|
+
|
51
72
|
```ruby
|
52
73
|
require "uri/idna"
|
53
74
|
|
54
|
-
URI::IDNA.lookup("
|
55
|
-
#=> "xn--pck0a1b0a6a2e"
|
75
|
+
URI::IDNA.lookup("ハロー・ワールド.jp")
|
76
|
+
#=> "xn--pck0a1b0a6a2e.jp"
|
56
77
|
|
57
|
-
URI::IDNA.lookup("xn--pck0a1b0a6a2e")
|
58
|
-
#=> "xn--pck0a1b0a6a2e"
|
78
|
+
URI::IDNA.lookup("xn--pck0a1b0a6a2e.jp")
|
79
|
+
#=> "xn--pck0a1b0a6a2e.jp"
|
59
80
|
|
60
81
|
URI::IDNA.lookup("Ῠ.me")
|
61
82
|
#<URI::IDNA::InvalidCodepointError: Codepoint U+1FE8 at position 1 of "Ῠ" not allowed>
|
62
83
|
```
|
63
84
|
|
64
|
-
### Unicode
|
85
|
+
### Unicode UTS46 (TR46)
|
65
86
|
|
66
|
-
|
87
|
+
_Current revision: 31_
|
88
|
+
|
89
|
+
The [UTS46] defines two IDN conversion functions: [ToASCII](https://www.unicode.org/reports/tr46/#ToASCII) and [ToUnicode](https://www.unicode.org/reports/tr46/#ToUnicode).
|
67
90
|
|
68
91
|
#### ToASCII
|
69
92
|
|
93
|
+
`URI::IDNA.to_ascii(domain_name, **options)`
|
94
|
+
|
95
|
+
##### Options
|
96
|
+
|
97
|
+
- `use_std3_ascii_rules`: `true` – whether to apply [STD3 rules](https://www.unicode.org/reports/tr46/#STD3_Rules) for both mapping and validation.
|
98
|
+
- `check_hyphens`: `true` – whether to check hyphens according to [Section 4.2.3.1](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1) of [RFC 5891].
|
99
|
+
- `check_bidi`: `true` – whether to check bidirectional characters according to [Section 4.2.3.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.4) of [RFC 5891].
|
100
|
+
- `check_joiners`: `true` – whether to check CONTEXTJ code points according to [Section 4.2.3.3](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.3) of [RFC 5891].
|
101
|
+
- `transitional_processing`: `false` – (deprecated) whether to apply [transitional processing](https://www.unicode.org/reports/tr46/#ProcessingStepMap) for mapping.
|
102
|
+
- `ignore_invalid_punycode`: `false` – whether to fast-path invalid Punycode labels according to [4th step of Processing](https://www.unicode.org/reports/tr46/#ProcessingStepPunycode).
|
103
|
+
- `verify_dns_length`: `true` – whether to check DNS length according to [Section 4.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.4) of [RFC 5891].
|
104
|
+
|
70
105
|
```ruby
|
71
106
|
require "uri/idna"
|
72
107
|
|
73
108
|
URI::IDNA.to_ascii("Bloß.de")
|
74
109
|
#=> "xn--blo-7ka.de"
|
75
110
|
|
76
|
-
#
|
111
|
+
# UTS46 transitional processing is disabled by default,
|
77
112
|
# but can be enabled via option:
|
78
|
-
URI::IDNA.to_ascii("Bloß.de",
|
113
|
+
URI::IDNA.to_ascii("Bloß.de", transitional_processing: true)
|
79
114
|
#=> "bloss.de"
|
80
115
|
|
81
|
-
# Note that
|
116
|
+
# Note that UTS46 processing is not fully IDNA2008 compliant:
|
82
117
|
URI::IDNA.to_ascii("☕.us")
|
83
118
|
#=> "xn--53h.us"
|
84
119
|
```
|
85
120
|
|
86
121
|
#### ToUnicode
|
87
122
|
|
123
|
+
`URI::IDNA.to_unicode(domain_name, **options)`
|
124
|
+
|
125
|
+
##### Options
|
126
|
+
|
127
|
+
- `use_std3_ascii_rules`: `true` – whether to apply [STD3 rules](https://www.unicode.org/reports/tr46/#STD3_Rules) for both mapping and validation.
|
128
|
+
- `check_hyphens`: `true` – whether to check hyphens according to [Section 4.2.3.1](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1) of [RFC 5891].
|
129
|
+
- `check_bidi`: `true` – whether to check bidirectional characters according to [Section 4.2.3.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.4) of [RFC 5891].
|
130
|
+
- `check_joiners`: `true` – whether to check CONTEXTJ code points according to [Section 4.2.3.3](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.3) of [RFC 5891].
|
131
|
+
- `transitional_processing`: `false` – (deprecated) whether to apply [transitional processing](https://www.unicode.org/reports/tr46/#ProcessingStepMap) for mapping.
|
132
|
+
- `ignore_invalid_punycode`: `false` – whether to fast-path invalid Punycode labels according to [4th step of Processing](https://www.unicode.org/reports/tr46/#ProcessingStepPunycode).
|
133
|
+
|
88
134
|
```ruby
|
89
135
|
require "uri/idna"
|
90
136
|
|
@@ -92,24 +138,83 @@ URI::IDNA.to_unicode("xn--blo-7ka.de")
|
|
92
138
|
#=> "bloß.de"
|
93
139
|
```
|
94
140
|
|
95
|
-
####
|
141
|
+
#### IDNA2008 compatibility
|
96
142
|
|
97
|
-
It's possible to
|
143
|
+
It's possible to use UTS46 mapping first and then apply IDNA2008, so the processing fully conforms IDNA2008:
|
98
144
|
|
99
145
|
```ruby
|
100
146
|
require "uri/idna"
|
101
147
|
|
102
|
-
|
103
|
-
|
148
|
+
# For example we can use UTS46 mapping to downcase some characters
|
149
|
+
char = "⼤"
|
150
|
+
char.ord # "\u2F24"
|
151
|
+
#=> 12068
|
152
|
+
|
153
|
+
# just downcase doesn't work in this case
|
154
|
+
char.downcase.ord
|
155
|
+
#=> 12068
|
104
156
|
|
105
|
-
#
|
106
|
-
URI::IDNA.
|
107
|
-
#=>
|
157
|
+
# but UTS46 mapping does it's thing:
|
158
|
+
URI::IDNA::UTS46::Mapping.call(char).ord
|
159
|
+
#=> 22823
|
160
|
+
|
161
|
+
# so here is a full example:
|
162
|
+
domain = "⼤.cn" # "\u2F24.cn"
|
163
|
+
URI::IDNA.lookup(domain)
|
164
|
+
# <URI::IDNA::InvalidCodepointError: Codepoint U+2F24 at position 1 of "⼤" not allowed>
|
165
|
+
|
166
|
+
mapped_domain = URI::IDNA::UTS46::Mapping.call(domain)
|
167
|
+
URI::IDNA.lookup(mapped_domain)
|
168
|
+
#=> "xn--pss.cn"
|
169
|
+
```
|
170
|
+
|
171
|
+
### WHATWG
|
172
|
+
|
173
|
+
WHATWG's [URL Standard] uses UTS46 algorithm to define ToASCII and ToUnicode functions, it abstracts all available flags and provides only one—the `be_btrict` flag instead.
|
174
|
+
|
175
|
+
Note that the `check_hyphens` UTS46 option is set to `false` in this algorithm.
|
176
|
+
|
177
|
+
#### ToASCII
|
178
|
+
|
179
|
+
`URI::IDNA.whatwg_to_ascii(domain_name, **options)`
|
180
|
+
|
181
|
+
##### Options
|
182
|
+
|
183
|
+
- `be_strict`: `true` – defines values of `use_std3_ascii_rules` and `verify_dns_length` UTS46 options.
|
184
|
+
|
185
|
+
```ruby
|
186
|
+
require "uri/idna"
|
187
|
+
|
188
|
+
URI::IDNA.whatwg_to_ascii("Bloß.de")
|
189
|
+
#=> "xn--blo-7ka.de"
|
190
|
+
|
191
|
+
# The be_strict flag sets use_std3_ascii_rules and verify_dns_length UTS46 flags to its value
|
192
|
+
URI::IDNA.whatwg_to_ascii("2003_rules.com", be_strict: false)
|
193
|
+
#=> "2003_rules.com"
|
194
|
+
|
195
|
+
# By default be_strict is set to true
|
196
|
+
URI::IDNA.whatwg_to_ascii("2003_rules.com")
|
197
|
+
#<URI::IDNA::InvalidCodepointError: Codepoint U+005F at position 5 of "2003_rules" not allowed>
|
198
|
+
```
|
199
|
+
|
200
|
+
#### ToUnicode
|
201
|
+
|
202
|
+
`URI::IDNA.whatwg_to_unicode(domain_name, **options)`
|
203
|
+
|
204
|
+
##### Options
|
205
|
+
|
206
|
+
- `be_strict`: `true` - `be_strict`: `true` – defines value of `use_std3_ascii_rules` UTS46 option.
|
207
|
+
|
208
|
+
```ruby
|
209
|
+
require "uri/idna"
|
210
|
+
|
211
|
+
URI::IDNA.whatwg_to_unicode("xn--blo-7ka.de")
|
212
|
+
#=> "bloß.de"
|
108
213
|
```
|
109
214
|
|
110
215
|
### Punycode
|
111
216
|
|
112
|
-
Punycode module performs conversion between Unicode and Punycode. Note that Punycode is not
|
217
|
+
Punycode module performs conversion between Unicode and Punycode. Note that Punycode is not IDNA2008 compliant, it is only used for conversion, no validations performed.
|
113
218
|
|
114
219
|
```ruby
|
115
220
|
require "uri/idna/punycode"
|
@@ -123,7 +228,7 @@ URI::IDNA::Punycode.decode("gdkl8fhk5egc")
|
|
123
228
|
|
124
229
|
## Full technical reference:
|
125
230
|
|
126
|
-
###
|
231
|
+
### IDNA2008
|
127
232
|
- [RFC 5890] – Definitions and Document Framework
|
128
233
|
- [RFC 5891] – Protocol
|
129
234
|
- [RFC 5892] – The Unicode Code Points
|
@@ -133,9 +238,9 @@ URI::IDNA::Punycode.decode("gdkl8fhk5egc")
|
|
133
238
|
|
134
239
|
- [RFC 3492] – Punycode: A Bootstring encoding of Unicode
|
135
240
|
|
136
|
-
###
|
241
|
+
### UTS46 (also referenced as TS46)
|
137
242
|
|
138
|
-
- [Unicode IDNA Compatibility Processing]
|
243
|
+
- [Unicode IDNA Compatibility Processing][UTS46]
|
139
244
|
|
140
245
|
## Development
|
141
246
|
|
@@ -147,7 +252,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
147
252
|
|
148
253
|
This gem uses Unicode data files to perform IDN conversion. To generate new Unicode data files, run `bundle exec rake idna:generate`.
|
149
254
|
|
150
|
-
To specify Unicode version, use `
|
255
|
+
To specify Unicode version, use `VERSION` environment variable, e.g. `VERSION=15.1.0 bundle exec rake idna:generate`.
|
151
256
|
|
152
257
|
By default, used Unicode version is the one used by the Ruby version (`RbConfig::CONFIG["UNICODE_VERSION"]`).
|
153
258
|
|
@@ -157,15 +262,13 @@ Unicode data cached in the `tmp` directory by default, to change it, use `CACHE_
|
|
157
262
|
|
158
263
|
### Inspect Unicode data
|
159
264
|
|
160
|
-
To inspect Unicode data, run `bundle exec rake idna:inspect[<HEX_CODE>]`.
|
161
|
-
|
162
|
-
To specify Unicode version, or cache directory, use `UNICODE_VERSION` or `CACHE_DIR` environment variables, e.g. `UNICODE_VERSION=15.0.0 bundle exec rake idna:inspect[1f495]`.
|
265
|
+
To inspect Unicode data, run `bundle exec rake 'idna:inspect[<HEX_CODE>]'`.
|
163
266
|
|
164
|
-
|
267
|
+
To specify Unicode version, or cache directory, use `VERSION` or `CACHE_DIR` environment variables, e.g. `VERSION=15.1.0 bundle exec rake 'idna:inspect[1f495]'`.
|
165
268
|
|
166
|
-
### Update
|
269
|
+
### Update UTS46 test suite data
|
167
270
|
|
168
|
-
To update
|
271
|
+
To update UTS46 test suite data, run `bundle exec rake idna:update_uts46_test_suite`.
|
169
272
|
|
170
273
|
## Contributing
|
171
274
|
|
@@ -175,10 +278,11 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/skryuk
|
|
175
278
|
|
176
279
|
The gem is available as open source under the terms of the [MIT License].
|
177
280
|
|
178
|
-
[RFC 5890]:
|
179
|
-
[RFC 5891]:
|
180
|
-
[RFC 5892]:
|
181
|
-
[RFC 5893]:
|
182
|
-
[RFC 3492]:
|
183
|
-
[
|
184
|
-
[
|
281
|
+
[RFC 5890]: https://datatracker.ietf.org/doc/html/rfc5890
|
282
|
+
[RFC 5891]: https://datatracker.ietf.org/doc/html/rfc5891
|
283
|
+
[RFC 5892]: https://datatracker.ietf.org/doc/html/rfc5892
|
284
|
+
[RFC 5893]: https://datatracker.ietf.org/doc/html/rfc5893
|
285
|
+
[RFC 3492]: https://datatracker.ietf.org/doc/html/rfc3492
|
286
|
+
[UTS46]: https://www.unicode.org/reports/tr46
|
287
|
+
[URL Standard]: https://url.spec.whatwg.org/#idna
|
288
|
+
[MIT License]: https://opensource.org/licenses/MIT
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "validation/label"
|
4
|
+
require_relative "validation/codepoint"
|
5
|
+
require_relative "validation/bidi"
|
6
|
+
|
7
|
+
module URI
|
8
|
+
module IDNA
|
9
|
+
class BaseProcessing
|
10
|
+
def initialize(domain_name, **options)
|
11
|
+
@domain_name = domain_name
|
12
|
+
@options = options_class.new(**options)
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
attr_reader :domain_name, :options
|
18
|
+
|
19
|
+
def options_class
|
20
|
+
raise NotImplementedError, "Implement #options_class method"
|
21
|
+
end
|
22
|
+
|
23
|
+
def punycode_decode(label)
|
24
|
+
raise Error, "Label contains non-ASCII code point" unless label.ascii_only?
|
25
|
+
|
26
|
+
code = label[ACE_PREFIX.length..]
|
27
|
+
raise Error, "Malformed A-label, no Punycode eligible content found" if code.empty?
|
28
|
+
|
29
|
+
Punycode.decode(code)
|
30
|
+
end
|
31
|
+
|
32
|
+
def punycode_encode(label)
|
33
|
+
return label if label.ascii_only?
|
34
|
+
|
35
|
+
ACE_PREFIX + Punycode.encode(label)
|
36
|
+
end
|
37
|
+
|
38
|
+
def process_labels(domain)
|
39
|
+
labels, trailing_dot = split_domain(domain)
|
40
|
+
|
41
|
+
labels.map! do |label|
|
42
|
+
raise Error, "Empty label" if label.empty?
|
43
|
+
|
44
|
+
yield label
|
45
|
+
end
|
46
|
+
|
47
|
+
join_labels(labels, trailing_dot)
|
48
|
+
end
|
49
|
+
|
50
|
+
def join_labels(labels, trailing_dot)
|
51
|
+
labels << "" if trailing_dot
|
52
|
+
labels.join(".")
|
53
|
+
end
|
54
|
+
|
55
|
+
def split_domain(domain)
|
56
|
+
labels = domain.split(".", -1)
|
57
|
+
trailing_dot = labels[-1] && labels[-1].empty? ? labels.pop : false
|
58
|
+
|
59
|
+
raise Error, "Empty domain" if labels.empty? || labels == [""]
|
60
|
+
|
61
|
+
[labels, trailing_dot]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/uri/idna/data/idna.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# This file is automatically generated by bin/generate
|
4
|
-
# Unicode version 15.
|
4
|
+
# Unicode version 15.1.0
|
5
5
|
|
6
6
|
module URI
|
7
7
|
module IDNA
|
8
|
-
UNICODE_VERSION = "15.
|
8
|
+
UNICODE_VERSION = "15.1.0"
|
9
9
|
SCRIPTS = {
|
10
10
|
"Greek" => [
|
11
11
|
0x37000000374,
|
@@ -64,6 +64,7 @@ module URI
|
|
64
64
|
0x2b7400002b81e,
|
65
65
|
0x2b8200002cea2,
|
66
66
|
0x2ceb00002ebe1,
|
67
|
+
0x2ebf00002ee5e,
|
67
68
|
0x2f8000002fa1e,
|
68
69
|
0x300000003134b,
|
69
70
|
0x31350000323b0,
|
@@ -2139,6 +2140,7 @@ module URI
|
|
2139
2140
|
0x2b7400002b81e,
|
2140
2141
|
0x2b8200002cea2,
|
2141
2142
|
0x2ceb00002ebe1,
|
2143
|
+
0x2ebf00002ee5e,
|
2142
2144
|
0x300000003134b,
|
2143
2145
|
0x31350000323b0,
|
2144
2146
|
],
|
@@ -2621,7 +2623,7 @@ module URI
|
|
2621
2623
|
0x2e8000002e9a,
|
2622
2624
|
0x2e9b00002ef4,
|
2623
2625
|
0x2f0000002fd6,
|
2624
|
-
|
2626
|
+
0x2ff000003000,
|
2625
2627
|
0x300100003005,
|
2626
2628
|
0x300800003021,
|
2627
2629
|
0x303000003031,
|
@@ -2631,6 +2633,7 @@ module URI
|
|
2631
2633
|
0x30a0000030a1,
|
2632
2634
|
0x30fb000030fc,
|
2633
2635
|
0x31c0000031e4,
|
2636
|
+
0x31ef000031f0,
|
2634
2637
|
0x321d0000321f,
|
2635
2638
|
0x325000003260,
|
2636
2639
|
0x327c0000327f,
|
@@ -3003,13 +3006,13 @@ module URI
|
|
3003
3006
|
0x2e9a00002e9b,
|
3004
3007
|
0x2ef400002f00,
|
3005
3008
|
0x2fd600002ff0,
|
3006
|
-
|
3009
|
+
0x300000003001,
|
3007
3010
|
0x304000003041,
|
3008
3011
|
0x309700003099,
|
3009
3012
|
0x310000003105,
|
3010
3013
|
0x313000003131,
|
3011
3014
|
0x318f00003190,
|
3012
|
-
|
3015
|
+
0x31e4000031ef,
|
3013
3016
|
0x321f00003220,
|
3014
3017
|
0xa48d0000a490,
|
3015
3018
|
0xa4c70000a4d0,
|
@@ -3432,7 +3435,8 @@ module URI
|
|
3432
3435
|
0x2b73a0002b740,
|
3433
3436
|
0x2b81e0002b820,
|
3434
3437
|
0x2cea20002ceb0,
|
3435
|
-
|
3438
|
+
0x2ebe10002ebf0,
|
3439
|
+
0x2ee5e0002f800,
|
3436
3440
|
0x2fa1e00030000,
|
3437
3441
|
0x3134b00031350,
|
3438
3442
|
0x323b0000e0001,
|
@@ -4194,6 +4198,7 @@ module URI
|
|
4194
4198
|
0x2b7400002b81e,
|
4195
4199
|
0x2b8200002cea2,
|
4196
4200
|
0x2ceb00002ebe1,
|
4201
|
+
0x2ebf00002ee5e,
|
4197
4202
|
0x2f8000002fa1e,
|
4198
4203
|
0x300000003134b,
|
4199
4204
|
0x31350000323b0,
|
data/lib/uri/idna/data/uts46.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# This file is automatically generated by bin/generate
|
4
|
-
# Unicode version 15.
|
4
|
+
# Unicode version 15.1.0
|
5
5
|
|
6
6
|
module URI
|
7
7
|
module IDNA
|
@@ -1824,7 +1824,7 @@ module URI
|
|
1824
1824
|
[0x1E9A, "M", "aʾ"],
|
1825
1825
|
[0x1E9B, "M", "ṡ"],
|
1826
1826
|
[0x1E9C, "V"],
|
1827
|
-
[0x1E9E, "M", "
|
1827
|
+
[0x1E9E, "M", "ß"],
|
1828
1828
|
[0x1E9F, "V"],
|
1829
1829
|
[0x1EA0, "M", "ạ"],
|
1830
1830
|
[0x1EA1, "V"],
|
@@ -2323,10 +2323,6 @@ module URI
|
|
2323
2323
|
[0x222F, "M", "∮∮"],
|
2324
2324
|
[0x2230, "M", "∮∮∮"],
|
2325
2325
|
[0x2231, "V"],
|
2326
|
-
[0x2260, "3"],
|
2327
|
-
[0x2261, "V"],
|
2328
|
-
[0x226E, "3"],
|
2329
|
-
[0x2270, "V"],
|
2330
2326
|
[0x2329, "M", "〈"],
|
2331
2327
|
[0x232A, "M", "〉"],
|
2332
2328
|
[0x232B, "V"],
|
@@ -7646,6 +7642,8 @@ module URI
|
|
7646
7642
|
[0x2CEA2, "X"],
|
7647
7643
|
[0x2CEB0, "V"],
|
7648
7644
|
[0x2EBE1, "X"],
|
7645
|
+
[0x2EBF0, "V"],
|
7646
|
+
[0x2EE5E, "X"],
|
7649
7647
|
[0x2F800, "M", "丽"],
|
7650
7648
|
[0x2F801, "M", "丸"],
|
7651
7649
|
[0x2F802, "M", "乁"],
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module URI
|
4
|
+
module IDNA
|
5
|
+
module IDNA2008
|
6
|
+
class Options
|
7
|
+
attr_reader :flags
|
8
|
+
|
9
|
+
CHECK_HYPHENS = 1 << 0
|
10
|
+
LEADING_COMBINING = 1 << 1
|
11
|
+
CHECK_JOINERS = 1 << 2
|
12
|
+
CHECK_OTHERS = 1 << 3
|
13
|
+
CHECK_BIDI = 1 << 4
|
14
|
+
VERIFY_DNS_LENGTH = 1 << 5
|
15
|
+
|
16
|
+
def initialize(
|
17
|
+
check_hyphens: true,
|
18
|
+
leading_combining: true,
|
19
|
+
check_joiners: true,
|
20
|
+
check_others: true,
|
21
|
+
check_bidi: true,
|
22
|
+
verify_dns_length: true
|
23
|
+
)
|
24
|
+
@flags = 0
|
25
|
+
@flags |= CHECK_HYPHENS if check_hyphens
|
26
|
+
@flags |= LEADING_COMBINING if leading_combining
|
27
|
+
@flags |= CHECK_JOINERS if check_joiners
|
28
|
+
@flags |= CHECK_OTHERS if check_others
|
29
|
+
@flags |= CHECK_BIDI if check_bidi
|
30
|
+
@flags |= VERIFY_DNS_LENGTH if verify_dns_length
|
31
|
+
end
|
32
|
+
|
33
|
+
def check_hyphens?
|
34
|
+
(flags & CHECK_HYPHENS) != 0
|
35
|
+
end
|
36
|
+
|
37
|
+
def leading_combining?
|
38
|
+
(flags & LEADING_COMBINING) != 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_joiners?
|
42
|
+
(flags & CHECK_JOINERS) != 0
|
43
|
+
end
|
44
|
+
|
45
|
+
def check_others?
|
46
|
+
(flags & CHECK_OTHERS) != 0
|
47
|
+
end
|
48
|
+
|
49
|
+
def check_bidi?
|
50
|
+
(flags & CHECK_BIDI) != 0
|
51
|
+
end
|
52
|
+
|
53
|
+
def verify_dns_length?
|
54
|
+
(flags & VERIFY_DNS_LENGTH) != 0
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|