uri-idna 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +149 -45
- data/lib/uri/idna/base_processing.rb +65 -0
- data/lib/uri/idna/data/idna.rb +11 -6
- data/lib/uri/idna/data/uts46.rb +4 -6
- data/lib/uri/idna/idna2008/options.rb +59 -0
- data/lib/uri/idna/idna2008/processing.rb +158 -0
- data/lib/uri/idna/intranges.rb +12 -4
- data/lib/uri/idna/punycode.rb +11 -15
- data/lib/uri/idna/uts46/mapping.rb +61 -0
- data/lib/uri/idna/uts46/options.rb +75 -0
- data/lib/uri/idna/uts46/processing.rb +98 -0
- data/lib/uri/idna/validation/bidi.rb +14 -13
- data/lib/uri/idna/validation/codepoint.rb +122 -0
- data/lib/uri/idna/validation/label.rb +70 -0
- data/lib/uri/idna/version.rb +1 -1
- data/lib/uri/idna/whatwg/processing.rb +35 -0
- data/lib/uri/idna.rb +30 -24
- data/lib/uri-idna.rb +3 -0
- metadata +12 -5
- data/lib/uri/idna/process.rb +0 -139
- data/lib/uri/idna/uts46.rb +0 -60
- data/lib/uri/idna/validation.rb +0 -199
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6682f4500b6119fcd467ff88793d5a2d4522308ea4884cc6f69f04cb073d1089
|
4
|
+
data.tar.gz: 1e63bd541e8020f789916a82950a81d2f67b8d29a882f45d7c4a369a10c15de1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86032ea0573558c6386154a29265dddc81e8914a9720643bceab6f80de49d9f11ff2fc8bf5bfa789c84a515050457e9493937040541831a8d14394f49e15549d
|
7
|
+
data.tar.gz: 493f974a03ccf2abc69e1090f2a61d029a5f2e47a3725f87bdc3bf6ced69a7d9e052a424ddc95268c0b26a4354c7b31d72268c9a404d4b16180e77db8c5ff009
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning].
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [0.2.0] - 2023-11-14
|
11
|
+
|
12
|
+
### Added
|
13
|
+
|
14
|
+
- WHATWG IDNA functions
|
15
|
+
|
16
|
+
### Changed
|
17
|
+
|
18
|
+
- **BREAKING!** Names of options updated to match UTS46 flags
|
19
|
+
- Unicode version updated to 15.1
|
20
|
+
- UTS46 functions now support Revision 31
|
21
|
+
|
22
|
+
### Fixed
|
23
|
+
|
24
|
+
- IDNA2008 functions now support not only labels, but full domains
|
25
|
+
|
10
26
|
## [0.1.0] - 2023-08-05
|
11
27
|
|
12
28
|
### Added
|
@@ -15,7 +31,8 @@ and this project adheres to [Semantic Versioning].
|
|
15
31
|
|
16
32
|
[@skryukov]: https://github.com/skryukov
|
17
33
|
|
18
|
-
[Unreleased]: https://github.com/skryukov/uri-idna/compare/v0.
|
34
|
+
[Unreleased]: https://github.com/skryukov/uri-idna/compare/v0.2.0...HEAD
|
35
|
+
[0.2.0]: https://github.com/skryukov/uri-idna/compare/v0.1.0...v0.2.0
|
19
36
|
[0.1.0]: https://github.com/skryukov/uri-idna/commits/v0.1.0
|
20
37
|
|
21
38
|
[Keep a Changelog]: https://keepachangelog.com/en/1.0.0/
|
data/README.md
CHANGED
@@ -3,11 +3,11 @@
|
|
3
3
|
[](https://rubygems.org/gems/uri-idna)
|
4
4
|
[](https://github.com/skryukov/uri-idna/actions/workflows/main.yml)
|
5
5
|
|
6
|
-
A
|
6
|
+
A IDNA2008, UTS46, IDNA from WHATWG URL Standard and Punycode implementation in pure Ruby.
|
7
7
|
|
8
8
|
This gem provides a number of functions for converting internationalized domain names (IDNs) between the Unicode and ASCII Compatible Encoding (ACE) forms.
|
9
9
|
|
10
|
-
<a href="https://evilmartians.com/?utm_source=
|
10
|
+
<a href="https://evilmartians.com/?utm_source=uri-idna&utm_campaign=project_page">
|
11
11
|
<img src="https://evilmartians.com/badges/sponsored-by-evil-martians.svg" alt="Sponsored by Evil Martians" width="236" height="54">
|
12
12
|
</a>
|
13
13
|
|
@@ -15,7 +15,7 @@ This gem provides a number of functions for converting internationalized domain
|
|
15
15
|
|
16
16
|
Add to your Gemfile:
|
17
17
|
```ruby
|
18
|
-
gem "
|
18
|
+
gem "uri-idna"
|
19
19
|
```
|
20
20
|
|
21
21
|
And then run `bundle install`.
|
@@ -24,23 +24,33 @@ And then run `bundle install`.
|
|
24
24
|
|
25
25
|
There are plenty of ways to convert IDNs between Unicode and ACE forms.
|
26
26
|
|
27
|
-
###
|
27
|
+
### IDNA2008
|
28
28
|
|
29
|
-
The [RFC
|
29
|
+
The [RFC 5891] defines two protocols for IDN conversion: [Registration](https://datatracker.ietf.org/doc/html/rfc5891#section-4) and [Domain Name Lookup](https://datatracker.ietf.org/doc/html/rfc5891#section-5).
|
30
30
|
|
31
31
|
#### Registration protocol
|
32
32
|
|
33
|
+
`URI::IDNA.register(alabel:, ulabel:, **options)`
|
34
|
+
|
35
|
+
##### Options
|
36
|
+
|
37
|
+
- `check_hyphens`: `true` – whether to check hyphens according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
38
|
+
- `leading_combining`: `true` – whether to check leading combining marks according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
39
|
+
- `check_joiners`: `true` – whether to check `CONTEXTJ` code points according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
40
|
+
- `check_others`: `true` – whether to check `CONTEXTO` code points according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
41
|
+
- `check_bidi`: `true` – whether to check bidirectional characters according to [Section 5.4](https://datatracker.ietf.org/doc/html/rfc5891#section-5.4).
|
42
|
+
|
33
43
|
```ruby
|
34
44
|
require "uri/idna"
|
35
45
|
|
36
|
-
URI::IDNA.register(alabel: "xn--gdkl8fhk5egc", ulabel: "
|
37
|
-
#=> "xn--gdkl8fhk5egc"
|
46
|
+
URI::IDNA.register(alabel: "xn--gdkl8fhk5egc.jp", ulabel: "ハロー・ワールド.jp")
|
47
|
+
#=> "xn--gdkl8fhk5egc.jp"
|
38
48
|
|
39
|
-
URI::IDNA.register(ulabel: "
|
40
|
-
#=> "xn--gdkl8fhk5egc"
|
49
|
+
URI::IDNA.register(ulabel: "ハロー・ワールド.jp")
|
50
|
+
#=> "xn--gdkl8fhk5egc.jp"
|
41
51
|
|
42
|
-
URI::IDNA.register(alabel: "xn--gdkl8fhk5egc")
|
43
|
-
#=> "xn--gdkl8fhk5egc"
|
52
|
+
URI::IDNA.register(alabel: "xn--gdkl8fhk5egc.jp")
|
53
|
+
#=> "xn--gdkl8fhk5egc.jp"
|
44
54
|
|
45
55
|
URI::IDNA.register(ulabel: "☕.us")
|
46
56
|
#<URI::IDNA::InvalidCodepointError: Codepoint U+2615 at position 1 of "☕" not allowed>
|
@@ -48,43 +58,79 @@ URI::IDNA.register(ulabel: "☕.us")
|
|
48
58
|
|
49
59
|
#### Domain Name Lookup Protocol
|
50
60
|
|
61
|
+
`URI::IDNA.lookup(domain_name, **options)`
|
62
|
+
|
63
|
+
##### Options
|
64
|
+
|
65
|
+
- `check_hyphens`: `true` – whether to check hyphens according to [Section 4.2.3.1](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1).
|
66
|
+
- `leading_combining`: `true` – whether to check leading combining marks according to [Section 4.2.3.2](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2).
|
67
|
+
- `check_joiners`: `true` – whether to check CONTEXTJ code points according to [Section 4.2.3.3](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.3).
|
68
|
+
- `check_others`: `true` – whether to check CONTEXTO code points according to [Section 4.2.3.3](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.3).
|
69
|
+
- `check_bidi`: `true` – whether to check bidirectional characters according to [Section 4.2.3.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.4).
|
70
|
+
- `verify_dns_length`: `true` – whether to check DNS length according to [Section 4.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.4).
|
71
|
+
|
51
72
|
```ruby
|
52
73
|
require "uri/idna"
|
53
74
|
|
54
|
-
URI::IDNA.lookup("
|
55
|
-
#=> "xn--pck0a1b0a6a2e"
|
75
|
+
URI::IDNA.lookup("ハロー・ワールド.jp")
|
76
|
+
#=> "xn--pck0a1b0a6a2e.jp"
|
56
77
|
|
57
|
-
URI::IDNA.lookup("xn--pck0a1b0a6a2e")
|
58
|
-
#=> "xn--pck0a1b0a6a2e"
|
78
|
+
URI::IDNA.lookup("xn--pck0a1b0a6a2e.jp")
|
79
|
+
#=> "xn--pck0a1b0a6a2e.jp"
|
59
80
|
|
60
81
|
URI::IDNA.lookup("Ῠ.me")
|
61
82
|
#<URI::IDNA::InvalidCodepointError: Codepoint U+1FE8 at position 1 of "Ῠ" not allowed>
|
62
83
|
```
|
63
84
|
|
64
|
-
### Unicode
|
85
|
+
### Unicode UTS46 (TR46)
|
65
86
|
|
66
|
-
|
87
|
+
_Current revision: 31_
|
88
|
+
|
89
|
+
The [UTS46] defines two IDN conversion functions: [ToASCII](https://www.unicode.org/reports/tr46/#ToASCII) and [ToUnicode](https://www.unicode.org/reports/tr46/#ToUnicode).
|
67
90
|
|
68
91
|
#### ToASCII
|
69
92
|
|
93
|
+
`URI::IDNA.to_ascii(domain_name, **options)`
|
94
|
+
|
95
|
+
##### Options
|
96
|
+
|
97
|
+
- `use_std3_ascii_rules`: `true` – whether to apply [STD3 rules](https://www.unicode.org/reports/tr46/#STD3_Rules) for both mapping and validation.
|
98
|
+
- `check_hyphens`: `true` – whether to check hyphens according to [Section 4.2.3.1](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1) of [RFC 5891].
|
99
|
+
- `check_bidi`: `true` – whether to check bidirectional characters according to [Section 4.2.3.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.4) of [RFC 5891].
|
100
|
+
- `check_joiners`: `true` – whether to check CONTEXTJ code points according to [Section 4.2.3.3](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.3) of [RFC 5891].
|
101
|
+
- `transitional_processing`: `false` – (deprecated) whether to apply [transitional processing](https://www.unicode.org/reports/tr46/#ProcessingStepMap) for mapping.
|
102
|
+
- `ignore_invalid_punycode`: `false` – whether to fast-path invalid Punycode labels according to [4th step of Processing](https://www.unicode.org/reports/tr46/#ProcessingStepPunycode).
|
103
|
+
- `verify_dns_length`: `true` – whether to check DNS length according to [Section 4.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.4) of [RFC 5891].
|
104
|
+
|
70
105
|
```ruby
|
71
106
|
require "uri/idna"
|
72
107
|
|
73
108
|
URI::IDNA.to_ascii("Bloß.de")
|
74
109
|
#=> "xn--blo-7ka.de"
|
75
110
|
|
76
|
-
#
|
111
|
+
# UTS46 transitional processing is disabled by default,
|
77
112
|
# but can be enabled via option:
|
78
|
-
URI::IDNA.to_ascii("Bloß.de",
|
113
|
+
URI::IDNA.to_ascii("Bloß.de", transitional_processing: true)
|
79
114
|
#=> "bloss.de"
|
80
115
|
|
81
|
-
# Note that
|
116
|
+
# Note that UTS46 processing is not fully IDNA2008 compliant:
|
82
117
|
URI::IDNA.to_ascii("☕.us")
|
83
118
|
#=> "xn--53h.us"
|
84
119
|
```
|
85
120
|
|
86
121
|
#### ToUnicode
|
87
122
|
|
123
|
+
`URI::IDNA.to_unicode(domain_name, **options)`
|
124
|
+
|
125
|
+
##### Options
|
126
|
+
|
127
|
+
- `use_std3_ascii_rules`: `true` – whether to apply [STD3 rules](https://www.unicode.org/reports/tr46/#STD3_Rules) for both mapping and validation.
|
128
|
+
- `check_hyphens`: `true` – whether to check hyphens according to [Section 4.2.3.1](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1) of [RFC 5891].
|
129
|
+
- `check_bidi`: `true` – whether to check bidirectional characters according to [Section 4.2.3.4](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.4) of [RFC 5891].
|
130
|
+
- `check_joiners`: `true` – whether to check CONTEXTJ code points according to [Section 4.2.3.3](https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.3) of [RFC 5891].
|
131
|
+
- `transitional_processing`: `false` – (deprecated) whether to apply [transitional processing](https://www.unicode.org/reports/tr46/#ProcessingStepMap) for mapping.
|
132
|
+
- `ignore_invalid_punycode`: `false` – whether to fast-path invalid Punycode labels according to [4th step of Processing](https://www.unicode.org/reports/tr46/#ProcessingStepPunycode).
|
133
|
+
|
88
134
|
```ruby
|
89
135
|
require "uri/idna"
|
90
136
|
|
@@ -92,24 +138,83 @@ URI::IDNA.to_unicode("xn--blo-7ka.de")
|
|
92
138
|
#=> "bloß.de"
|
93
139
|
```
|
94
140
|
|
95
|
-
####
|
141
|
+
#### IDNA2008 compatibility
|
96
142
|
|
97
|
-
It's possible to
|
143
|
+
It's possible to use UTS46 mapping first and then apply IDNA2008, so the processing fully conforms IDNA2008:
|
98
144
|
|
99
145
|
```ruby
|
100
146
|
require "uri/idna"
|
101
147
|
|
102
|
-
|
103
|
-
|
148
|
+
# For example we can use UTS46 mapping to downcase some characters
|
149
|
+
char = "⼤"
|
150
|
+
char.ord # "\u2F24"
|
151
|
+
#=> 12068
|
152
|
+
|
153
|
+
# just downcase doesn't work in this case
|
154
|
+
char.downcase.ord
|
155
|
+
#=> 12068
|
104
156
|
|
105
|
-
#
|
106
|
-
URI::IDNA.
|
107
|
-
#=>
|
157
|
+
# but UTS46 mapping does it's thing:
|
158
|
+
URI::IDNA::UTS46::Mapping.call(char).ord
|
159
|
+
#=> 22823
|
160
|
+
|
161
|
+
# so here is a full example:
|
162
|
+
domain = "⼤.cn" # "\u2F24.cn"
|
163
|
+
URI::IDNA.lookup(domain)
|
164
|
+
# <URI::IDNA::InvalidCodepointError: Codepoint U+2F24 at position 1 of "⼤" not allowed>
|
165
|
+
|
166
|
+
mapped_domain = URI::IDNA::UTS46::Mapping.call(domain)
|
167
|
+
URI::IDNA.lookup(mapped_domain)
|
168
|
+
#=> "xn--pss.cn"
|
169
|
+
```
|
170
|
+
|
171
|
+
### WHATWG
|
172
|
+
|
173
|
+
WHATWG's [URL Standard] uses UTS46 algorithm to define ToASCII and ToUnicode functions, it abstracts all available flags and provides only one—the `be_btrict` flag instead.
|
174
|
+
|
175
|
+
Note that the `check_hyphens` UTS46 option is set to `false` in this algorithm.
|
176
|
+
|
177
|
+
#### ToASCII
|
178
|
+
|
179
|
+
`URI::IDNA.whatwg_to_ascii(domain_name, **options)`
|
180
|
+
|
181
|
+
##### Options
|
182
|
+
|
183
|
+
- `be_strict`: `true` – defines values of `use_std3_ascii_rules` and `verify_dns_length` UTS46 options.
|
184
|
+
|
185
|
+
```ruby
|
186
|
+
require "uri/idna"
|
187
|
+
|
188
|
+
URI::IDNA.whatwg_to_ascii("Bloß.de")
|
189
|
+
#=> "xn--blo-7ka.de"
|
190
|
+
|
191
|
+
# The be_strict flag sets use_std3_ascii_rules and verify_dns_length UTS46 flags to its value
|
192
|
+
URI::IDNA.whatwg_to_ascii("2003_rules.com", be_strict: false)
|
193
|
+
#=> "2003_rules.com"
|
194
|
+
|
195
|
+
# By default be_strict is set to true
|
196
|
+
URI::IDNA.whatwg_to_ascii("2003_rules.com")
|
197
|
+
#<URI::IDNA::InvalidCodepointError: Codepoint U+005F at position 5 of "2003_rules" not allowed>
|
198
|
+
```
|
199
|
+
|
200
|
+
#### ToUnicode
|
201
|
+
|
202
|
+
`URI::IDNA.whatwg_to_unicode(domain_name, **options)`
|
203
|
+
|
204
|
+
##### Options
|
205
|
+
|
206
|
+
- `be_strict`: `true` - `be_strict`: `true` – defines value of `use_std3_ascii_rules` UTS46 option.
|
207
|
+
|
208
|
+
```ruby
|
209
|
+
require "uri/idna"
|
210
|
+
|
211
|
+
URI::IDNA.whatwg_to_unicode("xn--blo-7ka.de")
|
212
|
+
#=> "bloß.de"
|
108
213
|
```
|
109
214
|
|
110
215
|
### Punycode
|
111
216
|
|
112
|
-
Punycode module performs conversion between Unicode and Punycode. Note that Punycode is not
|
217
|
+
Punycode module performs conversion between Unicode and Punycode. Note that Punycode is not IDNA2008 compliant, it is only used for conversion, no validations performed.
|
113
218
|
|
114
219
|
```ruby
|
115
220
|
require "uri/idna/punycode"
|
@@ -123,7 +228,7 @@ URI::IDNA::Punycode.decode("gdkl8fhk5egc")
|
|
123
228
|
|
124
229
|
## Full technical reference:
|
125
230
|
|
126
|
-
###
|
231
|
+
### IDNA2008
|
127
232
|
- [RFC 5890] – Definitions and Document Framework
|
128
233
|
- [RFC 5891] – Protocol
|
129
234
|
- [RFC 5892] – The Unicode Code Points
|
@@ -133,9 +238,9 @@ URI::IDNA::Punycode.decode("gdkl8fhk5egc")
|
|
133
238
|
|
134
239
|
- [RFC 3492] – Punycode: A Bootstring encoding of Unicode
|
135
240
|
|
136
|
-
###
|
241
|
+
### UTS46 (also referenced as TS46)
|
137
242
|
|
138
|
-
- [Unicode IDNA Compatibility Processing]
|
243
|
+
- [Unicode IDNA Compatibility Processing][UTS46]
|
139
244
|
|
140
245
|
## Development
|
141
246
|
|
@@ -147,7 +252,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
147
252
|
|
148
253
|
This gem uses Unicode data files to perform IDN conversion. To generate new Unicode data files, run `bundle exec rake idna:generate`.
|
149
254
|
|
150
|
-
To specify Unicode version, use `
|
255
|
+
To specify Unicode version, use `VERSION` environment variable, e.g. `VERSION=15.1.0 bundle exec rake idna:generate`.
|
151
256
|
|
152
257
|
By default, used Unicode version is the one used by the Ruby version (`RbConfig::CONFIG["UNICODE_VERSION"]`).
|
153
258
|
|
@@ -157,15 +262,13 @@ Unicode data cached in the `tmp` directory by default, to change it, use `CACHE_
|
|
157
262
|
|
158
263
|
### Inspect Unicode data
|
159
264
|
|
160
|
-
To inspect Unicode data, run `bundle exec rake idna:inspect[<HEX_CODE>]`.
|
161
|
-
|
162
|
-
To specify Unicode version, or cache directory, use `UNICODE_VERSION` or `CACHE_DIR` environment variables, e.g. `UNICODE_VERSION=15.0.0 bundle exec rake idna:inspect[1f495]`.
|
265
|
+
To inspect Unicode data, run `bundle exec rake 'idna:inspect[<HEX_CODE>]'`.
|
163
266
|
|
164
|
-
|
267
|
+
To specify Unicode version, or cache directory, use `VERSION` or `CACHE_DIR` environment variables, e.g. `VERSION=15.1.0 bundle exec rake 'idna:inspect[1f495]'`.
|
165
268
|
|
166
|
-
### Update
|
269
|
+
### Update UTS46 test suite data
|
167
270
|
|
168
|
-
To update
|
271
|
+
To update UTS46 test suite data, run `bundle exec rake idna:update_uts46_test_suite`.
|
169
272
|
|
170
273
|
## Contributing
|
171
274
|
|
@@ -175,10 +278,11 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/skryuk
|
|
175
278
|
|
176
279
|
The gem is available as open source under the terms of the [MIT License].
|
177
280
|
|
178
|
-
[RFC 5890]:
|
179
|
-
[RFC 5891]:
|
180
|
-
[RFC 5892]:
|
181
|
-
[RFC 5893]:
|
182
|
-
[RFC 3492]:
|
183
|
-
[
|
184
|
-
[
|
281
|
+
[RFC 5890]: https://datatracker.ietf.org/doc/html/rfc5890
|
282
|
+
[RFC 5891]: https://datatracker.ietf.org/doc/html/rfc5891
|
283
|
+
[RFC 5892]: https://datatracker.ietf.org/doc/html/rfc5892
|
284
|
+
[RFC 5893]: https://datatracker.ietf.org/doc/html/rfc5893
|
285
|
+
[RFC 3492]: https://datatracker.ietf.org/doc/html/rfc3492
|
286
|
+
[UTS46]: https://www.unicode.org/reports/tr46
|
287
|
+
[URL Standard]: https://url.spec.whatwg.org/#idna
|
288
|
+
[MIT License]: https://opensource.org/licenses/MIT
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "validation/label"
|
4
|
+
require_relative "validation/codepoint"
|
5
|
+
require_relative "validation/bidi"
|
6
|
+
|
7
|
+
module URI
|
8
|
+
module IDNA
|
9
|
+
class BaseProcessing
|
10
|
+
def initialize(domain_name, **options)
|
11
|
+
@domain_name = domain_name
|
12
|
+
@options = options_class.new(**options)
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
attr_reader :domain_name, :options
|
18
|
+
|
19
|
+
def options_class
|
20
|
+
raise NotImplementedError, "Implement #options_class method"
|
21
|
+
end
|
22
|
+
|
23
|
+
def punycode_decode(label)
|
24
|
+
raise Error, "Label contains non-ASCII code point" unless label.ascii_only?
|
25
|
+
|
26
|
+
code = label[ACE_PREFIX.length..]
|
27
|
+
raise Error, "Malformed A-label, no Punycode eligible content found" if code.empty?
|
28
|
+
|
29
|
+
Punycode.decode(code)
|
30
|
+
end
|
31
|
+
|
32
|
+
def punycode_encode(label)
|
33
|
+
return label if label.ascii_only?
|
34
|
+
|
35
|
+
ACE_PREFIX + Punycode.encode(label)
|
36
|
+
end
|
37
|
+
|
38
|
+
def process_labels(domain)
|
39
|
+
labels, trailing_dot = split_domain(domain)
|
40
|
+
|
41
|
+
labels.map! do |label|
|
42
|
+
raise Error, "Empty label" if label.empty?
|
43
|
+
|
44
|
+
yield label
|
45
|
+
end
|
46
|
+
|
47
|
+
join_labels(labels, trailing_dot)
|
48
|
+
end
|
49
|
+
|
50
|
+
def join_labels(labels, trailing_dot)
|
51
|
+
labels << "" if trailing_dot
|
52
|
+
labels.join(".")
|
53
|
+
end
|
54
|
+
|
55
|
+
def split_domain(domain)
|
56
|
+
labels = domain.split(".", -1)
|
57
|
+
trailing_dot = labels[-1] && labels[-1].empty? ? labels.pop : false
|
58
|
+
|
59
|
+
raise Error, "Empty domain" if labels.empty? || labels == [""]
|
60
|
+
|
61
|
+
[labels, trailing_dot]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/uri/idna/data/idna.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# This file is automatically generated by bin/generate
|
4
|
-
# Unicode version 15.
|
4
|
+
# Unicode version 15.1.0
|
5
5
|
|
6
6
|
module URI
|
7
7
|
module IDNA
|
8
|
-
UNICODE_VERSION = "15.
|
8
|
+
UNICODE_VERSION = "15.1.0"
|
9
9
|
SCRIPTS = {
|
10
10
|
"Greek" => [
|
11
11
|
0x37000000374,
|
@@ -64,6 +64,7 @@ module URI
|
|
64
64
|
0x2b7400002b81e,
|
65
65
|
0x2b8200002cea2,
|
66
66
|
0x2ceb00002ebe1,
|
67
|
+
0x2ebf00002ee5e,
|
67
68
|
0x2f8000002fa1e,
|
68
69
|
0x300000003134b,
|
69
70
|
0x31350000323b0,
|
@@ -2139,6 +2140,7 @@ module URI
|
|
2139
2140
|
0x2b7400002b81e,
|
2140
2141
|
0x2b8200002cea2,
|
2141
2142
|
0x2ceb00002ebe1,
|
2143
|
+
0x2ebf00002ee5e,
|
2142
2144
|
0x300000003134b,
|
2143
2145
|
0x31350000323b0,
|
2144
2146
|
],
|
@@ -2621,7 +2623,7 @@ module URI
|
|
2621
2623
|
0x2e8000002e9a,
|
2622
2624
|
0x2e9b00002ef4,
|
2623
2625
|
0x2f0000002fd6,
|
2624
|
-
|
2626
|
+
0x2ff000003000,
|
2625
2627
|
0x300100003005,
|
2626
2628
|
0x300800003021,
|
2627
2629
|
0x303000003031,
|
@@ -2631,6 +2633,7 @@ module URI
|
|
2631
2633
|
0x30a0000030a1,
|
2632
2634
|
0x30fb000030fc,
|
2633
2635
|
0x31c0000031e4,
|
2636
|
+
0x31ef000031f0,
|
2634
2637
|
0x321d0000321f,
|
2635
2638
|
0x325000003260,
|
2636
2639
|
0x327c0000327f,
|
@@ -3003,13 +3006,13 @@ module URI
|
|
3003
3006
|
0x2e9a00002e9b,
|
3004
3007
|
0x2ef400002f00,
|
3005
3008
|
0x2fd600002ff0,
|
3006
|
-
|
3009
|
+
0x300000003001,
|
3007
3010
|
0x304000003041,
|
3008
3011
|
0x309700003099,
|
3009
3012
|
0x310000003105,
|
3010
3013
|
0x313000003131,
|
3011
3014
|
0x318f00003190,
|
3012
|
-
|
3015
|
+
0x31e4000031ef,
|
3013
3016
|
0x321f00003220,
|
3014
3017
|
0xa48d0000a490,
|
3015
3018
|
0xa4c70000a4d0,
|
@@ -3432,7 +3435,8 @@ module URI
|
|
3432
3435
|
0x2b73a0002b740,
|
3433
3436
|
0x2b81e0002b820,
|
3434
3437
|
0x2cea20002ceb0,
|
3435
|
-
|
3438
|
+
0x2ebe10002ebf0,
|
3439
|
+
0x2ee5e0002f800,
|
3436
3440
|
0x2fa1e00030000,
|
3437
3441
|
0x3134b00031350,
|
3438
3442
|
0x323b0000e0001,
|
@@ -4194,6 +4198,7 @@ module URI
|
|
4194
4198
|
0x2b7400002b81e,
|
4195
4199
|
0x2b8200002cea2,
|
4196
4200
|
0x2ceb00002ebe1,
|
4201
|
+
0x2ebf00002ee5e,
|
4197
4202
|
0x2f8000002fa1e,
|
4198
4203
|
0x300000003134b,
|
4199
4204
|
0x31350000323b0,
|
data/lib/uri/idna/data/uts46.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# This file is automatically generated by bin/generate
|
4
|
-
# Unicode version 15.
|
4
|
+
# Unicode version 15.1.0
|
5
5
|
|
6
6
|
module URI
|
7
7
|
module IDNA
|
@@ -1824,7 +1824,7 @@ module URI
|
|
1824
1824
|
[0x1E9A, "M", "aʾ"],
|
1825
1825
|
[0x1E9B, "M", "ṡ"],
|
1826
1826
|
[0x1E9C, "V"],
|
1827
|
-
[0x1E9E, "M", "
|
1827
|
+
[0x1E9E, "M", "ß"],
|
1828
1828
|
[0x1E9F, "V"],
|
1829
1829
|
[0x1EA0, "M", "ạ"],
|
1830
1830
|
[0x1EA1, "V"],
|
@@ -2323,10 +2323,6 @@ module URI
|
|
2323
2323
|
[0x222F, "M", "∮∮"],
|
2324
2324
|
[0x2230, "M", "∮∮∮"],
|
2325
2325
|
[0x2231, "V"],
|
2326
|
-
[0x2260, "3"],
|
2327
|
-
[0x2261, "V"],
|
2328
|
-
[0x226E, "3"],
|
2329
|
-
[0x2270, "V"],
|
2330
2326
|
[0x2329, "M", "〈"],
|
2331
2327
|
[0x232A, "M", "〉"],
|
2332
2328
|
[0x232B, "V"],
|
@@ -7646,6 +7642,8 @@ module URI
|
|
7646
7642
|
[0x2CEA2, "X"],
|
7647
7643
|
[0x2CEB0, "V"],
|
7648
7644
|
[0x2EBE1, "X"],
|
7645
|
+
[0x2EBF0, "V"],
|
7646
|
+
[0x2EE5E, "X"],
|
7649
7647
|
[0x2F800, "M", "丽"],
|
7650
7648
|
[0x2F801, "M", "丸"],
|
7651
7649
|
[0x2F802, "M", "乁"],
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module URI
|
4
|
+
module IDNA
|
5
|
+
module IDNA2008
|
6
|
+
class Options
|
7
|
+
attr_reader :flags
|
8
|
+
|
9
|
+
CHECK_HYPHENS = 1 << 0
|
10
|
+
LEADING_COMBINING = 1 << 1
|
11
|
+
CHECK_JOINERS = 1 << 2
|
12
|
+
CHECK_OTHERS = 1 << 3
|
13
|
+
CHECK_BIDI = 1 << 4
|
14
|
+
VERIFY_DNS_LENGTH = 1 << 5
|
15
|
+
|
16
|
+
def initialize(
|
17
|
+
check_hyphens: true,
|
18
|
+
leading_combining: true,
|
19
|
+
check_joiners: true,
|
20
|
+
check_others: true,
|
21
|
+
check_bidi: true,
|
22
|
+
verify_dns_length: true
|
23
|
+
)
|
24
|
+
@flags = 0
|
25
|
+
@flags |= CHECK_HYPHENS if check_hyphens
|
26
|
+
@flags |= LEADING_COMBINING if leading_combining
|
27
|
+
@flags |= CHECK_JOINERS if check_joiners
|
28
|
+
@flags |= CHECK_OTHERS if check_others
|
29
|
+
@flags |= CHECK_BIDI if check_bidi
|
30
|
+
@flags |= VERIFY_DNS_LENGTH if verify_dns_length
|
31
|
+
end
|
32
|
+
|
33
|
+
def check_hyphens?
|
34
|
+
(flags & CHECK_HYPHENS) != 0
|
35
|
+
end
|
36
|
+
|
37
|
+
def leading_combining?
|
38
|
+
(flags & LEADING_COMBINING) != 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_joiners?
|
42
|
+
(flags & CHECK_JOINERS) != 0
|
43
|
+
end
|
44
|
+
|
45
|
+
def check_others?
|
46
|
+
(flags & CHECK_OTHERS) != 0
|
47
|
+
end
|
48
|
+
|
49
|
+
def check_bidi?
|
50
|
+
(flags & CHECK_BIDI) != 0
|
51
|
+
end
|
52
|
+
|
53
|
+
def verify_dns_length?
|
54
|
+
(flags & VERIFY_DNS_LENGTH) != 0
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|