@formatjs/intl-getcanonicallocales 1.9.2 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BUILD +116 -0
- package/CHANGELOG.md +290 -0
- package/LICENSE.md +0 -0
- package/README.md +0 -0
- package/index.ts +43 -0
- package/package.json +3 -3
- package/polyfill.ts +23 -0
- package/scripts/aliases.ts +70 -0
- package/scripts/likely-subtags.ts +20 -0
- package/should-polyfill.ts +10 -0
- package/src/aliases.generated.ts +1137 -0
- package/src/canonicalizer.ts +272 -0
- package/src/emitter.ts +39 -0
- package/src/likelySubtags.generated.ts +1870 -0
- package/src/parser.ts +259 -0
- package/src/types.ts +63 -0
- package/tests/index.test.ts +24 -0
- package/tests/parser.test.ts +204 -0
- package/tsconfig.json +5 -0
- package/index.d.ts +0 -6
- package/index.d.ts.map +0 -1
- package/index.js +0 -42
- package/lib/index.d.ts +0 -6
- package/lib/index.d.ts.map +0 -1
- package/lib/index.js +0 -31
- package/lib/polyfill.d.ts +0 -2
- package/lib/polyfill.d.ts.map +0 -1
- package/lib/polyfill.js +0 -24
- package/lib/should-polyfill.d.ts +0 -2
- package/lib/should-polyfill.d.ts.map +0 -1
- package/lib/should-polyfill.js +0 -8
- package/lib/src/aliases.generated.d.ts +0 -5
- package/lib/src/aliases.generated.d.ts.map +0 -1
- package/lib/src/aliases.generated.js +0 -1137
- package/lib/src/canonicalizer.d.ts +0 -20
- package/lib/src/canonicalizer.d.ts.map +0 -1
- package/lib/src/canonicalizer.js +0 -219
- package/lib/src/emitter.d.ts +0 -4
- package/lib/src/emitter.d.ts.map +0 -1
- package/lib/src/emitter.js +0 -28
- package/lib/src/likelySubtags.generated.d.ts +0 -2
- package/lib/src/likelySubtags.generated.d.ts.map +0 -1
- package/lib/src/likelySubtags.generated.js +0 -1870
- package/lib/src/parser.d.ts +0 -10
- package/lib/src/parser.d.ts.map +0 -1
- package/lib/src/parser.js +0 -233
- package/lib/src/types.d.ts +0 -33
- package/lib/src/types.d.ts.map +0 -1
- package/lib/src/types.js +0 -1
- package/polyfill.d.ts +0 -2
- package/polyfill.d.ts.map +0 -1
- package/polyfill.iife.js +0 -3526
- package/polyfill.js +0 -26
- package/should-polyfill.d.ts +0 -2
- package/should-polyfill.d.ts.map +0 -1
- package/should-polyfill.js +0 -12
- package/src/aliases.generated.d.ts +0 -5
- package/src/aliases.generated.d.ts.map +0 -1
- package/src/aliases.generated.js +0 -1140
- package/src/canonicalizer.d.ts +0 -20
- package/src/canonicalizer.d.ts.map +0 -1
- package/src/canonicalizer.js +0 -224
- package/src/emitter.d.ts +0 -4
- package/src/emitter.d.ts.map +0 -1
- package/src/emitter.js +0 -33
- package/src/likelySubtags.generated.d.ts +0 -2
- package/src/likelySubtags.generated.d.ts.map +0 -1
- package/src/likelySubtags.generated.js +0 -1873
- package/src/parser.d.ts +0 -10
- package/src/parser.d.ts.map +0 -1
- package/src/parser.js +0 -243
- package/src/types.d.ts +0 -33
- package/src/types.d.ts.map +0 -1
- package/src/types.js +0 -2
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
import {UnicodeLocaleId, KV, Extension, UnicodeLanguageId} from './types'
|
|
2
|
+
import {
|
|
3
|
+
languageAlias,
|
|
4
|
+
variantAlias,
|
|
5
|
+
scriptAlias,
|
|
6
|
+
territoryAlias,
|
|
7
|
+
} from './aliases.generated'
|
|
8
|
+
import {
|
|
9
|
+
parseUnicodeLanguageId,
|
|
10
|
+
isUnicodeVariantSubtag,
|
|
11
|
+
isUnicodeLanguageSubtag,
|
|
12
|
+
SEPARATOR,
|
|
13
|
+
} from './parser'
|
|
14
|
+
import {likelySubtags} from './likelySubtags.generated'
|
|
15
|
+
import {emitUnicodeLanguageId} from './emitter'
|
|
16
|
+
|
|
17
|
+
function canonicalizeAttrs(strs: string[]): string[] {
|
|
18
|
+
return Object.keys(
|
|
19
|
+
strs.reduce((all: Record<string, number>, str) => {
|
|
20
|
+
all[str.toLowerCase()] = 1
|
|
21
|
+
return all
|
|
22
|
+
}, {})
|
|
23
|
+
).sort()
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function canonicalizeKVs(arr: KV[]): KV[] {
|
|
27
|
+
const all: Record<string, any> = {}
|
|
28
|
+
const result: KV[] = []
|
|
29
|
+
for (const kv of arr) {
|
|
30
|
+
if (kv[0] in all) {
|
|
31
|
+
continue
|
|
32
|
+
}
|
|
33
|
+
all[kv[0]] = 1
|
|
34
|
+
if (!kv[1] || kv[1] === 'true') {
|
|
35
|
+
result.push([kv[0].toLowerCase()])
|
|
36
|
+
} else {
|
|
37
|
+
result.push([kv[0].toLowerCase(), kv[1].toLowerCase()])
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return result.sort(compareKV)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function compareKV(t1: Array<any>, t2: Array<any>): number {
|
|
44
|
+
return t1[0] < t2[0] ? -1 : t1[0] > t2[0] ? 1 : 0
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function compareExtension(e1: Extension, e2: Extension): number {
|
|
48
|
+
return e1.type < e2.type ? -1 : e1.type > e2.type ? 1 : 0
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function mergeVariants(v1: string[], v2: string[]): string[] {
|
|
52
|
+
const result = [...v1]
|
|
53
|
+
for (const v of v2) {
|
|
54
|
+
if (v1.indexOf(v) < 0) {
|
|
55
|
+
result.push(v)
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return result
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* CAVEAT: We don't do this section in the spec bc they have no JSON data
|
|
63
|
+
* Use the bcp47 data to replace keys, types, tfields, and tvalues by their canonical forms. See Section 3.6.4 U Extension Data Files) and Section 3.7.1 T Extension Data Files. The aliases are in the alias attribute value, while the canonical is in the name attribute value. For example,
|
|
64
|
+
Because of the following bcp47 data:
|
|
65
|
+
<key name="ms"…>…<type name="uksystem" … alias="imperial" … />…</key>
|
|
66
|
+
We get the following transformation:
|
|
67
|
+
en-u-ms-imperial ⇒ en-u-ms-uksystem
|
|
68
|
+
* @param lang
|
|
69
|
+
*/
|
|
70
|
+
export function canonicalizeUnicodeLanguageId(
|
|
71
|
+
unicodeLanguageId: UnicodeLanguageId
|
|
72
|
+
): UnicodeLanguageId {
|
|
73
|
+
/**
|
|
74
|
+
* If the language subtag matches the type attribute of a languageAlias element in Supplemental Data, replace the language subtag with the replacement value.
|
|
75
|
+
* 1. If there are additional subtags in the replacement value, add them to the result, but only if there is no corresponding subtag already in the tag.
|
|
76
|
+
* 2. Five special deprecated grandfathered codes (such as i-default) are in type attributes, and are also replaced.
|
|
77
|
+
*/
|
|
78
|
+
|
|
79
|
+
// From https://github.com/unicode-org/icu/blob/master/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java#L1246
|
|
80
|
+
|
|
81
|
+
// Try language _ variant
|
|
82
|
+
let finalLangAst = unicodeLanguageId
|
|
83
|
+
if (unicodeLanguageId.variants.length) {
|
|
84
|
+
let replacedLang: string = ''
|
|
85
|
+
for (const variant of unicodeLanguageId.variants) {
|
|
86
|
+
if (
|
|
87
|
+
(replacedLang =
|
|
88
|
+
languageAlias[
|
|
89
|
+
emitUnicodeLanguageId({
|
|
90
|
+
lang: unicodeLanguageId.lang,
|
|
91
|
+
variants: [variant],
|
|
92
|
+
})
|
|
93
|
+
])
|
|
94
|
+
) {
|
|
95
|
+
const replacedLangAst = parseUnicodeLanguageId(
|
|
96
|
+
replacedLang.split(SEPARATOR)
|
|
97
|
+
)
|
|
98
|
+
finalLangAst = {
|
|
99
|
+
lang: replacedLangAst.lang,
|
|
100
|
+
script: finalLangAst.script || replacedLangAst.script,
|
|
101
|
+
region: finalLangAst.region || replacedLangAst.region,
|
|
102
|
+
variants: mergeVariants(
|
|
103
|
+
finalLangAst.variants,
|
|
104
|
+
replacedLangAst.variants
|
|
105
|
+
),
|
|
106
|
+
}
|
|
107
|
+
break
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// language _ script _ country
|
|
113
|
+
// ug-Arab-CN -> ug-CN
|
|
114
|
+
if (finalLangAst.script && finalLangAst.region) {
|
|
115
|
+
const replacedLang =
|
|
116
|
+
languageAlias[
|
|
117
|
+
emitUnicodeLanguageId({
|
|
118
|
+
lang: finalLangAst.lang,
|
|
119
|
+
script: finalLangAst.script,
|
|
120
|
+
region: finalLangAst.region,
|
|
121
|
+
variants: [],
|
|
122
|
+
})
|
|
123
|
+
]
|
|
124
|
+
if (replacedLang) {
|
|
125
|
+
const replacedLangAst = parseUnicodeLanguageId(
|
|
126
|
+
replacedLang.split(SEPARATOR)
|
|
127
|
+
)
|
|
128
|
+
finalLangAst = {
|
|
129
|
+
lang: replacedLangAst.lang,
|
|
130
|
+
script: replacedLangAst.script,
|
|
131
|
+
region: replacedLangAst.region,
|
|
132
|
+
variants: finalLangAst.variants,
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// language _ country
|
|
138
|
+
// eg. az_AZ -> az_Latn_A
|
|
139
|
+
if (finalLangAst.region) {
|
|
140
|
+
const replacedLang =
|
|
141
|
+
languageAlias[
|
|
142
|
+
emitUnicodeLanguageId({
|
|
143
|
+
lang: finalLangAst.lang,
|
|
144
|
+
region: finalLangAst.region,
|
|
145
|
+
variants: [],
|
|
146
|
+
})
|
|
147
|
+
]
|
|
148
|
+
if (replacedLang) {
|
|
149
|
+
const replacedLangAst = parseUnicodeLanguageId(
|
|
150
|
+
replacedLang.split(SEPARATOR)
|
|
151
|
+
)
|
|
152
|
+
finalLangAst = {
|
|
153
|
+
lang: replacedLangAst.lang,
|
|
154
|
+
script: finalLangAst.script || replacedLangAst.script,
|
|
155
|
+
region: replacedLangAst.region,
|
|
156
|
+
variants: finalLangAst.variants,
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
// only language
|
|
161
|
+
// e.g. twi -> ak
|
|
162
|
+
const replacedLang =
|
|
163
|
+
languageAlias[
|
|
164
|
+
emitUnicodeLanguageId({
|
|
165
|
+
lang: finalLangAst.lang,
|
|
166
|
+
variants: [],
|
|
167
|
+
})
|
|
168
|
+
]
|
|
169
|
+
if (replacedLang) {
|
|
170
|
+
const replacedLangAst = parseUnicodeLanguageId(
|
|
171
|
+
replacedLang.split(SEPARATOR)
|
|
172
|
+
)
|
|
173
|
+
finalLangAst = {
|
|
174
|
+
lang: replacedLangAst.lang,
|
|
175
|
+
script: finalLangAst.script || replacedLangAst.script,
|
|
176
|
+
region: finalLangAst.region || replacedLangAst.region,
|
|
177
|
+
variants: finalLangAst.variants,
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (finalLangAst.region) {
|
|
182
|
+
const region = finalLangAst.region.toUpperCase()
|
|
183
|
+
const regionAlias = territoryAlias[region]
|
|
184
|
+
let replacedRegion: string | undefined
|
|
185
|
+
if (regionAlias) {
|
|
186
|
+
const regions = regionAlias.split(' ')
|
|
187
|
+
replacedRegion = regions[0]
|
|
188
|
+
const likelySubtag =
|
|
189
|
+
likelySubtags[
|
|
190
|
+
emitUnicodeLanguageId({
|
|
191
|
+
lang: finalLangAst.lang,
|
|
192
|
+
script: finalLangAst.script,
|
|
193
|
+
variants: [],
|
|
194
|
+
}) as 'aa'
|
|
195
|
+
]
|
|
196
|
+
if (likelySubtag) {
|
|
197
|
+
const {region: likelyRegion} = parseUnicodeLanguageId(
|
|
198
|
+
likelySubtag.split(SEPARATOR)
|
|
199
|
+
)
|
|
200
|
+
if (likelyRegion && regions.indexOf(likelyRegion) > -1) {
|
|
201
|
+
replacedRegion = likelyRegion
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
if (replacedRegion) {
|
|
206
|
+
finalLangAst.region = replacedRegion
|
|
207
|
+
}
|
|
208
|
+
finalLangAst.region = finalLangAst.region.toUpperCase()
|
|
209
|
+
}
|
|
210
|
+
if (finalLangAst.script) {
|
|
211
|
+
finalLangAst.script =
|
|
212
|
+
finalLangAst.script[0].toUpperCase() +
|
|
213
|
+
finalLangAst.script.slice(1).toLowerCase()
|
|
214
|
+
if (scriptAlias[finalLangAst.script]) {
|
|
215
|
+
finalLangAst.script = scriptAlias[finalLangAst.script]
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if (finalLangAst.variants.length) {
|
|
220
|
+
for (let i = 0; i < finalLangAst.variants.length; i++) {
|
|
221
|
+
let variant = finalLangAst.variants[i].toLowerCase()
|
|
222
|
+
if (variantAlias[variant]) {
|
|
223
|
+
const alias = variantAlias[variant]
|
|
224
|
+
if (isUnicodeVariantSubtag(alias)) {
|
|
225
|
+
finalLangAst.variants[i] = alias
|
|
226
|
+
} else if (isUnicodeLanguageSubtag(alias)) {
|
|
227
|
+
// Yes this can happen per the spec
|
|
228
|
+
finalLangAst.lang = alias
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
finalLangAst.variants.sort()
|
|
233
|
+
}
|
|
234
|
+
return finalLangAst
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Canonicalize based on
|
|
239
|
+
* https://www.unicode.org/reports/tr35/tr35.html#Canonical_Unicode_Locale_Identifiers
|
|
240
|
+
* https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid
|
|
241
|
+
* IMPORTANT: This modifies the object inline
|
|
242
|
+
* @param locale
|
|
243
|
+
*/
|
|
244
|
+
export function canonicalizeUnicodeLocaleId(
|
|
245
|
+
locale: UnicodeLocaleId
|
|
246
|
+
): UnicodeLocaleId {
|
|
247
|
+
locale.lang = canonicalizeUnicodeLanguageId(locale.lang)
|
|
248
|
+
if (locale.extensions) {
|
|
249
|
+
for (const extension of locale.extensions) {
|
|
250
|
+
switch (extension.type) {
|
|
251
|
+
case 'u':
|
|
252
|
+
extension.keywords = canonicalizeKVs(extension.keywords)
|
|
253
|
+
if (extension.attributes) {
|
|
254
|
+
extension.attributes = canonicalizeAttrs(extension.attributes)
|
|
255
|
+
}
|
|
256
|
+
break
|
|
257
|
+
case 't':
|
|
258
|
+
if (extension.lang) {
|
|
259
|
+
extension.lang = canonicalizeUnicodeLanguageId(extension.lang)
|
|
260
|
+
}
|
|
261
|
+
extension.fields = canonicalizeKVs(extension.fields)
|
|
262
|
+
break
|
|
263
|
+
default:
|
|
264
|
+
extension.value = extension.value.toLowerCase()
|
|
265
|
+
break
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
locale.extensions.sort(compareExtension)
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return locale
|
|
272
|
+
}
|
package/src/emitter.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import {UnicodeLanguageId, UnicodeLocaleId} from './types'
|
|
2
|
+
|
|
3
|
+
export function emitUnicodeLanguageId(lang?: UnicodeLanguageId): string {
|
|
4
|
+
if (!lang) {
|
|
5
|
+
return ''
|
|
6
|
+
}
|
|
7
|
+
return [lang.lang, lang.script, lang.region, ...(lang.variants || [])]
|
|
8
|
+
.filter(Boolean)
|
|
9
|
+
.join('-')
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function emitUnicodeLocaleId({
|
|
13
|
+
lang,
|
|
14
|
+
extensions,
|
|
15
|
+
}: UnicodeLocaleId): string {
|
|
16
|
+
const chunks = [emitUnicodeLanguageId(lang)]
|
|
17
|
+
for (const ext of extensions) {
|
|
18
|
+
chunks.push(ext.type)
|
|
19
|
+
switch (ext.type) {
|
|
20
|
+
case 'u':
|
|
21
|
+
chunks.push(
|
|
22
|
+
...ext.attributes,
|
|
23
|
+
...ext.keywords.reduce((all: string[], kv) => all.concat(kv), [])
|
|
24
|
+
)
|
|
25
|
+
break
|
|
26
|
+
case 't':
|
|
27
|
+
chunks.push(
|
|
28
|
+
emitUnicodeLanguageId(ext.lang),
|
|
29
|
+
...ext.fields.reduce((all: string[], kv) => all.concat(kv), [])
|
|
30
|
+
)
|
|
31
|
+
break
|
|
32
|
+
default:
|
|
33
|
+
chunks.push(ext.value)
|
|
34
|
+
break
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return chunks.filter(Boolean).join('-')
|
|
39
|
+
}
|