cldr-transforms 46.0.0 → 47.0.0-BETA2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +1 -1
  3. package/bower.json +2 -2
  4. package/package.json +3 -3
  5. package/transforms/Arabic-Latin-BGN.txt +1 -1
  6. package/transforms/Arabic-Latin.txt +1 -1
  7. package/transforms/Bengali-Latin.txt +1 -1
  8. package/transforms/Cyrillic-Latin.txt +4 -4
  9. package/transforms/Greek-Latin.txt +8 -8
  10. package/transforms/Greek_Latin_UNGEGN.txt +2 -2
  11. package/transforms/Han-Latin-Names.txt +1 -1
  12. package/transforms/Han-Latin.json +2 -2
  13. package/transforms/Han-Latin.txt +1 -1
  14. package/transforms/Han-Spacedhan.txt +4 -4
  15. package/transforms/Hant-Latin.json +8 -0
  16. package/transforms/Hant-Latin.txt +100 -0
  17. package/transforms/Hiragana-Katakana.txt +2 -2
  18. package/transforms/Latin-Jamo.txt +2 -2
  19. package/transforms/Latin-Katakana.txt +3 -3
  20. package/transforms/Latin-NumericPinyin.txt +1 -1
  21. package/transforms/Maldivian-Latin-BGN.txt +1 -1
  22. package/transforms/Persian-Latin-BGN.txt +1 -1
  23. package/transforms/Thai-Latin.txt +1 -1
  24. package/transforms/Thai-ThaiLogical.txt +1 -1
  25. package/transforms/Thai-ThaiSemi.txt +1 -1
  26. package/transforms/ThaiLogical-Latin.txt +2 -2
  27. package/transforms/am-Ethi-t-d0-morse.txt +1 -1
  28. package/transforms/az-Title.txt +3 -3
  29. package/transforms/byn-Ethi-t-byn-latn-m0-xaleget.txt +3 -3
  30. package/transforms/chr-chr_FONIPA.txt +1 -1
  31. package/transforms/de-ASCII.txt +1 -1
  32. package/transforms/el-Lower.txt +2 -2
  33. package/transforms/el-Title.txt +3 -3
  34. package/transforms/it-am.txt +1 -1
  35. package/transforms/it-ja.txt +1 -1
  36. package/transforms/lt-Title.txt +7 -7
  37. package/transforms/tr-Title.txt +3 -3
  38. package/transforms/und-Ethi-t-und-latn-m0-beta_metsehaf-geminate.txt +1 -1
  39. package/transforms.json +1 -0
package/LICENSE CHANGED
@@ -2,7 +2,7 @@ UNICODE LICENSE V3
2
2
 
3
3
  COPYRIGHT AND PERMISSION NOTICE
4
4
 
5
- Copyright © 2004-2024 Unicode, Inc.
5
+ Copyright © 2004-2025 Unicode, Inc.
6
6
 
7
7
  NOTICE TO USER: Carefully read the following legal agreement. BY
8
8
  DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
package/README.md CHANGED
@@ -19,7 +19,7 @@ the data contained here, please file a new ticket at [Unicode Jira](https://unic
19
19
 
20
20
  ## License
21
21
 
22
- Copyright © 1991-2024 Unicode, Inc.
22
+ Copyright © 1991-2025 Unicode, Inc.
23
23
  [Terms of Use](http://www.unicode.org/copyright.html)
24
24
 
25
25
  SPDX-License-Identifier: Unicode-3.0
package/bower.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "cldr-transforms",
3
- "version": "46.0.0",
3
+ "version": "47.0.0-BETA2",
4
4
  "dependencies": {
5
- "cldr-core": "46.0.0"
5
+ "cldr-core": "47.0.0-BETA2"
6
6
  },
7
7
  "main": "transforms/**/*.json",
8
8
  "ignore": [
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "cldr-transforms",
3
- "version": "46.0.0",
3
+ "version": "47.0.0-BETA2",
4
4
  "peerDependencies": {
5
- "cldr-core": "46.0.0"
5
+ "cldr-core": "47.0.0-BETA2"
6
6
  },
7
7
  "description": "Transform data",
8
8
  "homepage": "https://cldr.unicode.org",
@@ -24,6 +24,6 @@
24
24
  },
25
25
  "license": "Unicode-3.0",
26
26
  "bugs": "https://cldr.unicode.org/index/bug-reports#TOC-Filing-a-Ticket",
27
- "cldrVersion": "46",
27
+ "cldrVersion": "47",
28
28
  "unicodeVersion": "16.0.0"
29
29
  }
@@ -14,7 +14,7 @@
14
14
  #
15
15
  # MINIMAL FILTER: Arabic-Latin
16
16
  #
17
- :: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىيًٌٍَُِّْ٠١٢٣٤٥٦٧٨٩ٱ]] ;
17
+ :: [[:Arabic:][:Block=Arabic:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىيًٌٍَُِّْ٠١٢٣٤٥٦٧٨٩ٱ]] ;
18
18
  :: NFKD (NFC) ;
19
19
  #
20
20
  #
@@ -10,7 +10,7 @@
10
10
  # Does *not* do assimilation of "al", nor hyphenation.
11
11
  # While it could be done, we need to determine whether a prefix "al" could
12
12
  # occur other than as the definite article (since no space is used).
13
- :: [[:Arabic:][:block=ARABIC:][‎ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ][\u0611\u0670]] ;
13
+ :: [[:Arabic:][:Block=Arabic:][‎ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ][\u0611\u0670]] ;
14
14
  :: NFKD (NFC);
15
15
  $disambig = ̱ ;
16
16
  $disambig2 = ̰ ;
@@ -1,4 +1,4 @@
1
- ::[[:script=bengali:][।-॥ঁ-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ়-ৄে-ৈো-্ৗড়-ঢ়য়-ৣ০-৺ৎ]];
1
+ ::[[:Script=Bengali:][।-॥ঁ-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ়-ৄে-ৈো-্ৗড়-ঢ়য়-ৣ০-৺ৎ]];
2
2
  ::NFD;
3
3
  ::Bengali-InterIndic;
4
4
  ::InterIndic-Latin;
@@ -2,7 +2,7 @@
2
2
  # Should add variants for Russian-English, Russian-German
3
3
  # Those can use this as a base, and then remap cases
4
4
  # like a $hat to ya or ja.
5
- # :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
5
+ # :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:Nonspacing_Mark:]] ;
6
6
  ### WARNING, ̈ must be added to the generated filters, in both directions ###
7
7
  # MINIMAL FILTER
8
8
  # Cyrillic-Latin
@@ -267,12 +267,12 @@ $ignore = [[:Mark:]''] * ;
267
267
  | K ← Q ;
268
268
  | u ← w ;
269
269
  | U ← W ;
270
- | KS ← X } $ignore [:UppercaseLetter:] ;
271
- | KS ← [:UppercaseLetter:] $ignore { X ;
270
+ | KS ← X } $ignore [:Uppercase_Letter:] ;
271
+ | KS ← [:Uppercase_Letter:] $ignore { X ;
272
272
  | Ks ← X ;
273
273
  | ks ← x ;
274
274
  :: NFC (NFD) ;
275
275
  # note: a global filter is more efficient, but MUST include all source chars!!
276
- # :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:] ‧]);
276
+ # :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:Nonspacing_Mark:] ‧]);
277
277
  # MINIMAL FILTER: Latin-Cyrillic
278
278
  :: ( [ḫḪhH‧ˌ̈A-Za-zÀ-ÏÑ-ÖÙ-Ýà-ïñ-öù-ýÿ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƏƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳəʹ-ʺ̀-̂̆-̦̱̇̌̀-́̈́ʹ΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЀЃЌ-ЎЙйѐѓќ-ўӁ-ӂӐ-ӑӖ-ӗḀ-ẙẛẠ-ỹἂ-ἅἊ-Ἅἒ-ἕἚ-Ἕἢ-ἥἪ-Ἥἲ-ἵἺ-Ἵὂ-ὅὊ-Ὅὒ-ὕὛὝὢ-ὥὪ-Ὥὰ-ώᾂ-ᾅᾊ-ᾍᾒ-ᾕᾚ-ᾝᾢ-ᾥᾪ-ᾭᾰᾲᾴᾸᾺ-ΆῂῄῈ-Ή῍-῎ῐῒ-ΐῘῚ-Ί῝-῞ῠῢ-ΰῨῪ-Ύ῭-΅ῲῴῸ-ΏK-Å] ) ;
@@ -1,5 +1,5 @@
1
1
  # Rules are predicated on running NFD first, and NFC afterwards
2
- # :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
2
+ # :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:Nonspacing_Mark:]] ;
3
3
  # MINIMAL FILTER GENERATED FOR: Greek-Latin
4
4
  :: [΄´;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ̄̈̓-̔͂-ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
5
5
  :: NFD (NFC) ;
@@ -12,9 +12,9 @@
12
12
  # ὨΣ ὩΣ ὪΣ ὫΣ
13
13
  # Ạ, ạ, Ẹ, ẹ, Ọ, ọ
14
14
  # Useful variables
15
- $lower = [[:latin:][:greek:] & [:Ll:]];
16
- $glower = [[:greek:] & [:Ll:]];
17
- $upper = [[:latin:][:greek:] & [:Lu:]] ;
15
+ $lower = [[:Latin:][:Greek:] & [:Ll:]];
16
+ $glower = [[:Greek:] & [:Ll:]];
17
+ $upper = [[:Latin:][:Greek:] & [:Lu:]] ;
18
18
  $accent = [:M:] ;
19
19
  # NOTE: restrict to just the Greek & Latin accents that we care about
20
20
  # TODO: broaden out once interation is fixed
@@ -220,8 +220,8 @@ $ignore = [[:Mark:]''] * ;
220
220
  | B ← W } $vowel ;
221
221
  | U ← V ;
222
222
  | U ← W ;
223
- $rough } $ignore [:UppercaseLetter:] → H ;
224
- $ignore [:UppercaseLetter:] { $rough → H ;
223
+ $rough } $ignore [:Uppercase_Letter:] → H ;
224
+ $ignore [:Uppercase_Letter:] { $rough → H ;
225
225
  $rough ← H ;
226
226
  $rough ↔ h ;
227
227
  # Completeness for Greek
@@ -243,7 +243,7 @@ $rough ↔ h ;
243
243
  ← [Ππ] { \' } [Ss] ;
244
244
  ← [Νν] { \' } $egammaLike ;
245
245
  ::NFC (NFD) ;
246
- # ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
247
- # ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
246
+ # ([\u0000-\u007F [:Latin:] [:Greek:] [:Nonspacing_Mark:]]) ;
247
+ # ([\u0000-\u007F · [:Latin:] [:Nonspacing_Mark:]]) ;
248
248
  # MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
249
249
  :: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̀-̷̹-ͅ΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
@@ -6,8 +6,8 @@
6
6
  :: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
7
7
  ::NFD (NFC) ;
8
8
  # Useful variables
9
- $lower = [[:latin:][:greek:] & [:Ll:]] ;
10
- $upper = [[:latin:][:greek:] & [:Lu:]] ;
9
+ $lower = [[:Latin:][:Greek:] & [:Ll:]] ;
10
+ $upper = [[:Latin:][:Greek:] & [:Lu:]] ;
11
11
  $accent = [[:Mn:][:Me:]] ;
12
12
  $macron = ̄ ;
13
13
  $ddot = ̈ ;
@@ -7,7 +7,7 @@
7
7
  # Do this before ::Han-Spacedhan() to catch Han after space in original text,
8
8
  # and to apply before all other rules.
9
9
  $startOfHanMarker = \uFDD1;
10
- [:^script=Han:] { ([:script=Han:]) → $startOfHanMarker $1;
10
+ [:^Script=Han:] { ([:Script=Han:]) → $startOfHanMarker $1;
11
11
  # Need Spacedhan so the name transliterations get spaced properly
12
12
  ::Han-Spacedhan();
13
13
  # Convert special name readings that depend on next character
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "_visibility": "external",
3
- "_alias": "Han-Latin",
4
- "_aliasBcp47": "und-Latn-t-und-hani",
3
+ "_alias": "Hans-Latn Han-Latin",
4
+ "_aliasBcp47": "und-Latn-t-und-hans und-Latn-t-und-hani",
5
5
  "_source": "Hani",
6
6
  "_direction": "forward",
7
7
  "_target": "Latn",
@@ -5,7 +5,7 @@
5
5
  # Note that Han-Spacedhan() has already been applied, so there should be spaces between Han characters.
6
6
  藏 } \u0020? 文 →zàng;# 藏 is zàng (not cáng) if followed by 文 wén: 藏文 language Zàngwén = Tibetan
7
7
  重 } \u0020? 庆 →chóng;# 重 is chóng (not zhòng) if followed by 庆 qìng: 重庆 city Chóngqìng
8
- 沈 } \u0020? 阳 →shěn;# is shěn (not chén) if followed by yáng: 沈阳 city Shěnyáng
8
+ # "沈 } \u0020? 阳 →shěn" is obsolete for Hans, the kMandarin entry for changed from "chén" to "shěn chén" in Unicode 14
9
9
  秘 } \u0020? 鲁 →bì;# 秘 is bì (not mì) if followed by 鲁 lǔ: 秘鲁 country Bìlǔ = Peru
10
10
  # START AUTOGENERATED Han-Latin.xml ( Unihan kMandarin)
11
11
  [吖錒锕阿𠼞𥥩𨉚𱚱]→ā;
@@ -1,8 +1,8 @@
1
1
  # Only intended for internal use
2
2
  # Make sure Han are normalized, including characters that contain them.
3
- # The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
4
- # Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
5
- :: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc;
3
+ # The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:Ideographic:]-[:sc=Han:]
4
+ # Where XXX is the resolved [:Ideographic:][:sc=Han:]. It needs updating with each Unicode release!
5
+ :: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:Ideographic:][:sc=Han:]] nfkc;
6
6
  :: fullwidth-halfwidth;
7
7
  。 → '.';
8
8
  。→ '.';
@@ -23,7 +23,7 @@
23
23
  々→ '⓶';
24
24
  〜→ '~';
25
25
  $terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
26
- $initialPunct = [:Ps:][:Pi:];
26
+ $initialPunct = [[:Ps:][:Pi:]];
27
27
  # add space between any Han or terminal punctuation and letters, and
28
28
  # between letters and Han or initial punct
29
29
  [[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
@@ -0,0 +1,8 @@
1
+ {
2
+ "_visibility": "external",
3
+ "_aliasBcp47": "und-Latn-t-und-hant",
4
+ "_source": "Hant",
5
+ "_direction": "forward",
6
+ "_target": "Latn",
7
+ "_rulesFile": "Hant-Latin.txt"
8
+ }
@@ -0,0 +1,100 @@
1
+ # Warning: does not do round-trip mapping!!
2
+ # Convert compounds; these are added individually, not derived from Unihan kMandarin.
3
+ # Here Han-Spacedhan() has not yet been applied.
4
+ # The following was moved from Hans-Latn; in a Hant/Taiwan context, the simplified-form city name 沈阳 should still transform to shěnyáng.
5
+ 沈 } 阳 →shěn;# 沈 is shěn (not chén) if followed by 阳 yáng: 沈阳 city Shěnyáng
6
+ # START From Unicode 17, the following should be autogenerated:
7
+ [棓]→bàng; # U+68D3
8
+ [繃]→bēng; # U+7E43
9
+ [俾]→bì; # U+4FFE
10
+ [萹]→biǎn; # U+8439
11
+ [摽脿蔈麃]→biāo; # U+647D,813F,8508,9E83
12
+ [啵]→bō; # U+5575
13
+ [柏薄]→bó; # U+67CF,8584
14
+ [卜]→bǔ; # U+535C
15
+ [差]→chā; # U+5DEE
16
+ [沈]→chén; # U+6C88
17
+ [牚]→chēng; # U+725A
18
+ [埫]→chǒng; # U+57EB
19
+ [槭]→cù; # U+69ED
20
+ [噠]→dá; # U+5660
21
+ [蹬]→dèng; # U+8E6C
22
+ [地]→dì; # U+5730
23
+ [嗲]→diē; # U+55F2
24
+ [䏲跌]→dié; # U+43F2,8DCC
25
+ [町]→dīng; # U+753A
26
+ [斗]→dǒu; # U+6597
27
+ [都]→dū; # U+90FD
28
+ [碡]→dú; # U+78A1
29
+ [柁]→duò; # U+67C1
30
+ [嗯]→en; # U+55EF
31
+ [髪髮]→fǎ; # U+9AEA,9AEE
32
+ [蕃]→fān; # U+8543
33
+ [帆]→fán; # U+5E06
34
+ [氾]→fàn; # U+6C3E
35
+ [彷]→fǎng; # U+5F77
36
+ [坋]→fèn; # U+574B
37
+ [諷讽]→fèng; # U+8AF7,8BBD
38
+ [乾]→gān; # U+4E7E
39
+ [㪅]→gēng; # U+3A85
40
+ [蓇]→gǔ; # U+84C7
41
+ [聒]→guā; # U+8052
42
+ [氿]→guǐ; # U+6C3F
43
+ [炔]→guì; # U+7094
44
+ [欻]→hū; # U+6B3B
45
+ [砉]→huò; # U+7809
46
+ [𪟝]→jī; # U+2A7DD
47
+ [蓻]→jí; # U+84FB
48
+ [袷]→jiá; # U+88B7
49
+ [叚]→jiǎ; # U+53DA
50
+ [菹]→jū; # U+83F9
51
+ [剋]→kè; # U+524B
52
+ [框]→kuāng; # U+6846
53
+ [适]→kuò; # U+9002
54
+ [肋]→lè; # U+808B
55
+ [釐]→lí; # U+91D0
56
+ [峛]→lǐ; # U+5CDB
57
+ [𩷕]→liáng; # U+29DD5
58
+ [瞭]→liǎo; # U+77AD
59
+ [蹣]→mán; # U+8E63
60
+ [眄]→miǎn; # U+7704
61
+ [碈]→mín; # U+7888
62
+ [万]→mò; # U+4E07
63
+ [伲]→nǐ; # U+4F32
64
+ [耙]→pá; # U+8019
65
+ [芘]→pí; # U+8298
66
+ [諞]→pián; # U+8ADE
67
+ [剽]→piào; # U+527D
68
+ [剖頗]→pǒ; # U+5256,9817
69
+ [醱]→pò; # U+91B1
70
+ [呇]→qǐ; # U+5447
71
+ [癿]→qié; # U+767F
72
+ [芎]→qiōng; # U+828E
73
+ [杣]→shān; # U+6763
74
+ [杓]→sháo; # U+6753
75
+ [舍]→shè; # U+820D
76
+ [誰]→shéi; # U+8AB0
77
+ [識识]→shì; # U+8B58,8BC6
78
+ [楯]→shǔn; # U+696F
79
+ [洓]→suǒ; # U+6D13
80
+ [沓]→tà; # U+6C93
81
+ [堤隄]→tí; # U+5824,9684
82
+ [萎]→wēi; # U+840E
83
+ [硊]→wěi; # U+784A
84
+ [筽]→wú; # U+7B7D
85
+ [嘸]→wǔ; # U+5638
86
+ [㴔]→xī; # U+3D14
87
+ [𲆰]→xí; # U+321B0
88
+ [𲆦]→xì; # U+321A6
89
+ [呷]→xiá; # U+5477
90
+ [硍]→xiàn; # U+784D
91
+ [崾]→yǎo; # U+5D3E
92
+ [畬]→yú; # U+756C
93
+ [薁]→yù; # U+8581
94
+ [嶦]→zhān; # U+5DA6
95
+ [著]→zhe; # U+8457
96
+ [徵]→zhēng; # U+5FB5
97
+ [苧]→zhù; # U+82E7
98
+ # END From Unicode 17, the above should be autogenerated:
99
+ # Then run the normal Hani-Latn transform for the rest
100
+ ::Hani-Latn();
@@ -1,5 +1,5 @@
1
1
  # note: a global filter is more efficient, but MUST include all source chars
2
- :: [[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]];
2
+ :: [[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:Nonspacing_Mark:]]-[\u309B \u309C]];
3
3
  :: NFKC (NFC);
4
4
  # Hiragana-Katakana
5
5
  # This is largely a one-to-one mapping, but it has a
@@ -173,5 +173,5 @@ $xo = [
173
173
  お ← $xo {ー};
174
174
  :: NFC (NFKC) ;
175
175
  # note: a global filter is more efficient, but MUST include all source chars!!
176
- :: ([[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]]);
176
+ :: ([[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:Nonspacing_Mark:]]-[\u309B \u309C]]);
177
177
  # eof
@@ -1,5 +1,5 @@
1
- ::[[:script=Latin:][:M:]-];
1
+ ::[[:Script=Latin:][:M:]-];
2
2
  ::NFD;
3
3
  ::Lower;
4
4
  ::Latin-ConjoiningJamo;
5
- ::[[:script=Latin:][:M:]] NFC;
5
+ ::[[:Script=Latin:][:M:]] NFC;
@@ -1,5 +1,5 @@
1
1
  # note: a global filter is more efficient, but MUST include all source chars
2
- #:: [\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]] ;
2
+ #:: [\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:Nonspacing_Mark:]] ;
3
3
  # MINIMAL FILTER GENERATED FOR: Latin-Katakana
4
4
  ### WARNING -- must add width filter, both here and below!!! ###
5
5
  :: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」゙-゚ァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̄Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ;
@@ -370,11 +370,11 @@ x → | ks ;
370
370
  # Final cleanup
371
371
  '~' → ; # delete stray tildes between letters
372
372
  [:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters
373
- # [ʾ[:Nonspacing Mark:]-[゙-゜]] → ; # delete any non-spacing marks that we didn't use
373
+ # [ʾ[:Nonspacing_Mark:]-[゙-゜]] → ; # delete any non-spacing marks that we didn't use
374
374
  :: NFC (NFD) ;
375
375
  :: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth);
376
376
  # note: a global filter is more efficient, but MUST include all source chars!!
377
- #:: ([\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]]);
377
+ #:: ([\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:Nonspacing_Mark:]]);
378
378
  # MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
379
379
  :: ( [[\ -~¢-£¥-¦¬̄₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ゙-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ;
380
380
  # eof
@@ -23,5 +23,5 @@ $digit = [1-5];
23
23
  $1 &NumericPinyin-Pinyin($3) $2 ← ([aAeE]) ($vowel* $consonant*) ($digit);
24
24
  $1 &NumericPinyin-Pinyin($3) $2 ← ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);
25
25
  $1 &NumericPinyin-Pinyin($3) $2 ← ($vowel) ($consonant*) ($digit);
26
- &NumericPinyin-Pinyin($1) ← [:letter:] {($digit)};
26
+ &NumericPinyin-Pinyin($1) ← [:Letter:] {($digit)};
27
27
  ::NFC (NFD);
@@ -10,7 +10,7 @@
10
10
  # In our rules, we also convert Arabic punctuation characters to Latin.
11
11
  # These appears to be used in Maldivian text, for example in the Universal
12
12
  # Declaration of Human Rights.
13
- ::[[:block=thaana:][،؛؟٪٫٬]\uFDF2] ;
13
+ ::[[:Block=Thaana:][،؛؟٪٫٬]\uFDF2] ;
14
14
  ::NFD;
15
15
  $wordBoundary = [^[:L:][:M:][:N:]] ;
16
16
  $vowel = [\u07A6-\u07AF] ;
@@ -11,7 +11,7 @@
11
11
  #
12
12
  # MINIMAL FILTER: Persian-Latin
13
13
  #
14
- :: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویيَُِّْ٠١٢٣٤٥٦٧٨٩پچژگی]] ;
14
+ :: [[:Arabic:][:Block=Arabic:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویيَُِّْ٠١٢٣٤٥٦٧٨٩پچژگی]] ;
15
15
  :: NFKD (NFC) ;
16
16
  #
17
17
  #
@@ -1,4 +1,4 @@
1
- ::[[:thai:] ก-ฺเ-๛];
1
+ ::[[:Thai:] ก-ฺเ-๛];
2
2
  ::NFD;
3
3
  ::Thai-ThaiSemi;
4
4
  ::Any-BreakInternal;
@@ -1,7 +1,7 @@
1
1
  # This reverses the Thai LogicalOrderException vowels, and does (part of) spaces
2
2
  # The rules that convert space into semicolon are in another file;
3
3
  # since they have to come BEFORE the break iterator
4
- $thai = [[:thai:] ก-ฺเ-๛] ;
4
+ $thai = [[:Thai:] ก-ฺเ-๛] ;
5
5
  # First convert the semicolon back
6
6
  ' ' ← $thai { '; ' } $thai;
7
7
  # Remove any other spaces between thai letters
@@ -1,4 +1,4 @@
1
1
  # The rules that convert space into semicolon are in this file;
2
2
  # since they have to come BEFORE the break iterator.
3
- $thai = [[:thai:] ก-ฺเ-๛] ;
3
+ $thai = [[:Thai:] ก-ฺเ-๛] ;
4
4
  $thai { ' ' } $thai → '; ' ;
@@ -18,8 +18,8 @@
18
18
  #{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
19
19
  #\uE000 → ọ ;
20
20
  # ← ọ ;
21
- $notAbove = [^\p{ccc=0}\p{ccc=above}] ;
22
- $notBelow = [^\p{ccc=0}\p{ccc=below}] ;
21
+ $notAbove = [^\p{ccc=0}\p{ccc=Above}] ;
22
+ $notBelow = [^\p{ccc=0}\p{ccc=Below}] ;
23
23
  # Consonants
24
24
  # Warning: the 'h's need to be handled carefully!
25
25
  # What we really want to say is the following, but we can't
@@ -12,7 +12,7 @@
12
12
  #
13
13
  # MINIMAL FILTER: Ethiopic-Morse Code
14
14
  #
15
- :: [[:Zs:]0-9!\?\+/@()\[\]_:;,\.'"$=\-[:Ethiopic:]] ;
15
+ :: [[:Zs:]0-9!\?\+/@()\[\]_:;,\.'"\$=\-[:Ethiopic:]] ;
16
16
  ([:Lo:])([:Zs:]+)([:Lo:]) → | $1⁄⁂⁄$2$3 ; # ⁄⁂⁄ is assumed to be a sufficiently weird enough sequence that won't naturally appear in any normal content
17
17
  #
18
18
  ########################################################################
@@ -1,8 +1,8 @@
1
1
  # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
2
2
  # Make any string of letters after a cased letter be lower, with rules for i
3
- [:cased:] [:case-ignorable:]* { İ → i;
4
- [:cased:] [:case-ignorable:]* { I → ı;
5
- [:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
3
+ [:Cased:] [:Case_Ignorable:]* { İ → i;
4
+ [:Cased:] [:Case_Ignorable:]* { I → ı;
5
+ [:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
6
6
  # Otherwise all lowercase go to upper (titlecase stay as is)
7
7
  i→İ ;
8
8
  ([:Lowercase:]) → &Any-Upper($1) ;
@@ -747,16 +747,16 @@ $wordBoundary{ኦ → $ኦ ; # ETHIOPIC SYLLABLE GLOTTAL O
747
747
  # Convert to dot to dot if dot is followed by a number, ellipsis, or another dot.
748
748
  \. $1 ← \.([0-9….]) ;
749
749
  # Convert to Ethiopic Fullstop if dot is not followed by a number or another dot.
750
- ። $1 \.([^0-9.]) ;
750
+ ። $1 \.([^0-9.]) ;
751
751
  \, $1 ← \,([0-9]) ;
752
- # ፣ $1 \,([^0-9]) ;
752
+ # ፣ $1 \,([^0-9]) ;
753
753
  ፤ ↔ \; ;
754
754
  ፦ ↔ \:\- ;
755
755
  # ፥ ↔ \: ;
756
756
  # ፨ → "#" ;
757
757
  # ፠ → \+ ;
758
758
  ፧ → \? ;
759
- ፡ $1 \,([^0-9]) ;
759
+ ፡ $1 \,([^0-9]) ;
760
760
  ::Null ;
761
761
  $1 $ጥበቅ ← $ጥበቅ ([ሀ-ፖ]) ;
762
762
  ########################################################################
@@ -106,4 +106,4 @@ e e+ → eː;
106
106
  i i+ → iː;
107
107
  o o+ → oː;
108
108
  u u+ → uː;
109
- ə̃ {ə̃}+ → ə̃;
109
+ ə̃ ə̃+ → ə̃;
@@ -10,4 +10,4 @@ $UE = [Ü {U \u0308}];
10
10
  $AE → AE;
11
11
  $OE → OE;
12
12
  $UE → UE;
13
- ::Any-ASCII;
13
+ ::Latin-ASCII;
@@ -4,7 +4,7 @@
4
4
  # and C is not followed by a sequence consisting of zero or more case-ignorable characters and then a cased letter.
5
5
  # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
6
6
  # With translit rules, easiest is to handle the negative condition first, mapping in that case to the regular sigma.
7
- Σ } [:case-ignorable:]* [:cased:] → σ;
8
- [:cased:] [:case-ignorable:]* { Σ → ς;
7
+ Σ } [:Case_Ignorable:]* [:Cased:] → σ;
8
+ [:Cased:] [:Case_Ignorable:]* { Σ → ς;
9
9
  ::Any-Lower;
10
10
  ::NFC();
@@ -2,9 +2,9 @@
2
2
  # Remove \0301 following Greek, with possible intervening 0308 marks.
3
3
  # [[:Greek:] & [:Ll:]] [\u0308]? { \u0301 → ;
4
4
  # Make any string of letters after a cased letter be lower, with rules for sigma
5
- [:cased:] [:case-ignorable:]* { Σ } [:case-ignorable:]* [:cased:] → σ;
6
- [:cased:] [:case-ignorable:]* { Σ → ς;
7
- [:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
5
+ [:Cased:] [:Case_Ignorable:]* { Σ } [:Case_Ignorable:]* [:Cased:] → σ;
6
+ [:Cased:] [:Case_Ignorable:]* { Σ → ς;
7
+ [:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
8
8
  # Otherwise all lowercase go to upper (titlecase stay as is)
9
9
  ([:Lowercase:]) → &Any-Title($1) ;
10
10
  ::NFC();
@@ -251,5 +251,5 @@ y → | i;
251
251
  z → ዝ;
252
252
  #
253
253
  #
254
- [:nonspacing mark:] → ;
254
+ [:Nonspacing_Mark:] → ;
255
255
  ::NFC(NFD);
@@ -253,5 +253,5 @@ z → ツ;
253
253
  \- → =;
254
254
  #
255
255
  #
256
- [:nonspacing mark:] → ;
256
+ [:Nonspacing_Mark:] → ;
257
257
  ::NFC(NFD);
@@ -1,12 +1,12 @@
1
1
  # Make any string of letters after a cased letter be lower
2
2
  ::NFD();
3
- [:cased:] [:case-ignorable:]* {I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
4
- [:cased:] [:case-ignorable:]* {J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
5
- [:cased:] [:case-ignorable:]* {I \u0328 } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
6
- [:cased:] [:case-ignorable:]* {I \u0300 → i \u0307 \u0300;
7
- [:cased:] [:case-ignorable:]* {I \u0301 → i \u0307 \u0301;
8
- [:cased:] [:case-ignorable:]* {I \u0303 → i \u0307 \u0303;
9
- [:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
3
+ [:Cased:] [:Case_Ignorable:]* {I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
4
+ [:Cased:] [:Case_Ignorable:]* {J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
5
+ [:Cased:] [:Case_Ignorable:]* {I \u0328 } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
6
+ [:Cased:] [:Case_Ignorable:]* {I \u0300 → i \u0307 \u0300;
7
+ [:Cased:] [:Case_Ignorable:]* {I \u0301 → i \u0307 \u0301;
8
+ [:Cased:] [:Case_Ignorable:]* {I \u0303 → i \u0307 \u0303;
9
+ [:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
10
10
  # Otherwise all lowercase go to upper (titlecase stay as is)
11
11
  [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
12
12
  ([:Lowercase:]) → &Any-Upper($1) ;
@@ -1,8 +1,8 @@
1
1
  # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
2
2
  # Make any string of letters after a cased letter be lower, with rules for i
3
- [:cased:] [:case-ignorable:]* { İ → i;
4
- [:cased:] [:case-ignorable:]* { I → ı;
5
- [:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
3
+ [:Cased:] [:Case_Ignorable:]* { İ → i;
4
+ [:Cased:] [:Case_Ignorable:]* { I → ı;
5
+ [:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
6
6
  # Otherwise all lowercase go to upper (titlecase stay as is)
7
7
  i→İ ;
8
8
  ([:Lowercase:]) → &Any-Upper($1) ;
@@ -12,6 +12,6 @@
12
12
  ########################################################################
13
13
  #
14
14
  :: Amharic-Amharic/Geminate ;
15
- :: Ethiopic-Latin/BetaMetsehaf ;
15
+ :: Ethiopic-Latin/Beta_Metsehaf ;
16
16
  #
17
17
  ########################################################################
package/transforms.json CHANGED
@@ -71,6 +71,7 @@
71
71
  "Han-Latin-Names",
72
72
  "Han-Spacedhan",
73
73
  "Hangul-Latin",
74
+ "Hant-Latin",
74
75
  "Hebrew-Latin",
75
76
  "Hebrew-Latin-BGN",
76
77
  "Hiragana-Katakana",