npm - cldr-transforms - Versions diffs - 46.0.0 → 47.0.0-BETA2 - Mend

cldr-transforms 46.0.0 → 47.0.0-BETA2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/LICENSE +1 -1
package/README.md +1 -1
package/bower.json +2 -2
package/package.json +3 -3
package/transforms/Arabic-Latin-BGN.txt +1 -1
package/transforms/Arabic-Latin.txt +1 -1
package/transforms/Bengali-Latin.txt +1 -1
package/transforms/Cyrillic-Latin.txt +4 -4
package/transforms/Greek-Latin.txt +8 -8
package/transforms/Greek_Latin_UNGEGN.txt +2 -2
package/transforms/Han-Latin-Names.txt +1 -1
package/transforms/Han-Latin.json +2 -2
package/transforms/Han-Latin.txt +1 -1
package/transforms/Han-Spacedhan.txt +4 -4
package/transforms/Hant-Latin.json +8 -0
package/transforms/Hant-Latin.txt +100 -0
package/transforms/Hiragana-Katakana.txt +2 -2
package/transforms/Latin-Jamo.txt +2 -2
package/transforms/Latin-Katakana.txt +3 -3
package/transforms/Latin-NumericPinyin.txt +1 -1
package/transforms/Maldivian-Latin-BGN.txt +1 -1
package/transforms/Persian-Latin-BGN.txt +1 -1
package/transforms/Thai-Latin.txt +1 -1
package/transforms/Thai-ThaiLogical.txt +1 -1
package/transforms/Thai-ThaiSemi.txt +1 -1
package/transforms/ThaiLogical-Latin.txt +2 -2
package/transforms/am-Ethi-t-d0-morse.txt +1 -1
package/transforms/az-Title.txt +3 -3
package/transforms/byn-Ethi-t-byn-latn-m0-xaleget.txt +3 -3
package/transforms/chr-chr_FONIPA.txt +1 -1
package/transforms/de-ASCII.txt +1 -1
package/transforms/el-Lower.txt +2 -2
package/transforms/el-Title.txt +3 -3
package/transforms/it-am.txt +1 -1
package/transforms/it-ja.txt +1 -1
package/transforms/lt-Title.txt +7 -7
package/transforms/tr-Title.txt +3 -3
package/transforms/und-Ethi-t-und-latn-m0-beta_metsehaf-geminate.txt +1 -1
package/transforms.json +1 -0

package/LICENSE CHANGED Viewed

@@ -2,7 +2,7 @@ UNICODE LICENSE V3
 COPYRIGHT AND PERMISSION NOTICE
-Copyright © 2004-2024 Unicode, Inc.
+Copyright © 2004-2025 Unicode, Inc.
 NOTICE TO USER: Carefully read the following legal agreement. BY
 DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR

package/README.md CHANGED Viewed

@@ -19,7 +19,7 @@ the data contained here, please file a new ticket at [Unicode Jira](https://unic
 ## License
-Copyright © 1991-2024 Unicode, Inc.
+Copyright © 1991-2025 Unicode, Inc.
 [Terms of Use](http://www.unicode.org/copyright.html)
 SPDX-License-Identifier: Unicode-3.0

package/bower.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "name": "cldr-transforms",
-  "version": "46.0.0",
+  "version": "47.0.0-BETA2",
   "dependencies": {
-    "cldr-core": "46.0.0"
+    "cldr-core": "47.0.0-BETA2"
   },
   "main": "transforms/**/*.json",
   "ignore": [

package/package.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "name": "cldr-transforms",
-  "version": "46.0.0",
+  "version": "47.0.0-BETA2",
   "peerDependencies": {
-    "cldr-core": "46.0.0"
+    "cldr-core": "47.0.0-BETA2"
   },
   "description": "Transform data",
   "homepage": "https://cldr.unicode.org",
@@ -24,6 +24,6 @@
   },
   "license": "Unicode-3.0",
   "bugs": "https://cldr.unicode.org/index/bug-reports#TOC-Filing-a-Ticket",
-  "cldrVersion": "46",
+  "cldrVersion": "47",
   "unicodeVersion": "16.0.0"
 }

package/transforms/Arabic-Latin-BGN.txt CHANGED Viewed

@@ -14,7 +14,7 @@
 #
 # MINIMAL FILTER: Arabic-Latin
 #
-:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىيًٌٍَُِّْ٠١٢٣٤٥٦٧٨٩ٱ]] ;
+:: [[:Arabic:][:Block=Arabic:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىيًٌٍَُِّْ٠١٢٣٤٥٦٧٨٩ٱ]] ;
 :: NFKD (NFC) ;
 #
 #

package/transforms/Arabic-Latin.txt CHANGED Viewed

@@ -10,7 +10,7 @@
 # Does *not* do assimilation of "al", nor hyphenation.
 # While it could be done, we need to determine whether a prefix "al" could
 # occur other than as the definite article (since no space is used).
-:: [[:Arabic:][:block=ARABIC:][‎ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ][\u0611\u0670]] ;
+:: [[:Arabic:][:Block=Arabic:][‎ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ][\u0611\u0670]] ;
 :: NFKD (NFC);
 $disambig =  ̱ ;
 $disambig2 =  ̰ ;

package/transforms/Bengali-Latin.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-::[[:script=bengali:][।-॥ঁ-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ়-ৄে-ৈো-্ৗড়-ঢ়য়-ৣ০-৺ৎ]];
+::[[:Script=Bengali:][।-॥ঁ-ঃঅ-ঌএ-ঐও-নপ-রলশ-হ়-ৄে-ৈো-্ৗড়-ঢ়য়-ৣ০-৺ৎ]];
 ::NFD;
 ::Bengali-InterIndic;
 ::InterIndic-Latin;

package/transforms/Cyrillic-Latin.txt CHANGED Viewed

@@ -2,7 +2,7 @@
 # Should add variants for Russian-English, Russian-German
 # Those can use this as a base, and then remap cases
 # like a $hat to ya or ja.
-# :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
+# :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:Nonspacing_Mark:]] ;
 ### WARNING, ̈ must be added to the generated filters, in both directions ###
 # MINIMAL FILTER
 # Cyrillic-Latin
@@ -267,12 +267,12 @@ $ignore = [[:Mark:]''] * ;
 | K ← Q ;
 | u ← w ;
 | U ← W ;
-| KS ← X } $ignore [:UppercaseLetter:] ;
-| KS ← [:UppercaseLetter:] $ignore { X ;
+| KS ← X } $ignore [:Uppercase_Letter:] ;
+| KS ← [:Uppercase_Letter:] $ignore { X ;
 | Ks ← X ;
 | ks ← x ;
 :: NFC (NFD) ;
 # note: a global filter is more efficient, but MUST include all source chars!!
-# :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:nonspacing mark:] ‧]);
+# :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:Nonspacing_Mark:] ‧]);
 # MINIMAL FILTER: Latin-Cyrillic
 :: ( [ḫḪhH‧ˌ̈A-Za-zÀ-ÏÑ-ÖÙ-Ýà-ïñ-öù-ýÿ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƏƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳəʹ-ʺ̀-̂̆-̦̱̇̌̀-́̈́ʹ΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЀЃЌ-ЎЙйѐѓќ-ўӁ-ӂӐ-ӑӖ-ӗḀ-ẙẛẠ-ỹἂ-ἅἊ-Ἅἒ-ἕἚ-Ἕἢ-ἥἪ-Ἥἲ-ἵἺ-Ἵὂ-ὅὊ-Ὅὒ-ὕὛὝὢ-ὥὪ-Ὥὰ-ώᾂ-ᾅᾊ-ᾍᾒ-ᾕᾚ-ᾝᾢ-ᾥᾪ-ᾭᾰᾲᾴᾸᾺ-ΆῂῄῈ-Ή῍-῎ῐῒ-ΐῘῚ-Ί῝-῞ῠῢ-ΰῨῪ-Ύ῭-΅ῲῴῸ-ΏK-Å] ) ;

package/transforms/Greek-Latin.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 # Rules are predicated on running NFD first, and NFC afterwards
-# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
+# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:Nonspacing_Mark:]] ;
 # MINIMAL FILTER GENERATED FOR: Greek-Latin
 :: [΄´;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ̄̈̓-̔͂-ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
 :: NFD (NFC) ;
@@ -12,9 +12,9 @@
 # ὨΣ ὩΣ ὪΣ ὫΣ
 # Ạ, ạ, Ẹ, ẹ, Ọ, ọ
 # Useful variables
-$lower = [[:latin:][:greek:] & [:Ll:]];
-$glower = [[:greek:] & [:Ll:]];
-$upper = [[:latin:][:greek:] & [:Lu:]] ;
+$lower = [[:Latin:][:Greek:] & [:Ll:]];
+$glower = [[:Greek:] & [:Ll:]];
+$upper = [[:Latin:][:Greek:] & [:Lu:]] ;
 $accent = [:M:] ;
 # NOTE: restrict to just the Greek & Latin accents that we care about
 # TODO: broaden out once interation is fixed
@@ -220,8 +220,8 @@ $ignore = [[:Mark:]''] * ;
 | B ← W  } $vowel ;
 | U ← V ;
 | U ← W ;
-$rough } $ignore [:UppercaseLetter:] → H ;
-$ignore [:UppercaseLetter:] { $rough → H ;
+$rough } $ignore [:Uppercase_Letter:] → H ;
+$ignore [:Uppercase_Letter:] { $rough → H ;
 $rough ← H ;
 $rough ↔ h ;
 # Completeness for Greek
@@ -243,7 +243,7 @@ $rough ↔ h ;
 ← [Ππ] { \' } [Ss] ;
 ← [Νν] { \' } $egammaLike ;
 ::NFC (NFD) ;
-# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
-# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
+# ([\u0000-\u007F [:Latin:] [:Greek:] [:Nonspacing_Mark:]]) ;
+# ([\u0000-\u007F · [:Latin:] [:Nonspacing_Mark:]]) ;
 # MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
 :: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̀-̷̹-ͅ΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;

package/transforms/Greek_Latin_UNGEGN.txt CHANGED Viewed

@@ -6,8 +6,8 @@
 :: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
 ::NFD (NFC) ;
 # Useful variables
-$lower = [[:latin:][:greek:] & [:Ll:]] ;
-$upper = [[:latin:][:greek:] & [:Lu:]] ;
+$lower = [[:Latin:][:Greek:] & [:Ll:]] ;
+$upper = [[:Latin:][:Greek:] & [:Lu:]] ;
 $accent = [[:Mn:][:Me:]] ;
 $macron = ̄ ;
 $ddot = ̈ ;

package/transforms/Han-Latin-Names.txt CHANGED Viewed

@@ -7,7 +7,7 @@
 # Do this before ::Han-Spacedhan() to catch Han after space in original text,
 # and to apply before all other rules.
 $startOfHanMarker = \uFDD1;
-[:^script=Han:] { ([:script=Han:]) → $startOfHanMarker $1;
+[:^Script=Han:] { ([:Script=Han:]) → $startOfHanMarker $1;
 # Need Spacedhan so the name transliterations get spaced properly
 ::Han-Spacedhan();
 # Convert special name readings that depend on next character

package/transforms/Han-Latin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_visibility": "external",
-  "_alias": "Han-Latin",
-  "_aliasBcp47": "und-Latn-t-und-hani",
+  "_alias": "Hans-Latn Han-Latin",
+  "_aliasBcp47": "und-Latn-t-und-hans und-Latn-t-und-hani",
   "_source": "Hani",
   "_direction": "forward",
   "_target": "Latn",

package/transforms/Han-Latin.txt CHANGED Viewed

@@ -5,7 +5,7 @@
 # Note that Han-Spacedhan() has already been applied, so there should be spaces between Han characters.
 藏 } \u0020? 文 →zàng;# 藏 is zàng (not cáng) if followed by 文 wén: 藏文 language Zàngwén = Tibetan
 重 } \u0020? 庆 →chóng;# 重 is chóng (not zhòng) if followed by 庆 qìng: 重庆 city Chóngqìng
-沈 } \u0020? 阳 →shěn;# 沈 is shěn (not chén) if followed by 阳 yáng: 沈阳 city Shěnyáng
+# "沈 } \u0020? 阳 →shěn" is obsolete for Hans, the kMandarin entry for 沈 changed from "chén" to "shěn chén" in Unicode 14
 秘 } \u0020? 鲁 →bì;# 秘 is bì (not mì) if followed by 鲁 lǔ: 秘鲁 country Bìlǔ = Peru
 # START AUTOGENERATED Han-Latin.xml ( Unihan kMandarin)
 [吖錒锕阿𠼞𥥩𨉚𱚱]→ā;

package/transforms/Han-Spacedhan.txt CHANGED Viewed

@@ -1,8 +1,8 @@
 # Only intended for internal use
 # Make sure Han are normalized, including characters that contain them.
-# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
-# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
-:: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc;
+# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:Ideographic:]-[:sc=Han:]
+# Where XXX is the resolved [:Ideographic:][:sc=Han:]. It needs updating with each Unicode release!
+:: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:Ideographic:][:sc=Han:]] nfkc;
 :: fullwidth-halfwidth;
 ｡ → '.';
 。→ '.';
@@ -23,7 +23,7 @@
 々→ '⓶';
 〜→ '~';
 $terminalPunct = [\.\,\:\;\?\!．，：？！｡、；[:Pe:][:Pf:]];
-$initialPunct = [:Ps:][:Pi:];
+$initialPunct = [[:Ps:][:Pi:]];
 # add space between any Han or terminal punctuation and letters, and
 # between letters and Han or initial punct
 [[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;

package/transforms/Hant-Latin.json ADDED Viewed

@@ -0,0 +1,8 @@
+{
+  "_visibility": "external",
+  "_aliasBcp47": "und-Latn-t-und-hant",
+  "_source": "Hant",
+  "_direction": "forward",
+  "_target": "Latn",
+  "_rulesFile": "Hant-Latin.txt"
+}

package/transforms/Hant-Latin.txt ADDED Viewed

@@ -0,0 +1,100 @@
+# Warning: does not do round-trip mapping!!
+# Convert compounds; these are added individually, not derived from Unihan kMandarin.
+# Here Han-Spacedhan() has not yet been applied.
+# The following was moved from Hans-Latn; in a Hant/Taiwan context, the simplified-form city name 沈阳 should still transform to shěnyáng.
+沈 } 阳 →shěn;# 沈 is shěn (not chén) if followed by 阳 yáng: 沈阳 city Shěnyáng
+# START From Unicode 17, the following should be autogenerated:
+[棓]→bàng;  # U+68D3
+[繃]→bēng;  # U+7E43
+[俾]→bì;    # U+4FFE
+[萹]→biǎn;  # U+8439
+[摽脿蔈麃]→biāo; # U+647D,813F,8508,9E83
+[啵]→bō;    # U+5575
+[柏薄]→bó;   # U+67CF,8584
+[卜]→bǔ;    # U+535C
+[差]→chā;   # U+5DEE
+[沈]→chén;  # U+6C88
+[牚]→chēng; # U+725A
+[埫]→chǒng; # U+57EB
+[槭]→cù;    # U+69ED
+[噠]→dá;    # U+5660
+[蹬]→dèng;  # U+8E6C
+[地]→dì;    # U+5730
+[嗲]→diē;   # U+55F2
+[䏲跌]→dié;  # U+43F2,8DCC
+[町]→dīng;  # U+753A
+[斗]→dǒu;   # U+6597
+[都]→dū;    # U+90FD
+[碡]→dú;    # U+78A1
+[柁]→duò;   # U+67C1
+[嗯]→en;    # U+55EF
+[髪髮]→fǎ;   # U+9AEA,9AEE
+[蕃]→fān;   # U+8543
+[帆]→fán;   # U+5E06
+[氾]→fàn;   # U+6C3E
+[彷]→fǎng;  # U+5F77
+[坋]→fèn;   # U+574B
+[諷讽]→fèng; # U+8AF7,8BBD
+[乾]→gān;   # U+4E7E
+[㪅]→gēng;  # U+3A85
+[蓇]→gǔ;    # U+84C7
+[聒]→guā;   # U+8052
+[氿]→guǐ;   # U+6C3F
+[炔]→guì;   # U+7094
+[欻]→hū;    # U+6B3B
+[砉]→huò;   # U+7809
+[𪟝]→jī;    # U+2A7DD
+[蓻]→jí;    # U+84FB
+[袷]→jiá;   # U+88B7
+[叚]→jiǎ;   # U+53DA
+[菹]→jū;    # U+83F9
+[剋]→kè;    # U+524B
+[框]→kuāng; # U+6846
+[适]→kuò;   # U+9002
+[肋]→lè;    # U+808B
+[釐]→lí;    # U+91D0
+[峛]→lǐ;    # U+5CDB
+[𩷕]→liáng; # U+29DD5
+[瞭]→liǎo;  # U+77AD
+[蹣]→mán;   # U+8E63
+[眄]→miǎn;  # U+7704
+[碈]→mín;   # U+7888
+[万]→mò;    # U+4E07
+[伲]→nǐ;    # U+4F32
+[耙]→pá;    # U+8019
+[芘]→pí;    # U+8298
+[諞]→pián;  # U+8ADE
+[剽]→piào;  # U+527D
+[剖頗]→pǒ;   # U+5256,9817
+[醱]→pò;    # U+91B1
+[呇]→qǐ;    # U+5447
+[癿]→qié;   # U+767F
+[芎]→qiōng; # U+828E
+[杣]→shān;  # U+6763
+[杓]→sháo;  # U+6753
+[舍]→shè;   # U+820D
+[誰]→shéi;  # U+8AB0
+[識识]→shì; # U+8B58,8BC6
+[楯]→shǔn;  # U+696F
+[洓]→suǒ;   # U+6D13
+[沓]→tà;    # U+6C93
+[堤隄]→tí;   # U+5824,9684
+[萎]→wēi;   # U+840E
+[硊]→wěi;   # U+784A
+[筽]→wú;    # U+7B7D
+[嘸]→wǔ;    # U+5638
+[㴔]→xī;    # U+3D14
+[𲆰]→xí;    # U+321B0
+[𲆦]→xì;    # U+321A6
+[呷]→xiá;   # U+5477
+[硍]→xiàn;   # U+784D
+[崾]→yǎo;    # U+5D3E
+[畬]→yú;   # U+756C
+[薁]→yù;   # U+8581
+[嶦]→zhān;   # U+5DA6
+[著]→zhe;   # U+8457
+[徵]→zhēng;   # U+5FB5
+[苧]→zhù;   # U+82E7
+# END From Unicode 17, the above should be autogenerated:
+# Then run the normal Hani-Latn transform for the rest
+::Hani-Latn();

package/transforms/Hiragana-Katakana.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 # note: a global filter is more efficient, but MUST include all source chars
-:: [[\u0000-\u007E 、。 ゙-゜ ァ-ー ｡-ﾟー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]];
+:: [[\u0000-\u007E 、。 ゙-゜ ァ-ー ｡-ﾟー[:Hiragana:] [:Katakana:] [:Nonspacing_Mark:]]-[\u309B \u309C]];
 :: NFKC (NFC);
 # Hiragana-Katakana
 # This is largely a one-to-one mapping, but it has a
@@ -173,5 +173,5 @@ $xo = [
 お ← $xo {ー};
 :: NFC (NFKC) ;
 # note: a global filter is more efficient, but MUST include all source chars!!
-:: ([[\u0000-\u007E 、。 ゙-゜ ァ-ー ｡-ﾟー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]]);
+:: ([[\u0000-\u007E 、。 ゙-゜ ァ-ー ｡-ﾟー[:Hiragana:] [:Katakana:] [:Nonspacing_Mark:]]-[\u309B \u309C]]);
 # eof

package/transforms/Latin-Jamo.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-::[[:script=Latin:][:M:]-];
+::[[:Script=Latin:][:M:]-];
 ::NFD;
 ::Lower;
 ::Latin-ConjoiningJamo;
-::[[:script=Latin:][:M:]] NFC;
+::[[:Script=Latin:][:M:]] NFC;

package/transforms/Latin-Katakana.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 # note: a global filter is more efficient, but MUST include all source chars
-#:: [\u0000-\u007E 、。 ゙-゜ ァ-ー ｡-ﾟ [:Latin:][:Katakana:] [:nonspacing mark:]] ;
+#:: [\u0000-\u007E 、。 ゙-゜ ァ-ー ｡-ﾟ [:Latin:][:Katakana:] [:Nonspacing_Mark:]] ;
 # MINIMAL FILTER GENERATED FOR: Latin-Katakana
 ### WARNING -- must add width filter, both here and below!!! ###
 :: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」゙-゚ァ-ロワヲ-ヴヷヺ-ー！-～￠-￦][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̄Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ;
@@ -370,11 +370,11 @@ x → | ks ;
 # Final cleanup
 '~' → ; # delete stray tildes between letters
 [:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters
-# [ʾ[:Nonspacing Mark:]-[゙-゜]] → ; # delete any non-spacing marks that we didn't use
+# [ʾ[:Nonspacing_Mark:]-[゙-゜]] → ; # delete any non-spacing marks that we didn't use
 :: NFC (NFD) ;
 :: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth);
 # note: a global filter is more efficient, but MUST include all source chars!!
-#:: ([\u0000-\u007E 、。 ゙-゜ ァ-ー ｡-ﾟ [:Latin:][:Katakana:] [:nonspacing mark:]]);
+#:: ([\u0000-\u007E 、。 ゙-゜ ァ-ー ｡-ﾟ [:Latin:][:Katakana:] [:Nonspacing_Mark:]]);
 # MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
 :: ( [[\ -~¢-£¥-¦¬̄₩｡-ﾾￂ-ￇￊ-ￏￒ-ￗￚ-ￜ￨-￮][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ゙-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ;
 # eof

package/transforms/Latin-NumericPinyin.txt CHANGED Viewed

@@ -23,5 +23,5 @@ $digit = [1-5];
 $1 &NumericPinyin-Pinyin($3) $2 ← ([aAeE]) ($vowel* $consonant*) ($digit);
 $1 &NumericPinyin-Pinyin($3) $2 ← ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);
 $1 &NumericPinyin-Pinyin($3) $2 ← ($vowel) ($consonant*) ($digit);
-&NumericPinyin-Pinyin($1) ← [:letter:] {($digit)};
+&NumericPinyin-Pinyin($1) ← [:Letter:] {($digit)};
 ::NFC (NFD);

package/transforms/Maldivian-Latin-BGN.txt CHANGED Viewed

@@ -10,7 +10,7 @@
 # In our rules, we also convert Arabic punctuation characters to Latin.
 # These appears to be used in Maldivian text, for example in the Universal
 # Declaration of Human Rights.
-::[[:block=thaana:][،؛؟٪٫٬]\uFDF2] ;
+::[[:Block=Thaana:][،؛؟٪٫٬]\uFDF2] ;
 ::NFD;
 $wordBoundary = [^[:L:][:M:][:N:]] ;
 $vowel = [\u07A6-\u07AF] ;

package/transforms/Persian-Latin-BGN.txt CHANGED Viewed

@@ -11,7 +11,7 @@
 #
 # MINIMAL FILTER: Persian-Latin
 #
-:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویيَُِّْ٠١٢٣٤٥٦٧٨٩پچژگی]] ;
+:: [[:Arabic:][:Block=Arabic:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویيَُِّْ٠١٢٣٤٥٦٧٨٩پچژگی]] ;
 :: NFKD (NFC) ;
 #
 #

package/transforms/Thai-Latin.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-::[[:thai:] ก-ฺเ-๛];
+::[[:Thai:] ก-ฺเ-๛];
 ::NFD;
 ::Thai-ThaiSemi;
 ::Any-BreakInternal;

package/transforms/Thai-ThaiLogical.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 # This reverses the Thai LogicalOrderException vowels, and does (part of) spaces
 # The rules that convert space into semicolon are in another file;
 # since they have to come BEFORE the break iterator
-$thai = [[:thai:] ก-ฺเ-๛] ;
+$thai = [[:Thai:] ก-ฺเ-๛] ;
 # First convert the semicolon back
 ' '  ← $thai { '; '  } $thai;
 # Remove any other spaces between thai letters

package/transforms/Thai-ThaiSemi.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 # The rules that convert space into semicolon are in this file;
 # since they have to come BEFORE the break iterator.
-$thai = [[:thai:] ก-ฺเ-๛] ;
+$thai = [[:Thai:] ก-ฺเ-๛] ;
 $thai { ' ' } $thai → '; ' ;

package/transforms/ThaiLogical-Latin.txt CHANGED Viewed

@@ -18,8 +18,8 @@
 #{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
 #\uE000 → ọ ;
 # ← ọ ;
-$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
-$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
+$notAbove = [^\p{ccc=0}\p{ccc=Above}] ;
+$notBelow = [^\p{ccc=0}\p{ccc=Below}] ;
 # Consonants
 # Warning: the 'h's need to be handled carefully!
 # What we really want to say is the following, but we can't

package/transforms/am-Ethi-t-d0-morse.txt CHANGED Viewed

@@ -12,7 +12,7 @@
 #
 # MINIMAL FILTER: Ethiopic-Morse Code
 #
-:: [[:Zs:]0-9!\?\+/@()\[\]_:;,\.'"$=\-[:Ethiopic:]] ;
+:: [[:Zs:]0-9!\?\+/@()\[\]_:;,\.'"\$=\-[:Ethiopic:]] ;
 ([:Lo:])([:Zs:]+)([:Lo:]) → | $1⁄⁂⁄$2$3 ;  # ⁄⁂⁄ is assumed to be a sufficiently weird enough sequence that won't naturally appear in any normal content
 #
 ########################################################################

package/transforms/az-Title.txt CHANGED Viewed

@@ -1,8 +1,8 @@
 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
 # Make any string of letters after a cased letter be lower, with rules for i
-[:cased:] [:case-ignorable:]* { İ → i;
-[:cased:] [:case-ignorable:]* { I → ı;
-[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
+[:Cased:] [:Case_Ignorable:]* { İ → i;
+[:Cased:] [:Case_Ignorable:]* { I → ı;
+[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
 # Otherwise all lowercase go to upper (titlecase stay as is)
 i→İ ;
 ([:Lowercase:]) → &Any-Upper($1) ;

package/transforms/byn-Ethi-t-byn-latn-m0-xaleget.txt CHANGED Viewed

@@ -747,16 +747,16 @@ $wordBoundary{ኦ → $ኦ ; # ETHIOPIC SYLLABLE GLOTTAL O
 # Convert to dot to dot if dot is followed by a number, ellipsis, or another dot.
 \. $1 ← \.([0-9….]) ;
 # Convert to Ethiopic Fullstop if dot is not followed by a number or another dot.
-። $1 ↔ \.([^0-9.]) ;
+። $1 ← \.([^0-9.]) ;
 \, $1 ← \,([0-9]) ;
-# ፣ $1 ↔ \,([^0-9]) ;
+# ፣ $1 ← \,([^0-9]) ;
 ፤ ↔ \;    ;
 ፦ ↔ \:\-  ;
 # ፥ ↔ \:  ;
 # ፨ → "#" ;
 # ፠ → \+  ;
 ፧ → \?  ;
-፡ $1 ↔ \,([^0-9])   ;
+፡ $1 ← \,([^0-9])   ;
 ::Null ;
 $1 $ጥበቅ ← $ጥበቅ ([ሀ-ፖ]) ;
 ########################################################################

package/transforms/chr-chr_FONIPA.txt CHANGED Viewed

@@ -106,4 +106,4 @@ e e+ → eː;
 i i+ → iː;
 o o+ → oː;
 u u+ → uː;
-ə̃ {ə̃}+ → ə̃;
+ə̃ ə̃+ → ə̃;

package/transforms/de-ASCII.txt CHANGED Viewed

@@ -10,4 +10,4 @@ $UE = [Ü {U \u0308}];
 $AE → AE;
 $OE → OE;
 $UE → UE;
-::Any-ASCII;
+::Latin-ASCII;

package/transforms/el-Lower.txt CHANGED Viewed

@@ -4,7 +4,7 @@
 # and C is not followed by a sequence consisting of zero or more case-ignorable characters and then a cased letter.
 # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
 # With translit rules, easiest is to handle the negative condition first, mapping in that case to the regular sigma.
-Σ } [:case-ignorable:]* [:cased:] → σ;
-[:cased:] [:case-ignorable:]* { Σ → ς;
+Σ } [:Case_Ignorable:]* [:Cased:] → σ;
+[:Cased:] [:Case_Ignorable:]* { Σ → ς;
 ::Any-Lower;
 ::NFC();

package/transforms/el-Title.txt CHANGED Viewed

@@ -2,9 +2,9 @@
 # Remove \0301 following Greek, with possible intervening 0308 marks.
 # [[:Greek:] & [:Ll:]] [\u0308]? { \u0301 → ;
 # Make any string of letters after a cased letter be lower, with rules for sigma
-[:cased:] [:case-ignorable:]* { Σ } [:case-ignorable:]* [:cased:] → σ;
-[:cased:] [:case-ignorable:]* { Σ → ς;
-[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
+[:Cased:] [:Case_Ignorable:]* { Σ } [:Case_Ignorable:]* [:Cased:] → σ;
+[:Cased:] [:Case_Ignorable:]* { Σ → ς;
+[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
 # Otherwise all lowercase go to upper (titlecase stay as is)
 ([:Lowercase:]) → &Any-Title($1) ;
 ::NFC();

package/transforms/it-am.txt CHANGED Viewed

@@ -251,5 +251,5 @@ y → | i;
 z → ዝ;
 #
 #
-[:nonspacing mark:] → ;
+[:Nonspacing_Mark:] → ;
 ::NFC(NFD);

package/transforms/it-ja.txt CHANGED Viewed

@@ -253,5 +253,5 @@ z → ツ;
 \- → ＝;
 #
 #
-[:nonspacing mark:] → ;
+[:Nonspacing_Mark:] → ;
 ::NFC(NFD);

package/transforms/lt-Title.txt CHANGED Viewed

@@ -1,12 +1,12 @@
 # Make any string of letters after a cased letter be lower
 ::NFD();
-[:cased:] [:case-ignorable:]* {I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
-[:cased:] [:case-ignorable:]* {J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
-[:cased:] [:case-ignorable:]* {I \u0328 } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
-[:cased:] [:case-ignorable:]* {I \u0300 → i \u0307 \u0300;
-[:cased:] [:case-ignorable:]* {I \u0301 → i \u0307 \u0301;
-[:cased:] [:case-ignorable:]* {I \u0303 → i \u0307 \u0303;
-[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
+[:Cased:] [:Case_Ignorable:]* {I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
+[:Cased:] [:Case_Ignorable:]* {J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
+[:Cased:] [:Case_Ignorable:]* {I \u0328 } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
+[:Cased:] [:Case_Ignorable:]* {I \u0300 → i \u0307 \u0300;
+[:Cased:] [:Case_Ignorable:]* {I \u0301 → i \u0307 \u0301;
+[:Cased:] [:Case_Ignorable:]* {I \u0303 → i \u0307 \u0303;
+[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
 # Otherwise all lowercase go to upper (titlecase stay as is)
 [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
 ([:Lowercase:]) → &Any-Upper($1) ;

package/transforms/tr-Title.txt CHANGED Viewed

@@ -1,8 +1,8 @@
 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
 # Make any string of letters after a cased letter be lower, with rules for i
-[:cased:] [:case-ignorable:]* { İ → i;
-[:cased:] [:case-ignorable:]* { I → ı;
-[:cased:] [:case-ignorable:]* { (.) → &Any-Lower($1) ;
+[:Cased:] [:Case_Ignorable:]* { İ → i;
+[:Cased:] [:Case_Ignorable:]* { I → ı;
+[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
 # Otherwise all lowercase go to upper (titlecase stay as is)
 i→İ ;
 ([:Lowercase:]) → &Any-Upper($1) ;

package/transforms/und-Ethi-t-und-latn-m0-beta_metsehaf-geminate.txt CHANGED Viewed

@@ -12,6 +12,6 @@
 ########################################################################
 #
 :: Amharic-Amharic/Geminate ;
-:: Ethiopic-Latin/BetaMetsehaf ;
+:: Ethiopic-Latin/Beta_Metsehaf ;
 #
 ########################################################################

package/transforms.json CHANGED Viewed

@@ -71,6 +71,7 @@
       "Han-Latin-Names",
       "Han-Spacedhan",
       "Hangul-Latin",
+      "Hant-Latin",
       "Hebrew-Latin",
       "Hebrew-Latin-BGN",
       "Hiragana-Katakana",