cldr-transforms 46.0.0 → 46.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bower.json +2 -2
- package/package.json +3 -3
- package/transforms/Arabic-Latin-BGN.txt +1 -1
- package/transforms/Arabic-Latin.txt +1 -1
- package/transforms/Bengali-Latin.txt +1 -1
- package/transforms/Cyrillic-Latin.txt +4 -4
- package/transforms/Greek-Latin.txt +8 -8
- package/transforms/Greek_Latin_UNGEGN.txt +2 -2
- package/transforms/Han-Latin-Names.txt +1 -1
- package/transforms/Han-Spacedhan.txt +4 -4
- package/transforms/Hiragana-Katakana.txt +2 -2
- package/transforms/Latin-Jamo.txt +2 -2
- package/transforms/Latin-Katakana.txt +3 -3
- package/transforms/Latin-NumericPinyin.txt +1 -1
- package/transforms/Maldivian-Latin-BGN.txt +1 -1
- package/transforms/Persian-Latin-BGN.txt +1 -1
- package/transforms/Thai-Latin.txt +1 -1
- package/transforms/Thai-ThaiLogical.txt +1 -1
- package/transforms/Thai-ThaiSemi.txt +1 -1
- package/transforms/ThaiLogical-Latin.txt +2 -2
- package/transforms/am-Ethi-t-d0-morse.txt +1 -1
- package/transforms/az-Title.txt +3 -3
- package/transforms/de-ASCII.txt +1 -1
- package/transforms/el-Lower.txt +2 -2
- package/transforms/el-Title.txt +3 -3
- package/transforms/it-am.txt +1 -1
- package/transforms/it-ja.txt +1 -1
- package/transforms/lt-Title.txt +7 -7
- package/transforms/tr-Title.txt +3 -3
package/bower.json
CHANGED
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cldr-transforms",
|
|
3
|
-
"version": "46.
|
|
3
|
+
"version": "46.1.0",
|
|
4
4
|
"peerDependencies": {
|
|
5
|
-
"cldr-core": "46.
|
|
5
|
+
"cldr-core": "46.1.0"
|
|
6
6
|
},
|
|
7
7
|
"description": "Transform data",
|
|
8
8
|
"homepage": "https://cldr.unicode.org",
|
|
@@ -24,6 +24,6 @@
|
|
|
24
24
|
},
|
|
25
25
|
"license": "Unicode-3.0",
|
|
26
26
|
"bugs": "https://cldr.unicode.org/index/bug-reports#TOC-Filing-a-Ticket",
|
|
27
|
-
"cldrVersion": "46",
|
|
27
|
+
"cldrVersion": "46.1",
|
|
28
28
|
"unicodeVersion": "16.0.0"
|
|
29
29
|
}
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
# Does *not* do assimilation of "al", nor hyphenation.
|
|
11
11
|
# While it could be done, we need to determine whether a prefix "al" could
|
|
12
12
|
# occur other than as the definite article (since no space is used).
|
|
13
|
-
:: [[:Arabic:][:
|
|
13
|
+
:: [[:Arabic:][:Block=Arabic:][ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ][\u0611\u0670]] ;
|
|
14
14
|
:: NFKD (NFC);
|
|
15
15
|
$disambig = ̱ ;
|
|
16
16
|
$disambig2 = ̰ ;
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# Should add variants for Russian-English, Russian-German
|
|
3
3
|
# Those can use this as a base, and then remap cases
|
|
4
4
|
# like a $hat to ya or ja.
|
|
5
|
-
# :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:
|
|
5
|
+
# :: [\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:Nonspacing_Mark:]] ;
|
|
6
6
|
### WARNING, ̈ must be added to the generated filters, in both directions ###
|
|
7
7
|
# MINIMAL FILTER
|
|
8
8
|
# Cyrillic-Latin
|
|
@@ -267,12 +267,12 @@ $ignore = [[:Mark:]''] * ;
|
|
|
267
267
|
| K ← Q ;
|
|
268
268
|
| u ← w ;
|
|
269
269
|
| U ← W ;
|
|
270
|
-
| KS ← X } $ignore [:
|
|
271
|
-
| KS ← [:
|
|
270
|
+
| KS ← X } $ignore [:Uppercase_Letter:] ;
|
|
271
|
+
| KS ← [:Uppercase_Letter:] $ignore { X ;
|
|
272
272
|
| Ks ← X ;
|
|
273
273
|
| ks ← x ;
|
|
274
274
|
:: NFC (NFD) ;
|
|
275
275
|
# note: a global filter is more efficient, but MUST include all source chars!!
|
|
276
|
-
# :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:
|
|
276
|
+
# :: ([\u0000-\u007E ʹ ʺ [:Cyrillic:] [:Latin:] [:Nonspacing_Mark:] ‧]);
|
|
277
277
|
# MINIMAL FILTER: Latin-Cyrillic
|
|
278
278
|
:: ( [ḫḪhH‧ˌ̈A-Za-zÀ-ÏÑ-ÖÙ-Ýà-ïñ-öù-ýÿ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƏƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳəʹ-ʺ̀-̂̆-̦̱̇̌̀-́̈́ʹ΅-ΆΈ-ΊΌΎ-ΐά-ΰό-ώϓЀЃЌ-ЎЙйѐѓќ-ўӁ-ӂӐ-ӑӖ-ӗḀ-ẙẛẠ-ỹἂ-ἅἊ-Ἅἒ-ἕἚ-Ἕἢ-ἥἪ-Ἥἲ-ἵἺ-Ἵὂ-ὅὊ-Ὅὒ-ὕὛὝὢ-ὥὪ-Ὥὰ-ώᾂ-ᾅᾊ-ᾍᾒ-ᾕᾚ-ᾝᾢ-ᾥᾪ-ᾭᾰᾲᾴᾸᾺ-ΆῂῄῈ-Ή῍-῎ῐῒ-ΐῘῚ-Ί῝-῞ῠῢ-ΰῨῪ-Ύ῭-΅ῲῴῸ-ΏK-Å] ) ;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# Rules are predicated on running NFD first, and NFC afterwards
|
|
2
|
-
# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:
|
|
2
|
+
# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:Nonspacing_Mark:]] ;
|
|
3
3
|
# MINIMAL FILTER GENERATED FOR: Greek-Latin
|
|
4
4
|
:: [΄´;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ̄̈̓-̔͂-ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
|
|
5
5
|
:: NFD (NFC) ;
|
|
@@ -12,9 +12,9 @@
|
|
|
12
12
|
# ὨΣ ὩΣ ὪΣ ὫΣ
|
|
13
13
|
# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
|
14
14
|
# Useful variables
|
|
15
|
-
$lower = [[:
|
|
16
|
-
$glower = [[:
|
|
17
|
-
$upper = [[:
|
|
15
|
+
$lower = [[:Latin:][:Greek:] & [:Ll:]];
|
|
16
|
+
$glower = [[:Greek:] & [:Ll:]];
|
|
17
|
+
$upper = [[:Latin:][:Greek:] & [:Lu:]] ;
|
|
18
18
|
$accent = [:M:] ;
|
|
19
19
|
# NOTE: restrict to just the Greek & Latin accents that we care about
|
|
20
20
|
# TODO: broaden out once interation is fixed
|
|
@@ -220,8 +220,8 @@ $ignore = [[:Mark:]''] * ;
|
|
|
220
220
|
| B ← W } $vowel ;
|
|
221
221
|
| U ← V ;
|
|
222
222
|
| U ← W ;
|
|
223
|
-
$rough } $ignore [:
|
|
224
|
-
$ignore [:
|
|
223
|
+
$rough } $ignore [:Uppercase_Letter:] → H ;
|
|
224
|
+
$ignore [:Uppercase_Letter:] { $rough → H ;
|
|
225
225
|
$rough ← H ;
|
|
226
226
|
$rough ↔ h ;
|
|
227
227
|
# Completeness for Greek
|
|
@@ -243,7 +243,7 @@ $rough ↔ h ;
|
|
|
243
243
|
← [Ππ] { \' } [Ss] ;
|
|
244
244
|
← [Νν] { \' } $egammaLike ;
|
|
245
245
|
::NFC (NFD) ;
|
|
246
|
-
# ([\u0000-\u007F [:Latin:] [:Greek:] [:
|
|
247
|
-
# ([\u0000-\u007F · [:Latin:] [:
|
|
246
|
+
# ([\u0000-\u007F [:Latin:] [:Greek:] [:Nonspacing_Mark:]]) ;
|
|
247
|
+
# ([\u0000-\u007F · [:Latin:] [:Nonspacing_Mark:]]) ;
|
|
248
248
|
# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
|
|
249
249
|
:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̀-̷̹-ͅ΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
|
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
|
|
7
7
|
::NFD (NFC) ;
|
|
8
8
|
# Useful variables
|
|
9
|
-
$lower = [[:
|
|
10
|
-
$upper = [[:
|
|
9
|
+
$lower = [[:Latin:][:Greek:] & [:Ll:]] ;
|
|
10
|
+
$upper = [[:Latin:][:Greek:] & [:Lu:]] ;
|
|
11
11
|
$accent = [[:Mn:][:Me:]] ;
|
|
12
12
|
$macron = ̄ ;
|
|
13
13
|
$ddot = ̈ ;
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
# Do this before ::Han-Spacedhan() to catch Han after space in original text,
|
|
8
8
|
# and to apply before all other rules.
|
|
9
9
|
$startOfHanMarker = \uFDD1;
|
|
10
|
-
[:^
|
|
10
|
+
[:^Script=Han:] { ([:Script=Han:]) → $startOfHanMarker $1;
|
|
11
11
|
# Need Spacedhan so the name transliterations get spaced properly
|
|
12
12
|
::Han-Spacedhan();
|
|
13
13
|
# Convert special name readings that depend on next character
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Only intended for internal use
|
|
2
2
|
# Make sure Han are normalized, including characters that contain them.
|
|
3
|
-
# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:
|
|
4
|
-
# Where XXX is the resolved [:
|
|
5
|
-
:: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:
|
|
3
|
+
# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:Ideographic:]-[:sc=Han:]
|
|
4
|
+
# Where XXX is the resolved [:Ideographic:][:sc=Han:]. It needs updating with each Unicode release!
|
|
5
|
+
:: [[、。々《-』〜・㆒-㆟㈠-㉇㊀-㊰㋀-㋋ ㍘-㍰㍻-㍿㏠-㏾🈐-🈒🈔-🈺🉀-🉈🉐🉑][:Ideographic:][:sc=Han:]] nfkc;
|
|
6
6
|
:: fullwidth-halfwidth;
|
|
7
7
|
。 → '.';
|
|
8
8
|
。→ '.';
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
々→ '⓶';
|
|
24
24
|
〜→ '~';
|
|
25
25
|
$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
|
|
26
|
-
$initialPunct = [:Ps:][:Pi:];
|
|
26
|
+
$initialPunct = [[:Ps:][:Pi:]];
|
|
27
27
|
# add space between any Han or terminal punctuation and letters, and
|
|
28
28
|
# between letters and Han or initial punct
|
|
29
29
|
[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# note: a global filter is more efficient, but MUST include all source chars
|
|
2
|
-
:: [[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:
|
|
2
|
+
:: [[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:Nonspacing_Mark:]]-[\u309B \u309C]];
|
|
3
3
|
:: NFKC (NFC);
|
|
4
4
|
# Hiragana-Katakana
|
|
5
5
|
# This is largely a one-to-one mapping, but it has a
|
|
@@ -173,5 +173,5 @@ $xo = [
|
|
|
173
173
|
お ← $xo {ー};
|
|
174
174
|
:: NFC (NFKC) ;
|
|
175
175
|
# note: a global filter is more efficient, but MUST include all source chars!!
|
|
176
|
-
:: ([[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:
|
|
176
|
+
:: ([[\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:Nonspacing_Mark:]]-[\u309B \u309C]]);
|
|
177
177
|
# eof
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# note: a global filter is more efficient, but MUST include all source chars
|
|
2
|
-
#:: [\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:
|
|
2
|
+
#:: [\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:Nonspacing_Mark:]] ;
|
|
3
3
|
# MINIMAL FILTER GENERATED FOR: Latin-Katakana
|
|
4
4
|
### WARNING -- must add width filter, both here and below!!! ###
|
|
5
5
|
:: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」゙-゚ァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̄Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ;
|
|
@@ -370,11 +370,11 @@ x → | ks ;
|
|
|
370
370
|
# Final cleanup
|
|
371
371
|
'~' → ; # delete stray tildes between letters
|
|
372
372
|
[:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters
|
|
373
|
-
# [ʾ[:
|
|
373
|
+
# [ʾ[:Nonspacing_Mark:]-[゙-゜]] → ; # delete any non-spacing marks that we didn't use
|
|
374
374
|
:: NFC (NFD) ;
|
|
375
375
|
:: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth);
|
|
376
376
|
# note: a global filter is more efficient, but MUST include all source chars!!
|
|
377
|
-
#:: ([\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:
|
|
377
|
+
#:: ([\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:Nonspacing_Mark:]]);
|
|
378
378
|
# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
|
|
379
379
|
:: ( [[\ -~¢-£¥-¦¬̄₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ゙-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ;
|
|
380
380
|
# eof
|
|
@@ -23,5 +23,5 @@ $digit = [1-5];
|
|
|
23
23
|
$1 &NumericPinyin-Pinyin($3) $2 ← ([aAeE]) ($vowel* $consonant*) ($digit);
|
|
24
24
|
$1 &NumericPinyin-Pinyin($3) $2 ← ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);
|
|
25
25
|
$1 &NumericPinyin-Pinyin($3) $2 ← ($vowel) ($consonant*) ($digit);
|
|
26
|
-
&NumericPinyin-Pinyin($1) ← [:
|
|
26
|
+
&NumericPinyin-Pinyin($1) ← [:Letter:] {($digit)};
|
|
27
27
|
::NFC (NFD);
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
# In our rules, we also convert Arabic punctuation characters to Latin.
|
|
11
11
|
# These appears to be used in Maldivian text, for example in the Universal
|
|
12
12
|
# Declaration of Human Rights.
|
|
13
|
-
::[[:
|
|
13
|
+
::[[:Block=Thaana:][،؛؟٪٫٬]\uFDF2] ;
|
|
14
14
|
::NFD;
|
|
15
15
|
$wordBoundary = [^[:L:][:M:][:N:]] ;
|
|
16
16
|
$vowel = [\u07A6-\u07AF] ;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# This reverses the Thai LogicalOrderException vowels, and does (part of) spaces
|
|
2
2
|
# The rules that convert space into semicolon are in another file;
|
|
3
3
|
# since they have to come BEFORE the break iterator
|
|
4
|
-
$thai = [[:
|
|
4
|
+
$thai = [[:Thai:] ก-ฺเ-๛] ;
|
|
5
5
|
# First convert the semicolon back
|
|
6
6
|
' ' ← $thai { '; ' } $thai;
|
|
7
7
|
# Remove any other spaces between thai letters
|
|
@@ -18,8 +18,8 @@
|
|
|
18
18
|
#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
|
|
19
19
|
#\uE000 → ọ ;
|
|
20
20
|
# ← ọ ;
|
|
21
|
-
$notAbove = [^\p{ccc=0}\p{ccc=
|
|
22
|
-
$notBelow = [^\p{ccc=0}\p{ccc=
|
|
21
|
+
$notAbove = [^\p{ccc=0}\p{ccc=Above}] ;
|
|
22
|
+
$notBelow = [^\p{ccc=0}\p{ccc=Below}] ;
|
|
23
23
|
# Consonants
|
|
24
24
|
# Warning: the 'h's need to be handled carefully!
|
|
25
25
|
# What we really want to say is the following, but we can't
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
#
|
|
13
13
|
# MINIMAL FILTER: Ethiopic-Morse Code
|
|
14
14
|
#
|
|
15
|
-
:: [[:Zs:]0-9!\?\+/@()\[\]_:;,\.'"
|
|
15
|
+
:: [[:Zs:]0-9!\?\+/@()\[\]_:;,\.'"\$=\-[:Ethiopic:]] ;
|
|
16
16
|
([:Lo:])([:Zs:]+)([:Lo:]) → | $1⁄⁂⁄$2$3 ; # ⁄⁂⁄ is assumed to be a sufficiently weird enough sequence that won't naturally appear in any normal content
|
|
17
17
|
#
|
|
18
18
|
########################################################################
|
package/transforms/az-Title.txt
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
|
2
2
|
# Make any string of letters after a cased letter be lower, with rules for i
|
|
3
|
-
[:
|
|
4
|
-
[:
|
|
5
|
-
[:
|
|
3
|
+
[:Cased:] [:Case_Ignorable:]* { İ → i;
|
|
4
|
+
[:Cased:] [:Case_Ignorable:]* { I → ı;
|
|
5
|
+
[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
|
|
6
6
|
# Otherwise all lowercase go to upper (titlecase stay as is)
|
|
7
7
|
i→İ ;
|
|
8
8
|
([:Lowercase:]) → &Any-Upper($1) ;
|
package/transforms/de-ASCII.txt
CHANGED
package/transforms/el-Lower.txt
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# and C is not followed by a sequence consisting of zero or more case-ignorable characters and then a cased letter.
|
|
5
5
|
# 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
|
|
6
6
|
# With translit rules, easiest is to handle the negative condition first, mapping in that case to the regular sigma.
|
|
7
|
-
Σ } [:
|
|
8
|
-
[:
|
|
7
|
+
Σ } [:Case_Ignorable:]* [:Cased:] → σ;
|
|
8
|
+
[:Cased:] [:Case_Ignorable:]* { Σ → ς;
|
|
9
9
|
::Any-Lower;
|
|
10
10
|
::NFC();
|
package/transforms/el-Title.txt
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
# Remove \0301 following Greek, with possible intervening 0308 marks.
|
|
3
3
|
# [[:Greek:] & [:Ll:]] [\u0308]? { \u0301 → ;
|
|
4
4
|
# Make any string of letters after a cased letter be lower, with rules for sigma
|
|
5
|
-
[:
|
|
6
|
-
[:
|
|
7
|
-
[:
|
|
5
|
+
[:Cased:] [:Case_Ignorable:]* { Σ } [:Case_Ignorable:]* [:Cased:] → σ;
|
|
6
|
+
[:Cased:] [:Case_Ignorable:]* { Σ → ς;
|
|
7
|
+
[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
|
|
8
8
|
# Otherwise all lowercase go to upper (titlecase stay as is)
|
|
9
9
|
([:Lowercase:]) → &Any-Title($1) ;
|
|
10
10
|
::NFC();
|
package/transforms/it-am.txt
CHANGED
package/transforms/it-ja.txt
CHANGED
package/transforms/lt-Title.txt
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# Make any string of letters after a cased letter be lower
|
|
2
2
|
::NFD();
|
|
3
|
-
[:
|
|
4
|
-
[:
|
|
5
|
-
[:
|
|
6
|
-
[:
|
|
7
|
-
[:
|
|
8
|
-
[:
|
|
9
|
-
[:
|
|
3
|
+
[:Cased:] [:Case_Ignorable:]* {I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
|
|
4
|
+
[:Cased:] [:Case_Ignorable:]* {J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
|
|
5
|
+
[:Cased:] [:Case_Ignorable:]* {I \u0328 } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
|
|
6
|
+
[:Cased:] [:Case_Ignorable:]* {I \u0300 → i \u0307 \u0300;
|
|
7
|
+
[:Cased:] [:Case_Ignorable:]* {I \u0301 → i \u0307 \u0301;
|
|
8
|
+
[:Cased:] [:Case_Ignorable:]* {I \u0303 → i \u0307 \u0303;
|
|
9
|
+
[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
|
|
10
10
|
# Otherwise all lowercase go to upper (titlecase stay as is)
|
|
11
11
|
[:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
|
|
12
12
|
([:Lowercase:]) → &Any-Upper($1) ;
|
package/transforms/tr-Title.txt
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
|
2
2
|
# Make any string of letters after a cased letter be lower, with rules for i
|
|
3
|
-
[:
|
|
4
|
-
[:
|
|
5
|
-
[:
|
|
3
|
+
[:Cased:] [:Case_Ignorable:]* { İ → i;
|
|
4
|
+
[:Cased:] [:Case_Ignorable:]* { I → ı;
|
|
5
|
+
[:Cased:] [:Case_Ignorable:]* { (.) → &Any-Lower($1) ;
|
|
6
6
|
# Otherwise all lowercase go to upper (titlecase stay as is)
|
|
7
7
|
i→İ ;
|
|
8
8
|
([:Lowercase:]) → &Any-Upper($1) ;
|