cldr-transforms 46.0.0-BETA2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +41 -0
- package/README.md +27 -0
- package/bower.json +13 -0
- package/package.json +29 -0
- package/transforms/Amharic-Latin-BGN.json +9 -0
- package/transforms/Amharic-Latin-BGN.txt +441 -0
- package/transforms/Any-Accents.json +9 -0
- package/transforms/Any-Accents.txt +270 -0
- package/transforms/Any-Publishing.json +9 -0
- package/transforms/Any-Publishing.txt +63 -0
- package/transforms/Arabic-Latin-BGN.json +9 -0
- package/transforms/Arabic-Latin-BGN.txt +205 -0
- package/transforms/Arabic-Latin.json +11 -0
- package/transforms/Arabic-Latin.txt +181 -0
- package/transforms/Armenian-Latin-BGN.json +9 -0
- package/transforms/Armenian-Latin-BGN.txt +165 -0
- package/transforms/Azerbaijani-Latin-BGN.json +9 -0
- package/transforms/Azerbaijani-Latin-BGN.txt +183 -0
- package/transforms/Belarusian-Latin-BGN.json +9 -0
- package/transforms/Belarusian-Latin-BGN.txt +194 -0
- package/transforms/Bengali-Arabic.json +9 -0
- package/transforms/Bengali-Arabic.txt +7 -0
- package/transforms/Bengali-Devanagari.json +9 -0
- package/transforms/Bengali-Devanagari.txt +7 -0
- package/transforms/Bengali-Gujarati.json +9 -0
- package/transforms/Bengali-Gujarati.txt +7 -0
- package/transforms/Bengali-Gurmukhi.json +9 -0
- package/transforms/Bengali-Gurmukhi.txt +7 -0
- package/transforms/Bengali-InterIndic.json +7 -0
- package/transforms/Bengali-InterIndic.txt +99 -0
- package/transforms/Bengali-Kannada.json +9 -0
- package/transforms/Bengali-Kannada.txt +7 -0
- package/transforms/Bengali-Latin.json +9 -0
- package/transforms/Bengali-Latin.txt +7 -0
- package/transforms/Bengali-Malayalam.json +9 -0
- package/transforms/Bengali-Malayalam.txt +7 -0
- package/transforms/Bengali-Oriya.json +9 -0
- package/transforms/Bengali-Oriya.txt +7 -0
- package/transforms/Bengali-Tamil.json +9 -0
- package/transforms/Bengali-Tamil.txt +7 -0
- package/transforms/Bengali-Telugu.json +9 -0
- package/transforms/Bengali-Telugu.txt +7 -0
- package/transforms/Bengali-ur.json +8 -0
- package/transforms/Bengali-ur.txt +7 -0
- package/transforms/Bulgarian-Latin-BGN.json +9 -0
- package/transforms/Bulgarian-Latin-BGN.txt +237 -0
- package/transforms/CanadianAboriginal-Latin.json +11 -0
- package/transforms/CanadianAboriginal-Latin.txt +184 -0
- package/transforms/Cyrillic-Latin.json +11 -0
- package/transforms/Cyrillic-Latin.txt +280 -0
- package/transforms/Devanagari-Arabic.json +9 -0
- package/transforms/Devanagari-Arabic.txt +7 -0
- package/transforms/Devanagari-Bengali.json +9 -0
- package/transforms/Devanagari-Bengali.txt +7 -0
- package/transforms/Devanagari-Gujarati.json +9 -0
- package/transforms/Devanagari-Gujarati.txt +7 -0
- package/transforms/Devanagari-Gurmukhi.json +9 -0
- package/transforms/Devanagari-Gurmukhi.txt +7 -0
- package/transforms/Devanagari-InterIndic.json +7 -0
- package/transforms/Devanagari-InterIndic.txt +113 -0
- package/transforms/Devanagari-Kannada.json +9 -0
- package/transforms/Devanagari-Kannada.txt +7 -0
- package/transforms/Devanagari-Latin.json +9 -0
- package/transforms/Devanagari-Latin.txt +7 -0
- package/transforms/Devanagari-Malayalam.json +9 -0
- package/transforms/Devanagari-Malayalam.txt +7 -0
- package/transforms/Devanagari-Oriya.json +9 -0
- package/transforms/Devanagari-Oriya.txt +7 -0
- package/transforms/Devanagari-Tamil.json +9 -0
- package/transforms/Devanagari-Tamil.txt +7 -0
- package/transforms/Devanagari-Telugu.json +9 -0
- package/transforms/Devanagari-Telugu.txt +7 -0
- package/transforms/Devanagari-ur.json +8 -0
- package/transforms/Devanagari-ur.txt +7 -0
- package/transforms/Fullwidth-Halfwidth.json +9 -0
- package/transforms/Fullwidth-Halfwidth.txt +261 -0
- package/transforms/Georgian-Latin-BGN.json +9 -0
- package/transforms/Georgian-Latin-BGN.txt +43 -0
- package/transforms/Georgian-Latin-BGN_1981.json +8 -0
- package/transforms/Georgian-Latin-BGN_1981.txt +54 -0
- package/transforms/Georgian-Latin.json +11 -0
- package/transforms/Georgian-Latin.txt +53 -0
- package/transforms/Greek-Latin-BGN.json +9 -0
- package/transforms/Greek-Latin-BGN.txt +419 -0
- package/transforms/Greek-Latin.json +11 -0
- package/transforms/Greek-Latin.txt +251 -0
- package/transforms/Greek_Latin_UNGEGN.json +11 -0
- package/transforms/Greek_Latin_UNGEGN.txt +179 -0
- package/transforms/Gujarati-Arabic.json +9 -0
- package/transforms/Gujarati-Arabic.txt +7 -0
- package/transforms/Gujarati-Bengali.json +9 -0
- package/transforms/Gujarati-Bengali.txt +7 -0
- package/transforms/Gujarati-Devanagari.json +9 -0
- package/transforms/Gujarati-Devanagari.txt +7 -0
- package/transforms/Gujarati-Gurmukhi.json +9 -0
- package/transforms/Gujarati-Gurmukhi.txt +7 -0
- package/transforms/Gujarati-InterIndic.json +7 -0
- package/transforms/Gujarati-InterIndic.txt +89 -0
- package/transforms/Gujarati-Kannada.json +9 -0
- package/transforms/Gujarati-Kannada.txt +7 -0
- package/transforms/Gujarati-Latin.json +9 -0
- package/transforms/Gujarati-Latin.txt +7 -0
- package/transforms/Gujarati-Malayalam.json +9 -0
- package/transforms/Gujarati-Malayalam.txt +7 -0
- package/transforms/Gujarati-Oriya.json +9 -0
- package/transforms/Gujarati-Oriya.txt +7 -0
- package/transforms/Gujarati-Tamil.json +9 -0
- package/transforms/Gujarati-Tamil.txt +7 -0
- package/transforms/Gujarati-Telugu.json +9 -0
- package/transforms/Gujarati-Telugu.txt +7 -0
- package/transforms/Gujarati-ur.json +8 -0
- package/transforms/Gujarati-ur.txt +7 -0
- package/transforms/Gurmukhi-Arabic.json +9 -0
- package/transforms/Gurmukhi-Arabic.txt +7 -0
- package/transforms/Gurmukhi-Bengali.json +9 -0
- package/transforms/Gurmukhi-Bengali.txt +7 -0
- package/transforms/Gurmukhi-Devanagari.json +9 -0
- package/transforms/Gurmukhi-Devanagari.txt +7 -0
- package/transforms/Gurmukhi-Gujarati.json +9 -0
- package/transforms/Gurmukhi-Gujarati.txt +7 -0
- package/transforms/Gurmukhi-InterIndic.json +7 -0
- package/transforms/Gurmukhi-InterIndic.txt +88 -0
- package/transforms/Gurmukhi-Kannada.json +9 -0
- package/transforms/Gurmukhi-Kannada.txt +7 -0
- package/transforms/Gurmukhi-Latin.json +9 -0
- package/transforms/Gurmukhi-Latin.txt +7 -0
- package/transforms/Gurmukhi-Malayalam.json +9 -0
- package/transforms/Gurmukhi-Malayalam.txt +7 -0
- package/transforms/Gurmukhi-Oriya.json +9 -0
- package/transforms/Gurmukhi-Oriya.txt +7 -0
- package/transforms/Gurmukhi-Tamil.json +9 -0
- package/transforms/Gurmukhi-Tamil.txt +7 -0
- package/transforms/Gurmukhi-Telugu.json +9 -0
- package/transforms/Gurmukhi-Telugu.txt +7 -0
- package/transforms/Gurmukhi-ur.json +8 -0
- package/transforms/Gurmukhi-ur.txt +7 -0
- package/transforms/Han-Latin-Names.json +8 -0
- package/transforms/Han-Latin-Names.txt +65 -0
- package/transforms/Han-Latin.json +9 -0
- package/transforms/Han-Latin.txt +1501 -0
- package/transforms/Han-Spacedhan.json +7 -0
- package/transforms/Han-Spacedhan.txt +35 -0
- package/transforms/Hangul-Latin.json +9 -0
- package/transforms/Hangul-Latin.txt +6 -0
- package/transforms/Hebrew-Latin-BGN.json +9 -0
- package/transforms/Hebrew-Latin-BGN.txt +113 -0
- package/transforms/Hebrew-Latin.json +11 -0
- package/transforms/Hebrew-Latin.txt +91 -0
- package/transforms/Hiragana-Katakana.json +11 -0
- package/transforms/Hiragana-Katakana.txt +179 -0
- package/transforms/Hiragana-Latin.json +11 -0
- package/transforms/Hiragana-Latin.txt +9 -0
- package/transforms/IPA-XSampa.json +11 -0
- package/transforms/IPA-XSampa.txt +228 -0
- package/transforms/InterIndic-Arabic.json +7 -0
- package/transforms/InterIndic-Arabic.txt +128 -0
- package/transforms/InterIndic-Bengali.json +7 -0
- package/transforms/InterIndic-Bengali.txt +135 -0
- package/transforms/InterIndic-Devanagari.json +7 -0
- package/transforms/InterIndic-Devanagari.txt +151 -0
- package/transforms/InterIndic-Gujarati.json +7 -0
- package/transforms/InterIndic-Gujarati.txt +136 -0
- package/transforms/InterIndic-Gurmukhi.json +7 -0
- package/transforms/InterIndic-Gurmukhi.txt +142 -0
- package/transforms/InterIndic-Kannada.json +7 -0
- package/transforms/InterIndic-Kannada.txt +137 -0
- package/transforms/InterIndic-Latin.json +7 -0
- package/transforms/InterIndic-Latin.txt +492 -0
- package/transforms/InterIndic-Malayalam.json +7 -0
- package/transforms/InterIndic-Malayalam.txt +137 -0
- package/transforms/InterIndic-Oriya.json +7 -0
- package/transforms/InterIndic-Oriya.txt +135 -0
- package/transforms/InterIndic-Tamil.json +7 -0
- package/transforms/InterIndic-Tamil.txt +136 -0
- package/transforms/InterIndic-Telugu.json +7 -0
- package/transforms/InterIndic-Telugu.txt +136 -0
- package/transforms/InterIndic-ur.json +7 -0
- package/transforms/InterIndic-ur.txt +119 -0
- package/transforms/Jamo-Latin.json +9 -0
- package/transforms/Jamo-Latin.txt +6 -0
- package/transforms/Kannada-Arabic.json +9 -0
- package/transforms/Kannada-Arabic.txt +7 -0
- package/transforms/Kannada-Bengali.json +9 -0
- package/transforms/Kannada-Bengali.txt +7 -0
- package/transforms/Kannada-Devanagari.json +9 -0
- package/transforms/Kannada-Devanagari.txt +7 -0
- package/transforms/Kannada-Gujarati.json +9 -0
- package/transforms/Kannada-Gujarati.txt +7 -0
- package/transforms/Kannada-Gurmukhi.json +9 -0
- package/transforms/Kannada-Gurmukhi.txt +7 -0
- package/transforms/Kannada-InterIndic.json +7 -0
- package/transforms/Kannada-InterIndic.txt +87 -0
- package/transforms/Kannada-Latin.json +9 -0
- package/transforms/Kannada-Latin.txt +7 -0
- package/transforms/Kannada-Malayalam.json +9 -0
- package/transforms/Kannada-Malayalam.txt +7 -0
- package/transforms/Kannada-Oriya.json +9 -0
- package/transforms/Kannada-Oriya.txt +7 -0
- package/transforms/Kannada-Tamil.json +9 -0
- package/transforms/Kannada-Tamil.txt +7 -0
- package/transforms/Kannada-Telugu.json +9 -0
- package/transforms/Kannada-Telugu.txt +7 -0
- package/transforms/Kannada-ur.json +8 -0
- package/transforms/Kannada-ur.txt +7 -0
- package/transforms/Katakana-Latin-BGN.json +9 -0
- package/transforms/Katakana-Latin-BGN.txt +335 -0
- package/transforms/Kazakh-Latin-BGN.json +9 -0
- package/transforms/Kazakh-Latin-BGN.txt +332 -0
- package/transforms/Kirghiz-Latin-BGN.json +9 -0
- package/transforms/Kirghiz-Latin-BGN.txt +211 -0
- package/transforms/Korean-Latin-BGN.json +9 -0
- package/transforms/Korean-Latin-BGN.txt +345 -0
- package/transforms/Latin-ASCII.json +9 -0
- package/transforms/Latin-ASCII.txt +909 -0
- package/transforms/Latin-Armenian.json +11 -0
- package/transforms/Latin-Armenian.txt +88 -0
- package/transforms/Latin-Bengali.json +9 -0
- package/transforms/Latin-Bengali.txt +8 -0
- package/transforms/Latin-Bopomofo.json +11 -0
- package/transforms/Latin-Bopomofo.txt +1445 -0
- package/transforms/Latin-ConjoiningJamo.json +7 -0
- package/transforms/Latin-ConjoiningJamo.txt +477 -0
- package/transforms/Latin-Devanagari.json +9 -0
- package/transforms/Latin-Devanagari.txt +8 -0
- package/transforms/Latin-Ethiopic.json +11 -0
- package/transforms/Latin-Ethiopic.txt +277 -0
- package/transforms/Latin-Gujarati.json +9 -0
- package/transforms/Latin-Gujarati.txt +8 -0
- package/transforms/Latin-Gurmukhi.json +9 -0
- package/transforms/Latin-Gurmukhi.txt +8 -0
- package/transforms/Latin-Hangul.json +9 -0
- package/transforms/Latin-Hangul.txt +7 -0
- package/transforms/Latin-InterIndic.json +7 -0
- package/transforms/Latin-InterIndic.txt +377 -0
- package/transforms/Latin-Jamo.json +9 -0
- package/transforms/Latin-Jamo.txt +7 -0
- package/transforms/Latin-Kannada.json +9 -0
- package/transforms/Latin-Kannada.txt +8 -0
- package/transforms/Latin-Katakana.json +11 -0
- package/transforms/Latin-Katakana.txt +382 -0
- package/transforms/Latin-Malayalam.json +9 -0
- package/transforms/Latin-Malayalam.txt +8 -0
- package/transforms/Latin-NumericPinyin.json +9 -0
- package/transforms/Latin-NumericPinyin.txt +29 -0
- package/transforms/Latin-Oriya.json +9 -0
- package/transforms/Latin-Oriya.txt +8 -0
- package/transforms/Latin-Tamil.json +9 -0
- package/transforms/Latin-Tamil.txt +8 -0
- package/transforms/Latin-Telugu.json +9 -0
- package/transforms/Latin-Telugu.txt +8 -0
- package/transforms/Latin-Thai.json +9 -0
- package/transforms/Latin-Thai.txt +7 -0
- package/transforms/Macedonian-Latin-BGN.json +9 -0
- package/transforms/Macedonian-Latin-BGN.txt +176 -0
- package/transforms/Malayalam-Arabic.json +9 -0
- package/transforms/Malayalam-Arabic.txt +7 -0
- package/transforms/Malayalam-Bengali.json +9 -0
- package/transforms/Malayalam-Bengali.txt +7 -0
- package/transforms/Malayalam-Devanagari.json +9 -0
- package/transforms/Malayalam-Devanagari.txt +7 -0
- package/transforms/Malayalam-Gujarati.json +9 -0
- package/transforms/Malayalam-Gujarati.txt +7 -0
- package/transforms/Malayalam-Gurmukhi.json +9 -0
- package/transforms/Malayalam-Gurmukhi.txt +7 -0
- package/transforms/Malayalam-InterIndic.json +7 -0
- package/transforms/Malayalam-InterIndic.txt +87 -0
- package/transforms/Malayalam-Kannada.json +9 -0
- package/transforms/Malayalam-Kannada.txt +7 -0
- package/transforms/Malayalam-Latin.json +9 -0
- package/transforms/Malayalam-Latin.txt +7 -0
- package/transforms/Malayalam-Oriya.json +9 -0
- package/transforms/Malayalam-Oriya.txt +7 -0
- package/transforms/Malayalam-Tamil.json +9 -0
- package/transforms/Malayalam-Tamil.txt +7 -0
- package/transforms/Malayalam-Telugu.json +9 -0
- package/transforms/Malayalam-Telugu.txt +7 -0
- package/transforms/Malayalam-ur.json +8 -0
- package/transforms/Malayalam-ur.txt +7 -0
- package/transforms/Maldivian-Latin-BGN.json +9 -0
- package/transforms/Maldivian-Latin-BGN.txt +171 -0
- package/transforms/Mongolian-Latin-BGN.json +9 -0
- package/transforms/Mongolian-Latin-BGN.txt +151 -0
- package/transforms/Myanmar-Latin.json +9 -0
- package/transforms/Myanmar-Latin.txt +367 -0
- package/transforms/Oriya-Arabic.json +9 -0
- package/transforms/Oriya-Arabic.txt +7 -0
- package/transforms/Oriya-Bengali.json +9 -0
- package/transforms/Oriya-Bengali.txt +7 -0
- package/transforms/Oriya-Devanagari.json +9 -0
- package/transforms/Oriya-Devanagari.txt +7 -0
- package/transforms/Oriya-Gujarati.json +9 -0
- package/transforms/Oriya-Gujarati.txt +7 -0
- package/transforms/Oriya-Gurmukhi.json +9 -0
- package/transforms/Oriya-Gurmukhi.txt +7 -0
- package/transforms/Oriya-InterIndic.json +7 -0
- package/transforms/Oriya-InterIndic.txt +91 -0
- package/transforms/Oriya-Kannada.json +9 -0
- package/transforms/Oriya-Kannada.txt +7 -0
- package/transforms/Oriya-Latin.json +9 -0
- package/transforms/Oriya-Latin.txt +7 -0
- package/transforms/Oriya-Malayalam.json +9 -0
- package/transforms/Oriya-Malayalam.txt +7 -0
- package/transforms/Oriya-Tamil.json +9 -0
- package/transforms/Oriya-Tamil.txt +7 -0
- package/transforms/Oriya-Telugu.json +9 -0
- package/transforms/Oriya-Telugu.txt +7 -0
- package/transforms/Oriya-ur.json +8 -0
- package/transforms/Oriya-ur.txt +7 -0
- package/transforms/Pashto-Latin-BGN.json +9 -0
- package/transforms/Pashto-Latin-BGN.txt +231 -0
- package/transforms/Persian-Latin-BGN.json +9 -0
- package/transforms/Persian-Latin-BGN.txt +203 -0
- package/transforms/Pinyin-NumericPinyin.json +7 -0
- package/transforms/Pinyin-NumericPinyin.txt +8 -0
- package/transforms/Russian-Latin-BGN.json +9 -0
- package/transforms/Russian-Latin-BGN.txt +235 -0
- package/transforms/Serbian-Latin-BGN.json +9 -0
- package/transforms/Serbian-Latin-BGN.txt +119 -0
- package/transforms/Simplified-Traditional.json +11 -0
- package/transforms/Simplified-Traditional.txt +4393 -0
- package/transforms/Syriac-Latin.json +11 -0
- package/transforms/Syriac-Latin.txt +52 -0
- package/transforms/Tamil-Arabic.json +9 -0
- package/transforms/Tamil-Arabic.txt +7 -0
- package/transforms/Tamil-Bengali.json +9 -0
- package/transforms/Tamil-Bengali.txt +7 -0
- package/transforms/Tamil-Devanagari.json +9 -0
- package/transforms/Tamil-Devanagari.txt +7 -0
- package/transforms/Tamil-Gujarati.json +9 -0
- package/transforms/Tamil-Gujarati.txt +7 -0
- package/transforms/Tamil-Gurmukhi.json +9 -0
- package/transforms/Tamil-Gurmukhi.txt +7 -0
- package/transforms/Tamil-InterIndic.json +7 -0
- package/transforms/Tamil-InterIndic.txt +70 -0
- package/transforms/Tamil-Kannada.json +9 -0
- package/transforms/Tamil-Kannada.txt +7 -0
- package/transforms/Tamil-Latin.json +9 -0
- package/transforms/Tamil-Latin.txt +7 -0
- package/transforms/Tamil-Malayalam.json +9 -0
- package/transforms/Tamil-Malayalam.txt +7 -0
- package/transforms/Tamil-Oriya.json +9 -0
- package/transforms/Tamil-Oriya.txt +7 -0
- package/transforms/Tamil-Telugu.json +9 -0
- package/transforms/Tamil-Telugu.txt +7 -0
- package/transforms/Tamil-ur.json +8 -0
- package/transforms/Tamil-ur.txt +7 -0
- package/transforms/Telugu-Arabic.json +9 -0
- package/transforms/Telugu-Arabic.txt +7 -0
- package/transforms/Telugu-Bengali.json +9 -0
- package/transforms/Telugu-Bengali.txt +7 -0
- package/transforms/Telugu-Devanagari.json +9 -0
- package/transforms/Telugu-Devanagari.txt +7 -0
- package/transforms/Telugu-Gujarati.json +9 -0
- package/transforms/Telugu-Gujarati.txt +7 -0
- package/transforms/Telugu-Gurmukhi.json +9 -0
- package/transforms/Telugu-Gurmukhi.txt +7 -0
- package/transforms/Telugu-InterIndic.json +7 -0
- package/transforms/Telugu-InterIndic.txt +87 -0
- package/transforms/Telugu-Kannada.json +9 -0
- package/transforms/Telugu-Kannada.txt +7 -0
- package/transforms/Telugu-Latin.json +9 -0
- package/transforms/Telugu-Latin.txt +7 -0
- package/transforms/Telugu-Malayalam.json +9 -0
- package/transforms/Telugu-Malayalam.txt +7 -0
- package/transforms/Telugu-Oriya.json +9 -0
- package/transforms/Telugu-Oriya.txt +7 -0
- package/transforms/Telugu-Tamil.json +9 -0
- package/transforms/Telugu-Tamil.txt +7 -0
- package/transforms/Telugu-ur.json +8 -0
- package/transforms/Telugu-ur.txt +7 -0
- package/transforms/Thaana-Latin.json +11 -0
- package/transforms/Thaana-Latin.txt +433 -0
- package/transforms/Thai-Latin.json +9 -0
- package/transforms/Thai-Latin.txt +9 -0
- package/transforms/Thai-ThaiLogical.json +7 -0
- package/transforms/Thai-ThaiLogical.txt +16 -0
- package/transforms/Thai-ThaiSemi.json +7 -0
- package/transforms/Thai-ThaiSemi.txt +6 -0
- package/transforms/ThaiLogical-Latin.json +7 -0
- package/transforms/ThaiLogical-Latin.txt +147 -0
- package/transforms/Turkmen-Latin-BGN.json +9 -0
- package/transforms/Turkmen-Latin-BGN.txt +302 -0
- package/transforms/Ukrainian-Latin-BGN.json +9 -0
- package/transforms/Ukrainian-Latin-BGN.txt +283 -0
- package/transforms/Uzbek-Latin-BGN.json +9 -0
- package/transforms/Uzbek-Latin-BGN.txt +261 -0
- package/transforms/am-Ethi-t-am-brai.json +11 -0
- package/transforms/am-Ethi-t-am-brai.txt +652 -0
- package/transforms/am-Ethi-t-am-ethi-m0-geminate.json +9 -0
- package/transforms/am-Ethi-t-am-ethi-m0-geminate.txt +6925 -0
- package/transforms/am-Ethi-t-d0-morse.json +9 -0
- package/transforms/am-Ethi-t-d0-morse.txt +657 -0
- package/transforms/am-am_FONIPA.json +9 -0
- package/transforms/am-am_FONIPA.txt +694 -0
- package/transforms/am-ar.json +8 -0
- package/transforms/am-ar.txt +5 -0
- package/transforms/am-chr.json +8 -0
- package/transforms/am-chr.txt +4 -0
- package/transforms/am-fa.json +8 -0
- package/transforms/am-fa.txt +4 -0
- package/transforms/az-Lower.json +8 -0
- package/transforms/az-Lower.txt +13 -0
- package/transforms/az-Title.json +8 -0
- package/transforms/az-Title.txt +11 -0
- package/transforms/az-Upper.json +8 -0
- package/transforms/az-Upper.txt +5 -0
- package/transforms/blt-fonipa-t-blt.json +8 -0
- package/transforms/blt-fonipa-t-blt.txt +132 -0
- package/transforms/byn-Ethi-t-byn-latn-m0-tekie-alibekit.json +11 -0
- package/transforms/byn-Ethi-t-byn-latn-m0-tekie-alibekit.txt +537 -0
- package/transforms/byn-Ethi-t-byn-latn-m0-xaleget.json +11 -0
- package/transforms/byn-Ethi-t-byn-latn-m0-xaleget.txt +773 -0
- package/transforms/ch-am.json +8 -0
- package/transforms/ch-am.txt +4 -0
- package/transforms/ch-ar.json +8 -0
- package/transforms/ch-ar.txt +4 -0
- package/transforms/ch-ch_FONIPA.json +8 -0
- package/transforms/ch-ch_FONIPA.txt +79 -0
- package/transforms/ch-chr.json +8 -0
- package/transforms/ch-chr.txt +4 -0
- package/transforms/ch-fa.json +8 -0
- package/transforms/ch-fa.txt +4 -0
- package/transforms/chr-chr_FONIPA.json +8 -0
- package/transforms/chr-chr_FONIPA.txt +111 -0
- package/transforms/cs-am.json +8 -0
- package/transforms/cs-am.txt +4 -0
- package/transforms/cs-ar.json +8 -0
- package/transforms/cs-ar.txt +4 -0
- package/transforms/cs-chr.json +8 -0
- package/transforms/cs-chr.txt +4 -0
- package/transforms/cs-cs_FONIPA.json +8 -0
- package/transforms/cs-cs_FONIPA.txt +74 -0
- package/transforms/cs-fa.json +8 -0
- package/transforms/cs-fa.txt +4 -0
- package/transforms/cs-ja.json +8 -0
- package/transforms/cs-ja.txt +4 -0
- package/transforms/cs-ko.json +8 -0
- package/transforms/cs-ko.txt +4 -0
- package/transforms/cs_FONIPA-ja.json +8 -0
- package/transforms/cs_FONIPA-ja.txt +228 -0
- package/transforms/cs_FONIPA-ko.json +8 -0
- package/transforms/cs_FONIPA-ko.txt +88 -0
- package/transforms/cy-fonipa-t-cy.json +8 -0
- package/transforms/cy-fonipa-t-cy.txt +189 -0
- package/transforms/de-ASCII.json +8 -0
- package/transforms/de-ASCII.txt +15 -0
- package/transforms/dsb-dsb_FONIPA.json +8 -0
- package/transforms/dsb-dsb_FONIPA.txt +67 -0
- package/transforms/el-Lower.json +8 -0
- package/transforms/el-Lower.txt +12 -0
- package/transforms/el-Title.json +8 -0
- package/transforms/el-Title.txt +12 -0
- package/transforms/el-Upper.json +8 -0
- package/transforms/el-Upper.txt +11 -0
- package/transforms/eo-am.json +8 -0
- package/transforms/eo-am.txt +4 -0
- package/transforms/eo-ar.json +8 -0
- package/transforms/eo-ar.txt +4 -0
- package/transforms/eo-chr.json +8 -0
- package/transforms/eo-chr.txt +4 -0
- package/transforms/eo-eo_FONIPA.json +8 -0
- package/transforms/eo-eo_FONIPA.txt +47 -0
- package/transforms/eo-fa.json +8 -0
- package/transforms/eo-fa.txt +4 -0
- package/transforms/es-am.json +8 -0
- package/transforms/es-am.txt +4 -0
- package/transforms/es-ar.json +8 -0
- package/transforms/es-ar.txt +13 -0
- package/transforms/es-chr.json +8 -0
- package/transforms/es-chr.txt +4 -0
- package/transforms/es-es_FONIPA.json +8 -0
- package/transforms/es-es_FONIPA.txt +139 -0
- package/transforms/es-fa.json +8 -0
- package/transforms/es-fa.txt +13 -0
- package/transforms/es-ja.json +8 -0
- package/transforms/es-ja.txt +4 -0
- package/transforms/es-zh.json +8 -0
- package/transforms/es-zh.txt +4 -0
- package/transforms/es_419-am.json +8 -0
- package/transforms/es_419-am.txt +5 -0
- package/transforms/es_419-ar.json +8 -0
- package/transforms/es_419-ar.txt +14 -0
- package/transforms/es_419-chr.json +8 -0
- package/transforms/es_419-chr.txt +5 -0
- package/transforms/es_419-fa.json +8 -0
- package/transforms/es_419-fa.txt +14 -0
- package/transforms/es_419-ja.json +8 -0
- package/transforms/es_419-ja.txt +5 -0
- package/transforms/es_419-zh.json +8 -0
- package/transforms/es_419-zh.txt +5 -0
- package/transforms/es_FONIPA-am.json +8 -0
- package/transforms/es_FONIPA-am.txt +230 -0
- package/transforms/es_FONIPA-es_419_FONIPA.json +8 -0
- package/transforms/es_FONIPA-es_419_FONIPA.txt +6 -0
- package/transforms/es_FONIPA-ja.json +8 -0
- package/transforms/es_FONIPA-ja.txt +155 -0
- package/transforms/es_FONIPA-zh.json +8 -0
- package/transforms/es_FONIPA-zh.txt +522 -0
- package/transforms/fa-fa_FONIPA.json +8 -0
- package/transforms/fa-fa_FONIPA.txt +107 -0
- package/transforms/gz-Ethi-t-und-sarb.json +11 -0
- package/transforms/gz-Ethi-t-und-sarb.txt +180 -0
- package/transforms/ha-ha_NE.json +8 -0
- package/transforms/ha-ha_NE.txt +6 -0
- package/transforms/hy-am.json +8 -0
- package/transforms/hy-am.txt +4 -0
- package/transforms/hy-ar.json +8 -0
- package/transforms/hy-ar.txt +4 -0
- package/transforms/hy-chr.json +8 -0
- package/transforms/hy-chr.txt +4 -0
- package/transforms/hy-fa.json +8 -0
- package/transforms/hy-fa.txt +4 -0
- package/transforms/hy-hy_FONIPA.json +8 -0
- package/transforms/hy-hy_FONIPA.txt +52 -0
- package/transforms/hy_AREVMDA-am.json +8 -0
- package/transforms/hy_AREVMDA-am.txt +4 -0
- package/transforms/hy_AREVMDA-ar.json +8 -0
- package/transforms/hy_AREVMDA-ar.txt +4 -0
- package/transforms/hy_AREVMDA-chr.json +8 -0
- package/transforms/hy_AREVMDA-chr.txt +4 -0
- package/transforms/hy_AREVMDA-fa.json +8 -0
- package/transforms/hy_AREVMDA-fa.txt +4 -0
- package/transforms/hy_AREVMDA-hy_AREVMDA_FONIPA.json +8 -0
- package/transforms/hy_AREVMDA-hy_AREVMDA_FONIPA.txt +80 -0
- package/transforms/ia-am.json +8 -0
- package/transforms/ia-am.txt +4 -0
- package/transforms/ia-ar.json +8 -0
- package/transforms/ia-ar.txt +4 -0
- package/transforms/ia-chr.json +8 -0
- package/transforms/ia-chr.txt +4 -0
- package/transforms/ia-fa.json +8 -0
- package/transforms/ia-fa.txt +4 -0
- package/transforms/ia-ia_FONIPA.json +8 -0
- package/transforms/ia-ia_FONIPA.txt +71 -0
- package/transforms/it-am.json +8 -0
- package/transforms/it-am.txt +257 -0
- package/transforms/it-ja.json +8 -0
- package/transforms/it-ja.txt +259 -0
- package/transforms/ja_Latn-ko.json +8 -0
- package/transforms/ja_Latn-ko.txt +141 -0
- package/transforms/ja_Latn-ru.json +8 -0
- package/transforms/ja_Latn-ru.txt +123 -0
- package/transforms/kk-am.json +8 -0
- package/transforms/kk-am.txt +4 -0
- package/transforms/kk-ar.json +8 -0
- package/transforms/kk-ar.txt +4 -0
- package/transforms/kk-chr.json +8 -0
- package/transforms/kk-chr.txt +4 -0
- package/transforms/kk-fa.json +8 -0
- package/transforms/kk-fa.txt +4 -0
- package/transforms/kk-kk_FONIPA.json +8 -0
- package/transforms/kk-kk_FONIPA.txt +59 -0
- package/transforms/ky-am.json +8 -0
- package/transforms/ky-am.txt +4 -0
- package/transforms/ky-ar.json +8 -0
- package/transforms/ky-ar.txt +4 -0
- package/transforms/ky-chr.json +8 -0
- package/transforms/ky-chr.txt +4 -0
- package/transforms/ky-fa.json +8 -0
- package/transforms/ky-fa.txt +4 -0
- package/transforms/ky-ky_FONIPA.json +8 -0
- package/transforms/ky-ky_FONIPA.txt +71 -0
- package/transforms/la-la_FONIPA.json +8 -0
- package/transforms/la-la_FONIPA.txt +79 -0
- package/transforms/lt-Lower.json +8 -0
- package/transforms/lt-Lower.txt +20 -0
- package/transforms/lt-Title.json +8 -0
- package/transforms/lt-Title.txt +15 -0
- package/transforms/lt-Upper.json +8 -0
- package/transforms/lt-Upper.txt +8 -0
- package/transforms/mn-mn_Latn-MNS.json +8 -0
- package/transforms/mn-mn_Latn-MNS.txt +86 -0
- package/transforms/my-am.json +8 -0
- package/transforms/my-am.txt +4 -0
- package/transforms/my-ar.json +8 -0
- package/transforms/my-ar.txt +4 -0
- package/transforms/my-chr.json +8 -0
- package/transforms/my-chr.txt +4 -0
- package/transforms/my-fa.json +8 -0
- package/transforms/my-fa.txt +4 -0
- package/transforms/my-my_FONIPA.json +8 -0
- package/transforms/my-my_FONIPA.txt +325 -0
- package/transforms/my-t-my-d0-zawgyi.json +8 -0
- package/transforms/my-t-my-d0-zawgyi.txt +222 -0
- package/transforms/my-t-my-s0-zawgyi.json +8 -0
- package/transforms/my-t-my-s0-zawgyi.txt +231 -0
- package/transforms/nl-Title.json +8 -0
- package/transforms/nl-Title.txt +7 -0
- package/transforms/nv-nv_FONIPA.json +8 -0
- package/transforms/nv-nv_FONIPA.txt +74 -0
- package/transforms/pl-am.json +8 -0
- package/transforms/pl-am.txt +4 -0
- package/transforms/pl-ar.json +8 -0
- package/transforms/pl-ar.txt +4 -0
- package/transforms/pl-chr.json +8 -0
- package/transforms/pl-chr.txt +4 -0
- package/transforms/pl-fa.json +8 -0
- package/transforms/pl-fa.txt +4 -0
- package/transforms/pl-ja.json +8 -0
- package/transforms/pl-ja.txt +4 -0
- package/transforms/pl-pl_FONIPA.json +8 -0
- package/transforms/pl-pl_FONIPA.txt +113 -0
- package/transforms/pl_FONIPA-ja.json +8 -0
- package/transforms/pl_FONIPA-ja.txt +301 -0
- package/transforms/rm_SURSILV-am.json +8 -0
- package/transforms/rm_SURSILV-am.txt +4 -0
- package/transforms/rm_SURSILV-ar.json +8 -0
- package/transforms/rm_SURSILV-ar.txt +4 -0
- package/transforms/rm_SURSILV-chr.json +8 -0
- package/transforms/rm_SURSILV-chr.txt +4 -0
- package/transforms/rm_SURSILV-fa.json +8 -0
- package/transforms/rm_SURSILV-fa.txt +4 -0
- package/transforms/rm_SURSILV-rm_FONIPA_SURSILV.json +8 -0
- package/transforms/rm_SURSILV-rm_FONIPA_SURSILV.txt +96 -0
- package/transforms/ro-am.json +8 -0
- package/transforms/ro-am.txt +4 -0
- package/transforms/ro-ar.json +8 -0
- package/transforms/ro-ar.txt +4 -0
- package/transforms/ro-chr.json +8 -0
- package/transforms/ro-chr.txt +4 -0
- package/transforms/ro-fa.json +8 -0
- package/transforms/ro-fa.txt +4 -0
- package/transforms/ro-ja.json +8 -0
- package/transforms/ro-ja.txt +4 -0
- package/transforms/ro-ro_FONIPA.json +8 -0
- package/transforms/ro-ro_FONIPA.txt +121 -0
- package/transforms/ro_FONIPA-ja.json +8 -0
- package/transforms/ro_FONIPA-ja.txt +211 -0
- package/transforms/ru-ja.json +8 -0
- package/transforms/ru-ja.txt +444 -0
- package/transforms/ru-zh.json +8 -0
- package/transforms/ru-zh.txt +996 -0
- package/transforms/ru_Latn-ru-BGN.json +9 -0
- package/transforms/ru_Latn-ru-BGN.txt +97 -0
- package/transforms/sat-am.json +8 -0
- package/transforms/sat-am.txt +6 -0
- package/transforms/sat-ar.json +8 -0
- package/transforms/sat-ar.txt +6 -0
- package/transforms/sat-chr.json +8 -0
- package/transforms/sat-chr.txt +6 -0
- package/transforms/sat-fa.json +8 -0
- package/transforms/sat-fa.txt +6 -0
- package/transforms/sat_Olck-sat_FONIPA.json +8 -0
- package/transforms/sat_Olck-sat_FONIPA.txt +174 -0
- package/transforms/sgw-Ethi-t-und-ethi.json +11 -0
- package/transforms/sgw-Ethi-t-und-ethi.txt +138 -0
- package/transforms/si-am.json +8 -0
- package/transforms/si-am.txt +4 -0
- package/transforms/si-ar.json +8 -0
- package/transforms/si-ar.txt +4 -0
- package/transforms/si-chr.json +8 -0
- package/transforms/si-chr.txt +4 -0
- package/transforms/si-fa.json +8 -0
- package/transforms/si-fa.txt +4 -0
- package/transforms/si-si_FONIPA.json +8 -0
- package/transforms/si-si_FONIPA.txt +157 -0
- package/transforms/si-si_Latn.json +8 -0
- package/transforms/si-si_Latn.txt +94 -0
- package/transforms/sk-am.json +8 -0
- package/transforms/sk-am.txt +4 -0
- package/transforms/sk-ar.json +8 -0
- package/transforms/sk-ar.txt +4 -0
- package/transforms/sk-chr.json +8 -0
- package/transforms/sk-chr.txt +4 -0
- package/transforms/sk-fa.json +8 -0
- package/transforms/sk-fa.txt +4 -0
- package/transforms/sk-ja.json +8 -0
- package/transforms/sk-ja.txt +4 -0
- package/transforms/sk-sk_FONIPA.json +8 -0
- package/transforms/sk-sk_FONIPA.txt +101 -0
- package/transforms/sk_FONIPA-ja.json +8 -0
- package/transforms/sk_FONIPA-ja.txt +252 -0
- package/transforms/ta-ta_FONIPA.json +8 -0
- package/transforms/ta-ta_FONIPA.txt +73 -0
- package/transforms/tlh-am.json +8 -0
- package/transforms/tlh-am.txt +4 -0
- package/transforms/tlh-ar.json +8 -0
- package/transforms/tlh-ar.txt +4 -0
- package/transforms/tlh-chr.json +8 -0
- package/transforms/tlh-chr.txt +4 -0
- package/transforms/tlh-fa.json +8 -0
- package/transforms/tlh-fa.txt +4 -0
- package/transforms/tlh-tlh_FONIPA.json +8 -0
- package/transforms/tlh-tlh_FONIPA.txt +46 -0
- package/transforms/tr-Lower.json +8 -0
- package/transforms/tr-Lower.txt +13 -0
- package/transforms/tr-Title.json +8 -0
- package/transforms/tr-Title.txt +11 -0
- package/transforms/tr-Upper.json +8 -0
- package/transforms/tr-Upper.txt +8 -0
- package/transforms/ug-ug_FONIPA.json +8 -0
- package/transforms/ug-ug_FONIPA.txt +69 -0
- package/transforms/und-Ethi-t-und-cyrl-m0-gutgarts.json +11 -0
- package/transforms/und-Ethi-t-und-cyrl-m0-gutgarts.txt +747 -0
- package/transforms/und-Ethi-t-und-latn-m0-aethiopi-geminate.json +9 -0
- package/transforms/und-Ethi-t-und-latn-m0-aethiopi-geminate.txt +19 -0
- package/transforms/und-Ethi-t-und-latn-m0-aethiopi.json +11 -0
- package/transforms/und-Ethi-t-und-latn-m0-aethiopi.txt +633 -0
- package/transforms/und-Ethi-t-und-latn-m0-alaloc-geminate.json +9 -0
- package/transforms/und-Ethi-t-und-latn-m0-alaloc-geminate.txt +19 -0
- package/transforms/und-Ethi-t-und-latn-m0-alaloc.json +11 -0
- package/transforms/und-Ethi-t-und-latn-m0-alaloc.txt +757 -0
- package/transforms/und-Ethi-t-und-latn-m0-beta_metsehaf-geminate.json +9 -0
- package/transforms/und-Ethi-t-und-latn-m0-beta_metsehaf-geminate.txt +19 -0
- package/transforms/und-Ethi-t-und-latn-m0-beta_metsehaf.json +11 -0
- package/transforms/und-Ethi-t-und-latn-m0-beta_metsehaf.txt +585 -0
- package/transforms/und-Ethi-t-und-latn-m0-es3842.json +9 -0
- package/transforms/und-Ethi-t-und-latn-m0-es3842.txt +640 -0
- package/transforms/und-Ethi-t-und-latn-m0-ies-jes-1964-geminate.json +9 -0
- package/transforms/und-Ethi-t-und-latn-m0-ies-jes-1964-geminate.txt +19 -0
- package/transforms/und-Ethi-t-und-latn-m0-ies-jes-1964.json +11 -0
- package/transforms/und-Ethi-t-und-latn-m0-ies-jes-1964.txt +619 -0
- package/transforms/und-Ethi-t-und-latn-m0-lambdin.json +11 -0
- package/transforms/und-Ethi-t-und-latn-m0-lambdin.txt +627 -0
- package/transforms/und-Ethi-t-und-latn-m0-sera.json +11 -0
- package/transforms/und-Ethi-t-und-latn-m0-sera.txt +695 -0
- package/transforms/und-Ethi-t-und-latn.json +11 -0
- package/transforms/und-Ethi-t-und-latn.txt +16 -0
- package/transforms/und_FONIPA-ar.json +8 -0
- package/transforms/und_FONIPA-ar.txt +114 -0
- package/transforms/und_FONIPA-chr.json +8 -0
- package/transforms/und_FONIPA-chr.txt +172 -0
- package/transforms/und_FONIPA-fa.json +8 -0
- package/transforms/und_FONIPA-fa.txt +109 -0
- package/transforms/uz_Cyrl-uz_Latn.json +9 -0
- package/transforms/uz_Cyrl-uz_Latn.txt +103 -0
- package/transforms/vec-vec_FONIPA.json +8 -0
- package/transforms/vec-vec_FONIPA.txt +85 -0
- package/transforms/xh-am.json +8 -0
- package/transforms/xh-am.txt +4 -0
- package/transforms/xh-ar.json +8 -0
- package/transforms/xh-ar.txt +4 -0
- package/transforms/xh-chr.json +8 -0
- package/transforms/xh-chr.txt +4 -0
- package/transforms/xh-fa.json +8 -0
- package/transforms/xh-fa.txt +4 -0
- package/transforms/xh-xh_FONIPA.json +8 -0
- package/transforms/xh-xh_FONIPA.txt +85 -0
- package/transforms/yo-yo_BJ.json +8 -0
- package/transforms/yo-yo_BJ.txt +36 -0
- package/transforms/zh_Latn_PINYIN-ru.json +8 -0
- package/transforms/zh_Latn_PINYIN-ru.txt +148 -0
- package/transforms/zu-am.json +8 -0
- package/transforms/zu-am.txt +4 -0
- package/transforms/zu-ar.json +8 -0
- package/transforms/zu-ar.txt +4 -0
- package/transforms/zu-chr.json +8 -0
- package/transforms/zu-chr.txt +4 -0
- package/transforms/zu-fa.json +8 -0
- package/transforms/zu-fa.txt +4 -0
- package/transforms/zu-zu_FONIPA.json +8 -0
- package/transforms/zu-zu_FONIPA.txt +72 -0
- package/transforms.json +385 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
|
|
2
|
+
# This transform converts Unicode Burmese text into Zawgyi font encoded
|
|
3
|
+
# form. Zawgyi is a popular, non-standard encoding scheme in Myanmar
|
|
4
|
+
# that uses the same code range as Myanmar Unicode but assigns different
|
|
5
|
+
# characters or glyphs to some codepoints. In addition to character remapping,
|
|
6
|
+
# context-based reordering of codepoints is needed to give readable
|
|
7
|
+
# output when the output is displayed with a Zawgyi font such as
|
|
8
|
+
# ZawgyiOne.ttf or ZawgyiOne2008.ttf.
|
|
9
|
+
#
|
|
10
|
+
# The transform is done in two main stages:
|
|
11
|
+
# (1) Map all Unicode codepoints to their Zawgyi counterparts.
|
|
12
|
+
# (2) Perform reordering.
|
|
13
|
+
# Modern Burmese digits and Unicode code points.
|
|
14
|
+
$nondigits = [^\u1040-\u1049];
|
|
15
|
+
$consonant = [\u1000-\u1021];
|
|
16
|
+
$narrowconsonant = [\u1001\u1002\u1004\u1005\u1007\u100b-\u100e\u1012\u1013\u1015-\u1017\u1019\u101d\u1020\u1025\u1026\u108f];
|
|
17
|
+
$wideconsonant = [\u1000\u1003\u1006\u1009\u100a\u100f\u1010\u1011\u1018\u101c\u101e\u101f\u1021];
|
|
18
|
+
$widenya = [\u100a\u106b];
|
|
19
|
+
$othernya = [\u1009\u106a];
|
|
20
|
+
$vowelsign = [\u102B-\u1030\u1032];
|
|
21
|
+
$vowelmedial = [\u102B-\u1030\u1032\u103c-\u103F];
|
|
22
|
+
$ukinzi = [\u1004\u101b\u105a]\u103A\u1039;
|
|
23
|
+
$medialraZ = [\u103b\u107e-\u1084];
|
|
24
|
+
$lowsignZ = [\u102f\u1030\u1037\u103c\u103d\u1087-\u108a];
|
|
25
|
+
$highsignZ = [\u102d\u102e\u1032\u1036\u1039\u1064];
|
|
26
|
+
$subscriptitem = [\u1060-\u1063\u1064-\u1068\u106c\u106d\u1070-\u107c\u1085\u1093\u1096];
|
|
27
|
+
$vowelsAndConsonants = [\u1000-\u102a];
|
|
28
|
+
#### Phase 0: CODEPOINT MAPPING FROM UNICODE TO ZAWGYI
|
|
29
|
+
$ukinzi ($consonant) \u103B > $1 \u103A \u1064 ;
|
|
30
|
+
$ukinzi ($consonant) \u102D \u1036 > $1 \u108e ;
|
|
31
|
+
$ukinzi ($consonant) \u102D > $1 \u108b ;
|
|
32
|
+
$ukinzi ($consonant) \u102E > $1 \u108C ;
|
|
33
|
+
$ukinzi ($consonant) \u1036 > $1 \u108D ;
|
|
34
|
+
$ukinzi ($consonant) \u1031 > $1 \u1031 \u1064 ;
|
|
35
|
+
$ukinzi ($consonant) \u103B \u102D \u102F > $1 \u103A \u1033 \u108B ;
|
|
36
|
+
$ukinzi ($consonant) \u103B \u102D > $1 \u103A \u108b ;
|
|
37
|
+
$ukinzi ($consonant) \u103B \u102E \u102F > $1 \u103A \u108C \u1033 ;
|
|
38
|
+
$ukinzi ($consonant) \u103B \u102E > $1 \u103A \u108C ;
|
|
39
|
+
$ukinzi ($consonant) \u103B \u1036 > $1 \u103A \u108D ;
|
|
40
|
+
$ukinzi ($consonant) \u103c > $1 \u103b \u1064; # Kinzi + medial ra
|
|
41
|
+
$ukinzi \u102D > \u108B ;
|
|
42
|
+
$ukinzi \u102E > \u108C ;
|
|
43
|
+
$ukinzi \u1036 > \u108D ;
|
|
44
|
+
$ukinzi ($consonant) > $1 \u1064 ;
|
|
45
|
+
\u1025 ($vowelsign) \u1038 > \u106A $1 \u1038 ;
|
|
46
|
+
\u1025 \u102f \u1036 > \u1025 \u1036 \u1033 ;
|
|
47
|
+
\u102D \u1036 > \u108E ;
|
|
48
|
+
# Some composed lower output
|
|
49
|
+
\u103d \u103e > \u108a ;
|
|
50
|
+
\u103e \u102f > \u1088 ;
|
|
51
|
+
\u103E \u1030 > \u1089 ;
|
|
52
|
+
\u103A > \u1039 ;
|
|
53
|
+
\u103B > \u103A ;
|
|
54
|
+
\u103C > \u103B ;
|
|
55
|
+
\u103D > \u103C ;
|
|
56
|
+
\u103E > \u103D ;
|
|
57
|
+
\u103F > \u1086 ;
|
|
58
|
+
([\u1019]) \u103e \u1030 > $1 \u103d \u1034; # A special case with signs.
|
|
59
|
+
\u102B \u103A > \u105A ;
|
|
60
|
+
\u1039 \u1010 \u103d > \u1096 ; # Very special case
|
|
61
|
+
\u1039 \u1000 > \u1060 ;
|
|
62
|
+
\u1039 \u1001 > \u1061 ;
|
|
63
|
+
\u1039 \u1002 > \u1062 ;
|
|
64
|
+
\u1039 \u1003 > \u1063 ;
|
|
65
|
+
\u1039 \u1005 > \u1065 ;
|
|
66
|
+
\u1039 \u1006 > \u1067 ;
|
|
67
|
+
\u1039 \u1007 > \u1068 ;
|
|
68
|
+
\u1039 \u1008 > \u1069 ;
|
|
69
|
+
\u1039 \u100B > \u106C ;
|
|
70
|
+
\u1039 \u100C > \u106D ;
|
|
71
|
+
\u1039 \u100F > \u1070 ;
|
|
72
|
+
\u1039 \u1010 > \u1072 ;
|
|
73
|
+
\u1039 \u1011 > \u1074 ;
|
|
74
|
+
\u1039 \u1012 > \u1075 ;
|
|
75
|
+
\u1039 \u1013 > \u1076 ;
|
|
76
|
+
\u1039 \u1014 > \u1077 ;
|
|
77
|
+
\u1039 \u1015 > \u1078 ;
|
|
78
|
+
\u1039 \u1016 > \u1079 ;
|
|
79
|
+
\u1039 \u1017 > \u107A ;
|
|
80
|
+
\u1039 \u1018 > \u1093 ;
|
|
81
|
+
\u1039 \u1019 > \u107C ;
|
|
82
|
+
\u1039 \u101C > \u1085 ;
|
|
83
|
+
\u100d\u1039\u100D > \u106E ;
|
|
84
|
+
\u100d\u1039\u100E > \u106F ;
|
|
85
|
+
\u100F\u1039\u100D > \u1091 ;
|
|
86
|
+
\u100B\u1039\u100C > \u1092 ;
|
|
87
|
+
\u100B\u1039\u100B > \u1097 ;
|
|
88
|
+
\u104E\u1004\u103A\u1038 > \u104E ;
|
|
89
|
+
#### PHASE 1: Everything is now in Zawgyi code points. REORDERING RULES.
|
|
90
|
+
::Null;
|
|
91
|
+
# Handle Na with lower modifiers, medial ra.
|
|
92
|
+
\u1014 ($subscriptitem) ($highsignZ*) \u103b > \u103b \u108f $1 $2;
|
|
93
|
+
\u1014 \u103b ([\u103c\u103d]*) (\u1031*) > $2 \u103b \u108f $1;
|
|
94
|
+
# E Vowel + medial ra. Move the e vowel
|
|
95
|
+
($consonant) \u103b ([\u103c\u103d]*) \u1031 > \u1031 \u103b $1 $2;
|
|
96
|
+
($consonant) \u103b > \u103b $1 ;
|
|
97
|
+
($consonant) \u103d \u1031 \u1037 > \u1031 $1 \u1094 \u103D ;
|
|
98
|
+
($consonant) (\u108a) \u1031 > \u1031 $1 $2 ;
|
|
99
|
+
# Ra + kinzi
|
|
100
|
+
($consonant) \u1064 \u103b > \u103b $1 \u1064 ;
|
|
101
|
+
# E vowel plus medials
|
|
102
|
+
($consonant) ([\u103a\u103c-\u103d]+) \u1031 > \u1031 $1 $2 ;
|
|
103
|
+
# Handle consonant, subscripted consonant, medial ra
|
|
104
|
+
($consonant) ($subscriptitem) ($highsignZ*) \u103b > \u103b $1 $2 $3 ;
|
|
105
|
+
# No medials intervening.
|
|
106
|
+
($vowelsAndConsonants) \u1031 > \u1031 $1 ;
|
|
107
|
+
# Handle Na with lower modifiers.
|
|
108
|
+
\u1014 ($subscriptitem) > \u108f $1 ;
|
|
109
|
+
\u1014 ($lowsignZ) ($highsignZ) \u1037 > \u108f $1 $2 \u1094;
|
|
110
|
+
\u1014 ($highsignZ) ($lowsignZ) \u1037 > \u108f $1 $2 \u1094;
|
|
111
|
+
\u1014 ($highsignZ) \u1037 > \u1014 $1 \u1094;
|
|
112
|
+
# a special case
|
|
113
|
+
\u1014 \u1032 \u1037 > \u1014 \u1032 \u1094;
|
|
114
|
+
\u1014 \u1037 > \u1014 \u1094;
|
|
115
|
+
\u1014 \u1032 ($lowsignZ) \u1037 > \u108f $1 \u1032 \u1094;
|
|
116
|
+
\u1014 ($highsignZ) ($lowsignZ) > \u108f $1 $2;
|
|
117
|
+
\u1014 ($lowsignZ) ($highsignZ) > \u108f $1 $2;
|
|
118
|
+
\u1014 ($lowsignZ) \u1037 > \u108f $1 \u1094;
|
|
119
|
+
\u1014 ($lowsignZ) > \u108f $1;
|
|
120
|
+
# Move 1037 dot to right with other descenders.
|
|
121
|
+
($lowsignZ) ($highsignZ*) \u1037 > $1 $2 \u1094;
|
|
122
|
+
($nondigits) \u1040 ([\u102B-\u103F]) > $1 \u101D $2;
|
|
123
|
+
# Handle lack of 104E ၎ MYANMAR SYMBOL AFOREMENTIONED
|
|
124
|
+
($nondigits) \u104e > $1 \u1044;
|
|
125
|
+
\u1031 \u1040 ($nondigits) > \u1031 \u101D $1;
|
|
126
|
+
\u1009 \u103A > \u1025 \u103A;
|
|
127
|
+
\u1025 \u102E > \u1026;
|
|
128
|
+
\u1037 \u103A > \u103A \u1037;
|
|
129
|
+
([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) > $2 $1;
|
|
130
|
+
## Phase 2: Further adjustments
|
|
131
|
+
::Null;
|
|
132
|
+
# Two medials
|
|
133
|
+
\u103a \u103c > \u103c \u107d;
|
|
134
|
+
\u103c \u1094 > \u103c \u1095 ;
|
|
135
|
+
# Medial ra variations, context dependent
|
|
136
|
+
$medialraZ ($narrowconsonant) ($subscriptitem) ($highsignZ) > \u1083 $1 $2 $3 ;
|
|
137
|
+
$medialraZ ($wideconsonant) ($subscriptitem) ($highsignZ) > \u1084 $1 $2 $3;
|
|
138
|
+
$medialraZ ($narrowconsonant) ($subscriptitem) > \u1081 $1 $2 ;
|
|
139
|
+
$medialraZ ($wideconsonant) ($subscriptitem) > \u1082 $1 $2 ;
|
|
140
|
+
$medialraZ ($narrowconsonant) ([\u103c\u108a]) ($highsignZ) > \u1083 $1 $2 $3 ;
|
|
141
|
+
$medialraZ ($wideconsonant) ([\u103c\u108a]) ($highsignZ) > \u1084 $1 $2 $3 ;
|
|
142
|
+
$medialraZ ($narrowconsonant) \u103d ($highsignZ) > \u107f $1 \u1087 $2;
|
|
143
|
+
$medialraZ ($wideconsonant) \u103d ($highsignZ) > \u1080 $1 \u1087 $2;
|
|
144
|
+
$medialraZ ($narrowconsonant) \u102f ($highsignZ) > \u107f $1 \u1033 $2;
|
|
145
|
+
$medialraZ ($wideconsonant) \u102f ($highsignZ) > \u1080 $1 \u1033 $2;
|
|
146
|
+
$medialraZ ($narrowconsonant) \u1030 ($highsignZ) > \u107f $1 \u1034 $2;
|
|
147
|
+
$medialraZ ($wideconsonant) \u1030 ($highsignZ) > \u1080 $1 \u1034 $2;
|
|
148
|
+
$medialraZ ($narrowconsonant) ($lowsignZ*) ($highsignZ) > \u107f $1 $2 $3;
|
|
149
|
+
$medialraZ ($wideconsonant) ($lowsignZ*) ($highsignZ) > \u1080 $1 $2 $3;
|
|
150
|
+
$medialraZ ($narrowconsonant) ([\u103c\u108a]) > \u1081 $1 $2 ;
|
|
151
|
+
$medialraZ ($wideconsonant) ([\u103c\u108a]) > \u1082 $1 $2 ;
|
|
152
|
+
$medialraZ ($narrowconsonant) \u103d > \u103b $1 \u1087;
|
|
153
|
+
$medialraZ ($wideconsonant) \u103d > \u107e $1 \u1087;
|
|
154
|
+
$medialraZ ($narrowconsonant) \u102f > \u103b $1 \u1033;
|
|
155
|
+
$medialraZ ($wideconsonant) \u102f > \u107e $1 \u1033;
|
|
156
|
+
$medialraZ ($narrowconsonant) \u1030 > \u103b $1 \u1034;
|
|
157
|
+
$medialraZ ($wideconsonant) \u1030 > \u107e $1 \u1034;
|
|
158
|
+
$medialraZ ($widenya) > \u1082 $1 ;
|
|
159
|
+
$medialraZ ($othernya) > \u103b \u106a ;
|
|
160
|
+
$medialraZ ($narrowconsonant) > \u103b $1 ;
|
|
161
|
+
$medialraZ ($wideconsonant) > \u107e $1 ;
|
|
162
|
+
\u1009 ($lowsignZ) > \u106a $1;
|
|
163
|
+
\u100A ($lowsignZ)> \u106B $1 ; ## NYA and NNYA
|
|
164
|
+
\u103d \u102d > \u102d \u103d;
|
|
165
|
+
\u103a ($highsignZ) \u102f [\u1037\u1094\u1095] > \u103a $1 \u1033 \u1095;
|
|
166
|
+
\u103a \u102f [\u1037\u1094\u1095] > \u103a \u1033 \u1095;
|
|
167
|
+
\u103a \u102f > \u103a \u1033;
|
|
168
|
+
# Kinzi combo
|
|
169
|
+
\u1064 \u102e > \u108c ;
|
|
170
|
+
##### Phase 3
|
|
171
|
+
::Null;
|
|
172
|
+
\u1037 ([\u102D-\u1030\u1032\u1036]) > $1 \u1037;
|
|
173
|
+
($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant)> $1 \u103A $2 $3;
|
|
174
|
+
# Combine vowel and consonant signs
|
|
175
|
+
\u103d \u102f > \u1088;
|
|
176
|
+
\u1033 \u1094 > \u1033 \u1095; # Wider spacing on lower dot
|
|
177
|
+
($medialraZ) ($narrowconsonant) ($lowsignZ*) ($highsignZ*) \u102f > $1 $2 $3 $4 \u1033;
|
|
178
|
+
($medialraZ) ($wideconsonant) ($lowsignZ*) ($highsignZ*) \u102f > $1 $2 $3 $4 \u1033;
|
|
179
|
+
($medialraZ) ($narrowconsonant) ($lowsignZ*) ($highsignZ*) \u1030 > $1 $2 $3 $4 \u1034;
|
|
180
|
+
($medialraZ) ($wideconsonant) ($lowsignZ*) ($highsignZ*) \u1030 > $1 $2 $3 $4 \u1034;
|
|
181
|
+
##### Phase 4. More reorderings of medials
|
|
182
|
+
::Null;
|
|
183
|
+
([\u103D\u103E]) \u103C > \u103C $1;
|
|
184
|
+
\u103E\u103D > \u103D\u103E ;
|
|
185
|
+
\u1038 ($vowelmedial) > $1 \u1038;
|
|
186
|
+
\u1038 ([\u1036\u1037\u103A]) > $1 \u1038;
|
|
187
|
+
\u103a ([\u1064\u108b-\u108e]) \u102d \u102f > \u103a $1 \u102d \u1033;
|
|
188
|
+
\u103a \u102d \u102f > \u103a \u102d \u1033;
|
|
189
|
+
#### Phase 5
|
|
190
|
+
::Null;
|
|
191
|
+
($consonant) \u103B \u103A > $1 \u103A \u103B;
|
|
192
|
+
([\u103C\u103D\u103E]) \u103B > \u103B $1;
|
|
193
|
+
([\u103D\u103E]) \u103C > \u103C $1;
|
|
194
|
+
\u103E\u103D > \u103D\u103E ;
|
|
195
|
+
([\u102D-\u1030\u1032]) \u103A ($consonant) \u103A > $1 $2 \u103A;
|
|
196
|
+
\u102D \u103A > \u102D;
|
|
197
|
+
\u102E \u103A > \u102E;
|
|
198
|
+
\u102F \u103A > \u102F;
|
|
199
|
+
\u102D \u102E > \u102E;
|
|
200
|
+
\u102F \u1030 > \u102F;
|
|
201
|
+
\u102B \u102B+ > \u102B;
|
|
202
|
+
\u102C \u102C+ > \u102C;
|
|
203
|
+
\u102D \u102D+ > \u102D;
|
|
204
|
+
\u102E \u102E+ > \u102E;
|
|
205
|
+
\u102F \u102F+ > \u102F;
|
|
206
|
+
\u1030 \u1030+ > \u1030;
|
|
207
|
+
\u1031 \u1031+ > \u1031;
|
|
208
|
+
\u1032 \u1032+ > \u1032;
|
|
209
|
+
\u1036 \u1036+ > \u1036;
|
|
210
|
+
\u103A \u103A+ > \u103A;
|
|
211
|
+
\u103B \u103B+ > \u103B;
|
|
212
|
+
\u103C \u103C+ > \u103C;
|
|
213
|
+
\u103D \u103D+ > \u103D;
|
|
214
|
+
\u103E \u103E+ > \u103E;
|
|
215
|
+
# Visually identical orderings - standardize
|
|
216
|
+
([\u102f\u1033]) \u102D > \u102D $1 ;
|
|
217
|
+
([\u102f\u1033]) \u1036 > \u1036 $1 ;
|
|
218
|
+
\u1037 \u1039 > \u1039 \u1037;
|
|
219
|
+
\u1032 \u103c > \u103c \u1032 ;
|
|
220
|
+
\u102e \u103c > \u103c \u102e ;
|
|
221
|
+
\u103d \u1088 > \u1088 ;
|
|
222
|
+
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
|
|
2
|
+
# This transform converts Zawgyi "encoded" Burmese into proper
|
|
3
|
+
# unicode. Zawgyi is a popular encoding scheme in Myanmar. It uses
|
|
4
|
+
# the Myanmar unicode range but assigns different characters or
|
|
5
|
+
# glyphs to some codepoints. In addition to the character mapping,
|
|
6
|
+
# there is reordering of codepoints needed to match the expected
|
|
7
|
+
# unicode order. This reordering is context-based.
|
|
8
|
+
#
|
|
9
|
+
# This transform is done in two main stages:
|
|
10
|
+
# (1) Map all Zawgyi codepoints to their Unicode counterpart.
|
|
11
|
+
# (2) Perform reordering.
|
|
12
|
+
# Modern Burmese digits & Unicode code points.
|
|
13
|
+
$nondigits = [^\u1040-\u1049];
|
|
14
|
+
$consonant = [\u1000-\u1021];
|
|
15
|
+
$vowelsign = [\u102B-\u1030\u1032]; # Unicode vowel signs except E (1031)
|
|
16
|
+
$vowelsAndConsonants = [\u1000-\u102a];
|
|
17
|
+
$umedial = [\u103B-\u103E]; # Medial codepoints in Unicode
|
|
18
|
+
$vowelmedial = [\u102B-\u1030\u1032\1u36\u1037\u103A-\u103F]; # Union of vowel signs and medials
|
|
19
|
+
$ukinzi = \u1004\u103A\u1039; # Codepoints representing kinzi in Unicode
|
|
20
|
+
# Zawgyi medial ra has multiple representations
|
|
21
|
+
$zmedialra = [\u103B\u107E-\u1084];
|
|
22
|
+
$wspace = [\u0020\u00a0\u1680\u2000-\u200d\u2060\u202f\u205f\u3000\ufeff];
|
|
23
|
+
####
|
|
24
|
+
#### STAGE 1: CODEPOINT MAPPING FROM ZAWGYI TO UNICODE
|
|
25
|
+
####
|
|
26
|
+
# Kinzi (predefined ligatures)
|
|
27
|
+
# Move base character to the right
|
|
28
|
+
($consonant) \u103A \u1064 → $ukinzi $1 \u103B;
|
|
29
|
+
($consonant) \u1064 → $ukinzi $1;
|
|
30
|
+
\u1064 → $ukinzi;
|
|
31
|
+
# Special cases moving base character to right before vowel signs
|
|
32
|
+
($consonant) \u108B → $ukinzi $1 \u102D;
|
|
33
|
+
($consonant) \u108C → $ukinzi $1 \u102E;
|
|
34
|
+
($consonant) \u108D → $ukinzi $1 \u1036;
|
|
35
|
+
# Special cases moving Kinzi block to left
|
|
36
|
+
($consonant) \u103A \u1033 \u108B → $ukinzi $1 \u103B \u102D \u102F;
|
|
37
|
+
($consonant) \u103A \u108b → $ukinzi $1 \u103B \u102D ;
|
|
38
|
+
($consonant) \u103A \u108C → $ukinzi $1 \u103B \u102E ;
|
|
39
|
+
($consonant) \u103A \u108D → $ukinzi $1 \u103B \u1036 ;
|
|
40
|
+
($consonant) \u103A \u108e → $1 \u103B \u102D \u1036 ;
|
|
41
|
+
\u108B → $ukinzi \u102D ;
|
|
42
|
+
\u108C → $ukinzi \u102E ;
|
|
43
|
+
\u108D → $ukinzi \u1036 ;
|
|
44
|
+
# Consonants (only the ones that have to change)
|
|
45
|
+
\u106A → \u1009 ; # NYA
|
|
46
|
+
\u106B → \u100A ;
|
|
47
|
+
\u108F → \u1014 ;
|
|
48
|
+
\u1090 → \u101B ;
|
|
49
|
+
\u1086 → \u103F ;
|
|
50
|
+
# yapin
|
|
51
|
+
[\u103A\u107d] → \u103B ;
|
|
52
|
+
# yayit
|
|
53
|
+
($zmedialra)+ → \u103C ;
|
|
54
|
+
# wasway
|
|
55
|
+
\u103C* \u108A → \u103D \u103E; # To avoid duplicate medials
|
|
56
|
+
\u103C → \u103D ;
|
|
57
|
+
# hatoh
|
|
58
|
+
[\u103D\u1087] → \u103E ;
|
|
59
|
+
\u1088 → \u103E \u102F ;
|
|
60
|
+
\u1089 → \u103E \u1030 ;
|
|
61
|
+
# Vowels
|
|
62
|
+
\u1033 → \u102F ;
|
|
63
|
+
\u1034 → \u1030 ;
|
|
64
|
+
# asat
|
|
65
|
+
\u1039 → \u103A ;
|
|
66
|
+
# lower dot
|
|
67
|
+
[\u1094\u1095] → \u1037 ;
|
|
68
|
+
# Special cases for 1025 vs 1009;
|
|
69
|
+
\u1025 \u1039 → \u1009 \u103a;
|
|
70
|
+
\u1025 \u1061 → \u1009 \u1039 \u1001;
|
|
71
|
+
\u1025 \u1062 → \u1009 \u1039 \u1002;
|
|
72
|
+
\u1025 \u1065 → \u1009 \u1039 \u1005;
|
|
73
|
+
\u1025 \u1068 → \u1009 \u1039 \u1007;
|
|
74
|
+
\u1025 \u1076 → \u1009 \u1039 \u1013;
|
|
75
|
+
\u1025 \u1078 → \u1009 \u1039 \u1015;
|
|
76
|
+
\u1025 \u107A → \u1009 \u1039 \u1017;
|
|
77
|
+
\u1025 \u1079 → \u1009 \u1039 \u1016;
|
|
78
|
+
# Stacked Consonants
|
|
79
|
+
\u105A → \u102B \u103A ;
|
|
80
|
+
\u1060 → \u1039 \u1000 ;
|
|
81
|
+
\u1061 → \u1039 \u1001 ;
|
|
82
|
+
\u1062 → \u1039 \u1002 ;
|
|
83
|
+
\u1063 → \u1039 \u1003 ;
|
|
84
|
+
\u1065 → \u1039 \u1005 ;
|
|
85
|
+
[\u1066\u1067] → \u1039 \u1006 ;
|
|
86
|
+
\u1068 → \u1039 \u1007 ;
|
|
87
|
+
\u1069 → \u1039 \u1008 ;
|
|
88
|
+
\u106C → \u1039 \u100B ;
|
|
89
|
+
\u106D → \u1039 \u100C ;
|
|
90
|
+
\u1070 → \u1039 \u100F ;
|
|
91
|
+
[\u1071\u1072] → \u1039 \u1010 ;
|
|
92
|
+
\u1096 → \u1039 \u1010 \u103D;
|
|
93
|
+
[\u1073\u1074] → \u1039 \u1011 ;
|
|
94
|
+
\u1075 → \u1039 \u1012 ;
|
|
95
|
+
\u1076 → \u1039 \u1013 ;
|
|
96
|
+
\u1077 → \u1039 \u1014 ;
|
|
97
|
+
\u1078 → \u1039 \u1015 ;
|
|
98
|
+
\u1079 → \u1039 \u1016 ;
|
|
99
|
+
\u107A → \u1039 \u1017 ;
|
|
100
|
+
[\u107B\u1093] → \u1039 \u1018 ;
|
|
101
|
+
\u107C → \u1039 \u1019 ;
|
|
102
|
+
\u1085 → \u1039 \u101C ;
|
|
103
|
+
\u108E → \u102D \u1036 ;
|
|
104
|
+
# Pre-defined ligatures
|
|
105
|
+
\u106E → \u100D\u1039\u100D ;
|
|
106
|
+
\u106F → \u100D\u1039\u100E ;
|
|
107
|
+
\u1091 → \u100F\u1039\u100D ;
|
|
108
|
+
\u1092 → \u100B\u1039\u100C ;
|
|
109
|
+
\u1097 → \u100B\u1039\u100B ;
|
|
110
|
+
\u104E → \u104E\u1004\u103A\u1038 ;
|
|
111
|
+
####
|
|
112
|
+
#### STAGE 1.01: Digits 0 and 4 used instead of letters
|
|
113
|
+
# Case of MYANMAR digit being used instead of a letter
|
|
114
|
+
# Lone digit zero and four at start
|
|
115
|
+
::Null;
|
|
116
|
+
^ \u1040 ($nondigits) → \u101D $1;
|
|
117
|
+
^ \u1044 ($nondigits) → | \u104E $1 ;
|
|
118
|
+
# Lone digit zero or four at end
|
|
119
|
+
($nondigits) \u1040 $ → $1 \u101D;
|
|
120
|
+
($nondigits) \u1044 $ → $1 \u104e;
|
|
121
|
+
# Evowel and dependent vowel signs before 0 or 4 only
|
|
122
|
+
# -> convert to the consonant.
|
|
123
|
+
([\u102b-\u103f]) \u1040 ($nondigits) → $1 \u101d $2;
|
|
124
|
+
([\u102b-\u103f]) \u1044 ($nondigits) → $1 \u104E $2;
|
|
125
|
+
####
|
|
126
|
+
#### STAGE 1.1: Strip spaces immediately before combining characters.
|
|
127
|
+
#### Move e-vowel after consonants and medials
|
|
128
|
+
#### Now every codepoint is Unicode. This starts conversion
|
|
129
|
+
#### from semi-visual order to logical order.
|
|
130
|
+
####
|
|
131
|
+
::Null;
|
|
132
|
+
# Don't remove spaces before E vowel or medial Ra at this stage
|
|
133
|
+
($wspace) \u1037 > \u1037 $1;
|
|
134
|
+
($wspace+) ([\u102b-\u1030\u1032-\u103b\u103d\u103e]) → $2;
|
|
135
|
+
# Remove a duplicate early
|
|
136
|
+
\u1037+ → \u1037;
|
|
137
|
+
# Move e-vowel after medials and consonants.
|
|
138
|
+
\u1031+ $ukinzi ($consonant) > $ukinzi $1 \u1031;
|
|
139
|
+
\u1031+ \u1037+ ($consonant) > $1 \u1031 \u1037 ;
|
|
140
|
+
\u1031+ \u103c ($consonant) > $1 \u103c \u1031;
|
|
141
|
+
# Move medials other than 103c before the 1031. Leave 103c for
|
|
142
|
+
# the next consonant.
|
|
143
|
+
\u1031+ ($consonant) ([\u103b\u103d\u103e]+) > $1 $2 \u1031;
|
|
144
|
+
\u1031+ ($vowelsAndConsonants) > $1 \u1031;
|
|
145
|
+
####
|
|
146
|
+
#### STAGE 2: POST REORDERING RULES FOR UNICODE RENDERING
|
|
147
|
+
####
|
|
148
|
+
::Null;
|
|
149
|
+
\u103b \u103a > \u103a \u103b;
|
|
150
|
+
# Simpler replacements for Zawgyi 1025
|
|
151
|
+
\u1025 \u102E → \u1026;
|
|
152
|
+
# Asat and dot below reordering, to Unicode NFC.
|
|
153
|
+
\u103A\u1037 → \u1037\u103A;
|
|
154
|
+
# Reorder some vowel signs
|
|
155
|
+
\u1036 ($umedial*) ($vowelsign+) → $1 $2 \u1036 ;
|
|
156
|
+
([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) → $2 $1;
|
|
157
|
+
# Move ra medial which precedes consonant, but not other medials.
|
|
158
|
+
\u103C ($consonant) → $1 \u103C;
|
|
159
|
+
####
|
|
160
|
+
#### Stage 3
|
|
161
|
+
#### Move \u1036, and \u103C after consonants.
|
|
162
|
+
::Null;
|
|
163
|
+
($umedial) \u1039 ($consonant) > \u1039 $2 $1;
|
|
164
|
+
\u103C \u103A \u1039 ($consonant) → \u103A \u1039 $1 \u103C;
|
|
165
|
+
\u1036 ($umedial+) → $1 \u1036;
|
|
166
|
+
####
|
|
167
|
+
#### Stage 4
|
|
168
|
+
#### Reordering medials, dot below, contractions, E sign, and asat.
|
|
169
|
+
::Null;
|
|
170
|
+
# Reorder the medials
|
|
171
|
+
([\u103C\u103D\u103E]+) \u103B → \u103B $1;
|
|
172
|
+
([\u103D\u103E]+) \u103C → \u103C $1;
|
|
173
|
+
\u103E\u103D → \u103D\u103E ;
|
|
174
|
+
# Contractions with vowel signs
|
|
175
|
+
([\u1031]+) ($vowelsign*) \u1039 ($consonant) → \u1039 $3 $1 $2;
|
|
176
|
+
($vowelsign+) \u1039 ($consonant) → \u1039 $2 $1;
|
|
177
|
+
# Move vowel sign E \u1031 after medials, but not across consonants
|
|
178
|
+
($umedial*) ([\u1031]+) ($umedial*) → $1 $3 $2;
|
|
179
|
+
# Reorder dot below after medials and vowel diacritics
|
|
180
|
+
\u1037 ([\u102D-\u1030\u1032\u1036\u103b-\u103e]+) → $1 \u1037;
|
|
181
|
+
# Move vowel signs after medials
|
|
182
|
+
($vowelsign+) ($umedial+) → $2 $1;
|
|
183
|
+
# Reorder modifiers and asat
|
|
184
|
+
($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant) → $1 \u103A $2 $3;
|
|
185
|
+
####
|
|
186
|
+
#### Stage 5. More reorderings
|
|
187
|
+
#### Vowel signs after medials, sort medials,
|
|
188
|
+
####
|
|
189
|
+
::Null;
|
|
190
|
+
# Replace CA + YA with JHA after moving other things beyond the medials.
|
|
191
|
+
\u1005 \u103b → \u1008;
|
|
192
|
+
# More moving vowel signs after medials
|
|
193
|
+
([\u102b-\u1032]) ($umedial) → $2 $1;
|
|
194
|
+
# Sort the medials
|
|
195
|
+
([\u103C\u103D\u103E]) \u103B → \u103B $1;
|
|
196
|
+
([\u103D\u103E]) \u103C → \u103C $1;
|
|
197
|
+
\u103E\u103D → \u103D\u103E ;
|
|
198
|
+
# Move visarga after other signs
|
|
199
|
+
\u1038 ($vowelmedial) → $1 \u1038;
|
|
200
|
+
# Reorder
|
|
201
|
+
\u1036 \u102f → \u102f \u1036;
|
|
202
|
+
###
|
|
203
|
+
### Stage 6
|
|
204
|
+
### Finish conflicting and extra diacritics. Remove some white space
|
|
205
|
+
###
|
|
206
|
+
::Null;
|
|
207
|
+
# Fix duplicate combiners
|
|
208
|
+
\u102D \u102D+ → \u102D;
|
|
209
|
+
\u102E \u102E+ → \u102E;
|
|
210
|
+
\u102F \u102F+ → \u102F;
|
|
211
|
+
\u1030 \u1030+ → \u1030;
|
|
212
|
+
\u1032 \u1032+ → \u1032;
|
|
213
|
+
\u1036 \u1036+ → \u1036;
|
|
214
|
+
\u1037 \u1037+ → \u1037;
|
|
215
|
+
\u1039 \u1039+ → \u1039;
|
|
216
|
+
\u103a \u103a+ → \u103a;
|
|
217
|
+
\u103b \u103b+ → \u103b;
|
|
218
|
+
\u103c \u103c+ → \u103c;
|
|
219
|
+
\u103d \u103d+ → \u103d;
|
|
220
|
+
\u103e \u103e+ → \u103e; # http://unicode.org/cldr/trac/ticket/10386
|
|
221
|
+
# Fix overlapping signs
|
|
222
|
+
\u102F [\u1030\u103a] → \u102F;
|
|
223
|
+
\u102D \u102E → \u102E;
|
|
224
|
+
# Remove space directly before diacritics.
|
|
225
|
+
($wspace)+ ([\u102b-\u1032\u1036-\u103e]) → $2;
|
|
226
|
+
# Remove ZWSP at start and end
|
|
227
|
+
^ \u200b+ → ;
|
|
228
|
+
\u200b+ $ → ;
|
|
229
|
+
# Fix multiple spaces around ZWSP to single ZWSP.
|
|
230
|
+
$wspace* \u200b $wspace* → \u200b;
|
|
231
|
+
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
|
|
2
|
+
# Copyright (C) 2011-2013, Apple Inc. and others. All Rights Reserved.
|
|
3
|
+
# Special titlecasing for Dutch initial "ij".
|
|
4
|
+
::Any-Title();
|
|
5
|
+
# Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
|
6
|
+
[:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
|
7
|
+
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
|
|
2
|
+
::Lower;
|
|
3
|
+
::NFC;
|
|
4
|
+
# References
|
|
5
|
+
# [1] https://en.wikipedia.org/wiki/Navajo_language#Orthography
|
|
6
|
+
# [2] https://en.wikipedia.org/wiki/Navajo_phonology
|
|
7
|
+
$apostrophe = [’ ʼ \'];
|
|
8
|
+
ą́ą́ → ɑ̃́ː;
|
|
9
|
+
áá → ɑ́ː;
|
|
10
|
+
ąą → ɑ̃ː;
|
|
11
|
+
aa → ɑː;
|
|
12
|
+
ą́ → ɑ̃́;
|
|
13
|
+
á → ɑ́;
|
|
14
|
+
ą → ɑ̃;
|
|
15
|
+
a → ɑ;
|
|
16
|
+
ę́ę́ → ẽ́ː;
|
|
17
|
+
éé → éː;
|
|
18
|
+
ęę → ẽː;
|
|
19
|
+
ee → eː;
|
|
20
|
+
ę́ → ẽ́;
|
|
21
|
+
é → é;
|
|
22
|
+
ę → ẽ;
|
|
23
|
+
e → e;
|
|
24
|
+
į́į́ → ɪ̃́ː;
|
|
25
|
+
íí → ɪ́ː;
|
|
26
|
+
įį → ɪ̃ː;
|
|
27
|
+
ii → ɪː;
|
|
28
|
+
į́ → ɪ̃́;
|
|
29
|
+
í → ɪ́;
|
|
30
|
+
į → ɪ̃;
|
|
31
|
+
i → ɪ;
|
|
32
|
+
ǫ́ǫ́ → ṍː;
|
|
33
|
+
óó → óː;
|
|
34
|
+
ǫǫ → õː;
|
|
35
|
+
oo → oː;
|
|
36
|
+
ǫ́ → ṍ;
|
|
37
|
+
ó → ó;
|
|
38
|
+
ǫ → õ;
|
|
39
|
+
o → o;
|
|
40
|
+
$apostrophe → ʔ;
|
|
41
|
+
b → p;
|
|
42
|
+
ch $apostrophe → t͡ʃʼ;
|
|
43
|
+
ch → t͡ʃʰ;
|
|
44
|
+
dl → tˡ;
|
|
45
|
+
dz → t͡s;
|
|
46
|
+
d → t;
|
|
47
|
+
gh → ɣ;
|
|
48
|
+
g → k;
|
|
49
|
+
hw → xʷ;
|
|
50
|
+
h → h;
|
|
51
|
+
j → t͡ʃ;
|
|
52
|
+
k $apostrophe → kʼ;
|
|
53
|
+
kw → k͡xʷ;
|
|
54
|
+
k → k͡x;
|
|
55
|
+
l → l;
|
|
56
|
+
ł → ɬ;
|
|
57
|
+
m → m;
|
|
58
|
+
n → n;
|
|
59
|
+
sh → ʃ;
|
|
60
|
+
s → s;
|
|
61
|
+
tł $apostrophe → t͡ɬʼ;
|
|
62
|
+
tł → t͡ɬʰ;
|
|
63
|
+
ts $apostrophe → t͡sʼ;
|
|
64
|
+
ts → t͡sʰ;
|
|
65
|
+
t $apostrophe → tʼ;
|
|
66
|
+
t → t͡x;
|
|
67
|
+
w → w;
|
|
68
|
+
x → x;
|
|
69
|
+
y → j;
|
|
70
|
+
zh → ʒ;
|
|
71
|
+
z → z;
|
|
72
|
+
::NULL;
|
|
73
|
+
{ɣ} [{ṍ} {ó} {õ} {o}] → ɣʷ;
|
|
74
|
+
|