interscript 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/interscript.rb +5 -1
- data/lib/interscript/fs.rb +3 -1
- data/lib/interscript/mapping.rb +2 -2
- data/lib/interscript/opal.rb +5 -1
- data/lib/interscript/opal/maps.js.erb +7 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
- data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
- data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
- data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
- data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
- data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
- data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
- data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
- data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
- data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
- data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
- data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
- data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
- data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
- data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
- data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
- data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
- data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
- data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
- data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
- data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
- data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
- data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
- data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
- data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
- data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
- data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
- data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
- data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
- data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
- data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
- data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
- data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
- data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
- data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
- data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
- data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
- data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
- data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
- metadata +41 -15
@@ -0,0 +1,159 @@
|
|
1
|
+
---
|
2
|
+
authority_id: alalc
|
3
|
+
id: 1997
|
4
|
+
language: asm
|
5
|
+
source_script: Deva
|
6
|
+
destination_script: Latn
|
7
|
+
name: Assamese Romanization, 1997
|
8
|
+
url: https://www.loc.gov/catdir/cpso/romanization/assamese.pdf
|
9
|
+
creation_date: 1997
|
10
|
+
description: |
|
11
|
+
ALA-LC Romanization table for Assamese
|
12
|
+
|
13
|
+
notes:
|
14
|
+
|
15
|
+
- Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
|
16
|
+
vowels following a consonant can be found in grammars; no distinction between the two is
|
17
|
+
made in transliteration.
|
18
|
+
|
19
|
+
- |
|
20
|
+
The vowel a is implicit after all consonants and consonant clusters and is supplied in
|
21
|
+
transliteration, with the following exceptions:
|
22
|
+
|
23
|
+
a) when another vowel is indicated by its appropriate sign; and
|
24
|
+
b) when the absence of any vowel is indicated by the subscript sign ( ◌् ) called halanta or
|
25
|
+
birāma.
|
26
|
+
|
27
|
+
- Candrabindu before guttural, palatal, cerebral, and dental occlusives is transliterated n̐. Before
|
28
|
+
labials, sibilants, semivowels, aspirates, vowels, and in final position it is transliterated m̐.
|
29
|
+
|
30
|
+
- When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
|
31
|
+
|
32
|
+
tests:
|
33
|
+
- source: "অসমীয়া কবিতা"
|
34
|
+
expected: "asamaīẏaā kabaitaā"
|
35
|
+
- source: "কবিৰ আজি জন্মদিন"
|
36
|
+
expected: "kabaira ājai janamadaina"
|
37
|
+
- source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
|
38
|
+
expected: "baerauṭata emaāhara paāchatae paunara bhayaṃkara aganaikaāṇaḍa"
|
39
|
+
- source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
|
40
|
+
expected: "bhaṅaāra bairaudadhae āwaedana daākhaila kaṃganaāra"
|
41
|
+
- source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
|
42
|
+
expected: "āpaunai paṛhai bhaāla paāba paraā baātarai"
|
43
|
+
- source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
|
44
|
+
expected: "śaraīraāmapaurata garaubharatai ṭaraāka jabada, daujanaka āṭaka"
|
45
|
+
- source: "কেনে আছে প্ৰাক্তন"
|
46
|
+
expected: "kaenae āchae paraākatana"
|
47
|
+
- source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
|
48
|
+
expected: "kamaumabaāira maeẏarara daehata kaobhaiḍa pajaiṭaibha"
|
49
|
+
- source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
|
50
|
+
expected: "ṭauiṭaāraযogae khaoda sadaraī karae ei kathaā"
|
51
|
+
- source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
|
52
|
+
expected: "lakhaimapaura jailaāra naāraāẏaṇapaurara barapathaārata ājai paraśaānatai dhaāma naāmaerae ekhana baṛdadhaāśaramara śaubhaāramabha karaā haẏa"
|
53
|
+
|
54
|
+
|
55
|
+
map:
|
56
|
+
|
57
|
+
rules:
|
58
|
+
# note[3]
|
59
|
+
- pattern: \u0981(?=[কখগঘঙচছজঝঞটঠডড়ঢঢ়ণতৎথদধন]) # ঁ before guttural, palatal, cerebral, and dental
|
60
|
+
result: ṅ
|
61
|
+
|
62
|
+
characters:
|
63
|
+
|
64
|
+
#Vowels and Diphthongs (see Note 1)
|
65
|
+
|
66
|
+
'অ': 'a'
|
67
|
+
'আ': 'ā'
|
68
|
+
'ই': 'i'
|
69
|
+
'ঈ': 'ī'
|
70
|
+
'উ': 'u'
|
71
|
+
'ঊ': 'ū'
|
72
|
+
'ঋ': 'ṛ'
|
73
|
+
'ৠ': 'ṝ'
|
74
|
+
'ঌ': 'ḹ'
|
75
|
+
'এ': 'e'
|
76
|
+
'ঐ': 'ai'
|
77
|
+
'ও': 'o'
|
78
|
+
'ঔ': 'au'
|
79
|
+
|
80
|
+
# Consonant characters
|
81
|
+
|
82
|
+
#Gutturals
|
83
|
+
'ক': 'ka'
|
84
|
+
'খ': 'kha'
|
85
|
+
'গ': 'ga'
|
86
|
+
'ঘ': 'gha'
|
87
|
+
'ঙ': 'ṅa'
|
88
|
+
|
89
|
+
#Palatals
|
90
|
+
'চ': 'ca'
|
91
|
+
'ছ': 'cha'
|
92
|
+
'জ': 'ja'
|
93
|
+
'ঝ': 'jha'
|
94
|
+
'ঞ': 'ña'
|
95
|
+
|
96
|
+
#Cerebrals
|
97
|
+
'ট': 'ṭa'
|
98
|
+
'ঠ': 'ṭha'
|
99
|
+
'ড': 'ḍa'
|
100
|
+
'ড়': 'ṛa'
|
101
|
+
'ঢ': 'ḍha'
|
102
|
+
'ঢ়': 'ṛha'
|
103
|
+
'ণ': 'ṇa'
|
104
|
+
|
105
|
+
#Dentals
|
106
|
+
'ত': 'ta'
|
107
|
+
'ৎ': 'ṭ'
|
108
|
+
'থ': 'tha'
|
109
|
+
'দ': 'da'
|
110
|
+
'ধ': 'dha'
|
111
|
+
'ন': 'na'
|
112
|
+
|
113
|
+
#Labials
|
114
|
+
'প': 'pa'
|
115
|
+
'ফ': 'pha'
|
116
|
+
'ব': 'ba'
|
117
|
+
'ভ': 'bha'
|
118
|
+
'ম': 'ma'
|
119
|
+
|
120
|
+
#Semivowels
|
121
|
+
'য়': 'ya'
|
122
|
+
'য়': 'ẏa'
|
123
|
+
'ৰ': 'ra'
|
124
|
+
'ল': 'la'
|
125
|
+
'ৱ': 'wa'
|
126
|
+
|
127
|
+
#Sibilants
|
128
|
+
'শ': 'śa'
|
129
|
+
'ষ': 'sha'
|
130
|
+
'স': 'sa'
|
131
|
+
|
132
|
+
#Aspirate
|
133
|
+
'হ': 'ha'
|
134
|
+
|
135
|
+
|
136
|
+
# Anusvāra
|
137
|
+
'ং': 'ṃ'
|
138
|
+
|
139
|
+
# Bisarga
|
140
|
+
'ঃ': 'ḥ'
|
141
|
+
|
142
|
+
# Candrabindu (see Note 3)
|
143
|
+
'ঁ': 'm̐'
|
144
|
+
|
145
|
+
# Abagraha (see Note 4)
|
146
|
+
'ऽ': '’' # (apostrophe)
|
147
|
+
|
148
|
+
# Medials # Needed for connecting constants
|
149
|
+
'\u09be': 'ā'
|
150
|
+
'\u09bf': 'i'
|
151
|
+
'\u09c0': 'ī'
|
152
|
+
'\u09c1': 'u'
|
153
|
+
'\u09c2': 'ū'
|
154
|
+
'\u09c3': 'ṛ'
|
155
|
+
'\u09c7': 'e'
|
156
|
+
'\u09c8': 'ai'
|
157
|
+
'\u09cb': 'o'
|
158
|
+
'\u09cc': 'au'
|
159
|
+
'\u09CD': '' # Used for joining
|
@@ -9,8 +9,8 @@ url: http://catdir.loc.gov/catdir/cpso/romanization/beloruss.pdf
|
|
9
9
|
creation_date: 1997
|
10
10
|
|
11
11
|
notes:
|
12
|
-
- Ґ letter found in Old Belarusian and in modern publications in Tarashkevitsa orthography.
|
13
|
-
- Do not confuse with the digraph кг (also romanized as “kh”). Manual review may be needed when transcribing data in vernacular characters in order to distinguish х from кг.
|
12
|
+
- Ґ letter found in Old Belarusian and in modern publications in Tarashkevitsa orthography.
|
13
|
+
- Do not confuse with the digraph кг (also romanized as “kh”). Manual review may be needed when transcribing data in vernacular characters in order to distinguish х from кг.
|
14
14
|
- The apostrophe (´) is not transliterated.
|
15
15
|
- Soft sign (prime) is USMARC hexadecimal code A7 ~ U+02B9 Unicode character
|
16
16
|
|
@@ -15,7 +15,7 @@ note:
|
|
15
15
|
- This table presupposes monotonic accentuation; rough breathings are accordingly not addressed.
|
16
16
|
- The diphthong definition for upsilon is taken from the 2010 version
|
17
17
|
- Generalised gramma digraph rule to capitals
|
18
|
-
|
18
|
+
|
19
19
|
tests:
|
20
20
|
|
21
21
|
- source: |
|
@@ -25,7 +25,7 @@ tests:
|
|
25
25
|
|
26
26
|
expected: |
|
27
27
|
Ena prama monon me parakinēse ki emena na grapsō oti toutēn tēn patrida tēn echomen oloi mazi, kai sophoi ki amatheis kai plousioi kai phtōchoi kai politikoi kai stratiōtikoi kai oi pleon mikroteroi anthrōpoi; osoi agōnistēkamen, analogōs o katheis, echomen na zēsomen edō. To loipon doulepsamen oloi mazi, na tēn phylamen ki oloi mazi kai na mēn legei oute o dynatos «egō» oute o adynatos. Xerete pote na legei o katheis «egō»? Otan agōnistei monos tou kai phkiasei ē chalasei, na legei «egō»; otan omōs agōnizontai polloi kai phkianoun, tote na lene «emeis». Eimaste eis to «emeis» ki ochi eis to «egō». Kai eis to exēs na mathomen gnōsē, an thelomen na phkiasomen chōrion, na zēsomen oloi mazi.
|
28
|
-
|
28
|
+
|
29
29
|
Giannēs Makrygiannēs.
|
30
30
|
|
31
31
|
|
@@ -622,4 +622,3 @@ map:
|
|
622
622
|
|
623
623
|
"\u0387": ";" # ·
|
624
624
|
"\u00B7": ";" # ·
|
625
|
-
|
@@ -14,7 +14,7 @@ note:
|
|
14
14
|
- Applies to texts after 1453 (Modern Greek)
|
15
15
|
- This table presupposes monotonic accentuation; rough breathings are accordingly not addressed.
|
16
16
|
- Generalised gramma digraph rule to capitals
|
17
|
-
|
17
|
+
|
18
18
|
tests:
|
19
19
|
|
20
20
|
- source: |
|
@@ -24,7 +24,7 @@ tests:
|
|
24
24
|
|
25
25
|
expected: |
|
26
26
|
Ena prama monon me parakinēse ki emena na grapsō oti toutēn tēn patrida tēn echomen oloi mazi, kai sophoi ki amatheis kai plousioi kai phtōchoi kai politikoi kai stratiōtikoi kai oi pleon mikroteroi anthrōpoi; osoi agōnistēkamen, analogōs o katheis, echomen na zēsomen edō. To loipon doulepsamen oloi mazi, na tēn phylamen ki oloi mazi kai na mēn legei oute o dynatos «egō» oute o adynatos. Xerete pote na legei o katheis «egō»? Otan agōnistei monos tou kai phkiasei ē chalasei, na legei «egō»; otan omōs agōnizontai polloi kai phkianoun, tote na lene «emeis». Eimaste eis to «emeis» ki ochi eis to «egō». Kai eis to exēs na mathomen gnōsē, an thelomen na phkiasomen chōrion, na zēsomen oloi mazi.
|
27
|
-
|
27
|
+
|
28
28
|
Giannēs Makrygiannēs.
|
29
29
|
|
30
30
|
|
@@ -625,4 +625,3 @@ map:
|
|
625
625
|
|
626
626
|
"\u0387": ";" # ·
|
627
627
|
"\u00B7": ";" # ·
|
628
|
-
|
@@ -0,0 +1,159 @@
|
|
1
|
+
---
|
2
|
+
authority_id: alalc
|
3
|
+
id: 2020
|
4
|
+
language: hin
|
5
|
+
source_script: Deva
|
6
|
+
destination_script: Latn
|
7
|
+
name: Hindi Romanization, 2020
|
8
|
+
url: https://www.loc.gov/catdir/cpso/romanization/hindi.pdf
|
9
|
+
creation_date: 2020
|
10
|
+
description: |
|
11
|
+
ALA-LC Romanization table for Hindi
|
12
|
+
|
13
|
+
notes:
|
14
|
+
|
15
|
+
- Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
|
16
|
+
vowels following a consonant can be found in grammars; no distinction between the two is
|
17
|
+
made in transliteration.
|
18
|
+
|
19
|
+
- |
|
20
|
+
The vowel a is implicit after all consonants and consonant clusters and is supplied in
|
21
|
+
transliteration, with the following exceptions:
|
22
|
+
|
23
|
+
a) when another vowel is indicated by its appropriate sign; and
|
24
|
+
b) when the absence of any vowel is indicated by the subscript sign ( ◌् ) called halanta or
|
25
|
+
virāma.
|
26
|
+
|
27
|
+
- |
|
28
|
+
Exception: Anusvāra is transliterated by:
|
29
|
+
|
30
|
+
a) ṅ before gutturals,
|
31
|
+
b) ñ before palatals,
|
32
|
+
c) ṇ before cerebrals,
|
33
|
+
d) n before dentals, and
|
34
|
+
e) m before labials.
|
35
|
+
|
36
|
+
- Anunāsika before guttural, palatal, cerebral, and dental occlusives is transliterated n̐. Before
|
37
|
+
labials, sibilants, semivowels, aspirates, vowels, and in final position it is transliterated m̐.
|
38
|
+
|
39
|
+
- When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
|
40
|
+
|
41
|
+
tests:
|
42
|
+
- source: "हम"
|
43
|
+
expected: "hama"
|
44
|
+
- source: "मीन"
|
45
|
+
expected: "maīna"
|
46
|
+
- source: "औसत"
|
47
|
+
expected: "ăusata"
|
48
|
+
- source: "माँऽऽऽ!"
|
49
|
+
expected: "maān̐’’’!"
|
50
|
+
- source: "माँ"
|
51
|
+
expected: "maām̐"
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
map:
|
56
|
+
|
57
|
+
rules:
|
58
|
+
# note[4]
|
59
|
+
- pattern: (?<=)\u0901(?=\b)
|
60
|
+
result: "m̐"
|
61
|
+
|
62
|
+
characters:
|
63
|
+
|
64
|
+
# I. Vowels and Diphthongs (see Note 1)
|
65
|
+
|
66
|
+
'अ': 'a'
|
67
|
+
'आ': 'ā'
|
68
|
+
'इ': 'i'
|
69
|
+
'ई': 'ī'
|
70
|
+
'उ': 'u'
|
71
|
+
'ऊ': 'ū'
|
72
|
+
'ऋ': 'ṛ'
|
73
|
+
'ॠ': 'ṝ'
|
74
|
+
'ऌ': 'ḹ'
|
75
|
+
'ॳ': 'ĕ'
|
76
|
+
'ए': 'e'
|
77
|
+
'ॲ': 'ê'
|
78
|
+
'अै': 'ai'
|
79
|
+
'ऐ': 'ai'
|
80
|
+
'ऒ': 'ŏ'
|
81
|
+
'ओ': 'o'
|
82
|
+
'ऑ': 'ô'
|
83
|
+
'औ': 'ău'
|
84
|
+
|
85
|
+
# II. Consonants (see Note 2)
|
86
|
+
# Gutturals
|
87
|
+
'क': 'ka'
|
88
|
+
'ख': 'kha'
|
89
|
+
'ग': 'ga'
|
90
|
+
'घ': 'gha'
|
91
|
+
'ङ': 'ṅa'
|
92
|
+
|
93
|
+
# Palatals
|
94
|
+
'च': 'ca'
|
95
|
+
'छ': 'cha'
|
96
|
+
'ज': 'ja'
|
97
|
+
'झ': 'jha'
|
98
|
+
'ञ': 'ña'
|
99
|
+
|
100
|
+
# Cerebrals
|
101
|
+
'ट': 'ṭa'
|
102
|
+
'ठ': 'ṭha'
|
103
|
+
'ड': 'ḍa'
|
104
|
+
'ड़': 'ṛa'
|
105
|
+
'ढ': 'ḍha'
|
106
|
+
'ढ़': 'ṛha'
|
107
|
+
'ण': 'ṇa'
|
108
|
+
|
109
|
+
# Dentals
|
110
|
+
'त': 'ta'
|
111
|
+
'थ': 'tha'
|
112
|
+
'द': 'da'
|
113
|
+
'ध': 'dha'
|
114
|
+
'न': 'na'
|
115
|
+
|
116
|
+
# Labials
|
117
|
+
'प': 'pa'
|
118
|
+
'फ': 'pha'
|
119
|
+
'ब': 'ba'
|
120
|
+
'भ': 'bha'
|
121
|
+
'म': 'ma'
|
122
|
+
|
123
|
+
# Semivowels
|
124
|
+
'य': 'ya'
|
125
|
+
'र': 'ra'
|
126
|
+
'ल': 'la'
|
127
|
+
'व': 'ba'
|
128
|
+
|
129
|
+
# Sibilants
|
130
|
+
'श': 'śa'
|
131
|
+
'ष': 'sha'
|
132
|
+
'स': 'sa'
|
133
|
+
|
134
|
+
# Aspirate
|
135
|
+
'ह': 'ha'
|
136
|
+
|
137
|
+
# Anusvāra
|
138
|
+
'ं': 'ṃ'
|
139
|
+
|
140
|
+
# Bisarga
|
141
|
+
'ः ': 'ḥ'
|
142
|
+
|
143
|
+
# Anunāsika
|
144
|
+
'ँ': 'n̐' # ঁ : n̐, m̐
|
145
|
+
|
146
|
+
# Abagraha
|
147
|
+
'ऽ': '’' # (apostrophe)
|
148
|
+
|
149
|
+
# Medials # Needed for connecting constants
|
150
|
+
'ा': "ā"
|
151
|
+
'ि': "i"
|
152
|
+
'ी': "ī"
|
153
|
+
'ु': "u"
|
154
|
+
'ू': "ū"
|
155
|
+
'ृ': "ṛi"
|
156
|
+
'ॄ': "rī"
|
157
|
+
'े': "e"
|
158
|
+
'ॊ': "o"
|
159
|
+
'ौ': "au"
|
@@ -7,7 +7,7 @@ destination_script: Latn
|
|
7
7
|
name: ALA-LC Romanization Table -- Korean (1997)
|
8
8
|
url: http://catdir.loc.gov/catdir/cpso/romanization/korean.pdf
|
9
9
|
creation_date: 1997
|
10
|
-
adoption_date:
|
10
|
+
adoption_date:
|
11
11
|
description:
|
12
12
|
"1. General Practice
|
13
13
|
The Library of Congress will continue to follow the McCune-Reischauer system
|
@@ -0,0 +1,170 @@
|
|
1
|
+
---
|
2
|
+
authority_id: alalc
|
3
|
+
id: 1997
|
4
|
+
language: hin
|
5
|
+
source_script: Deva
|
6
|
+
destination_script: Latn
|
7
|
+
name: Marathi Romanization, 1997
|
8
|
+
url: https://www.loc.gov/catdir/cpso/romanization/marathi.pdf
|
9
|
+
creation_date: 1997
|
10
|
+
description: |
|
11
|
+
ALA-LC Romanization table for Marathi
|
12
|
+
|
13
|
+
notes:
|
14
|
+
|
15
|
+
- Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
|
16
|
+
vowels following a consonant can be found in grammars; no distinction between the two is
|
17
|
+
made in transliteration.
|
18
|
+
|
19
|
+
- |
|
20
|
+
The vowel a is implicit after all consonants and consonant clusters and is supplied in
|
21
|
+
transliteration, with the following exceptions:
|
22
|
+
|
23
|
+
a) when another vowel is indicated by its appropriate sign; and
|
24
|
+
b) when the absence of any vowel is indicated by the subscript sign ( ् ) called halanta or
|
25
|
+
virāma.
|
26
|
+
|
27
|
+
- |
|
28
|
+
Exception: Anusvāra is transliterated by:
|
29
|
+
|
30
|
+
a) ṅ before gutturals,
|
31
|
+
b) ñ before palatals,
|
32
|
+
c) ṇ before cerebrals,
|
33
|
+
d) n before dentals, and
|
34
|
+
e) m before labials.
|
35
|
+
In other circumstances it is transliterated by a tilde (~) over the vowel.
|
36
|
+
|
37
|
+
- When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
|
38
|
+
|
39
|
+
tests:
|
40
|
+
- source: "ठाणे - जिल्ह्यात बुधवारी एक हजार रुग्णांची वाढ, तर जणांच्या मृत्यूची नोंद"
|
41
|
+
expected: "ṭhaāṇae - jailahayaāta baudhavaāraī eka hajaāra raugaṇaāñcaī vaāḍha, tara jaṇaāñcayaā maṛitayaūcaī naonda"
|
42
|
+
- source: "एकता कपूर पुन्हा अडकली वादात, वेबसीरिजमधल्या 'त्या' सीनमुळे जमावाची घरावर दगडफेक"
|
43
|
+
expected: "ekataā kapaūra paunahaā aḍakalaī vaādaāta, vaebasaīraijamadhalayaā 'tayaā' saīnamaulae jamaāvaācaī gharaāvara dagaḍaphaeka"
|
44
|
+
- source: "जाणून घ्या, बीएमसीच्या अधिकाऱ्यांनी कंगना राणौतच्या ऑफिसमधले नक्की काय- काय तोडलं"
|
45
|
+
expected: "jaāṇaūna ghayaā, baīemasaīcayaā adhaikaāऱyaānnaī kaṅganaā raāṇaautacayaā ôphaisamadhalae nakakaī kaāya- kaāya taoḍalam"
|
46
|
+
- source: "कंगना मुंबईत दाखल होण्यापूर्वी 'मातोश्री'वरून फर्मान सुटले; प्रवक्त्यांना सक्त आदेश"
|
47
|
+
expected: "kaṅganaā maumbaīta daākhala haoṇayaāpaūravaī 'maātaośaraī'varaūna pharamaāna sauṭalae; paravakatayaānnaā sakata ādaeśa"
|
48
|
+
- source: "मराठा आरक्षणास तात्पुरती स्थगिती; सर्वोच्च न्यायालयाचा निर्णय"
|
49
|
+
expected: "maraāṭhaā ārakashaṇaāsa taātapaurataī sathagaitaī; saravaocaca nayaāyaālayaācaā nairaṇaya"
|
50
|
+
- source: "भारताच्या तिन्ही लशींचा पहिला टप्पा यशस्वी, वाचा कधी येणार बाजारात"
|
51
|
+
expected: "bhaārataācayaā tainahaī laśaīñcaā pahailaā ṭapapaā yaśasavaī, vaācaā kadhaī yaeṇaāra baājaāraāta"
|
52
|
+
- source: "रुग्णवाढीमुळे खाटांची चणचण"
|
53
|
+
expected: "raugaṇavaāḍhaīmaulae khaāṭaāñcaī caṇacaṇa"
|
54
|
+
- source: "पीएम स्वनिधी कर्ज योजनेला मुंबईतून अल्प प्रतिसाद"
|
55
|
+
expected: "paīema savanaidhaī karaja yaojanaelaā maumbaītaūna alapa parataisaāda"
|
56
|
+
- source: "सांताक्रूझ-चेंबूर लिंक रोडवरील उन्नत मार्गाला स्थगिती"
|
57
|
+
expected: "saāntaākaraūjha-caembaūra laiṅka raoḍavaraīla unanata maāragaālaā sathagaitaī"
|
58
|
+
- source: "संपादक अर्णब गोस्वामी यांच्याविरूद्ध खडक पोलिस ठाण्यात तक्रार"
|
59
|
+
expected: "sampaādaka araṇaba gaosavaāmaī yaāñcayaāvairaūdadha khaḍaka paolaisa ṭhaāṇayaāta takaraāra"
|
60
|
+
|
61
|
+
map:
|
62
|
+
|
63
|
+
rules:
|
64
|
+
# note[3]
|
65
|
+
- pattern: \u0902(?=[कखगघङ])
|
66
|
+
result: ṅ
|
67
|
+
- pattern: \u0902(?=[चछजझञ])
|
68
|
+
result: ñ
|
69
|
+
- pattern: \u0902(?=[टठडढण])
|
70
|
+
result: ṇ
|
71
|
+
- pattern: \u0902(?=[तथदधन])
|
72
|
+
result: n
|
73
|
+
|
74
|
+
characters:
|
75
|
+
|
76
|
+
# I. Vowels and Diphthongs (see Note 1)
|
77
|
+
|
78
|
+
'अ': 'a'
|
79
|
+
'आ': 'ā'
|
80
|
+
'इ': 'i'
|
81
|
+
'ई': 'ī'
|
82
|
+
'उ': 'u'
|
83
|
+
'ऊ': 'ū'
|
84
|
+
'ऋ': 'ṛ'
|
85
|
+
'ॠ': 'ṝ'
|
86
|
+
'ऌ': 'ḹ'
|
87
|
+
#'ॳ': 'ĕ'
|
88
|
+
'ए': 'e'
|
89
|
+
'ॲ': 'ê'
|
90
|
+
#'अै': 'ai'
|
91
|
+
'ऐ': 'ai'
|
92
|
+
#'ऒ': 'ŏ'
|
93
|
+
'ओ': 'o'
|
94
|
+
'ऑ': 'ô'
|
95
|
+
'औ': 'ău'
|
96
|
+
|
97
|
+
# II. Consonants
|
98
|
+
# Gutturals
|
99
|
+
'क': 'ka'
|
100
|
+
'ख': 'kha'
|
101
|
+
'ग': 'ga'
|
102
|
+
'घ': 'gha'
|
103
|
+
'ङ': 'ṅa'
|
104
|
+
|
105
|
+
# Palatals
|
106
|
+
'च': 'ca'
|
107
|
+
'छ': 'cha'
|
108
|
+
'ज': 'ja'
|
109
|
+
'झ': 'jha'
|
110
|
+
'ञ': 'ña'
|
111
|
+
|
112
|
+
# Cerebrals
|
113
|
+
'ट': 'ṭa'
|
114
|
+
'ठ': 'ṭha'
|
115
|
+
'ड': 'ḍa'
|
116
|
+
#'ड़': 'ṛa'
|
117
|
+
'ढ': 'ḍha'
|
118
|
+
#'ढ़': 'ṛha'
|
119
|
+
'ण': 'ṇa'
|
120
|
+
|
121
|
+
# Dentals
|
122
|
+
'त': 'ta'
|
123
|
+
'थ': 'tha'
|
124
|
+
'द': 'da'
|
125
|
+
'ध': 'dha'
|
126
|
+
'न': 'na'
|
127
|
+
|
128
|
+
# Labials
|
129
|
+
'प': 'pa'
|
130
|
+
'फ': 'pha'
|
131
|
+
'ब': 'ba'
|
132
|
+
'भ': 'bha'
|
133
|
+
'म': 'ma'
|
134
|
+
|
135
|
+
# Semivowels
|
136
|
+
'य': 'ya'
|
137
|
+
'र': 'ra'
|
138
|
+
'ल': 'la'
|
139
|
+
'ळ': 'la'
|
140
|
+
'व': 'va'
|
141
|
+
|
142
|
+
# Sibilants
|
143
|
+
'श': 'śa'
|
144
|
+
'ष': 'sha'
|
145
|
+
'स': 'sa'
|
146
|
+
|
147
|
+
# Aspirate
|
148
|
+
'ह': 'ha'
|
149
|
+
|
150
|
+
# Anusvāra
|
151
|
+
'ं': 'm'
|
152
|
+
|
153
|
+
# Bisarga
|
154
|
+
'ः ': 'ḥ'
|
155
|
+
|
156
|
+
# Abagraha
|
157
|
+
'ऽ': '’' # (apostrophe)
|
158
|
+
|
159
|
+
# Medials # Needed for connecting constants
|
160
|
+
'ा': "ā"
|
161
|
+
'ि': "i"
|
162
|
+
'ी': "ī"
|
163
|
+
'ु': "u"
|
164
|
+
'ू': "ū"
|
165
|
+
'ृ': "ṛi"
|
166
|
+
'ॄ': "rī"
|
167
|
+
'े': "e"
|
168
|
+
'ो': "o"
|
169
|
+
'ौ': "au"
|
170
|
+
'्': ''
|