interscript 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  3. data/lib/interscript.rb +5 -1
  4. data/lib/interscript/fs.rb +3 -1
  5. data/lib/interscript/mapping.rb +2 -2
  6. data/lib/interscript/opal.rb +5 -1
  7. data/lib/interscript/opal/maps.js.erb +7 -4
  8. data/lib/interscript/version.rb +1 -1
  9. data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
  10. data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
  11. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
  14. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
  15. data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
  18. data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
  19. data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
  21. data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
  22. data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
  23. data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
  24. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
  25. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
  26. data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
  27. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
  28. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
  29. data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
  30. data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
  31. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
  32. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
  33. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
  34. data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
  35. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
  36. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
  37. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
  38. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
  39. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
  40. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
  41. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
  42. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
  43. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
  44. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
  45. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  46. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  47. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  48. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
  49. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  50. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  51. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  52. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  53. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  54. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
  57. data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
  59. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
  60. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
  61. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
  62. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
  63. data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
  64. data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
  65. data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
  68. data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
  69. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
  70. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
  71. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  72. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
  73. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
  74. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
  75. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
  76. data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
  77. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
  78. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
  79. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
  80. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
  81. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
  82. data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
  83. data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
  84. data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
  85. data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
  86. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  87. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
  88. data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
  89. data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
  90. data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
  91. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  92. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
  93. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
  94. data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
  95. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
  96. data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
  97. data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
  98. data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
  99. data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
  100. data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
  101. data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
  102. data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
  103. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
  104. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
  105. data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
  106. data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
  107. data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
  108. data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
  109. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
  110. data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
  111. data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
  112. data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
  113. data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
  114. data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
  115. data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
  116. metadata +41 -15
@@ -0,0 +1,159 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: asm
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Assamese Romanization, 1997
8
+ url: https://www.loc.gov/catdir/cpso/romanization/assamese.pdf
9
+ creation_date: 1997
10
+ description: |
11
+ ALA-LC Romanization table for Assamese
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
16
+ vowels following a consonant can be found in grammars; no distinction between the two is
17
+ made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the subscript sign ( ◌् ) called halanta or
25
+ birāma.
26
+
27
+ - Candrabindu before guttural, palatal, cerebral, and dental occlusives is transliterated n̐. Before
28
+ labials, sibilants, semivowels, aspirates, vowels, and in final position it is transliterated m̐.
29
+
30
+ - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
31
+
32
+ tests:
33
+ - source: "অসমীয়া কবিতা"
34
+ expected: "asamaīẏaā kabaitaā"
35
+ - source: "কবিৰ আজি জন্মদিন"
36
+ expected: "kabaira ājai janamadaina"
37
+ - source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
38
+ expected: "baerauṭata emaāhara paāchatae paunara bhayaṃkara aganaikaāṇaḍa"
39
+ - source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
40
+ expected: "bhaṅaāra bairaudadhae āwaedana daākhaila kaṃganaāra"
41
+ - source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
42
+ expected: "āpaunai paṛhai bhaāla paāba paraā baātarai"
43
+ - source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
44
+ expected: "śaraīraāmapaurata garaubharatai ṭaraāka jabada, daujanaka āṭaka"
45
+ - source: "কেনে আছে প্ৰাক্তন"
46
+ expected: "kaenae āchae paraākatana"
47
+ - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
48
+ expected: "kamaumabaāira maeẏarara daehata kaobhaiḍa pajaiṭaibha"
49
+ - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
50
+ expected: "ṭauiṭaāraযogae khaoda sadaraī karae ei kathaā"
51
+ - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
52
+ expected: "lakhaimapaura jailaāra naāraāẏaṇapaurara barapathaārata ājai paraśaānatai dhaāma naāmaerae ekhana baṛdadhaāśaramara śaubhaāramabha karaā haẏa"
53
+
54
+
55
+ map:
56
+
57
+ rules:
58
+ # note[3]
59
+ - pattern: \u0981(?=[কখগঘঙচছজঝঞটঠডড়ঢঢ়ণতৎথদধন]) # ঁ before guttural, palatal, cerebral, and dental
60
+ result: ṅ
61
+
62
+ characters:
63
+
64
+ #Vowels and Diphthongs (see Note 1)
65
+
66
+ 'অ': 'a'
67
+ 'আ': 'ā'
68
+ 'ই': 'i'
69
+ 'ঈ': 'ī'
70
+ 'উ': 'u'
71
+ 'ঊ': 'ū'
72
+ 'ঋ': 'ṛ'
73
+ 'ৠ': 'ṝ'
74
+ 'ঌ': 'ḹ'
75
+ 'এ': 'e'
76
+ 'ঐ': 'ai'
77
+ 'ও': 'o'
78
+ 'ঔ': 'au'
79
+
80
+ # Consonant characters
81
+
82
+ #Gutturals
83
+ 'ক': 'ka'
84
+ 'খ': 'kha'
85
+ 'গ': 'ga'
86
+ 'ঘ': 'gha'
87
+ 'ঙ': 'ṅa'
88
+
89
+ #Palatals
90
+ 'চ': 'ca'
91
+ 'ছ': 'cha'
92
+ 'জ': 'ja'
93
+ 'ঝ': 'jha'
94
+ 'ঞ': 'ña'
95
+
96
+ #Cerebrals
97
+ 'ট': 'ṭa'
98
+ 'ঠ': 'ṭha'
99
+ 'ড': 'ḍa'
100
+ 'ড়': 'ṛa'
101
+ 'ঢ': 'ḍha'
102
+ 'ঢ়': 'ṛha'
103
+ 'ণ': 'ṇa'
104
+
105
+ #Dentals
106
+ 'ত': 'ta'
107
+ 'ৎ': 'ṭ'
108
+ 'থ': 'tha'
109
+ 'দ': 'da'
110
+ 'ধ': 'dha'
111
+ 'ন': 'na'
112
+
113
+ #Labials
114
+ 'প': 'pa'
115
+ 'ফ': 'pha'
116
+ 'ব': 'ba'
117
+ 'ভ': 'bha'
118
+ 'ম': 'ma'
119
+
120
+ #Semivowels
121
+ 'য়': 'ya'
122
+ 'য়': 'ẏa'
123
+ 'ৰ': 'ra'
124
+ 'ল': 'la'
125
+ 'ৱ': 'wa'
126
+
127
+ #Sibilants
128
+ 'শ': 'śa'
129
+ 'ষ': 'sha'
130
+ 'স': 'sa'
131
+
132
+ #Aspirate
133
+ 'হ': 'ha'
134
+
135
+
136
+ # Anusvāra
137
+ 'ং': 'ṃ'
138
+
139
+ # Bisarga
140
+ 'ঃ': 'ḥ'
141
+
142
+ # Candrabindu (see Note 3)
143
+ 'ঁ': 'm̐'
144
+
145
+ # Abagraha (see Note 4)
146
+ 'ऽ': '’' # (apostrophe)
147
+
148
+ # Medials # Needed for connecting constants
149
+ '\u09be': 'ā'
150
+ '\u09bf': 'i'
151
+ '\u09c0': 'ī'
152
+ '\u09c1': 'u'
153
+ '\u09c2': 'ū'
154
+ '\u09c3': 'ṛ'
155
+ '\u09c7': 'e'
156
+ '\u09c8': 'ai'
157
+ '\u09cb': 'o'
158
+ '\u09cc': 'au'
159
+ '\u09CD': '' # Used for joining
@@ -103,7 +103,7 @@ map:
103
103
  '\u0427': 'Ch' # Ч
104
104
  '\u04B8': 'J' # Ҹ
105
105
  '\u0428': 'Sh' # Ш
106
-
106
+
107
107
  '\u0430': 'a' # а
108
108
  '\u0431': 'b' # б
109
109
  '\u0432': 'v' # в
@@ -9,8 +9,8 @@ url: http://catdir.loc.gov/catdir/cpso/romanization/beloruss.pdf
9
9
  creation_date: 1997
10
10
 
11
11
  notes:
12
- - Ґ letter found in Old Belarusian and in modern publications in Tarashkevitsa orthography.
13
- - Do not confuse with the digraph кг (also romanized as “kh”). Manual review may be needed when transcribing data in vernacular characters in order to distinguish х from кг.
12
+ - Ґ letter found in Old Belarusian and in modern publications in Tarashkevitsa orthography.
13
+ - Do not confuse with the digraph кг (also romanized as “kh”). Manual review may be needed when transcribing data in vernacular characters in order to distinguish х from кг.
14
14
  - The apostrophe (´) is not transliterated.
15
15
  - Soft sign (prime) is USMARC hexadecimal code A7 ~ U+02B9 Unicode character
16
16
 
@@ -15,7 +15,7 @@ note:
15
15
  - This table presupposes monotonic accentuation; rough breathings are accordingly not addressed.
16
16
  - The diphthong definition for upsilon is taken from the 2010 version
17
17
  - Generalised gramma digraph rule to capitals
18
-
18
+
19
19
  tests:
20
20
 
21
21
  - source: |
@@ -25,7 +25,7 @@ tests:
25
25
 
26
26
  expected: |
27
27
  Ena prama monon me parakinēse ki emena na grapsō oti toutēn tēn patrida tēn echomen oloi mazi, kai sophoi ki amatheis kai plousioi kai phtōchoi kai politikoi kai stratiōtikoi kai oi pleon mikroteroi anthrōpoi; osoi agōnistēkamen, analogōs o katheis, echomen na zēsomen edō. To loipon doulepsamen oloi mazi, na tēn phylamen ki oloi mazi kai na mēn legei oute o dynatos «egō» oute o adynatos. Xerete pote na legei o katheis «egō»? Otan agōnistei monos tou kai phkiasei ē chalasei, na legei «egō»; otan omōs agōnizontai polloi kai phkianoun, tote na lene «emeis». Eimaste eis to «emeis» ki ochi eis to «egō». Kai eis to exēs na mathomen gnōsē, an thelomen na phkiasomen chōrion, na zēsomen oloi mazi.
28
-
28
+
29
29
  Giannēs Makrygiannēs.
30
30
 
31
31
 
@@ -622,4 +622,3 @@ map:
622
622
 
623
623
  "\u0387": ";" # ·
624
624
  "\u00B7": ";" # ·
625
-
@@ -14,7 +14,7 @@ note:
14
14
  - Applies to texts after 1453 (Modern Greek)
15
15
  - This table presupposes monotonic accentuation; rough breathings are accordingly not addressed.
16
16
  - Generalised gramma digraph rule to capitals
17
-
17
+
18
18
  tests:
19
19
 
20
20
  - source: |
@@ -24,7 +24,7 @@ tests:
24
24
 
25
25
  expected: |
26
26
  Ena prama monon me parakinēse ki emena na grapsō oti toutēn tēn patrida tēn echomen oloi mazi, kai sophoi ki amatheis kai plousioi kai phtōchoi kai politikoi kai stratiōtikoi kai oi pleon mikroteroi anthrōpoi; osoi agōnistēkamen, analogōs o katheis, echomen na zēsomen edō. To loipon doulepsamen oloi mazi, na tēn phylamen ki oloi mazi kai na mēn legei oute o dynatos «egō» oute o adynatos. Xerete pote na legei o katheis «egō»? Otan agōnistei monos tou kai phkiasei ē chalasei, na legei «egō»; otan omōs agōnizontai polloi kai phkianoun, tote na lene «emeis». Eimaste eis to «emeis» ki ochi eis to «egō». Kai eis to exēs na mathomen gnōsē, an thelomen na phkiasomen chōrion, na zēsomen oloi mazi.
27
-
27
+
28
28
  Giannēs Makrygiannēs.
29
29
 
30
30
 
@@ -625,4 +625,3 @@ map:
625
625
 
626
626
  "\u0387": ";" # ·
627
627
  "\u00B7": ";" # ·
628
-
@@ -0,0 +1,159 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2020
4
+ language: hin
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Hindi Romanization, 2020
8
+ url: https://www.loc.gov/catdir/cpso/romanization/hindi.pdf
9
+ creation_date: 2020
10
+ description: |
11
+ ALA-LC Romanization table for Hindi
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
16
+ vowels following a consonant can be found in grammars; no distinction between the two is
17
+ made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the subscript sign ( ◌् ) called halanta or
25
+ virāma.
26
+
27
+ - |
28
+ Exception: Anusvāra is transliterated by:
29
+
30
+ a) ṅ before gutturals,
31
+ b) ñ before palatals,
32
+ c) ṇ before cerebrals,
33
+ d) n before dentals, and
34
+ e) m before labials.
35
+
36
+ - Anunāsika before guttural, palatal, cerebral, and dental occlusives is transliterated n̐. Before
37
+ labials, sibilants, semivowels, aspirates, vowels, and in final position it is transliterated m̐.
38
+
39
+ - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
40
+
41
+ tests:
42
+ - source: "हम"
43
+ expected: "hama"
44
+ - source: "मीन"
45
+ expected: "maīna"
46
+ - source: "औसत"
47
+ expected: "ăusata"
48
+ - source: "माँऽऽऽ!"
49
+ expected: "maān̐’’’!"
50
+ - source: "माँ"
51
+ expected: "maām̐"
52
+
53
+
54
+
55
+ map:
56
+
57
+ rules:
58
+ # note[4]
59
+ - pattern: (?<=)\u0901(?=\b)
60
+ result: "m̐"
61
+
62
+ characters:
63
+
64
+ # I. Vowels and Diphthongs (see Note 1)
65
+
66
+ 'अ': 'a'
67
+ 'आ': 'ā'
68
+ 'इ': 'i'
69
+ 'ई': 'ī'
70
+ 'उ': 'u'
71
+ 'ऊ': 'ū'
72
+ 'ऋ': 'ṛ'
73
+ 'ॠ': 'ṝ'
74
+ 'ऌ': 'ḹ'
75
+ 'ॳ': 'ĕ'
76
+ 'ए': 'e'
77
+ 'ॲ': 'ê'
78
+ 'अै': 'ai'
79
+ 'ऐ': 'ai'
80
+ 'ऒ': 'ŏ'
81
+ 'ओ': 'o'
82
+ 'ऑ': 'ô'
83
+ 'औ': 'ău'
84
+
85
+ # II. Consonants (see Note 2)
86
+ # Gutturals
87
+ 'क': 'ka'
88
+ 'ख': 'kha'
89
+ 'ग': 'ga'
90
+ 'घ': 'gha'
91
+ 'ङ': 'ṅa'
92
+
93
+ # Palatals
94
+ 'च': 'ca'
95
+ 'छ': 'cha'
96
+ 'ज': 'ja'
97
+ 'झ': 'jha'
98
+ 'ञ': 'ña'
99
+
100
+ # Cerebrals
101
+ 'ट': 'ṭa'
102
+ 'ठ': 'ṭha'
103
+ 'ड': 'ḍa'
104
+ 'ड़': 'ṛa'
105
+ 'ढ': 'ḍha'
106
+ 'ढ़': 'ṛha'
107
+ 'ण': 'ṇa'
108
+
109
+ # Dentals
110
+ 'त': 'ta'
111
+ 'थ': 'tha'
112
+ 'द': 'da'
113
+ 'ध': 'dha'
114
+ 'न': 'na'
115
+
116
+ # Labials
117
+ 'प': 'pa'
118
+ 'फ': 'pha'
119
+ 'ब': 'ba'
120
+ 'भ': 'bha'
121
+ 'म': 'ma'
122
+
123
+ # Semivowels
124
+ 'य': 'ya'
125
+ 'र': 'ra'
126
+ 'ल': 'la'
127
+ 'व': 'ba'
128
+
129
+ # Sibilants
130
+ 'श': 'śa'
131
+ 'ष': 'sha'
132
+ 'स': 'sa'
133
+
134
+ # Aspirate
135
+ 'ह': 'ha'
136
+
137
+ # Anusvāra
138
+ 'ं': 'ṃ'
139
+
140
+ # Bisarga
141
+ 'ः ': 'ḥ'
142
+
143
+ # Anunāsika
144
+ 'ँ': 'n̐' # ঁ : n̐, m̐
145
+
146
+ # Abagraha
147
+ 'ऽ': '’' # (apostrophe)
148
+
149
+ # Medials # Needed for connecting constants
150
+ 'ा': "ā"
151
+ 'ि': "i"
152
+ 'ी': "ī"
153
+ 'ु': "u"
154
+ 'ू': "ū"
155
+ 'ृ': "ṛi"
156
+ 'ॄ': "rī"
157
+ 'े': "e"
158
+ 'ॊ': "o"
159
+ 'ौ': "au"
@@ -70,7 +70,7 @@ map:
70
70
  '\u10c3' : 'W' # Ⴣ
71
71
  '\u10c4' : 'X̣' # Ⴤ
72
72
  '\u10c5' : 'Ō' # Ⴥ
73
-
73
+
74
74
  '\u2d00' : 'a' # ⴀ
75
75
  '\u2d01' : 'b' # ⴁ
76
76
  '\u2d02' : 'g' # ⴂ
@@ -109,4 +109,3 @@ map:
109
109
  '\u2d23' : 'w' # ⴣ
110
110
  '\u2d24' : 'x̣' # ⴤ
111
111
  '\u2d25' : 'ō' # ⴥ
112
-
@@ -7,7 +7,7 @@ destination_script: Latn
7
7
  name: ALA-LC Romanization Table -- Korean (1997)
8
8
  url: http://catdir.loc.gov/catdir/cpso/romanization/korean.pdf
9
9
  creation_date: 1997
10
- adoption_date:
10
+ adoption_date:
11
11
  description:
12
12
  "1. General Practice
13
13
  The Library of Congress will continue to follow the McCune-Reischauer system
@@ -0,0 +1,170 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: hin
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Marathi Romanization, 1997
8
+ url: https://www.loc.gov/catdir/cpso/romanization/marathi.pdf
9
+ creation_date: 1997
10
+ description: |
11
+ ALA-LC Romanization table for Marathi
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
16
+ vowels following a consonant can be found in grammars; no distinction between the two is
17
+ made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the subscript sign ( ् ) called halanta or
25
+ virāma.
26
+
27
+ - |
28
+ Exception: Anusvāra is transliterated by:
29
+
30
+ a) ṅ before gutturals,
31
+ b) ñ before palatals,
32
+ c) ṇ before cerebrals,
33
+ d) n before dentals, and
34
+ e) m before labials.
35
+ In other circumstances it is transliterated by a tilde (~) over the vowel.
36
+
37
+ - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
38
+
39
+ tests:
40
+ - source: "ठाणे - जिल्ह्यात बुधवारी एक हजार रुग्णांची वाढ, तर जणांच्या मृत्यूची नोंद"
41
+ expected: "ṭhaāṇae - jailahayaāta baudhavaāraī eka hajaāra raugaṇaāñcaī vaāḍha, tara jaṇaāñcayaā maṛitayaūcaī naonda"
42
+ - source: "एकता कपूर पुन्हा अडकली वादात, वेबसीरिजमधल्या 'त्या' सीनमुळे जमावाची घरावर दगडफेक"
43
+ expected: "ekataā kapaūra paunahaā aḍakalaī vaādaāta, vaebasaīraijamadhalayaā 'tayaā' saīnamaulae jamaāvaācaī gharaāvara dagaḍaphaeka"
44
+ - source: "जाणून घ्या, बीएमसीच्या अधिकाऱ्यांनी कंगना राणौतच्या ऑफिसमधले नक्की काय- काय तोडलं"
45
+ expected: "jaāṇaūna ghayaā, baīemasaīcayaā adhaikaāऱyaānnaī kaṅganaā raāṇaautacayaā ôphaisamadhalae nakakaī kaāya- kaāya taoḍalam"
46
+ - source: "कंगना मुंबईत दाखल होण्यापूर्वी 'मातोश्री'वरून फर्मान सुटले; प्रवक्त्यांना सक्त आदेश"
47
+ expected: "kaṅganaā maumbaīta daākhala haoṇayaāpaūravaī 'maātaośaraī'varaūna pharamaāna sauṭalae; paravakatayaānnaā sakata ādaeśa"
48
+ - source: "मराठा आरक्षणास तात्पुरती स्थगिती; सर्वोच्च न्यायालयाचा निर्णय"
49
+ expected: "maraāṭhaā ārakashaṇaāsa taātapaurataī sathagaitaī; saravaocaca nayaāyaālayaācaā nairaṇaya"
50
+ - source: "भारताच्या तिन्ही लशींचा पहिला टप्पा यशस्वी, वाचा कधी येणार बाजारात"
51
+ expected: "bhaārataācayaā tainahaī laśaīñcaā pahailaā ṭapapaā yaśasavaī, vaācaā kadhaī yaeṇaāra baājaāraāta"
52
+ - source: "रुग्णवाढीमुळे खाटांची चणचण"
53
+ expected: "raugaṇavaāḍhaīmaulae khaāṭaāñcaī caṇacaṇa"
54
+ - source: "पीएम स्वनिधी कर्ज योजनेला मुंबईतून अल्प प्रतिसाद"
55
+ expected: "paīema savanaidhaī karaja yaojanaelaā maumbaītaūna alapa parataisaāda"
56
+ - source: "सांताक्रूझ-चेंबूर लिंक रोडवरील उन्नत मार्गाला स्थगिती"
57
+ expected: "saāntaākaraūjha-caembaūra laiṅka raoḍavaraīla unanata maāragaālaā sathagaitaī"
58
+ - source: "संपादक अर्णब गोस्वामी यांच्याविरूद्ध खडक पोलिस ठाण्यात तक्रार"
59
+ expected: "sampaādaka araṇaba gaosavaāmaī yaāñcayaāvairaūdadha khaḍaka paolaisa ṭhaāṇayaāta takaraāra"
60
+
61
+ map:
62
+
63
+ rules:
64
+ # note[3]
65
+ - pattern: \u0902(?=[कखगघङ])
66
+ result: ṅ
67
+ - pattern: \u0902(?=[चछजझञ])
68
+ result: ñ
69
+ - pattern: \u0902(?=[टठडढण])
70
+ result: ṇ
71
+ - pattern: \u0902(?=[तथदधन])
72
+ result: n
73
+
74
+ characters:
75
+
76
+ # I. Vowels and Diphthongs (see Note 1)
77
+
78
+ 'अ': 'a'
79
+ 'आ': 'ā'
80
+ 'इ': 'i'
81
+ 'ई': 'ī'
82
+ 'उ': 'u'
83
+ 'ऊ': 'ū'
84
+ 'ऋ': 'ṛ'
85
+ 'ॠ': 'ṝ'
86
+ 'ऌ': 'ḹ'
87
+ #'ॳ': 'ĕ'
88
+ 'ए': 'e'
89
+ 'ॲ': 'ê'
90
+ #'अै': 'ai'
91
+ 'ऐ': 'ai'
92
+ #'ऒ': 'ŏ'
93
+ 'ओ': 'o'
94
+ 'ऑ': 'ô'
95
+ 'औ': 'ău'
96
+
97
+ # II. Consonants
98
+ # Gutturals
99
+ 'क': 'ka'
100
+ 'ख': 'kha'
101
+ 'ग': 'ga'
102
+ 'घ': 'gha'
103
+ 'ङ': 'ṅa'
104
+
105
+ # Palatals
106
+ 'च': 'ca'
107
+ 'छ': 'cha'
108
+ 'ज': 'ja'
109
+ 'झ': 'jha'
110
+ 'ञ': 'ña'
111
+
112
+ # Cerebrals
113
+ 'ट': 'ṭa'
114
+ 'ठ': 'ṭha'
115
+ 'ड': 'ḍa'
116
+ #'ड़': 'ṛa'
117
+ 'ढ': 'ḍha'
118
+ #'ढ़': 'ṛha'
119
+ 'ण': 'ṇa'
120
+
121
+ # Dentals
122
+ 'त': 'ta'
123
+ 'थ': 'tha'
124
+ 'द': 'da'
125
+ 'ध': 'dha'
126
+ 'न': 'na'
127
+
128
+ # Labials
129
+ 'प': 'pa'
130
+ 'फ': 'pha'
131
+ 'ब': 'ba'
132
+ 'भ': 'bha'
133
+ 'म': 'ma'
134
+
135
+ # Semivowels
136
+ 'य': 'ya'
137
+ 'र': 'ra'
138
+ 'ल': 'la'
139
+ 'ळ': 'la'
140
+ 'व': 'va'
141
+
142
+ # Sibilants
143
+ 'श': 'śa'
144
+ 'ष': 'sha'
145
+ 'स': 'sa'
146
+
147
+ # Aspirate
148
+ 'ह': 'ha'
149
+
150
+ # Anusvāra
151
+ 'ं': 'm'
152
+
153
+ # Bisarga
154
+ 'ः ': 'ḥ'
155
+
156
+ # Abagraha
157
+ 'ऽ': '’' # (apostrophe)
158
+
159
+ # Medials # Needed for connecting constants
160
+ 'ा': "ā"
161
+ 'ि': "i"
162
+ 'ी': "ī"
163
+ 'ु': "u"
164
+ 'ू': "ū"
165
+ 'ृ': "ṛi"
166
+ 'ॄ': "rī"
167
+ 'े': "e"
168
+ 'ो': "o"
169
+ 'ौ': "au"
170
+ '्': ''