interscript 0.1.7 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,284 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:tel
5
+ source_script: Telu
6
+ destination_script: Latn
7
+ name: Telugu Romanization, 1997
8
+ alias:
9
+ ogc11122:
10
+ code: tel_Telu2Latn_ALA_1997
11
+ description: Telugu ALA-Library of Congress 1997 System
12
+ url: http://catdir.loc.gov/catdir/cpso/romanization/telugu.pdf
13
+ creation_date: 1997
14
+ description: |
15
+ ALA-LC Romanization table for Telugu
16
+
17
+ notes:
18
+
19
+ - |
20
+ Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
21
+ vowels following a consonant can be found in grammars; no distinction between the two is
22
+ made in transliteration.
23
+ - |
24
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
25
+ transliteration, with the following exceptions:
26
+ a) when another vowel is indicated by its appropriate sign; and
27
+ b) when the absence of any vowel is indicated by the superscript sign ( ౬ ) called valapalagilaka.
28
+ - |
29
+ Exception: Sunna is transliterated by:
30
+ a) ṅ before gutturals,
31
+ b) ñ before palatals,
32
+ c) ṇ before cerebrals,
33
+ d) n before dentals, and
34
+ e) m before labials.
35
+ - |
36
+ Ardhasunna before gutturals and palatal, cerebral, and dental occlusives is transliterated n̐.
37
+ Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
38
+ transliterated m
39
+
40
+ tests:
41
+ - source: "తమిళనాడు"
42
+ expected: "tamiḷanāḍu"
43
+ - source: "తంటికొండ ఘటన: ఆగని మృత్యుఘోష"
44
+ expected: "taṃṭikoṇḍa ghaṭana: āgani mṛtayughŏṣa"
45
+ - source: "మళ్లీ వివాదం: అమితాబ్‌పై కేసు"
46
+ expected: "maḷalī vivādaṃ: amitābapai kēsu"
47
+ - source: "‘వరద సాయం పేరుతో వైట్ కాలర్ దోపిడీ’"
48
+ expected: "‘varada sāyaṃ pērutŏ vaiṭa kālara dŏpiḍī’"
49
+ - source: "రెండో విడత జీఎస్టీ పరిహారం"
50
+ expected: "reṃḍŏ viḍata jīesaṭī parihāraṃ"
51
+ - source: "నితీష్‌ కుమార్‌ అధ్యాయం ముగిసినట్లేనా?!"
52
+ expected: "nitīṣa kumāra adhayāyaṃ mugisinaṭalēnā?!"
53
+ - source: "వారిపై జీవితాంతం నిషేధం విధించండి!"
54
+ expected: "vāripai jīvitāntaṃ niṣēdhaṃ vidhiñcaṃḍi!"
55
+ - source: "మరో లాక్‌డౌన్‌ వల్ల అన్నీ అనర్థాలే!"
56
+ expected: "marŏ lākaḍauna valala ananī anarathālē!"
57
+ - source: "జెసిండా మరో సంచలనం"
58
+ expected: "jesiṃḍā marŏ sañcalanaṃ"
59
+ - source: "స్వీయ నిర్బంధంలోకి డబ్ల్యూహెచ్‌ఓ డైరెక్టర్‌"
60
+ expected: "savīya nirabandhaṃlŏki ḍabalayūhecaō ḍairekaṭara"
61
+ - source: "కరోనాపై యుద్ధంలో సమిధలు"
62
+ expected: "karŏnāpai yudadhaṃlŏ samidhalu"
63
+ - source: "అమెరికా ఎన్నికలు: ‘పెద్దన్న’ ఎవరో?!"
64
+ expected: "amerikā enanikalu: ‘pedadanana’ evarŏ?!"
65
+ - source: "౪౬౨౬౯"
66
+ expected: "46269"
67
+
68
+ map:
69
+
70
+ rules:
71
+ # rule II
72
+ - pattern: ([క]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
73
+ result: 'k'
74
+ - pattern: ([ఖ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
75
+ result: 'kh'
76
+ - pattern: ([గ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
77
+ result: 'g'
78
+ - pattern: ([ఘ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
79
+ result: 'gh'
80
+ - pattern: ([ఙ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
81
+ result: 'ṅ'
82
+ - pattern: ([చ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
83
+ result: 'c'
84
+ - pattern: ([ౘ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
85
+ result: 'ĉ'
86
+ - pattern: ([ఛ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
87
+ result: 'ch'
88
+ - pattern: ([జ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
89
+ result: 'j'
90
+ - pattern: ([ౙ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
91
+ result: 'ĵ'
92
+ - pattern: ([ఝ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
93
+ result: 'jh'
94
+ - pattern: ([ఞ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
95
+ result: 'ñ'
96
+ - pattern: ([ట]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
97
+ result: 'ṭ'
98
+ - pattern: ([ఠ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
99
+ result: 'ṭh'
100
+ - pattern: ([డ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
101
+ result: 'ḍ'
102
+ - pattern: ([ఢ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
103
+ result: 'ḍh'
104
+ - pattern: ([ణ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
105
+ result: 'ṇ'
106
+ - pattern: ([త]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
107
+ result: 't'
108
+ - pattern: ([థ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
109
+ result: 'th'
110
+ - pattern: ([ద]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
111
+ result: 'd'
112
+ - pattern: ([ధ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
113
+ result: 'dh'
114
+ - pattern: ([న]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
115
+ result: 'n'
116
+ - pattern: ([ప]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
117
+ result: 'p'
118
+ - pattern: ([ఫ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
119
+ result: 'ph'
120
+ - pattern: ([బ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
121
+ result: 'b'
122
+ - pattern: ([భ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
123
+ result: 'bh'
124
+ - pattern: ([మ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
125
+ result: 'm'
126
+ - pattern: ([య]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
127
+ result: 'y'
128
+ - pattern: ([ర]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
129
+ result: 'r'
130
+ - pattern: ([ఱ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
131
+ result: 'ṛ'
132
+ - pattern: ([ల]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
133
+ result: 'l'
134
+ - pattern: ([ళ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
135
+ result: 'ḷ'
136
+ - pattern: ([వ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
137
+ result: 'v'
138
+ - pattern: ([శ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
139
+ result: 'ś'
140
+ - pattern: ([ష]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
141
+ result: 'ṣ'
142
+ - pattern: ([స]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
143
+ result: 's'
144
+ - pattern: ([హ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
145
+ result: 'h'
146
+
147
+ # rule III
148
+ - pattern: \u0C02(?=[కఖగఘఙ])
149
+ result: ṅ
150
+ - pattern: \u0C02(?=[చౘఛజౙఝఞ])
151
+ result: ñ
152
+ - pattern: \u0C02(?=[టఠడఢణ])
153
+ result: ṇ
154
+ - pattern: \u0C02(?=[తథదధన])
155
+ result: n
156
+ - pattern: \u0C02(?=[పఫబభమ])
157
+ result: m
158
+
159
+ # rule IV
160
+ - pattern: \u0C01(?=[కఖగఘఙచౘఛజౙఝఞటఠడఢణతథదధన]) # before guttural, palatal, cerebral, and dental
161
+ result: n̐
162
+
163
+ characters:
164
+
165
+ 'అ': 'a'
166
+ 'ఆ': 'ā'
167
+ 'ఇ': 'i'
168
+ 'ఈ': 'ī'
169
+ 'ఉ': 'u'
170
+ 'ఊ': 'ū'
171
+ 'ఋ': 'ṛ'
172
+ 'ౠ': 'ṝ'
173
+ 'ఌ': 'ḻ'
174
+ 'ఎ': 'e'
175
+ 'ఏ': 'ē'
176
+ 'ఐ': 'ai'
177
+ 'ఒ': 'o'
178
+ 'ఓ': 'ō'
179
+ 'ఔ': 'au'
180
+
181
+ # Gutturals
182
+ 'క': 'ka'
183
+ 'ఖ': 'kha'
184
+ 'గ': 'ga'
185
+ 'ఘ': 'gha'
186
+ 'ఙ': 'ṅa'
187
+
188
+ # Palatals
189
+ 'చ': 'ca'
190
+ 'ౘ': 'ĉa'
191
+ 'ఛ': 'cha'
192
+ 'జ': 'ja'
193
+ 'ౙ': 'ĵa'
194
+ 'ఝ': 'jha'
195
+ 'ఞ': 'ña'
196
+
197
+ # Cerebrals
198
+ 'ట': 'ṭa'
199
+ 'ఠ': 'ṭha'
200
+ 'డ': 'ḍa'
201
+ 'ఢ': 'ḍha'
202
+ 'ణ': 'ṇa'
203
+
204
+ # Dentals
205
+ 'త': 'ta'
206
+ 'థ': 'tha'
207
+ 'ద': 'da'
208
+ 'ధ': 'dha'
209
+ 'న': 'na'
210
+
211
+ # Labials
212
+ 'ప': 'pa'
213
+ 'ఫ': 'pha'
214
+ 'బ': 'ba'
215
+ 'భ': 'bha'
216
+ 'మ': 'ma'
217
+
218
+ # Semivowels
219
+ 'య': 'ya'
220
+ 'ర': 'ra'
221
+ 'ఱ': 'ṛa'
222
+ 'ల': 'la'
223
+ 'ళ': 'ḷa'
224
+ 'వ': 'va'
225
+
226
+ # Sibilants
227
+ 'శ': 'śa'
228
+ 'ష': 'ṣa'
229
+ 'స': 'sa'
230
+
231
+
232
+
233
+ # Aspirate
234
+ 'హ': 'ha'
235
+
236
+
237
+
238
+ # Chandrabindu
239
+ 'ঁ': 'm̐'
240
+ 'ఁ': 'm̐'
241
+
242
+ # Bisarga
243
+ 'ః': 'ḥ'
244
+
245
+ # Anusvāra
246
+ 'ం': 'ṃ'
247
+
248
+ # Medials # Needed for connecting constants
249
+
250
+ 'ా': 'ā'
251
+ 'ి': 'i'
252
+ 'ీ': 'ī'
253
+ 'ు': 'u'
254
+ 'ూ': 'ū'
255
+ 'ృ': 'ṛ'
256
+ 'ె': 'e'
257
+ 'ే': 'ē'
258
+ 'ై': 'ai'
259
+ 'ొ': 'o'
260
+ 'ో': 'ŏ'
261
+ 'ౌ': 'au'
262
+ '\u09CD': '' # Used for joining
263
+
264
+
265
+ '\u0c4d': '' #verma sign for halanta
266
+ 'ౕ ': ''
267
+ 'ౖ ': ''
268
+ '्': ''
269
+ '़': ''
270
+ "‍": ''# Used for joining
271
+ "‌": ''# Used for non joining
272
+
273
+ # numbers
274
+
275
+ '౦': '0'
276
+ '౧': '1'
277
+ '౨': '2'
278
+ '౩': '3'
279
+ '౪': '4'
280
+ '౫': '5'
281
+ '౬': '6'
282
+ '౭': '7'
283
+ '౮': '8'
284
+ '౯': '9'
@@ -0,0 +1,64 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2011
4
+ language: iso-639-2:tel
5
+ source_script: Telu
6
+ destination_script: Latn
7
+ name: Telugu Romanization, 2011
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/telugu.pdf
9
+ creation_date: 2011
10
+ description: |
11
+ ALA-LC Romanization table for Telugu
12
+
13
+ notes:
14
+
15
+ - |
16
+ Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
17
+ vowels following a consonant can be found in grammars; no distinction between the two is
18
+ made in transliteration.
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+ a) when another vowel is indicated by its appropriate sign; and
23
+ b) when the absence of any vowel is indicated by the superscript sign ( ౬ ) called valapalagilaka.
24
+ - |
25
+ Exception: Sunna is transliterated by:
26
+ a) ṅ before gutturals,
27
+ b) ñ before palatals,
28
+ c) ṇ before cerebrals,
29
+ d) n before dentals, and
30
+ e) m before labials.
31
+ - |
32
+ Ardhasunna before gutturals and palatal, cerebral, and dental occlusives is transliterated n̐.
33
+ Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
34
+ transliterated m
35
+
36
+ tests:
37
+ - source: "తమిళనాడు"
38
+ expected: "tamiḷanāḍu"
39
+ - source: "దేవాస్‌కు ౮౯౩౯ కోట్లివ్వండి"
40
+ expected: "dēvāsaku 8939 kŏṭalivavaṃḍi"
41
+ - source: "యూరప్, అమెరికాకు కోవిడ్‌ దడ"
42
+ expected: "yūrapa, amerikāku kŏviḍa daḍa"
43
+ - source: "జనవరి నాటికి అమెరికాలో టీకా"
44
+ expected: "janavari nāṭiki amerikālŏ ṭīkā"
45
+ - source: "ఫ్రాన్స్‌ను ముస్లింలు శిక్షించవచ్చు"
46
+ expected: "pharānasanu musaliṃlu śikaṣiñcavacacu"
47
+ - source: "క్లాస్‌ రూంలో ఉపాధ్యాయుడి వికృత చేష్టలు"
48
+ expected: "kalāsa rūṃlŏ upādhayāyuḍi vikṛta cēṣaṭalu"
49
+ - source: "భారీ భూకంపం; భయంకరమైన అనుభవాలు"
50
+ expected: "bhārī bhūkampaṃ; bhayaṅkaramaina anubhavālu"
51
+ - source: "నిట్ట నిలువునా కూలిన అపార్ట్‌మెంట్‌"
52
+ expected: "niṭaṭa niluvunā kūlina apāraṭameṇṭa"
53
+ - source: "పిచ్చి ప్రయోగాలకు పోతే జరిగేది ఇదే"
54
+ expected: "picaci parayŏgālaku pŏtē jarigēdi idē"
55
+ - source: "కరోనాపై సీడీసీ వైఫల్యం ఎందుకు?"
56
+ expected: "karŏnāpai sīḍīsī vaiphalayaṃ eṃduku?"
57
+ - source: "అత్యంత అరుదైన పులి పిల్లలు ఇవే!"
58
+ expected: "atayanta arudaina puli pilalalu ivē!"
59
+
60
+ map:
61
+
62
+ inherit: alalc-tel-Telu-Latn-1997
63
+
64
+
@@ -0,0 +1,105 @@
1
+ ---
2
+ authority_id: az
3
+ id: 1939
4
+ language: iso-639-2:aze
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ url: https://omniglot.com/writing/azeri.htm
8
+ creation_date: 1939
9
+ description: |
10
+ In 1939 Joseph Stalin ordered the Cyrillic alphabet to be used by Azeri speakers in the Soviet Union.
11
+
12
+ notes:
13
+ # from Wikipedia
14
+ - The letter Ц, intended for the sound [ц] in borrowed words, was used in the Azerbaijani Cyrillic alphabet until 1951. In the Azeri language, the sound [c] usually becomes [s].
15
+
16
+ tests:
17
+ - source: Юя
18
+ expected: Yuya
19
+ # from internet
20
+ - source: Азәрбайҹан әлифбасы
21
+ expected: Azərbaycan əlifbası
22
+ - source: |
23
+ Бүтүн инсанлар ләйагәт вә һүгугларына ҝөрә азад бәрабәр доғулурлар.
24
+ Онларын шүурлары вә виҹданлары вар вә бир-бирләринә мүнасибәтдә гардашлыг руһунда давранмалыдырлар.
25
+ expected: |
26
+ Bütün insanlar ləyaqət və hüquqlarına görə azad bərabər doğulurlar.
27
+ Onların şüurları və vicdanları var və bir-birlərinə münasibətdə qardaşlıq ruhunda davranmalıdırlar.
28
+
29
+ map:
30
+ characters:
31
+ "\u0410": "A" # А
32
+ "\u0411": "B" # Б
33
+ "\u0412": "V" # В
34
+ "\u0413": "Q" # Г
35
+ "\u0492": "\u011E" # Ғ
36
+ "\u0414": "D" # Д
37
+ "\u0415": "E" # Е
38
+ "\u04D8": "\u018F" # Ә
39
+ "\u0416": "J" # Ж
40
+ "\u0417": "Z" # З
41
+ "\u0418": "\u0130" # И
42
+ "\u0419": "Y" # Й
43
+ "\u041A": "K" # К
44
+ "\u049C": "G" # Ҝ
45
+ "\u041B": "L" # Л
46
+ "\u041C": "M" # М
47
+ "\u041D": "N" # Н
48
+ "\u041E": "O" # О
49
+ "\u04E8": "\u00D6" # Ө
50
+ "\u041F": "P" # П
51
+ "\u0420": "R" # Р
52
+ "\u0421": "S" # С
53
+ "\u0422": "T" # Т
54
+ "\u0423": "U" # У
55
+ "\u04AE": "\u00DC" # Ү
56
+ "\u0424": "F" # Ф
57
+ "\u0425": "X" # Х
58
+ "\u0426": "s" # Ц note[1]
59
+ "\u04BA": "H" # Һ
60
+ "\u0427": "\u00C7" # Ч
61
+ "\u04B8": "C" # Ҹ
62
+ "\u0428": "\u015E" # Ш
63
+ "\u042B": "I" # Ы
64
+ "\u042D": "E" # Э
65
+ "\u042E": "Yu" # Ю
66
+ "\u042F": "Ya" # Я
67
+
68
+ "\u0430": "a" # а
69
+ "\u0431": "b" # б
70
+ "\u0432": "v" # в
71
+ "\u0433": "q" # г
72
+ "\u0493": "\u011F" # ғ
73
+ "\u0434": "d" # д
74
+ "\u0435": "e" # е
75
+ "\u04D9": "\u0259" # ә
76
+ "\u0436": "j" # ж
77
+ "\u0437": "z" # з
78
+ "\u0438": "i" # и
79
+ "\u0439": "y" # й
80
+ "\u043A": "k" # к
81
+ "\u049D": "g" # ҝ
82
+ "\u043B": "l" # л
83
+ "\u043C": "m" # м
84
+ "\u043D": "n" # н
85
+ "\u043E": "o" # о
86
+ "\u04E9": "\u00F6" # ө
87
+ "\u043F": "p" # п
88
+ "\u0440": "r" # р
89
+ "\u0441": "s" # с
90
+ "\u0442": "t" # т
91
+ "\u0443": "u" # у
92
+ "\u04AF": "\u00FC" # ү
93
+ "\u0444": "f" # ф
94
+ "\u0445": "x" # х
95
+ "\u0446": "s" # ц note[1]
96
+ "\u04BB": "h" # һ
97
+ "\u0447": "\u00E7" # ч
98
+ "\u04B9": "c" # ҹ
99
+ "\u0448": "\u015F" # ш
100
+ "\u044B": "\u0131" # ы
101
+ "\u044D": "e" # э
102
+ "\u044E": "yu" # ю
103
+ "\u044F": "ya" # я
104
+
105
+ "\u0027": "" # '