interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,284 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:tel
5
+ source_script: Telu
6
+ destination_script: Latn
7
+ name: Telugu Romanization, 1997
8
+ alias:
9
+ ogc11122:
10
+ code: tel_Telu2Latn_ALA_1997
11
+ description: Telugu ALA-Library of Congress 1997 System
12
+ url: http://catdir.loc.gov/catdir/cpso/romanization/telugu.pdf
13
+ creation_date: 1997
14
+ description: |
15
+ ALA-LC Romanization table for Telugu
16
+
17
+ notes:
18
+
19
+ - |
20
+ Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
21
+ vowels following a consonant can be found in grammars; no distinction between the two is
22
+ made in transliteration.
23
+ - |
24
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
25
+ transliteration, with the following exceptions:
26
+ a) when another vowel is indicated by its appropriate sign; and
27
+ b) when the absence of any vowel is indicated by the superscript sign ( ౬ ) called valapalagilaka.
28
+ - |
29
+ Exception: Sunna is transliterated by:
30
+ a) ṅ before gutturals,
31
+ b) ñ before palatals,
32
+ c) ṇ before cerebrals,
33
+ d) n before dentals, and
34
+ e) m before labials.
35
+ - |
36
+ Ardhasunna before gutturals and palatal, cerebral, and dental occlusives is transliterated n̐.
37
+ Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
38
+ transliterated m
39
+
40
+ tests:
41
+ - source: "తమిళనాడు"
42
+ expected: "tamiḷanāḍu"
43
+ - source: "తంటికొండ ఘటన: ఆగని మృత్యుఘోష"
44
+ expected: "taṃṭikoṇḍa ghaṭana: āgani mṛtayughŏṣa"
45
+ - source: "మళ్లీ వివాదం: అమితాబ్‌పై కేసు"
46
+ expected: "maḷalī vivādaṃ: amitābapai kēsu"
47
+ - source: "‘వరద సాయం పేరుతో వైట్ కాలర్ దోపిడీ’"
48
+ expected: "‘varada sāyaṃ pērutŏ vaiṭa kālara dŏpiḍī’"
49
+ - source: "రెండో విడత జీఎస్టీ పరిహారం"
50
+ expected: "reṃḍŏ viḍata jīesaṭī parihāraṃ"
51
+ - source: "నితీష్‌ కుమార్‌ అధ్యాయం ముగిసినట్లేనా?!"
52
+ expected: "nitīṣa kumāra adhayāyaṃ mugisinaṭalēnā?!"
53
+ - source: "వారిపై జీవితాంతం నిషేధం విధించండి!"
54
+ expected: "vāripai jīvitāntaṃ niṣēdhaṃ vidhiñcaṃḍi!"
55
+ - source: "మరో లాక్‌డౌన్‌ వల్ల అన్నీ అనర్థాలే!"
56
+ expected: "marŏ lākaḍauna valala ananī anarathālē!"
57
+ - source: "జెసిండా మరో సంచలనం"
58
+ expected: "jesiṃḍā marŏ sañcalanaṃ"
59
+ - source: "స్వీయ నిర్బంధంలోకి డబ్ల్యూహెచ్‌ఓ డైరెక్టర్‌"
60
+ expected: "savīya nirabandhaṃlŏki ḍabalayūhecaō ḍairekaṭara"
61
+ - source: "కరోనాపై యుద్ధంలో సమిధలు"
62
+ expected: "karŏnāpai yudadhaṃlŏ samidhalu"
63
+ - source: "అమెరికా ఎన్నికలు: ‘పెద్దన్న’ ఎవరో?!"
64
+ expected: "amerikā enanikalu: ‘pedadanana’ evarŏ?!"
65
+ - source: "౪౬౨౬౯"
66
+ expected: "46269"
67
+
68
+ map:
69
+
70
+ rules:
71
+ # rule II
72
+ - pattern: ([క]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
73
+ result: 'k'
74
+ - pattern: ([ఖ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
75
+ result: 'kh'
76
+ - pattern: ([గ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
77
+ result: 'g'
78
+ - pattern: ([ఘ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
79
+ result: 'gh'
80
+ - pattern: ([ఙ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
81
+ result: 'ṅ'
82
+ - pattern: ([చ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
83
+ result: 'c'
84
+ - pattern: ([ౘ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
85
+ result: 'ĉ'
86
+ - pattern: ([ఛ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
87
+ result: 'ch'
88
+ - pattern: ([జ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
89
+ result: 'j'
90
+ - pattern: ([ౙ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
91
+ result: 'ĵ'
92
+ - pattern: ([ఝ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
93
+ result: 'jh'
94
+ - pattern: ([ఞ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
95
+ result: 'ñ'
96
+ - pattern: ([ట]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
97
+ result: 'ṭ'
98
+ - pattern: ([ఠ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
99
+ result: 'ṭh'
100
+ - pattern: ([డ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
101
+ result: 'ḍ'
102
+ - pattern: ([ఢ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
103
+ result: 'ḍh'
104
+ - pattern: ([ణ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
105
+ result: 'ṇ'
106
+ - pattern: ([త]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
107
+ result: 't'
108
+ - pattern: ([థ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
109
+ result: 'th'
110
+ - pattern: ([ద]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
111
+ result: 'd'
112
+ - pattern: ([ధ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
113
+ result: 'dh'
114
+ - pattern: ([న]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
115
+ result: 'n'
116
+ - pattern: ([ప]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
117
+ result: 'p'
118
+ - pattern: ([ఫ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
119
+ result: 'ph'
120
+ - pattern: ([బ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
121
+ result: 'b'
122
+ - pattern: ([భ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
123
+ result: 'bh'
124
+ - pattern: ([మ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
125
+ result: 'm'
126
+ - pattern: ([య]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
127
+ result: 'y'
128
+ - pattern: ([ర]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
129
+ result: 'r'
130
+ - pattern: ([ఱ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
131
+ result: 'ṛ'
132
+ - pattern: ([ల]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
133
+ result: 'l'
134
+ - pattern: ([ళ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
135
+ result: 'ḷ'
136
+ - pattern: ([వ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
137
+ result: 'v'
138
+ - pattern: ([శ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
139
+ result: 'ś'
140
+ - pattern: ([ష]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
141
+ result: 'ṣ'
142
+ - pattern: ([స]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
143
+ result: 's'
144
+ - pattern: ([హ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
145
+ result: 'h'
146
+
147
+ # rule III
148
+ - pattern: \u0C02(?=[కఖగఘఙ])
149
+ result: ṅ
150
+ - pattern: \u0C02(?=[చౘఛజౙఝఞ])
151
+ result: ñ
152
+ - pattern: \u0C02(?=[టఠడఢణ])
153
+ result: ṇ
154
+ - pattern: \u0C02(?=[తథదధన])
155
+ result: n
156
+ - pattern: \u0C02(?=[పఫబభమ])
157
+ result: m
158
+
159
+ # rule IV
160
+ - pattern: \u0C01(?=[కఖగఘఙచౘఛజౙఝఞటఠడఢణతథదధన]) # before guttural, palatal, cerebral, and dental
161
+ result: n̐
162
+
163
+ characters:
164
+
165
+ 'అ': 'a'
166
+ 'ఆ': 'ā'
167
+ 'ఇ': 'i'
168
+ 'ఈ': 'ī'
169
+ 'ఉ': 'u'
170
+ 'ఊ': 'ū'
171
+ 'ఋ': 'ṛ'
172
+ 'ౠ': 'ṝ'
173
+ 'ఌ': 'ḻ'
174
+ 'ఎ': 'e'
175
+ 'ఏ': 'ē'
176
+ 'ఐ': 'ai'
177
+ 'ఒ': 'o'
178
+ 'ఓ': 'ō'
179
+ 'ఔ': 'au'
180
+
181
+ # Gutturals
182
+ 'క': 'ka'
183
+ 'ఖ': 'kha'
184
+ 'గ': 'ga'
185
+ 'ఘ': 'gha'
186
+ 'ఙ': 'ṅa'
187
+
188
+ # Palatals
189
+ 'చ': 'ca'
190
+ 'ౘ': 'ĉa'
191
+ 'ఛ': 'cha'
192
+ 'జ': 'ja'
193
+ 'ౙ': 'ĵa'
194
+ 'ఝ': 'jha'
195
+ 'ఞ': 'ña'
196
+
197
+ # Cerebrals
198
+ 'ట': 'ṭa'
199
+ 'ఠ': 'ṭha'
200
+ 'డ': 'ḍa'
201
+ 'ఢ': 'ḍha'
202
+ 'ణ': 'ṇa'
203
+
204
+ # Dentals
205
+ 'త': 'ta'
206
+ 'థ': 'tha'
207
+ 'ద': 'da'
208
+ 'ధ': 'dha'
209
+ 'న': 'na'
210
+
211
+ # Labials
212
+ 'ప': 'pa'
213
+ 'ఫ': 'pha'
214
+ 'బ': 'ba'
215
+ 'భ': 'bha'
216
+ 'మ': 'ma'
217
+
218
+ # Semivowels
219
+ 'య': 'ya'
220
+ 'ర': 'ra'
221
+ 'ఱ': 'ṛa'
222
+ 'ల': 'la'
223
+ 'ళ': 'ḷa'
224
+ 'వ': 'va'
225
+
226
+ # Sibilants
227
+ 'శ': 'śa'
228
+ 'ష': 'ṣa'
229
+ 'స': 'sa'
230
+
231
+
232
+
233
+ # Aspirate
234
+ 'హ': 'ha'
235
+
236
+
237
+
238
+ # Chandrabindu
239
+ 'ঁ': 'm̐'
240
+ 'ఁ': 'm̐'
241
+
242
+ # Bisarga
243
+ 'ః': 'ḥ'
244
+
245
+ # Anusvāra
246
+ 'ం': 'ṃ'
247
+
248
+ # Medials # Needed for connecting constants
249
+
250
+ 'ా': 'ā'
251
+ 'ి': 'i'
252
+ 'ీ': 'ī'
253
+ 'ు': 'u'
254
+ 'ూ': 'ū'
255
+ 'ృ': 'ṛ'
256
+ 'ె': 'e'
257
+ 'ే': 'ē'
258
+ 'ై': 'ai'
259
+ 'ొ': 'o'
260
+ 'ో': 'ŏ'
261
+ 'ౌ': 'au'
262
+ '\u09CD': '' # Used for joining
263
+
264
+
265
+ '\u0c4d': '' #verma sign for halanta
266
+ 'ౕ ': ''
267
+ 'ౖ ': ''
268
+ '्': ''
269
+ '़': ''
270
+ "‍": ''# Used for joining
271
+ "‌": ''# Used for non joining
272
+
273
+ # numbers
274
+
275
+ '౦': '0'
276
+ '౧': '1'
277
+ '౨': '2'
278
+ '౩': '3'
279
+ '౪': '4'
280
+ '౫': '5'
281
+ '౬': '6'
282
+ '౭': '7'
283
+ '౮': '8'
284
+ '౯': '9'
@@ -0,0 +1,64 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2011
4
+ language: iso-639-2:tel
5
+ source_script: Telu
6
+ destination_script: Latn
7
+ name: Telugu Romanization, 2011
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/telugu.pdf
9
+ creation_date: 2011
10
+ description: |
11
+ ALA-LC Romanization table for Telugu
12
+
13
+ notes:
14
+
15
+ - |
16
+ Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
17
+ vowels following a consonant can be found in grammars; no distinction between the two is
18
+ made in transliteration.
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+ a) when another vowel is indicated by its appropriate sign; and
23
+ b) when the absence of any vowel is indicated by the superscript sign ( ౬ ) called valapalagilaka.
24
+ - |
25
+ Exception: Sunna is transliterated by:
26
+ a) ṅ before gutturals,
27
+ b) ñ before palatals,
28
+ c) ṇ before cerebrals,
29
+ d) n before dentals, and
30
+ e) m before labials.
31
+ - |
32
+ Ardhasunna before gutturals and palatal, cerebral, and dental occlusives is transliterated n̐.
33
+ Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
34
+ transliterated m
35
+
36
+ tests:
37
+ - source: "తమిళనాడు"
38
+ expected: "tamiḷanāḍu"
39
+ - source: "దేవాస్‌కు ౮౯౩౯ కోట్లివ్వండి"
40
+ expected: "dēvāsaku 8939 kŏṭalivavaṃḍi"
41
+ - source: "యూరప్, అమెరికాకు కోవిడ్‌ దడ"
42
+ expected: "yūrapa, amerikāku kŏviḍa daḍa"
43
+ - source: "జనవరి నాటికి అమెరికాలో టీకా"
44
+ expected: "janavari nāṭiki amerikālŏ ṭīkā"
45
+ - source: "ఫ్రాన్స్‌ను ముస్లింలు శిక్షించవచ్చు"
46
+ expected: "pharānasanu musaliṃlu śikaṣiñcavacacu"
47
+ - source: "క్లాస్‌ రూంలో ఉపాధ్యాయుడి వికృత చేష్టలు"
48
+ expected: "kalāsa rūṃlŏ upādhayāyuḍi vikṛta cēṣaṭalu"
49
+ - source: "భారీ భూకంపం; భయంకరమైన అనుభవాలు"
50
+ expected: "bhārī bhūkampaṃ; bhayaṅkaramaina anubhavālu"
51
+ - source: "నిట్ట నిలువునా కూలిన అపార్ట్‌మెంట్‌"
52
+ expected: "niṭaṭa niluvunā kūlina apāraṭameṇṭa"
53
+ - source: "పిచ్చి ప్రయోగాలకు పోతే జరిగేది ఇదే"
54
+ expected: "picaci parayŏgālaku pŏtē jarigēdi idē"
55
+ - source: "కరోనాపై సీడీసీ వైఫల్యం ఎందుకు?"
56
+ expected: "karŏnāpai sīḍīsī vaiphalayaṃ eṃduku?"
57
+ - source: "అత్యంత అరుదైన పులి పిల్లలు ఇవే!"
58
+ expected: "atayanta arudaina puli pilalalu ivē!"
59
+
60
+ map:
61
+
62
+ inherit: alalc-tel-Telu-Latn-1997
63
+
64
+
@@ -0,0 +1,105 @@
1
+ ---
2
+ authority_id: az
3
+ id: 1939
4
+ language: iso-639-2:aze
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ url: https://omniglot.com/writing/azeri.htm
8
+ creation_date: 1939
9
+ description: |
10
+ In 1939 Joseph Stalin ordered the Cyrillic alphabet to be used by Azeri speakers in the Soviet Union.
11
+
12
+ notes:
13
+ # from Wikipedia
14
+ - The letter Ц, intended for the sound [ц] in borrowed words, was used in the Azerbaijani Cyrillic alphabet until 1951. In the Azeri language, the sound [c] usually becomes [s].
15
+
16
+ tests:
17
+ - source: Юя
18
+ expected: Yuya
19
+ # from internet
20
+ - source: Азәрбайҹан әлифбасы
21
+ expected: Azərbaycan əlifbası
22
+ - source: |
23
+ Бүтүн инсанлар ләйагәт вә һүгугларына ҝөрә азад бәрабәр доғулурлар.
24
+ Онларын шүурлары вә виҹданлары вар вә бир-бирләринә мүнасибәтдә гардашлыг руһунда давранмалыдырлар.
25
+ expected: |
26
+ Bütün insanlar ləyaqət və hüquqlarına görə azad bərabər doğulurlar.
27
+ Onların şüurları və vicdanları var və bir-birlərinə münasibətdə qardaşlıq ruhunda davranmalıdırlar.
28
+
29
+ map:
30
+ characters:
31
+ "\u0410": "A" # А
32
+ "\u0411": "B" # Б
33
+ "\u0412": "V" # В
34
+ "\u0413": "Q" # Г
35
+ "\u0492": "\u011E" # Ғ
36
+ "\u0414": "D" # Д
37
+ "\u0415": "E" # Е
38
+ "\u04D8": "\u018F" # Ә
39
+ "\u0416": "J" # Ж
40
+ "\u0417": "Z" # З
41
+ "\u0418": "\u0130" # И
42
+ "\u0419": "Y" # Й
43
+ "\u041A": "K" # К
44
+ "\u049C": "G" # Ҝ
45
+ "\u041B": "L" # Л
46
+ "\u041C": "M" # М
47
+ "\u041D": "N" # Н
48
+ "\u041E": "O" # О
49
+ "\u04E8": "\u00D6" # Ө
50
+ "\u041F": "P" # П
51
+ "\u0420": "R" # Р
52
+ "\u0421": "S" # С
53
+ "\u0422": "T" # Т
54
+ "\u0423": "U" # У
55
+ "\u04AE": "\u00DC" # Ү
56
+ "\u0424": "F" # Ф
57
+ "\u0425": "X" # Х
58
+ "\u0426": "s" # Ц note[1]
59
+ "\u04BA": "H" # Һ
60
+ "\u0427": "\u00C7" # Ч
61
+ "\u04B8": "C" # Ҹ
62
+ "\u0428": "\u015E" # Ш
63
+ "\u042B": "I" # Ы
64
+ "\u042D": "E" # Э
65
+ "\u042E": "Yu" # Ю
66
+ "\u042F": "Ya" # Я
67
+
68
+ "\u0430": "a" # а
69
+ "\u0431": "b" # б
70
+ "\u0432": "v" # в
71
+ "\u0433": "q" # г
72
+ "\u0493": "\u011F" # ғ
73
+ "\u0434": "d" # д
74
+ "\u0435": "e" # е
75
+ "\u04D9": "\u0259" # ә
76
+ "\u0436": "j" # ж
77
+ "\u0437": "z" # з
78
+ "\u0438": "i" # и
79
+ "\u0439": "y" # й
80
+ "\u043A": "k" # к
81
+ "\u049D": "g" # ҝ
82
+ "\u043B": "l" # л
83
+ "\u043C": "m" # м
84
+ "\u043D": "n" # н
85
+ "\u043E": "o" # о
86
+ "\u04E9": "\u00F6" # ө
87
+ "\u043F": "p" # п
88
+ "\u0440": "r" # р
89
+ "\u0441": "s" # с
90
+ "\u0442": "t" # т
91
+ "\u0443": "u" # у
92
+ "\u04AF": "\u00FC" # ү
93
+ "\u0444": "f" # ф
94
+ "\u0445": "x" # х
95
+ "\u0446": "s" # ц note[1]
96
+ "\u04BB": "h" # һ
97
+ "\u0447": "\u00E7" # ч
98
+ "\u04B9": "c" # ҹ
99
+ "\u0448": "\u015F" # ш
100
+ "\u044B": "\u0131" # ы
101
+ "\u044D": "e" # э
102
+ "\u044E": "yu" # ю
103
+ "\u044F": "ya" # я
104
+
105
+ "\u0027": "" # '