interscript 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +1 -3
- data/aliases.json +1 -0
- data/lib/interscript.rb +8 -3
- data/lib/interscript/fs.rb +27 -0
- data/lib/interscript/mapping.rb +3 -1
- data/lib/interscript/opal.rb +142 -3
- data/lib/interscript/opal/entrypoint.rb +8 -0
- data/lib/interscript/opal/exports.rb +11 -0
- data/lib/interscript/opal/maps.js.erb +2 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
- data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
- data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
- data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
- data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
- data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
- data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
- data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
- data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
- data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
- data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
- data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
- data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
- data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
- data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
- data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
- data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
- data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
- data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
- data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
- data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
- data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
- data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
- data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
- data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
- data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
- data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
- data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
- data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
- data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
- data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
- data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
- data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
- data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
- data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
- data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
- data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
- data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
- data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
- data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
- data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
- data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
- data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
- data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
- data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
- data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
- data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
- data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
- data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
- data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
- data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
- data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
- data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
- data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
- data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
- data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
- data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
- data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
- data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
- data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
- data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
- data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
- data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
- data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
- data/spec/interscript/filenames_spec.rb +6 -369
- data/spec/interscript_spec.rb +10 -2
- metadata +50 -7
- data/lib/interscript/opal/map_translate.rb +0 -7
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 1997
|
|
4
|
+
language: iso-639-2:tel
|
|
5
|
+
source_script: Telu
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Telugu Romanization, 1997
|
|
8
|
+
alias:
|
|
9
|
+
ogc11122:
|
|
10
|
+
code: tel_Telu2Latn_ALA_1997
|
|
11
|
+
description: Telugu ALA-Library of Congress 1997 System
|
|
12
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/telugu.pdf
|
|
13
|
+
creation_date: 1997
|
|
14
|
+
description: |
|
|
15
|
+
ALA-LC Romanization table for Telugu
|
|
16
|
+
|
|
17
|
+
notes:
|
|
18
|
+
|
|
19
|
+
- |
|
|
20
|
+
Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
|
|
21
|
+
vowels following a consonant can be found in grammars; no distinction between the two is
|
|
22
|
+
made in transliteration.
|
|
23
|
+
- |
|
|
24
|
+
The vowel a is implicit after all consonants and consonant clusters and is supplied in
|
|
25
|
+
transliteration, with the following exceptions:
|
|
26
|
+
a) when another vowel is indicated by its appropriate sign; and
|
|
27
|
+
b) when the absence of any vowel is indicated by the superscript sign ( ౬ ) called valapalagilaka.
|
|
28
|
+
- |
|
|
29
|
+
Exception: Sunna is transliterated by:
|
|
30
|
+
a) ṅ before gutturals,
|
|
31
|
+
b) ñ before palatals,
|
|
32
|
+
c) ṇ before cerebrals,
|
|
33
|
+
d) n before dentals, and
|
|
34
|
+
e) m before labials.
|
|
35
|
+
- |
|
|
36
|
+
Ardhasunna before gutturals and palatal, cerebral, and dental occlusives is transliterated n̐.
|
|
37
|
+
Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
|
|
38
|
+
transliterated m
|
|
39
|
+
|
|
40
|
+
tests:
|
|
41
|
+
- source: "తమిళనాడు"
|
|
42
|
+
expected: "tamiḷanāḍu"
|
|
43
|
+
- source: "తంటికొండ ఘటన: ఆగని మృత్యుఘోష"
|
|
44
|
+
expected: "taṃṭikoṇḍa ghaṭana: āgani mṛtayughŏṣa"
|
|
45
|
+
- source: "మళ్లీ వివాదం: అమితాబ్పై కేసు"
|
|
46
|
+
expected: "maḷalī vivādaṃ: amitābapai kēsu"
|
|
47
|
+
- source: "‘వరద సాయం పేరుతో వైట్ కాలర్ దోపిడీ’"
|
|
48
|
+
expected: "‘varada sāyaṃ pērutŏ vaiṭa kālara dŏpiḍī’"
|
|
49
|
+
- source: "రెండో విడత జీఎస్టీ పరిహారం"
|
|
50
|
+
expected: "reṃḍŏ viḍata jīesaṭī parihāraṃ"
|
|
51
|
+
- source: "నితీష్ కుమార్ అధ్యాయం ముగిసినట్లేనా?!"
|
|
52
|
+
expected: "nitīṣa kumāra adhayāyaṃ mugisinaṭalēnā?!"
|
|
53
|
+
- source: "వారిపై జీవితాంతం నిషేధం విధించండి!"
|
|
54
|
+
expected: "vāripai jīvitāntaṃ niṣēdhaṃ vidhiñcaṃḍi!"
|
|
55
|
+
- source: "మరో లాక్డౌన్ వల్ల అన్నీ అనర్థాలే!"
|
|
56
|
+
expected: "marŏ lākaḍauna valala ananī anarathālē!"
|
|
57
|
+
- source: "జెసిండా మరో సంచలనం"
|
|
58
|
+
expected: "jesiṃḍā marŏ sañcalanaṃ"
|
|
59
|
+
- source: "స్వీయ నిర్బంధంలోకి డబ్ల్యూహెచ్ఓ డైరెక్టర్"
|
|
60
|
+
expected: "savīya nirabandhaṃlŏki ḍabalayūhecaō ḍairekaṭara"
|
|
61
|
+
- source: "కరోనాపై యుద్ధంలో సమిధలు"
|
|
62
|
+
expected: "karŏnāpai yudadhaṃlŏ samidhalu"
|
|
63
|
+
- source: "అమెరికా ఎన్నికలు: ‘పెద్దన్న’ ఎవరో?!"
|
|
64
|
+
expected: "amerikā enanikalu: ‘pedadanana’ evarŏ?!"
|
|
65
|
+
- source: "౪౬౨౬౯"
|
|
66
|
+
expected: "46269"
|
|
67
|
+
|
|
68
|
+
map:
|
|
69
|
+
|
|
70
|
+
rules:
|
|
71
|
+
# rule II
|
|
72
|
+
- pattern: ([క]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
73
|
+
result: 'k'
|
|
74
|
+
- pattern: ([ఖ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
75
|
+
result: 'kh'
|
|
76
|
+
- pattern: ([గ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
77
|
+
result: 'g'
|
|
78
|
+
- pattern: ([ఘ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
79
|
+
result: 'gh'
|
|
80
|
+
- pattern: ([ఙ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
81
|
+
result: 'ṅ'
|
|
82
|
+
- pattern: ([చ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
83
|
+
result: 'c'
|
|
84
|
+
- pattern: ([ౘ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
85
|
+
result: 'ĉ'
|
|
86
|
+
- pattern: ([ఛ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
87
|
+
result: 'ch'
|
|
88
|
+
- pattern: ([జ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
89
|
+
result: 'j'
|
|
90
|
+
- pattern: ([ౙ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
91
|
+
result: 'ĵ'
|
|
92
|
+
- pattern: ([ఝ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
93
|
+
result: 'jh'
|
|
94
|
+
- pattern: ([ఞ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
95
|
+
result: 'ñ'
|
|
96
|
+
- pattern: ([ట]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
97
|
+
result: 'ṭ'
|
|
98
|
+
- pattern: ([ఠ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
99
|
+
result: 'ṭh'
|
|
100
|
+
- pattern: ([డ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
101
|
+
result: 'ḍ'
|
|
102
|
+
- pattern: ([ఢ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
103
|
+
result: 'ḍh'
|
|
104
|
+
- pattern: ([ణ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
105
|
+
result: 'ṇ'
|
|
106
|
+
- pattern: ([త]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
107
|
+
result: 't'
|
|
108
|
+
- pattern: ([థ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
109
|
+
result: 'th'
|
|
110
|
+
- pattern: ([ద]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
111
|
+
result: 'd'
|
|
112
|
+
- pattern: ([ధ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
113
|
+
result: 'dh'
|
|
114
|
+
- pattern: ([న]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
115
|
+
result: 'n'
|
|
116
|
+
- pattern: ([ప]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
117
|
+
result: 'p'
|
|
118
|
+
- pattern: ([ఫ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
119
|
+
result: 'ph'
|
|
120
|
+
- pattern: ([బ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
121
|
+
result: 'b'
|
|
122
|
+
- pattern: ([భ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
123
|
+
result: 'bh'
|
|
124
|
+
- pattern: ([మ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
125
|
+
result: 'm'
|
|
126
|
+
- pattern: ([య]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
127
|
+
result: 'y'
|
|
128
|
+
- pattern: ([ర]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
129
|
+
result: 'r'
|
|
130
|
+
- pattern: ([ఱ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
131
|
+
result: 'ṛ'
|
|
132
|
+
- pattern: ([ల]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
133
|
+
result: 'l'
|
|
134
|
+
- pattern: ([ళ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
135
|
+
result: 'ḷ'
|
|
136
|
+
- pattern: ([వ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
137
|
+
result: 'v'
|
|
138
|
+
- pattern: ([శ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
139
|
+
result: 'ś'
|
|
140
|
+
- pattern: ([ష]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
141
|
+
result: 'ṣ'
|
|
142
|
+
- pattern: ([స]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
143
|
+
result: 's'
|
|
144
|
+
- pattern: ([హ]=?)(?=[\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c])
|
|
145
|
+
result: 'h'
|
|
146
|
+
|
|
147
|
+
# rule III
|
|
148
|
+
- pattern: \u0C02(?=[కఖగఘఙ])
|
|
149
|
+
result: ṅ
|
|
150
|
+
- pattern: \u0C02(?=[చౘఛజౙఝఞ])
|
|
151
|
+
result: ñ
|
|
152
|
+
- pattern: \u0C02(?=[టఠడఢణ])
|
|
153
|
+
result: ṇ
|
|
154
|
+
- pattern: \u0C02(?=[తథదధన])
|
|
155
|
+
result: n
|
|
156
|
+
- pattern: \u0C02(?=[పఫబభమ])
|
|
157
|
+
result: m
|
|
158
|
+
|
|
159
|
+
# rule IV
|
|
160
|
+
- pattern: \u0C01(?=[కఖగఘఙచౘఛజౙఝఞటఠడఢణతథదధన]) # before guttural, palatal, cerebral, and dental
|
|
161
|
+
result: n̐
|
|
162
|
+
|
|
163
|
+
characters:
|
|
164
|
+
|
|
165
|
+
'అ': 'a'
|
|
166
|
+
'ఆ': 'ā'
|
|
167
|
+
'ఇ': 'i'
|
|
168
|
+
'ఈ': 'ī'
|
|
169
|
+
'ఉ': 'u'
|
|
170
|
+
'ఊ': 'ū'
|
|
171
|
+
'ఋ': 'ṛ'
|
|
172
|
+
'ౠ': 'ṝ'
|
|
173
|
+
'ఌ': 'ḻ'
|
|
174
|
+
'ఎ': 'e'
|
|
175
|
+
'ఏ': 'ē'
|
|
176
|
+
'ఐ': 'ai'
|
|
177
|
+
'ఒ': 'o'
|
|
178
|
+
'ఓ': 'ō'
|
|
179
|
+
'ఔ': 'au'
|
|
180
|
+
|
|
181
|
+
# Gutturals
|
|
182
|
+
'క': 'ka'
|
|
183
|
+
'ఖ': 'kha'
|
|
184
|
+
'గ': 'ga'
|
|
185
|
+
'ఘ': 'gha'
|
|
186
|
+
'ఙ': 'ṅa'
|
|
187
|
+
|
|
188
|
+
# Palatals
|
|
189
|
+
'చ': 'ca'
|
|
190
|
+
'ౘ': 'ĉa'
|
|
191
|
+
'ఛ': 'cha'
|
|
192
|
+
'జ': 'ja'
|
|
193
|
+
'ౙ': 'ĵa'
|
|
194
|
+
'ఝ': 'jha'
|
|
195
|
+
'ఞ': 'ña'
|
|
196
|
+
|
|
197
|
+
# Cerebrals
|
|
198
|
+
'ట': 'ṭa'
|
|
199
|
+
'ఠ': 'ṭha'
|
|
200
|
+
'డ': 'ḍa'
|
|
201
|
+
'ఢ': 'ḍha'
|
|
202
|
+
'ణ': 'ṇa'
|
|
203
|
+
|
|
204
|
+
# Dentals
|
|
205
|
+
'త': 'ta'
|
|
206
|
+
'థ': 'tha'
|
|
207
|
+
'ద': 'da'
|
|
208
|
+
'ధ': 'dha'
|
|
209
|
+
'న': 'na'
|
|
210
|
+
|
|
211
|
+
# Labials
|
|
212
|
+
'ప': 'pa'
|
|
213
|
+
'ఫ': 'pha'
|
|
214
|
+
'బ': 'ba'
|
|
215
|
+
'భ': 'bha'
|
|
216
|
+
'మ': 'ma'
|
|
217
|
+
|
|
218
|
+
# Semivowels
|
|
219
|
+
'య': 'ya'
|
|
220
|
+
'ర': 'ra'
|
|
221
|
+
'ఱ': 'ṛa'
|
|
222
|
+
'ల': 'la'
|
|
223
|
+
'ళ': 'ḷa'
|
|
224
|
+
'వ': 'va'
|
|
225
|
+
|
|
226
|
+
# Sibilants
|
|
227
|
+
'శ': 'śa'
|
|
228
|
+
'ష': 'ṣa'
|
|
229
|
+
'స': 'sa'
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# Aspirate
|
|
234
|
+
'హ': 'ha'
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# Chandrabindu
|
|
239
|
+
'ঁ': 'm̐'
|
|
240
|
+
'ఁ': 'm̐'
|
|
241
|
+
|
|
242
|
+
# Bisarga
|
|
243
|
+
'ః': 'ḥ'
|
|
244
|
+
|
|
245
|
+
# Anusvāra
|
|
246
|
+
'ం': 'ṃ'
|
|
247
|
+
|
|
248
|
+
# Medials # Needed for connecting constants
|
|
249
|
+
|
|
250
|
+
'ా': 'ā'
|
|
251
|
+
'ి': 'i'
|
|
252
|
+
'ీ': 'ī'
|
|
253
|
+
'ు': 'u'
|
|
254
|
+
'ూ': 'ū'
|
|
255
|
+
'ృ': 'ṛ'
|
|
256
|
+
'ె': 'e'
|
|
257
|
+
'ే': 'ē'
|
|
258
|
+
'ై': 'ai'
|
|
259
|
+
'ొ': 'o'
|
|
260
|
+
'ో': 'ŏ'
|
|
261
|
+
'ౌ': 'au'
|
|
262
|
+
'\u09CD': '' # Used for joining
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
'\u0c4d': '' #verma sign for halanta
|
|
266
|
+
'ౕ ': ''
|
|
267
|
+
'ౖ ': ''
|
|
268
|
+
'्': ''
|
|
269
|
+
'़': ''
|
|
270
|
+
"": ''# Used for joining
|
|
271
|
+
"": ''# Used for non joining
|
|
272
|
+
|
|
273
|
+
# numbers
|
|
274
|
+
|
|
275
|
+
'౦': '0'
|
|
276
|
+
'౧': '1'
|
|
277
|
+
'౨': '2'
|
|
278
|
+
'౩': '3'
|
|
279
|
+
'౪': '4'
|
|
280
|
+
'౫': '5'
|
|
281
|
+
'౬': '6'
|
|
282
|
+
'౭': '7'
|
|
283
|
+
'౮': '8'
|
|
284
|
+
'౯': '9'
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: alalc
|
|
3
|
+
id: 2011
|
|
4
|
+
language: iso-639-2:tel
|
|
5
|
+
source_script: Telu
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
name: Telugu Romanization, 2011
|
|
8
|
+
url: http://catdir.loc.gov/catdir/cpso/romanization/telugu.pdf
|
|
9
|
+
creation_date: 2011
|
|
10
|
+
description: |
|
|
11
|
+
ALA-LC Romanization table for Telugu
|
|
12
|
+
|
|
13
|
+
notes:
|
|
14
|
+
|
|
15
|
+
- |
|
|
16
|
+
Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
|
|
17
|
+
vowels following a consonant can be found in grammars; no distinction between the two is
|
|
18
|
+
made in transliteration.
|
|
19
|
+
- |
|
|
20
|
+
The vowel a is implicit after all consonants and consonant clusters and is supplied in
|
|
21
|
+
transliteration, with the following exceptions:
|
|
22
|
+
a) when another vowel is indicated by its appropriate sign; and
|
|
23
|
+
b) when the absence of any vowel is indicated by the superscript sign ( ౬ ) called valapalagilaka.
|
|
24
|
+
- |
|
|
25
|
+
Exception: Sunna is transliterated by:
|
|
26
|
+
a) ṅ before gutturals,
|
|
27
|
+
b) ñ before palatals,
|
|
28
|
+
c) ṇ before cerebrals,
|
|
29
|
+
d) n before dentals, and
|
|
30
|
+
e) m before labials.
|
|
31
|
+
- |
|
|
32
|
+
Ardhasunna before gutturals and palatal, cerebral, and dental occlusives is transliterated n̐.
|
|
33
|
+
Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
|
|
34
|
+
transliterated m
|
|
35
|
+
|
|
36
|
+
tests:
|
|
37
|
+
- source: "తమిళనాడు"
|
|
38
|
+
expected: "tamiḷanāḍu"
|
|
39
|
+
- source: "దేవాస్కు ౮౯౩౯ కోట్లివ్వండి"
|
|
40
|
+
expected: "dēvāsaku 8939 kŏṭalivavaṃḍi"
|
|
41
|
+
- source: "యూరప్, అమెరికాకు కోవిడ్ దడ"
|
|
42
|
+
expected: "yūrapa, amerikāku kŏviḍa daḍa"
|
|
43
|
+
- source: "జనవరి నాటికి అమెరికాలో టీకా"
|
|
44
|
+
expected: "janavari nāṭiki amerikālŏ ṭīkā"
|
|
45
|
+
- source: "ఫ్రాన్స్ను ముస్లింలు శిక్షించవచ్చు"
|
|
46
|
+
expected: "pharānasanu musaliṃlu śikaṣiñcavacacu"
|
|
47
|
+
- source: "క్లాస్ రూంలో ఉపాధ్యాయుడి వికృత చేష్టలు"
|
|
48
|
+
expected: "kalāsa rūṃlŏ upādhayāyuḍi vikṛta cēṣaṭalu"
|
|
49
|
+
- source: "భారీ భూకంపం; భయంకరమైన అనుభవాలు"
|
|
50
|
+
expected: "bhārī bhūkampaṃ; bhayaṅkaramaina anubhavālu"
|
|
51
|
+
- source: "నిట్ట నిలువునా కూలిన అపార్ట్మెంట్"
|
|
52
|
+
expected: "niṭaṭa niluvunā kūlina apāraṭameṇṭa"
|
|
53
|
+
- source: "పిచ్చి ప్రయోగాలకు పోతే జరిగేది ఇదే"
|
|
54
|
+
expected: "picaci parayŏgālaku pŏtē jarigēdi idē"
|
|
55
|
+
- source: "కరోనాపై సీడీసీ వైఫల్యం ఎందుకు?"
|
|
56
|
+
expected: "karŏnāpai sīḍīsī vaiphalayaṃ eṃduku?"
|
|
57
|
+
- source: "అత్యంత అరుదైన పులి పిల్లలు ఇవే!"
|
|
58
|
+
expected: "atayanta arudaina puli pilalalu ivē!"
|
|
59
|
+
|
|
60
|
+
map:
|
|
61
|
+
|
|
62
|
+
inherit: alalc-tel-Telu-Latn-1997
|
|
63
|
+
|
|
64
|
+
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
---
|
|
2
|
+
authority_id: az
|
|
3
|
+
id: 1939
|
|
4
|
+
language: iso-639-2:aze
|
|
5
|
+
source_script: Cyrl
|
|
6
|
+
destination_script: Latn
|
|
7
|
+
url: https://omniglot.com/writing/azeri.htm
|
|
8
|
+
creation_date: 1939
|
|
9
|
+
description: |
|
|
10
|
+
In 1939 Joseph Stalin ordered the Cyrillic alphabet to be used by Azeri speakers in the Soviet Union.
|
|
11
|
+
|
|
12
|
+
notes:
|
|
13
|
+
# from Wikipedia
|
|
14
|
+
- The letter Ц, intended for the sound [ц] in borrowed words, was used in the Azerbaijani Cyrillic alphabet until 1951. In the Azeri language, the sound [c] usually becomes [s].
|
|
15
|
+
|
|
16
|
+
tests:
|
|
17
|
+
- source: Юя
|
|
18
|
+
expected: Yuya
|
|
19
|
+
# from internet
|
|
20
|
+
- source: Азәрбайҹан әлифбасы
|
|
21
|
+
expected: Azərbaycan əlifbası
|
|
22
|
+
- source: |
|
|
23
|
+
Бүтүн инсанлар ләйагәт вә һүгугларына ҝөрә азад бәрабәр доғулурлар.
|
|
24
|
+
Онларын шүурлары вә виҹданлары вар вә бир-бирләринә мүнасибәтдә гардашлыг руһунда давранмалыдырлар.
|
|
25
|
+
expected: |
|
|
26
|
+
Bütün insanlar ləyaqət və hüquqlarına görə azad bərabər doğulurlar.
|
|
27
|
+
Onların şüurları və vicdanları var və bir-birlərinə münasibətdə qardaşlıq ruhunda davranmalıdırlar.
|
|
28
|
+
|
|
29
|
+
map:
|
|
30
|
+
characters:
|
|
31
|
+
"\u0410": "A" # А
|
|
32
|
+
"\u0411": "B" # Б
|
|
33
|
+
"\u0412": "V" # В
|
|
34
|
+
"\u0413": "Q" # Г
|
|
35
|
+
"\u0492": "\u011E" # Ғ
|
|
36
|
+
"\u0414": "D" # Д
|
|
37
|
+
"\u0415": "E" # Е
|
|
38
|
+
"\u04D8": "\u018F" # Ә
|
|
39
|
+
"\u0416": "J" # Ж
|
|
40
|
+
"\u0417": "Z" # З
|
|
41
|
+
"\u0418": "\u0130" # И
|
|
42
|
+
"\u0419": "Y" # Й
|
|
43
|
+
"\u041A": "K" # К
|
|
44
|
+
"\u049C": "G" # Ҝ
|
|
45
|
+
"\u041B": "L" # Л
|
|
46
|
+
"\u041C": "M" # М
|
|
47
|
+
"\u041D": "N" # Н
|
|
48
|
+
"\u041E": "O" # О
|
|
49
|
+
"\u04E8": "\u00D6" # Ө
|
|
50
|
+
"\u041F": "P" # П
|
|
51
|
+
"\u0420": "R" # Р
|
|
52
|
+
"\u0421": "S" # С
|
|
53
|
+
"\u0422": "T" # Т
|
|
54
|
+
"\u0423": "U" # У
|
|
55
|
+
"\u04AE": "\u00DC" # Ү
|
|
56
|
+
"\u0424": "F" # Ф
|
|
57
|
+
"\u0425": "X" # Х
|
|
58
|
+
"\u0426": "s" # Ц note[1]
|
|
59
|
+
"\u04BA": "H" # Һ
|
|
60
|
+
"\u0427": "\u00C7" # Ч
|
|
61
|
+
"\u04B8": "C" # Ҹ
|
|
62
|
+
"\u0428": "\u015E" # Ш
|
|
63
|
+
"\u042B": "I" # Ы
|
|
64
|
+
"\u042D": "E" # Э
|
|
65
|
+
"\u042E": "Yu" # Ю
|
|
66
|
+
"\u042F": "Ya" # Я
|
|
67
|
+
|
|
68
|
+
"\u0430": "a" # а
|
|
69
|
+
"\u0431": "b" # б
|
|
70
|
+
"\u0432": "v" # в
|
|
71
|
+
"\u0433": "q" # г
|
|
72
|
+
"\u0493": "\u011F" # ғ
|
|
73
|
+
"\u0434": "d" # д
|
|
74
|
+
"\u0435": "e" # е
|
|
75
|
+
"\u04D9": "\u0259" # ә
|
|
76
|
+
"\u0436": "j" # ж
|
|
77
|
+
"\u0437": "z" # з
|
|
78
|
+
"\u0438": "i" # и
|
|
79
|
+
"\u0439": "y" # й
|
|
80
|
+
"\u043A": "k" # к
|
|
81
|
+
"\u049D": "g" # ҝ
|
|
82
|
+
"\u043B": "l" # л
|
|
83
|
+
"\u043C": "m" # м
|
|
84
|
+
"\u043D": "n" # н
|
|
85
|
+
"\u043E": "o" # о
|
|
86
|
+
"\u04E9": "\u00F6" # ө
|
|
87
|
+
"\u043F": "p" # п
|
|
88
|
+
"\u0440": "r" # р
|
|
89
|
+
"\u0441": "s" # с
|
|
90
|
+
"\u0442": "t" # т
|
|
91
|
+
"\u0443": "u" # у
|
|
92
|
+
"\u04AF": "\u00FC" # ү
|
|
93
|
+
"\u0444": "f" # ф
|
|
94
|
+
"\u0445": "x" # х
|
|
95
|
+
"\u0446": "s" # ц note[1]
|
|
96
|
+
"\u04BB": "h" # һ
|
|
97
|
+
"\u0447": "\u00E7" # ч
|
|
98
|
+
"\u04B9": "c" # ҹ
|
|
99
|
+
"\u0448": "\u015F" # ш
|
|
100
|
+
"\u044B": "\u0131" # ы
|
|
101
|
+
"\u044D": "e" # э
|
|
102
|
+
"\u044E": "yu" # ю
|
|
103
|
+
"\u044F": "ya" # я
|
|
104
|
+
|
|
105
|
+
"\u0027": "" # '
|