interscript 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/interscript.rb +5 -1
- data/lib/interscript/fs.rb +3 -1
- data/lib/interscript/mapping.rb +2 -2
- data/lib/interscript/opal.rb +5 -1
- data/lib/interscript/opal/maps.js.erb +7 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
- data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
- data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
- data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
- data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
- data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
- data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
- data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
- data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
- data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
- data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
- data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
- data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
- data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
- data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
- data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
- data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
- data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
- data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
- data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
- data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
- data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
- data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
- data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
- data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
- data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
- data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
- data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
- data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
- data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
- data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
- data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
- data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
- data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
- data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
- data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
- data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
- data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
- data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
- metadata +41 -15
@@ -0,0 +1,173 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bis
|
3
|
+
id: 1991
|
4
|
+
language: knd
|
5
|
+
source_script: Knda
|
6
|
+
destination_script: Latn
|
7
|
+
name: Indian script code for information interchange - ISCII - Kannada Romanization
|
8
|
+
#url:
|
9
|
+
creation_date: 1991
|
10
|
+
description: |
|
11
|
+
IS 13194 (1991): Indian script code for information
|
12
|
+
interchange - ISCII [LITD 20: Indian Language Technologies
|
13
|
+
and Products]
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- |
|
17
|
+
Exception: Anusvāra is transliterated by:
|
18
|
+
|
19
|
+
a) ṅ before gutturals,
|
20
|
+
b) ñ before palatals,
|
21
|
+
c) ṇ before cerebrals,
|
22
|
+
d) n before dentals, and
|
23
|
+
e) m before labials.
|
24
|
+
|
25
|
+
tests:
|
26
|
+
- source: "ಈಗ ವೈರಲ್ ಆಗುತ್ತಿದೆ ಕಂಗನಾ ರಣಾವುತ್ ಹಳೇಯ ವಿಡಿಯೋ"
|
27
|
+
expected: "īg vairl āguttide kṅgnā rṇāvut hḷēy viḍiyō"
|
28
|
+
- source: "ಸಂಕಷ್ಟ ಎದುರಾದರೆ ಬಿಎಸ್ವೈ ಬೆನ್ನಿಗೆ ಎಚ್ಡಿಕೆ ?"
|
29
|
+
expected: "sṅkṣṭ edurādre biesvai bennige ecḍike ?"
|
30
|
+
- source: "ಶಾಸಕರಿಂದಲೂ ಒತ್ತಡ?"
|
31
|
+
expected: "śāskrindlū ottḍ?"
|
32
|
+
- source: "ಏಕೆಂದರೆ, ಇವರ ಹೆಸರೇ ಕೊರೊನಾ!"
|
33
|
+
expected: "ēkendre, ivr hesrē keūronā!"
|
34
|
+
- source: "ಕೊರೊನಾಕ್ಕಿಂತಲೂ ಮುಂಚೆಯೇ ಅವರು ಕೊರೊನಾ ಆಗಿದ್ದವರು!"
|
35
|
+
expected: "keūronākkintlū muñceyē avru keūronā āgiddvru!"
|
36
|
+
- source: "ಕೇರಳದ ಕೊಟ್ಟಾಯಂನ ಮಹಿಳೆಯೊಬ್ಬರು ಈಗ ತಮ್ಮ ಹೆಸರು ಹೇಳಲು ಮುಜುಗರ ಪಡುವಂತಾಗಿದೆ"
|
37
|
+
expected: "kērḷd keūṭṭāynn mhiḷeyobbru īg tmm hesru hēḷlu mujugr pḍuvntāgide"
|
38
|
+
- source: "ಬೇರೆ ಬೆಳವಣಿಗೆಗೆ ಸಾಕ್ಷಿ ಸಾಧ್ಯತೆ"
|
39
|
+
expected: "bēre beḷvṇigege sākṣi sādhyte"
|
40
|
+
- source: "ಗುರು ಶನಿ ಗ್ರಹಗಳ ನಡುವೆ 3 ಜನರ ಪ್ರಯಾಣ"
|
41
|
+
expected: "guru śni grhgḷ nḍuve 3 jnr pryāṇ"
|
42
|
+
- source: "ಕೊರೊನಾ ಬಿಕ್ಕಟ್ಟಿನ ಕಾಲದಲ್ಲಿ “ಮಿಸೆಸ್ ಕೊರೊನಾ’ಗೆ ಸಮಸ್ಯೆ!"
|
43
|
+
expected: "keūronā bikkṭṭin kāldlli “mises keūronā’ge smsye!"
|
44
|
+
- source: "ಕೆಲವು ತಿಂಗಳಿಂದ ರಷ್ಯಾ ದೇಶದ ಏನಾಟೊಲಿ ಇವ್ಯಾನಿಶಿನ್ ಮತ್ತು ಇವಾನ್ ವ್ಯಾಗನರ್ ಹಾಗೂ ಅಮೆರಿಕಾದ ಕ್ರಿಸ್ ಕ್ಯಾಸಿಡಿ ಈ ಉಪಗ್ರಹದಲ್ಲಿ ವಾಸಿಸುತ್ತಿದ್ದಾರೆ"
|
45
|
+
expected: "kelvu tiṅgḷind rṣyā dēśd ēnāṭeūli ivyāniśin mttu ivān vyāgnr hāgū amerikād kris kyāsiḍi ī upgrhdlli vāsisuttiddāre"
|
46
|
+
|
47
|
+
|
48
|
+
map:
|
49
|
+
|
50
|
+
rules:
|
51
|
+
# note
|
52
|
+
- pattern: \u0C82(?=[ಕಖಗಘಙ])
|
53
|
+
result: ṅ
|
54
|
+
- pattern: \u0C82(?=[ಚಛಜಝಞ])
|
55
|
+
result: ñ
|
56
|
+
- pattern: \u0C82(?=[ಟಠಡಢಣ])
|
57
|
+
result: ṇ
|
58
|
+
- pattern: \u0C82(?=[ತಥದಧನ])
|
59
|
+
result: n
|
60
|
+
- pattern: \u0C82(?=[ಪಫಬಭಮ])
|
61
|
+
result: m
|
62
|
+
|
63
|
+
characters:
|
64
|
+
'ಅ': 'a'
|
65
|
+
'ಆ': 'ā'
|
66
|
+
'ಇ': 'i'
|
67
|
+
'ಈ': 'ī'
|
68
|
+
'ಉ': 'u'
|
69
|
+
'ಊ': 'ū'
|
70
|
+
'ಋ': 'ṛ'
|
71
|
+
'ಌ': 'ḻ'
|
72
|
+
|
73
|
+
'ಎ': 'e'
|
74
|
+
'ಏ': 'ē'
|
75
|
+
'ಐ': 'ai'
|
76
|
+
|
77
|
+
'ಒ': 'o'
|
78
|
+
'ಓ': 'ŏ'
|
79
|
+
'ಔ': 'au'
|
80
|
+
|
81
|
+
# II. Consonants (see Note 2)
|
82
|
+
# Gutturals
|
83
|
+
'ಕ': 'k'
|
84
|
+
'ಖ': 'kh'
|
85
|
+
'ಗ': 'g'
|
86
|
+
'ಘ': 'gh'
|
87
|
+
'ಙ': 'ṅ'
|
88
|
+
|
89
|
+
# Palatals
|
90
|
+
'ಚ': 'c'
|
91
|
+
'ಛ': 'ch'
|
92
|
+
'ಜ': 'j'
|
93
|
+
'ಝ': 'jh'
|
94
|
+
'ಞ': 'ñ'
|
95
|
+
|
96
|
+
# Cerebrals
|
97
|
+
'ಟ': 'ṭ'
|
98
|
+
'ಠ': 'ṭh'
|
99
|
+
'ಡ': 'ḍ'
|
100
|
+
'ಢ': 'ḍh'
|
101
|
+
'ಣ': 'ṇ'
|
102
|
+
|
103
|
+
# Dentals
|
104
|
+
'ತ': 't'
|
105
|
+
'ಥ': 'th'
|
106
|
+
'ದ': 'd'
|
107
|
+
'ಧ': 'dh'
|
108
|
+
'ನ': 'n'
|
109
|
+
|
110
|
+
# Labials
|
111
|
+
'ಪ': 'p'
|
112
|
+
'ಫ': 'ph'
|
113
|
+
'ಬ': 'b'
|
114
|
+
'ಭ': 'bh'
|
115
|
+
'ಮ': 'm'
|
116
|
+
|
117
|
+
# Semivowels
|
118
|
+
'ಯ': 'y'
|
119
|
+
'ರ': 'r'
|
120
|
+
'ಱ': 'ṟ'
|
121
|
+
'ಲ': 'l'
|
122
|
+
'ಳ': 'ḷ'
|
123
|
+
# 'ऴ': 'ẕ'
|
124
|
+
|
125
|
+
|
126
|
+
'ವ': 'v'
|
127
|
+
|
128
|
+
# Sibilants
|
129
|
+
'ಶ': 'ś'
|
130
|
+
'ಷ': 'ṣ'
|
131
|
+
'ಸ': 's'
|
132
|
+
|
133
|
+
|
134
|
+
# Aspirate
|
135
|
+
'ಹ': 'h'
|
136
|
+
|
137
|
+
|
138
|
+
# Chandrabindu
|
139
|
+
'\u0C80': 'm'
|
140
|
+
'\u0C81': 'm'
|
141
|
+
|
142
|
+
# Bisarga
|
143
|
+
'ಃ': 'ḥ'
|
144
|
+
|
145
|
+
# Anusvāra
|
146
|
+
'ಂ': 'ṃ'
|
147
|
+
|
148
|
+
'$಼': '' #nukta
|
149
|
+
|
150
|
+
# Medials # Needed for connecting constants
|
151
|
+
'ಾ': "ā"
|
152
|
+
'ಿ': "i"
|
153
|
+
'ೀ': "ī"
|
154
|
+
'ು': "u"
|
155
|
+
'ೂ': "ū"
|
156
|
+
'ೃ': "ṛ"
|
157
|
+
'ೄ': "ṛr"
|
158
|
+
|
159
|
+
|
160
|
+
'ೆ': "e"
|
161
|
+
'ೇ': "ē"
|
162
|
+
'ೈ': "ai"
|
163
|
+
|
164
|
+
|
165
|
+
'ೊ': 'o'
|
166
|
+
'ೋ': 'ō'
|
167
|
+
'ೌ': 'au'
|
168
|
+
|
169
|
+
'्': ''
|
170
|
+
'़': ''
|
171
|
+
'್': ''
|
172
|
+
"": '' # no need for zero with joiner
|
173
|
+
"": '' # no need for zero with non joiner
|
@@ -0,0 +1,176 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bis
|
3
|
+
id: 1991
|
4
|
+
language: mlm
|
5
|
+
source_script: Mlym
|
6
|
+
destination_script: Latn
|
7
|
+
name: Indian script code for information interchange - ISCII - Malayalam Romanization
|
8
|
+
#url:
|
9
|
+
creation_date: 1991
|
10
|
+
description: |
|
11
|
+
IS 13194 (1991): Indian script code for information
|
12
|
+
interchange - ISCII [LITD 20: Indian Language Technologies
|
13
|
+
and Products]
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- |
|
17
|
+
Exception: Anusvāra is transliterated by:
|
18
|
+
|
19
|
+
a) ṅ before gutturals,
|
20
|
+
b) ñ before palatals,
|
21
|
+
c) ṇ before cerebrals,
|
22
|
+
d) n before dentals, and
|
23
|
+
e) m before labials.
|
24
|
+
|
25
|
+
tests:
|
26
|
+
- source: "സ്വപ്നയ്ക്കൊപ്പം ഹോട്ടലിൽ മന്ത്രിപുത്രൻ, ചിത്രങ്ങൾ; 4 കോടി കമ്മിഷനിലും പങ്കുപറ്റി"
|
27
|
+
expected: "svpnykkeāppṃ hōṭṭlil mntriputrṇ, citrṅṅḷ; 4 kōṭi kmmiṣniluṃ pṅkupṟṟi"
|
28
|
+
- source: "വിവാദങ്ങളിൽ മാപ്പില്ല, ആദ്യമായി ഐപിഎൽ കമന്ററിക്കില്ലാതെ മഞ്ജരേക്കര്; പുറത്ത് തന്നെ"
|
29
|
+
expected: "vivādṅṅḷil māppill, ādymāyi aipiel kmnṟṟikkillāte mñjrēkkr; puṟtt tnne"
|
30
|
+
- source: "പരമാവധി ഊറ്റിയെടുത്തു; എല്ലാം കഴിഞ്ഞ് ഉപേക്ഷിച്ചു: വിങ്ങലോടെ റംസിയുടെ സഹോദരി"
|
31
|
+
expected: "prmāvdhi ūṟṟiyeṭuttu; ellāṃ kẕiññ upēkṣiccu: viṅṅlōṭe ṟṃsiyuṭe shōdri"
|
32
|
+
- source: "വഴിനീളെ രോഷം; യൂത്ത് കോണ്ഗ്രസുകാരന്റെ കയ്യൊടിഞ്ഞു, കൈവീശികാട്ടി ജലീൽ"
|
33
|
+
expected: "vẕinīḷe rōṣṃ; yūtt kōṇgrsukārnṟe kyyoṭiññu, kaivīśikāṭṭi jlīl"
|
34
|
+
- source: "‘വികൃതിപ്പയ്യനാ’യിരുന്ന കോലി മിന്നും താരമായത് ഇന്ത്യൻ ക്രിക്കറ്റിന്റെ ഗുണം: അക്തർ"
|
35
|
+
expected: "‘vikṛtippyynā’yirunn kōli minnuṃ tārmāyt intyṇ krikkṟṟinṟe guṇṃ: aktr"
|
36
|
+
- source: "ലോകത്തിനു വാക്സീൻ വേണമെങ്കിൽ ഈ നഗരം കനിയണം; തലയുയർത്തി ഇന്ത്യ"
|
37
|
+
expected: "lōkttinu vāksīṇ vēṇmeṅkil ī ngrṃ kniyṇṃ; tlyuyrtti inty"
|
38
|
+
- source: "‘അദ്ദേഹം ഒരു മകളെപ്പോലെ എന്നെ കേട്ടു’: ഗവർണറെ കണ്ട് കങ്കണ റനൗട്ട്"
|
39
|
+
expected: "‘addēhṃ oru mkḷeppōle enne kēṭṭu’: gvrṇṟe kṇṭ kṅkṇ ṟnṭṭ"
|
40
|
+
- source: "‘എല്ലാം ഫെയ്സ്ബുക്കില് പറയുമെന്നു ജലീല്; കനത്ത സുരക്ഷയില് യാത്ര, കരിങ്കൊടി"
|
41
|
+
expected: "‘ellāṃ pheysbukkil pṟyumennu jlīl; kntt surkṣyil yātr, kriṅkoṭi"
|
42
|
+
- source: "ഏറ്റവും ചെറുപ്പം ജോയി; ജയലക്ഷ്മി, ദീപ്തി, ജ്യോതി; പട്ടികയിലെ നിര ഇങ്ങനെ"
|
43
|
+
expected: "ēṟṟvuṃ ceṟuppṃ jōyi; jylkṣmi, dīpti, jyōti; pṭṭikyile nir iṅṅne"
|
44
|
+
- source: "പരിശോധന കുറച്ച് കേരളം; കോവിഡ് ടെസ്റ്റ് പോസിറ്റിവിറ്റി നിരക്ക് എറ്റവും ഉയർന്ന്; ആശങ്ക"
|
45
|
+
expected: "priśōdhn kuṟcc kērḷṃ; kōvid̂ ṭesṟṟ pōsiṟṟiviṟṟi nirkk eṟṟvuṃ uyrnn; āśṅk"
|
46
|
+
|
47
|
+
map:
|
48
|
+
|
49
|
+
rules:
|
50
|
+
# note
|
51
|
+
- pattern: \u0D02(?=[കൿഖഗഘങ])
|
52
|
+
result: ṅ
|
53
|
+
- pattern: \u0D02(?=[ചഛജഝഞ])
|
54
|
+
result: ñ
|
55
|
+
- pattern: \u0D02(?=[ടഠഡഢണ])
|
56
|
+
result: ṇ
|
57
|
+
- pattern: \u0D02(?=[തഥദധന])
|
58
|
+
result: n
|
59
|
+
- pattern: \u0D02(?=[പഫബഭമ])
|
60
|
+
result: m
|
61
|
+
|
62
|
+
|
63
|
+
characters:
|
64
|
+
'അ': 'a'
|
65
|
+
'ആ': 'ā'
|
66
|
+
'ഇ': 'i'
|
67
|
+
'ഈ': 'ī'
|
68
|
+
'ഉ': 'u'
|
69
|
+
'ഊ': 'ū'
|
70
|
+
'ഋ': 'ṛ'
|
71
|
+
'ഌ': 'ḻ'
|
72
|
+
|
73
|
+
|
74
|
+
'എ': 'e'
|
75
|
+
'ഏ': 'ē'
|
76
|
+
'ഐ': 'ai'
|
77
|
+
|
78
|
+
'ഒ': 'o'
|
79
|
+
'ഓ': 'ŏ'
|
80
|
+
'ഔ': 'au'
|
81
|
+
|
82
|
+
# II. Consonants (see Note 2)
|
83
|
+
# Gutturals
|
84
|
+
'ക': 'k'
|
85
|
+
'ൿ': 'k'
|
86
|
+
'ഖ': 'kh'
|
87
|
+
'ഗ': 'g'
|
88
|
+
'ഘ': 'gh'
|
89
|
+
'ങ': 'ṅ'
|
90
|
+
|
91
|
+
# Palatals
|
92
|
+
'ച': 'c'
|
93
|
+
'ഛ': 'ch'
|
94
|
+
'ജ': 'j'
|
95
|
+
'ഝ': 'jh'
|
96
|
+
'ഞ': 'ñ'
|
97
|
+
|
98
|
+
# Cerebrals
|
99
|
+
'ട': 'ṭ'
|
100
|
+
'ഠ': 'ṭh'
|
101
|
+
'ഡ': 'd̂'
|
102
|
+
'ഢ': 'ḍh'
|
103
|
+
'ണ': 'ṇ'
|
104
|
+
|
105
|
+
# Dentals
|
106
|
+
'ത': 't'
|
107
|
+
'ഥ': 'th'
|
108
|
+
'ദ': 'd'
|
109
|
+
'ധ': 'dh'
|
110
|
+
'ന': 'n'
|
111
|
+
|
112
|
+
# Labials
|
113
|
+
'പ': 'p'
|
114
|
+
'ഫ': 'ph'
|
115
|
+
'ബ': 'b'
|
116
|
+
'ഭ': 'bh'
|
117
|
+
'മ': 'm'
|
118
|
+
|
119
|
+
# Semivowels
|
120
|
+
'യ': 'y'
|
121
|
+
'ര': 'r'
|
122
|
+
'ർ': 'r'
|
123
|
+
'റ': 'ṟ'
|
124
|
+
'ൽ': 'l'
|
125
|
+
'ല': 'l'
|
126
|
+
'ള': 'ḷ'
|
127
|
+
'ൾ': 'ḷ'
|
128
|
+
'ഴ': 'ẕ'
|
129
|
+
# Sibilants
|
130
|
+
'വ': 'v'
|
131
|
+
'ശ': 'ś'
|
132
|
+
'ഷ': 'ṣ'
|
133
|
+
'സ': 's'
|
134
|
+
|
135
|
+
|
136
|
+
# Aspirate
|
137
|
+
'ഹ': 'h'
|
138
|
+
|
139
|
+
# Chandrabindu
|
140
|
+
'\u0D01': 'm'
|
141
|
+
|
142
|
+
# Bisarga
|
143
|
+
'ഃ': 'ḥ'
|
144
|
+
|
145
|
+
# Anusvāra
|
146
|
+
'ം': 'ṃ'
|
147
|
+
|
148
|
+
# Medials # Needed for connecting constants
|
149
|
+
|
150
|
+
'ാ': 'ā'
|
151
|
+
'ി': 'i'
|
152
|
+
'ീ': 'ī'
|
153
|
+
'ു': 'u'
|
154
|
+
'ൂ': 'ū'
|
155
|
+
|
156
|
+
'ൃ': "ṛ"
|
157
|
+
'ൄ': "ṝ"
|
158
|
+
|
159
|
+
|
160
|
+
'െ': "e"
|
161
|
+
'േ': "ē"
|
162
|
+
'ൈ': "ai"
|
163
|
+
'ൊ': 'o'
|
164
|
+
'ോ': 'ō'
|
165
|
+
'ൌ': 'au'
|
166
|
+
|
167
|
+
|
168
|
+
'ൺ': 'n'
|
169
|
+
'ൻ': 'ṇ'
|
170
|
+
|
171
|
+
'्': ''
|
172
|
+
'്': ''
|
173
|
+
'़': ''
|
174
|
+
'ൗ': ''
|
175
|
+
"": '' # no need for zero with joiner
|
176
|
+
"": '' # no need for zero with non joiner
|
@@ -0,0 +1,160 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bis
|
3
|
+
id: 1991
|
4
|
+
language: ori
|
5
|
+
source_script: Orya
|
6
|
+
destination_script: Latn
|
7
|
+
name: Indian script code for information interchange - ISCII - Oriya Romanization
|
8
|
+
#url:
|
9
|
+
creation_date: 1991
|
10
|
+
description: |
|
11
|
+
IS 13194 (1991): Indian script code for information
|
12
|
+
interchange - ISCII [LITD 20: Indian Language Technologies
|
13
|
+
and Products]
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- |
|
17
|
+
Exception: Anusvāra is transliterated by:
|
18
|
+
|
19
|
+
a) ṅ before gutturals,
|
20
|
+
b) ñ before palatals,
|
21
|
+
c) ṇ before cerebrals,
|
22
|
+
d) n before dentals, and
|
23
|
+
e) m before labials.
|
24
|
+
|
25
|
+
tests:
|
26
|
+
- source: "ସାମ୍ପ୍ରତିକ ବିଶ୍ବ ସ୍ଥିତାବସ୍ଥାକୁ ଚାଲେଞ୍ଜ୍ କରୁଥିବା ଦୁଇ ମୁଖ୍ୟ ପ୍ରତିଦ୍ବନ୍ଦ୍ବୀ ହେଉଛନ୍ତି ଚୀନ୍ ଓ ରୁଷ୍: ଇଂଲଣ୍ଡ୍ ଗୁଇନ୍ଦା ଅଧିକାରୀ"
|
27
|
+
expected: "sāmprtik biśb sthitābsthāku cālēñj kruthibā dui mukhẏ prtidbndbī hēuchnti cīn ŏ ruṣ: iṃlṇḍ guindā adhikārī"
|
28
|
+
- source: "ଏଣିକି ଏହି ଗାଡ଼ି ଚଳାଇଲେ ପୁଲିସ କାଟି ପାରିବ ନାହିଁ ଫାଇନ୍"
|
29
|
+
expected: "ēṇiki ēhi gād̂i cḷāilē pulis kāṭi pārib nāhim phāin"
|
30
|
+
- source: "ପିସି କାରବାର ଘଟଣା, ନିଲମ୍ବନ ହେଲେ ପଞ୍ଚାୟତ ଅଧିକାରୀ"
|
31
|
+
expected: "pisi kārbār ghṭṇā, nilmbn hēlē pñcāẏt adhikārī"
|
32
|
+
- source: "ବରିଷ୍ଠ ଓଡ଼ିଆ ଚଳଚ୍ଚିତ୍ର ଅଭିନେତା ଅଜିତ ଦାସଙ୍କ"
|
33
|
+
expected: "briṣṭh ŏḍiā cḷccitr abhinētā ajit dāsṅk"
|
34
|
+
- source: "ସଞ୍ଚୟ କରିବାରେ କେଉଁ ରାଶି ଅଧିକ ସତର୍କ ?"
|
35
|
+
expected: "sñcẏ kribārē kēum rāśi adhik strk ?"
|
36
|
+
- source: "କର୍କଟ ରାଶିର ଅଧିକାରୀ ନିଜ ଜ୍ଞାତିପରିଜନଙ୍କ ପାଇଁ ଟଙ୍କା ଖର୍ଚ୍ଚ କରିବାକୁ ପସନ୍ଦ କରିଥାନ୍ତି।"
|
37
|
+
expected: "krkṭ rāśir adhikārī nij jñātiprijnṅk pāim ṭṅkā khrcc kribāku psnd krithānti."
|
38
|
+
- source: "ବୃଷ ରାଶିର ବ୍ୟକ୍ତିମାନେ ସ୍ବଭାବରେ କଞ୍ଜୁସ୍ କିମ୍ବା କୃପଣ ନୁହନ୍ତି"
|
39
|
+
expected: "bṛṣ rāśir bẏktimānē sbbhābrē kñjus kimbā kṛpṇ nuhnti"
|
40
|
+
- source: "ନବନିଯୁକ୍ତ ଓଡିଶା କଂଗ୍ରେସ ପ୍ରଭାରୀ ଏ.ଚେଲ୍ଲା କୁମାରଙ୍କୁ କରୋନା"
|
41
|
+
expected: "nbniyukt ŏḍiśā kṅgrēs prbhārī ē.cēllā kumārṅku krŏnā"
|
42
|
+
- source: "ଦିଲ୍ଲୀ: ଦିନ ଦ୍ବିପହରରେ ଗାଡ଼ି ଉପରକୁ ଦୁର୍ବୃତ୍ତ ଚଳାଇଲେ ୮ ରାଉଣ୍ଡ ଗୁଳି: ଚାଳକଙ୍କ ମୃତ୍ୟୁ"
|
43
|
+
expected: "dillī: din dbiphrrē gād̂i uprku durbṛtt cḷāilē ୮ rāuṇḍ guḷi: cāḷkṅk mṛtẏu"
|
44
|
+
- source: "ବୟସରେ ଆର ପାରିକୁ ଚାଲିଗଲେ କଣ୍ଠଶିଳ୍ପୀ ଅନୁରାଧା ପୋଡୱାଲଙ୍କ ପୁଅ ଆଦିତ୍ୟ"
|
45
|
+
expected: "bẏsrē ār pāriku cāliglē kṇṭhśiḷpī anurādhā pēāḍୱālṅk pua āditẏ"
|
46
|
+
|
47
|
+
map:
|
48
|
+
|
49
|
+
rules:
|
50
|
+
# note
|
51
|
+
- pattern: \u0B02(?=[କଖଗଘଙ])
|
52
|
+
result: ṅ
|
53
|
+
- pattern: \u0B02(?=[ଚଛଜଝଞ])
|
54
|
+
result: ñ
|
55
|
+
- pattern: \u0B02(?=[ଟଠଡଡ଼ଢଣଢ଼])
|
56
|
+
result: ṇ
|
57
|
+
- pattern: \u0B02(?=[ତଥଦଧନ])
|
58
|
+
result: n
|
59
|
+
- pattern: \u0B02(?=[ପଫବଭମ])
|
60
|
+
result: m
|
61
|
+
|
62
|
+
|
63
|
+
characters:
|
64
|
+
'ଅ': 'a'
|
65
|
+
'ଆ': 'ā'
|
66
|
+
'ଇ': 'i'
|
67
|
+
'ଈ': 'ī'
|
68
|
+
'ଉ': 'u'
|
69
|
+
'ଊ': 'ū'
|
70
|
+
'ଋ': 'ṛ'
|
71
|
+
'ଌ': 'ḻ'
|
72
|
+
'ଏ': 'ē'
|
73
|
+
'ଐ': 'ai'
|
74
|
+
'ଓ': 'ŏ'
|
75
|
+
'ଔ': 'au'
|
76
|
+
|
77
|
+
# II. Consonants (see Note 2)
|
78
|
+
# Gutturals
|
79
|
+
'କ': 'k'
|
80
|
+
'ଖ': 'kh'
|
81
|
+
'ଗ': 'g'
|
82
|
+
'ଘ': 'gh'
|
83
|
+
'ଙ': 'ṅ'
|
84
|
+
|
85
|
+
# Palatals
|
86
|
+
'ଚ': 'c'
|
87
|
+
'ଛ': 'ch'
|
88
|
+
'ଜ': 'j'
|
89
|
+
'ଝ': 'jh'
|
90
|
+
'ଞ': 'ñ'
|
91
|
+
|
92
|
+
# Cerebrals
|
93
|
+
'ଟ': 'ṭ'
|
94
|
+
'ଠ': 'ṭh'
|
95
|
+
'ଡ': 'ḍ'
|
96
|
+
'ଡ଼': 'd̂'
|
97
|
+
'ଢ': 'ḍh'
|
98
|
+
'ଢ଼': 'd̂h'
|
99
|
+
'ଣ': 'ṇ'
|
100
|
+
|
101
|
+
# Dentals
|
102
|
+
'ତ': 't'
|
103
|
+
'ଥ': 'th'
|
104
|
+
'ଦ': 'd'
|
105
|
+
'ଧ': 'dh'
|
106
|
+
'ନ': 'n'
|
107
|
+
|
108
|
+
# Labials
|
109
|
+
'ପ': 'p'
|
110
|
+
'ଫ': 'ph'
|
111
|
+
'ବ': 'b'
|
112
|
+
'ଭ': 'bh'
|
113
|
+
'ମ': 'm'
|
114
|
+
|
115
|
+
# Semivowels
|
116
|
+
'ଯ': 'y'
|
117
|
+
'ୟ': 'ẏ'
|
118
|
+
'ର': 'r'
|
119
|
+
'ଲ': 'l'
|
120
|
+
'ଳ': 'ḷ'
|
121
|
+
'ଵ': 'v'
|
122
|
+
|
123
|
+
# Sibilants
|
124
|
+
'ଶ': 'ś'
|
125
|
+
'ଷ': 'ṣ'
|
126
|
+
'ସ': 's'
|
127
|
+
|
128
|
+
|
129
|
+
# Aspirate
|
130
|
+
'ହ': 'h'
|
131
|
+
|
132
|
+
# Chandrabindu
|
133
|
+
'ଁ': 'm'
|
134
|
+
|
135
|
+
# Bisarga
|
136
|
+
'ଃ': 'ḥ'
|
137
|
+
|
138
|
+
# Anusvāra
|
139
|
+
'ଂ': 'ṃ'
|
140
|
+
|
141
|
+
# Medials # Needed for connecting constants
|
142
|
+
|
143
|
+
'ା': 'ā'
|
144
|
+
'ି': 'i'
|
145
|
+
'ୀ': 'ī'
|
146
|
+
'ୁ': 'u'
|
147
|
+
'ୂ': 'ū'
|
148
|
+
'ୃ': 'ṛ'
|
149
|
+
'େ': 'ē'
|
150
|
+
'ୈ': 'ai'
|
151
|
+
'ୋ': 'ŏ'
|
152
|
+
'ୌ': 'au'
|
153
|
+
|
154
|
+
'्': ''
|
155
|
+
'୍': ''
|
156
|
+
'़': ''
|
157
|
+
'଼': ''
|
158
|
+
'।': '.'
|
159
|
+
"": ''# Used for joining
|
160
|
+
"": ''# Used for non joining
|