interscript 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
- data/lib/interscript.rb +5 -1
- data/lib/interscript/fs.rb +3 -1
- data/lib/interscript/mapping.rb +2 -2
- data/lib/interscript/opal.rb +5 -1
- data/lib/interscript/opal/maps.js.erb +7 -4
- data/lib/interscript/version.rb +1 -1
- data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
- data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
- data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
- data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
- data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
- data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
- data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
- data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
- data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
- data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
- data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
- data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
- data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
- data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
- data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
- data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
- data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
- data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
- data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
- data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
- data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
- data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
- data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
- data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
- data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
- data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
- data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
- data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
- data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
- data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
- data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
- data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
- data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
- data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
- data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
- data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
- data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
- data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
- data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
- data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
- data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
- data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
- data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
- data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
- data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
- data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
- data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
- data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
- data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
- data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
- data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
- data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
- data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
- data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
- data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
- data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
- data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
- data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
- data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
- data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
- data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
- data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
- data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
- data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
- data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
- data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
- data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
- data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
- data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
- data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
- data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
- data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
- data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
- data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
- data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
- data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
- data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
- data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
- data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
- data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
- data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
- data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
- data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
- data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
- data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
- data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
- data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
- data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
- data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
- data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
- data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
- data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
- data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
- data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
- data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
- data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
- data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
- data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
- data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
- metadata +41 -15
@@ -15,8 +15,8 @@ description: |
|
|
15
15
|
correspondences given below
|
16
16
|
|
17
17
|
notes:
|
18
|
-
- The letter w is used word initially and before a vowel. # 'and' or 'or' ?
|
19
|
-
- The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
|
18
|
+
- The letter w is used word initially and before a vowel. # 'and' or 'or' ?
|
19
|
+
- The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
|
20
20
|
- The letter w is used between or after vowels.
|
21
21
|
- The letter w is used after e, u, ö and ə.
|
22
22
|
- |
|
@@ -55,7 +55,7 @@ tests:
|
|
55
55
|
- source: Ҡыҙылъяр
|
56
56
|
expected: Qıźılyar
|
57
57
|
# adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
|
58
|
-
- source: кемдең
|
58
|
+
- source: кемдең
|
59
59
|
expected: kemdeñ
|
60
60
|
- source: кем
|
61
61
|
expected: kem
|
@@ -65,7 +65,7 @@ tests:
|
|
65
65
|
expected: oşo
|
66
66
|
- source: быларҙың
|
67
67
|
expected: bılarźıñ
|
68
|
-
- source: һеҙҙән
|
68
|
+
- source: һеҙҙән
|
69
69
|
expected: heźźən
|
70
70
|
- source: һин
|
71
71
|
expected: hin
|
@@ -136,7 +136,7 @@ map:
|
|
136
136
|
'\u042B': 'I' # Ы
|
137
137
|
'\u042C': '' # Ь
|
138
138
|
'\u042D': 'E' # Э
|
139
|
-
'\u04D8': "\u018F" # Ә
|
139
|
+
'\u04D8': "\u018F" # Ә
|
140
140
|
'\u042E': 'Yu' # Ю
|
141
141
|
'\u042F': 'Ya' # Я
|
142
142
|
|
File without changes
|
@@ -26,7 +26,7 @@ tests:
|
|
26
26
|
|
27
27
|
expected: |
|
28
28
|
Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrídha tin ékhomen óloi mazí, kai sofoí ki amathís kai ploúsioi kai ftokhoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o kathís, ékhomen na zísomen edhó. To loipón dhoulépsamen óloi mazí, na tin filámen ki óloi mazí kai na min léyi oúte o dhinatós «egó» oúte o adhínatos. Xérete póte na léyi o kathís «egó»? Ótan agonistí mónos tou kai fkiási í khalási, na léyi «egó»; ótan ómos agonízondai polloí kai fkiánoun, tóte na léne «emís». Ímaste is to «emís» ki ókhi is to «egó». Kai is to exís na máthomen gnósi, an thélomen na fkiásomen khorión, na zísomen óloi mazí.
|
29
|
-
|
29
|
+
|
30
30
|
Yiánnis Makriyiánnis.
|
31
31
|
|
32
32
|
|
@@ -74,7 +74,7 @@ tests:
|
|
74
74
|
expected: Taḯyetos
|
75
75
|
- source: σπρέυ
|
76
76
|
expected: spréi
|
77
|
-
|
77
|
+
|
78
78
|
- source: Αθήνα
|
79
79
|
expected: Athína
|
80
80
|
- source: Άγιον Όρος
|
@@ -526,7 +526,7 @@ map:
|
|
526
526
|
- pattern: (?<=[Οο])\u03C5 # υ (after Ο)
|
527
527
|
result: u
|
528
528
|
- pattern: (?<=[Οο])\u03CD # ύ (after Ο)
|
529
|
-
result: ú
|
529
|
+
result: ú
|
530
530
|
- pattern: \u03A5[Ιιί] # ΥΙ
|
531
531
|
result: I
|
532
532
|
- pattern: \u03C5[Ιιί] # υι
|
@@ -699,4 +699,3 @@ map:
|
|
699
699
|
|
700
700
|
"\u0387": ";" # ·
|
701
701
|
"\u00B7": ";" # ·
|
702
|
-
|
@@ -12,42 +12,42 @@ description:
|
|
12
12
|
|
13
13
|
notes: "
|
14
14
|
|
15
|
-
1. At the end of a syllable, the character ᄋ should be romanized ng,
|
16
|
-
as in the following example:
|
15
|
+
1. At the end of a syllable, the character ᄋ should be romanized ng,
|
16
|
+
as in the following example:
|
17
17
|
|
18
18
|
평양 → P’yŏngyang
|
19
19
|
|
20
|
-
At the beginning of a syllable, the character ᄋ is silent and
|
21
|
-
should not be romanized. An example follows:
|
20
|
+
At the beginning of a syllable, the character ᄋ is silent and
|
21
|
+
should not be romanized. An example follows:
|
22
22
|
|
23
|
-
용화 → Yonghwa
|
23
|
+
용화 → Yonghwa
|
24
24
|
|
25
25
|
2. Syllable boundaries within words are not reflected in romanization.
|
26
|
-
In the different types of syllables shown in the table below, C
|
26
|
+
In the different types of syllables shown in the table below, C
|
27
27
|
represents any consonant character, V represents any vowel character
|
28
28
|
and / represents a syllable boundary.
|
29
29
|
|
30
|
-
Han’gŭl 개성 남포 안양
|
30
|
+
Han’gŭl 개성 남포 안양
|
31
31
|
Syllable boundaries CV/CVC CVC/CV VC/VC
|
32
32
|
Romanization Kaesŏng Namp’o Anyang
|
33
33
|
|
34
|
-
3. Euphonic changes occurring within a word, including between the
|
35
|
-
specific and generic of a geographical name, should be reflected in
|
36
|
-
romanization. Generic terms are usually seen separated from the name
|
37
|
-
by a hyphen and with a lower case initial letter rather than as a
|
38
|
-
separate word:
|
34
|
+
3. Euphonic changes occurring within a word, including between the
|
35
|
+
specific and generic of a geographical name, should be reflected in
|
36
|
+
romanization. Generic terms are usually seen separated from the name
|
37
|
+
by a hyphen and with a lower case initial letter rather than as a
|
38
|
+
separate word:
|
39
39
|
|
40
40
|
영진리 → Yŏngjil-li
|
41
41
|
덕흥리 → Tŏkhŭng-ni
|
42
42
|
압록강 → Amnok-kang
|
43
43
|
대동강 → Taedong-gang
|
44
44
|
|
45
|
-
4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
|
46
|
-
published in North Korea in 1966), unlike the Korean spoken in the
|
47
|
-
Republic of Korea, the language spoken in the Democratic People’s
|
45
|
+
4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
|
46
|
+
published in North Korea in 1966), unlike the Korean spoken in the
|
47
|
+
Republic of Korea, the language spoken in the Democratic People’s
|
48
48
|
Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
|
49
|
-
The use of the word-initial ᄅ ('r') can be seen in official news
|
50
|
-
reports as well as native mapping. Since such examples exist, the
|
49
|
+
The use of the word-initial ᄅ ('r') can be seen in official news
|
50
|
+
reports as well as native mapping. Since such examples exist, the
|
51
51
|
word initial ᄅ ('r') is reflected as an option in the tables given above.
|
52
52
|
|
53
53
|
5. The Romanization column shows only lowercase forms but, when romanizing,
|
@@ -25,7 +25,7 @@ notes:
|
|
25
25
|
has been used here for illustrative purposes.
|
26
26
|
- The Macedonian Cyrillic lowercase italic Т may sometimes be seen as w̄.
|
27
27
|
There is no specific Unicode encoding for this variant form so a comparable character
|
28
|
-
has been used here for illustrative purposes.
|
28
|
+
has been used here for illustrative purposes.
|
29
29
|
- |
|
30
30
|
An inventory of letter-diacritic combinations, with their Unicode encoding,
|
31
31
|
in addition to the unmodified letters of the basic Roman script is:
|
@@ -0,0 +1,200 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bgnpcgn
|
3
|
+
id: 2020
|
4
|
+
language: nep
|
5
|
+
source_script: Deva
|
6
|
+
destination_script: Latn
|
7
|
+
name: Nepali Romanization, 2020
|
8
|
+
url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20NEPALI.pdf
|
9
|
+
creation_date: 1964
|
10
|
+
description: |
|
11
|
+
BGN/PCGN 2011 Agreement Romanization of Nepali
|
12
|
+
The BGN and the PCGN have adopted the Nepal Survey Department (NSD) system for the
|
13
|
+
romanization of Nepali names. This system, below, should be applied to Nepali names for which Roman‐
|
14
|
+
script spellings in materials produced by the government of Nepal are not available.
|
15
|
+
|
16
|
+
notes:
|
17
|
+
|
18
|
+
- Only the isolated forms of the characters are given in the consonant table. See any grammar of Nepali
|
19
|
+
(or other language using the Devanagari alphabet) for variant forms used in conjunct characters.
|
20
|
+
- These two consonant characters appear sometimes to represent ṛ (cerebral r), e.g., पहाड → pahāṛ
|
21
|
+
instead of pahāḍ. At one time they were written with dots below, i.e., as ड़ and ढ़, though this is no
|
22
|
+
longer normal practice in Nepali. The romanizations ṛ and ṛh, respectively, are optional for
|
23
|
+
documentary purposes if such dots appear in Nepali writing.
|
24
|
+
- व , can be romanized as either v or w. This character is primarily
|
25
|
+
romanized as v in consonant initial, medial, and final position; however, initial, medial, and final w
|
26
|
+
romanizations can occur. The w romanization is a special case which is believed to be dependent on
|
27
|
+
dialect, pronunciation, or stress.
|
28
|
+
- |
|
29
|
+
An inventory of letter‐diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
|
30
|
+
Ṅ(U+1E44) ṅ (U+1E45)
|
31
|
+
Ñ (U+00D1) ñ (U+00F1)
|
32
|
+
Ṭ (1E6C) ṭ (1E6D)
|
33
|
+
Ḍ (1E0C) ḍ (1E0D)
|
34
|
+
Ṇ (1E46) ṇ (1E47)
|
35
|
+
Ṣ (1E62) ṣ (1E63)
|
36
|
+
Ā (U+0100) ā (U+0101)
|
37
|
+
Ī (U+012A) ī (U+012B)
|
38
|
+
Ū (U+016A) ū (U+016B)
|
39
|
+
Ṛ (1E5A) ṛ (1E5B)
|
40
|
+
|
41
|
+
- The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase
|
42
|
+
Roman letters as appropriate should be used.
|
43
|
+
|
44
|
+
- |
|
45
|
+
ं (anusvara) is rendered by
|
46
|
+
ṅ before क, ख, ग, and घ
|
47
|
+
ñ before च, छ, ज, and झ
|
48
|
+
ṇ before ट, ठ, ड, and ढ
|
49
|
+
n before त, थ, द, and ध
|
50
|
+
ṁ before य, र, ल, व, श, ष, स and ह
|
51
|
+
|
52
|
+
tests:
|
53
|
+
- source: "लेखन"
|
54
|
+
expected: "lekhn"
|
55
|
+
- source: "मुद्रा"
|
56
|
+
expected: "mudarā"
|
57
|
+
- source: "प्रशंसा"
|
58
|
+
expected: "parshṃsā" # note 5 rule checking
|
59
|
+
- source: "अंक"
|
60
|
+
expected: "aṅk" # note 5 rule checking
|
61
|
+
- source: "नेकपाले स्थगित स्थायी कमिटीको बैठक भदौ गते बोलाउने भएको"
|
62
|
+
expected: "nekpāle sathgit sathāyī kmiṭīko baiṭhk bhdau gte bolāune bheko"
|
63
|
+
- source: "न घर रह्यो, न परिवार"
|
64
|
+
expected: "n ghr rhayo, n privār"
|
65
|
+
- source: "ढोरपाटनमा भुजीखोला बाढीपहिरोले अभिभावक गुमाएका बालबालिकाको बिचल्ली"
|
66
|
+
expected: "ḍhorpāṭnmā bhujīkholā bāḍhīphirole abhibhāvk gumāekā bālbālikāko bichlalī"
|
67
|
+
- source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
|
68
|
+
expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
|
69
|
+
- source: "संविधान जारी भएसँगै सार्वजनिक प्रशासनमा नयाँ उत्साह आउने अपेक्षा थियो"
|
70
|
+
expected: "sṃvidhān jārī bhes~gai sāravjnik parshāsnmā nyā~ utasāh āune apekṣā thiyo"
|
71
|
+
- source: "देशमा कोरोना संक्रमित र मृतकको संख्या हरेक दिन बढ्दो छ"
|
72
|
+
expected: "deshmā koronā sṅkarmit r mṛitkko sṅkhayā hrek din bḍhado chh"
|
73
|
+
- source: "गाउँपालिकाका अध्यक्ष टिका गुरुङका अनुसार विष्णुदासलाई राजुले सुत्नका लागि बेलुका साथी लगेका थिए"
|
74
|
+
expected: "gāu~pālikākā adhaykṣ ṭikā guruṅkā anusār viṣaṇudāslāī rājule sutankā lāgi belukā sāthī lgekā thie"
|
75
|
+
- source: "यो आयोजना गाउँपालिकाको केन्द्र तेल्लोकमा पर्छ"
|
76
|
+
expected: "yo āyojnā gāu~pālikāko kenadar telalokmā prachh"
|
77
|
+
- source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
|
78
|
+
expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
|
79
|
+
- source: "चैत पहिलो साता घर आएका उनी लकडाउन भएपछि यतै रोकिए"
|
80
|
+
expected: "chait philo sātā ghr āekā unī lkḍāun bhepchhi ytai rokie"
|
81
|
+
- source: "काम गर्न जानेको हकमा रोजगारदाता कम्पनीको पत्रसँगै वडा र जिल्ला प्रशासनको सिफारिस अनिवार्य गरिएको छ"
|
82
|
+
expected: "kām gran jāneko hkmā rojgārdātā kmapnīko ptrs~gai vḍā r jilalā parshāsnko siphāris anivāray grieko chh"
|
83
|
+
- source: "दुःख"
|
84
|
+
expected: "duḥkh"
|
85
|
+
|
86
|
+
map:
|
87
|
+
|
88
|
+
rules:
|
89
|
+
# note[5]
|
90
|
+
- pattern: \u0902(?=[कखगघ]) # ं before क, ख, ग, and घ
|
91
|
+
result: ṅ
|
92
|
+
- pattern: \u0902(?=[चछजझ]) # ं before च, छ, ज, and झ
|
93
|
+
result: ñ
|
94
|
+
- pattern: \u0902(?=[टठडढ]) # ं before ट, ठ, ड, and ढ
|
95
|
+
result: ṇ
|
96
|
+
- pattern: \u0902(?=[तथदध]) # ं before त, थ, द, and ध
|
97
|
+
result: n
|
98
|
+
|
99
|
+
characters:
|
100
|
+
|
101
|
+
# Vowels and Diphthongs
|
102
|
+
|
103
|
+
'अ': 'a'
|
104
|
+
'आ': 'ā'
|
105
|
+
'इ': 'i'
|
106
|
+
'ई': 'ī'
|
107
|
+
'उ': 'u'
|
108
|
+
'ऊ': 'ū'
|
109
|
+
'ऋ': 'ṛi'
|
110
|
+
'ॠ': 'rī'
|
111
|
+
'ए': 'e'
|
112
|
+
'ऐ': 'ai'
|
113
|
+
'ओ': 'o'
|
114
|
+
'औ': 'au'
|
115
|
+
|
116
|
+
# Medials # Needed for connecting constants
|
117
|
+
|
118
|
+
'ा': "ā"
|
119
|
+
'ि': "i"
|
120
|
+
'ी': "ī"
|
121
|
+
'ु': "u"
|
122
|
+
'ू': "ū"
|
123
|
+
'ृ': "ṛi"
|
124
|
+
'ॄ': "rī"
|
125
|
+
'े': "e"
|
126
|
+
'ै': "ai"
|
127
|
+
'ो': "o"
|
128
|
+
'ौ': "au"
|
129
|
+
|
130
|
+
|
131
|
+
# Consonants (see Note 1)
|
132
|
+
|
133
|
+
# Gutturals
|
134
|
+
'क': 'k'
|
135
|
+
'ख': 'kh'
|
136
|
+
'ग': 'g'
|
137
|
+
'घ': 'gh'
|
138
|
+
'ङ': 'ṅ'
|
139
|
+
|
140
|
+
# Palatals
|
141
|
+
'च': 'ch'
|
142
|
+
'छ': 'chh'
|
143
|
+
'ज': 'j'
|
144
|
+
'झ': 'jh'
|
145
|
+
'ञ': 'ñ'
|
146
|
+
|
147
|
+
# Cerebrals
|
148
|
+
'ट': 'ṭ'
|
149
|
+
'ठ': 'ṭh'
|
150
|
+
'ड': 'ḍ'
|
151
|
+
'ढ': 'ḍh'
|
152
|
+
'ण': 'ṇ'
|
153
|
+
|
154
|
+
# Dentals
|
155
|
+
'त': 't'
|
156
|
+
'थ': 'th'
|
157
|
+
'द': 'd'
|
158
|
+
'ध': 'dh'
|
159
|
+
'न': 'n'
|
160
|
+
|
161
|
+
# Labials
|
162
|
+
'प': 'p'
|
163
|
+
'फ': 'ph'
|
164
|
+
'ब': 'b'
|
165
|
+
'भ': 'bh'
|
166
|
+
'म': 'm'
|
167
|
+
|
168
|
+
# Semivowels
|
169
|
+
'य': 'y'
|
170
|
+
'र': 'r'
|
171
|
+
'ल': 'l'
|
172
|
+
'व': 'v' # or wa [Note#3]
|
173
|
+
|
174
|
+
# Sibilants
|
175
|
+
'श': 'sh'
|
176
|
+
'ष': 'ṣ'
|
177
|
+
'स': 's'
|
178
|
+
'क्ष': 'kṣ'
|
179
|
+
'त्र': 'tr'
|
180
|
+
'ज्ञ' : 'jñ'
|
181
|
+
|
182
|
+
# Aspirate
|
183
|
+
'ह': 'h'
|
184
|
+
|
185
|
+
# Anusvāra
|
186
|
+
'ं': 'ṃ'
|
187
|
+
|
188
|
+
# Bisarga
|
189
|
+
'ः': 'ḥ'
|
190
|
+
|
191
|
+
# Anunāsika
|
192
|
+
'ँ': '~'
|
193
|
+
|
194
|
+
'ॅ': 'r'
|
195
|
+
|
196
|
+
# halanta
|
197
|
+
'्': 'a'
|
198
|
+
|
199
|
+
# Abagraha
|
200
|
+
'ऽ': '’' # (apostrophe)
|
@@ -0,0 +1,159 @@
|
|
1
|
+
---
|
2
|
+
authority_id: bis
|
3
|
+
id: 1991
|
4
|
+
language: asm
|
5
|
+
source_script: Beng
|
6
|
+
destination_script: Latn
|
7
|
+
name: Indian script code for information interchange - ISCII - Assamese Romanization
|
8
|
+
#url:
|
9
|
+
creation_date: 1991
|
10
|
+
description: |
|
11
|
+
IS 13194 (1991): Indian script code for information
|
12
|
+
interchange - ISCII [LITD 20: Indian Language Technologies
|
13
|
+
and Products]
|
14
|
+
|
15
|
+
notes:
|
16
|
+
- |
|
17
|
+
Exception: Anusvāra is transliterated by:
|
18
|
+
|
19
|
+
a) ṅ before gutturals,
|
20
|
+
b) ñ before palatals,
|
21
|
+
c) ṇ before cerebrals,
|
22
|
+
d) n before dentals, and
|
23
|
+
e) m before labials.
|
24
|
+
|
25
|
+
tests:
|
26
|
+
- source: "অসমীয়া কবিতা"
|
27
|
+
expected: "asmīẏā kbitā"
|
28
|
+
- source: "কবিৰ আজি জন্মদিন"
|
29
|
+
expected: "kbir āji jnmdin"
|
30
|
+
- source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
|
31
|
+
expected: "bēruṭt ēmāhr pāchtē punr bhẏṅkr agnikāṇḍ"
|
32
|
+
- source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
|
33
|
+
expected: "bhṅār biruddhē āvēdn dākhil kṅgnār"
|
34
|
+
- source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
|
35
|
+
expected: "āpuni pd̂hi bhāl pāb prā bātri"
|
36
|
+
- source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
|
37
|
+
expected: "śrīrāmpurt grubhrti ṭrāk jbd, dujnk āṭk"
|
38
|
+
- source: "কেনে আছে প্ৰাক্তন"
|
39
|
+
expected: "kēnē āchē prāktn"
|
40
|
+
- source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
|
41
|
+
expected: "kmumbāir mēẏrr dēht kŏbhiḍ pjiṭibh"
|
42
|
+
- source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
|
43
|
+
expected: "ṭuiṭāryŏgē khŏd sdrī krē ēi kthā"
|
44
|
+
- source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
|
45
|
+
expected: "lkhimpur jilār nārāẏṇpurr brpthārt āji prśānti dhām nāmērē ēkhn bṛddhāśrmr śubhārmbh krā hẏ"
|
46
|
+
|
47
|
+
map:
|
48
|
+
|
49
|
+
rules:
|
50
|
+
# note
|
51
|
+
- pattern: \u0982(?=[কখগঘঙ])
|
52
|
+
result: ṅ
|
53
|
+
- pattern: \u0982(?=[চছজঝঞ])
|
54
|
+
result: ñ
|
55
|
+
- pattern: \u0982(?=[টঠডড়ঢঢ়ণ])
|
56
|
+
result: ṇ
|
57
|
+
- pattern: \u0982(?=[তৎথদধন])
|
58
|
+
result: n
|
59
|
+
- pattern: \u0982(?=[পফবভম])
|
60
|
+
result: m
|
61
|
+
|
62
|
+
|
63
|
+
characters:
|
64
|
+
'অ': 'a'
|
65
|
+
'আ': 'ā'
|
66
|
+
'ই': 'i'
|
67
|
+
'ঈ': 'ī'
|
68
|
+
'উ': 'u'
|
69
|
+
'ঊ': 'ū'
|
70
|
+
'ৠ': 'ṛ'
|
71
|
+
'ঌ': 'ḻ'
|
72
|
+
'এ': 'ē'
|
73
|
+
'ঐ': 'ai'
|
74
|
+
'ও': 'ŏ'
|
75
|
+
'ঔ': 'au'
|
76
|
+
|
77
|
+
# Consonants
|
78
|
+
# Gutturals
|
79
|
+
'ক': 'k'
|
80
|
+
'খ': 'kh'
|
81
|
+
'গ': 'g'
|
82
|
+
'ঘ': 'gh'
|
83
|
+
'ঙ': 'ṅ'
|
84
|
+
|
85
|
+
# Palatals
|
86
|
+
'চ': 'c'
|
87
|
+
'ছ': 'ch'
|
88
|
+
'জ': 'j'
|
89
|
+
'ঝ': 'jh'
|
90
|
+
'ঞ': 'ñ'
|
91
|
+
|
92
|
+
# Cerebrals
|
93
|
+
'ট': 'ṭ'
|
94
|
+
'ঠ': 'ṭh'
|
95
|
+
'ড': 'ḍ'
|
96
|
+
'ড়': 'd̂'
|
97
|
+
'ঢ': 'ḍh'
|
98
|
+
'ঢ়': 'd̂h'
|
99
|
+
'ণ': 'ṇ'
|
100
|
+
|
101
|
+
# Dentals
|
102
|
+
'ত': 't'
|
103
|
+
'ৎ': 't'
|
104
|
+
'থ': 'th'
|
105
|
+
'দ': 'd'
|
106
|
+
'ধ': 'dh'
|
107
|
+
'ন': 'n'
|
108
|
+
|
109
|
+
# Labials
|
110
|
+
'প': 'p'
|
111
|
+
'ফ': 'ph'
|
112
|
+
'ব': 'b'
|
113
|
+
'ভ': 'bh'
|
114
|
+
'ম': 'm'
|
115
|
+
|
116
|
+
# Semivowels
|
117
|
+
'য': 'y'
|
118
|
+
'য়': 'ẏ'
|
119
|
+
'য়': 'ẏ'
|
120
|
+
'ৰ': 'r'
|
121
|
+
'ল': 'l'
|
122
|
+
'ৱ': 'v'
|
123
|
+
|
124
|
+
|
125
|
+
# Sibilants
|
126
|
+
'শ': 'ś'
|
127
|
+
'ষ': 'ṣ'
|
128
|
+
'স': 's'
|
129
|
+
|
130
|
+
|
131
|
+
# Aspirate
|
132
|
+
'হ': 'h'
|
133
|
+
|
134
|
+
# Chandrabindu
|
135
|
+
'ঁ': 'm'
|
136
|
+
|
137
|
+
# Bisarga
|
138
|
+
'ঃ ': 'ḥ'
|
139
|
+
|
140
|
+
# Anusvāra
|
141
|
+
'ং': 'ṃ'
|
142
|
+
|
143
|
+
# Medials # Needed for connecting constants
|
144
|
+
|
145
|
+
'\u09be': 'ā'
|
146
|
+
'\u09bf': 'i'
|
147
|
+
'\u09c0': 'ī'
|
148
|
+
'\u09c1': 'u'
|
149
|
+
'\u09c2': 'ū'
|
150
|
+
'\u09c3': 'ṛ'
|
151
|
+
'\u09c7': 'ē'
|
152
|
+
'\u09c8': 'ai'
|
153
|
+
'\u09cb': 'ŏ'
|
154
|
+
'\u09cc': 'au'
|
155
|
+
'\u09CD': '' # Used for joining
|
156
|
+
'्': ''
|
157
|
+
'़': ''
|
158
|
+
'।': '.'
|
159
|
+
"": ''# Used for joining
|