interscript 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  3. data/lib/interscript.rb +5 -1
  4. data/lib/interscript/fs.rb +3 -1
  5. data/lib/interscript/mapping.rb +2 -2
  6. data/lib/interscript/opal.rb +5 -1
  7. data/lib/interscript/opal/maps.js.erb +7 -4
  8. data/lib/interscript/version.rb +1 -1
  9. data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
  10. data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
  11. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
  14. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
  15. data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
  16. data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
  17. data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
  18. data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
  19. data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
  20. data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
  21. data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
  22. data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
  23. data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
  24. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
  25. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
  26. data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
  27. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
  28. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
  29. data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
  30. data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
  31. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
  32. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
  33. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
  34. data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
  35. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
  36. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
  37. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
  38. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
  39. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
  40. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
  41. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
  42. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
  43. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
  44. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
  45. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  46. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  47. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  48. data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
  49. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  50. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  51. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  52. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  53. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  54. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  55. data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
  56. data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
  57. data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
  58. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
  59. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
  60. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
  61. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
  62. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
  63. data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
  64. data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
  65. data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
  66. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
  67. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
  68. data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
  69. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
  70. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
  71. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  72. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
  73. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
  74. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
  75. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
  76. data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
  77. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
  78. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
  79. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
  80. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
  81. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
  82. data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
  83. data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
  84. data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
  85. data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
  86. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  87. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
  88. data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
  89. data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
  90. data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
  91. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  92. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
  93. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
  94. data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
  95. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
  96. data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
  97. data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
  98. data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
  99. data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
  100. data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
  101. data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
  102. data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
  103. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
  104. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
  105. data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
  106. data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
  107. data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
  108. data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
  109. data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
  110. data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
  111. data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
  112. data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
  113. data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
  114. data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
  115. data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
  116. metadata +41 -15
@@ -15,8 +15,8 @@ description: |
15
15
  correspondences given below
16
16
 
17
17
  notes:
18
- - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
19
- - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
18
+ - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
19
+ - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
20
20
  - The letter w is used between or after vowels.
21
21
  - The letter w is used after e, u, ö and ə.
22
22
  - |
@@ -55,7 +55,7 @@ tests:
55
55
  - source: Ҡыҙылъяр
56
56
  expected: Qıźılyar
57
57
  # adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
58
- - source: кемдең
58
+ - source: кемдең
59
59
  expected: kemdeñ
60
60
  - source: кем
61
61
  expected: kem
@@ -65,7 +65,7 @@ tests:
65
65
  expected: oşo
66
66
  - source: быларҙың
67
67
  expected: bılarźıñ
68
- - source: һеҙҙән
68
+ - source: һеҙҙән
69
69
  expected: heźźən
70
70
  - source: һин
71
71
  expected: hin
@@ -136,7 +136,7 @@ map:
136
136
  '\u042B': 'I' # Ы
137
137
  '\u042C': '' # Ь
138
138
  '\u042D': 'E' # Э
139
- '\u04D8': "\u018F" # Ә
139
+ '\u04D8': "\u018F" # Ә
140
140
  '\u042E': 'Yu' # Ю
141
141
  '\u042F': 'Ya' # Я
142
142
 
@@ -26,7 +26,7 @@ tests:
26
26
 
27
27
  expected: |
28
28
  Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrídha tin ékhomen óloi mazí, kai sofoí ki amathís kai ploúsioi kai ftokhoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o kathís, ékhomen na zísomen edhó. To loipón dhoulépsamen óloi mazí, na tin filámen ki óloi mazí kai na min léyi oúte o dhinatós «egó» oúte o adhínatos. Xérete póte na léyi o kathís «egó»? Ótan agonistí mónos tou kai fkiási í khalási, na léyi «egó»; ótan ómos agonízondai polloí kai fkiánoun, tóte na léne «emís». Ímaste is to «emís» ki ókhi is to «egó». Kai is to exís na máthomen gnósi, an thélomen na fkiásomen khorión, na zísomen óloi mazí.
29
-
29
+
30
30
  Yiánnis Makriyiánnis.
31
31
 
32
32
 
@@ -74,7 +74,7 @@ tests:
74
74
  expected: Taḯyetos
75
75
  - source: σπρέυ
76
76
  expected: spréi
77
-
77
+
78
78
  - source: Αθήνα
79
79
  expected: Athína
80
80
  - source: Άγιον Όρος
@@ -526,7 +526,7 @@ map:
526
526
  - pattern: (?<=[Οο])\u03C5 # υ (after Ο)
527
527
  result: u
528
528
  - pattern: (?<=[Οο])\u03CD # ύ (after Ο)
529
- result: ú
529
+ result: ú
530
530
  - pattern: \u03A5[Ιιί] # ΥΙ
531
531
  result: I
532
532
  - pattern: \u03C5[Ιιί] # υι
@@ -699,4 +699,3 @@ map:
699
699
 
700
700
  "\u0387": ";" # ·
701
701
  "\u00B7": ";" # ·
702
-
@@ -17,4 +17,3 @@ map:
17
17
  character_separator: ""
18
18
  word_separator: " "
19
19
  inherit: "elot-ell-Grek-Latn-743-1982-ts"
20
-
@@ -40,4 +40,3 @@ tests:
40
40
 
41
41
  map:
42
42
  inherit: "ggg-kat-Geor-Latn-2002"
43
-
@@ -12,42 +12,42 @@ description:
12
12
 
13
13
  notes: "
14
14
 
15
- 1. At the end of a syllable, the character ᄋ should be romanized ng,
16
- as in the following example:
15
+ 1. At the end of a syllable, the character ᄋ should be romanized ng,
16
+ as in the following example:
17
17
 
18
18
  평양 → P’yŏngyang
19
19
 
20
- At the beginning of a syllable, the character ᄋ is silent and
21
- should not be romanized. An example follows:
20
+ At the beginning of a syllable, the character ᄋ is silent and
21
+ should not be romanized. An example follows:
22
22
 
23
- 용화 → Yonghwa
23
+ 용화 → Yonghwa
24
24
 
25
25
  2. Syllable boundaries within words are not reflected in romanization.
26
- In the different types of syllables shown in the table below, C
26
+ In the different types of syllables shown in the table below, C
27
27
  represents any consonant character, V represents any vowel character
28
28
  and / represents a syllable boundary.
29
29
 
30
- Han’gŭl 개성 남포 안양
30
+ Han’gŭl 개성 남포 안양
31
31
  Syllable boundaries CV/CVC CVC/CV VC/VC
32
32
  Romanization Kaesŏng Namp’o Anyang
33
33
 
34
- 3. Euphonic changes occurring within a word, including between the
35
- specific and generic of a geographical name, should be reflected in
36
- romanization. Generic terms are usually seen separated from the name
37
- by a hyphen and with a lower case initial letter rather than as a
38
- separate word:
34
+ 3. Euphonic changes occurring within a word, including between the
35
+ specific and generic of a geographical name, should be reflected in
36
+ romanization. Generic terms are usually seen separated from the name
37
+ by a hyphen and with a lower case initial letter rather than as a
38
+ separate word:
39
39
 
40
40
  영진리 → Yŏngjil-li
41
41
  덕흥리 → Tŏkhŭng-ni
42
42
  압록강 → Amnok-kang
43
43
  대동강 → Taedong-gang
44
44
 
45
- 4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
46
- published in North Korea in 1966), unlike the Korean spoken in the
47
- Republic of Korea, the language spoken in the Democratic People’s
45
+ 4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
46
+ published in North Korea in 1966), unlike the Korean spoken in the
47
+ Republic of Korea, the language spoken in the Democratic People’s
48
48
  Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
49
- The use of the word-initial ᄅ ('r') can be seen in official news
50
- reports as well as native mapping. Since such examples exist, the
49
+ The use of the word-initial ᄅ ('r') can be seen in official news
50
+ reports as well as native mapping. Since such examples exist, the
51
51
  word initial ᄅ ('r') is reflected as an option in the tables given above.
52
52
 
53
53
  5. The Romanization column shows only lowercase forms but, when romanizing,
@@ -5,8 +5,8 @@ language: kor
5
5
  source_script: Hang
6
6
  destination_script: Latn
7
7
  name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
- url:
9
- creation_date:
8
+ url:
9
+ creation_date:
10
10
  adoption_date:
11
11
  description:
12
12
 
@@ -5,8 +5,8 @@ language: kor
5
5
  source_script: Kore
6
6
  destination_script: Latn
7
7
  name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
- url:
9
- creation_date:
8
+ url:
9
+ creation_date:
10
10
  adoption_date:
11
11
  description:
12
12
 
@@ -25,7 +25,7 @@ notes:
25
25
  has been used here for illustrative purposes.
26
26
  - The Macedonian Cyrillic lowercase italic Т may sometimes be seen as w̄.
27
27
  There is no specific Unicode encoding for this variant form so a comparable character
28
- has been used here for illustrative purposes.
28
+ has been used here for illustrative purposes.
29
29
  - |
30
30
  An inventory of letter-diacritic combinations, with their Unicode encoding,
31
31
  in addition to the unmodified letters of the basic Roman script is:
@@ -0,0 +1,200 @@
1
+ ---
2
+ authority_id: bgnpcgn
3
+ id: 2020
4
+ language: nep
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Nepali Romanization, 2020
8
+ url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20NEPALI.pdf
9
+ creation_date: 1964
10
+ description: |
11
+ BGN/PCGN 2011 Agreement Romanization of Nepali
12
+ The BGN and the PCGN have adopted the Nepal Survey Department (NSD) system for the
13
+ romanization of Nepali names. This system, below, should be applied to Nepali names for which Roman‐
14
+ script spellings in materials produced by the government of Nepal are not available.
15
+
16
+ notes:
17
+
18
+ - Only the isolated forms of the characters are given in the consonant table. See any grammar of Nepali
19
+ (or other language using the Devanagari alphabet) for variant forms used in conjunct characters.
20
+ - These two consonant characters appear sometimes to represent ṛ (cerebral r), e.g., पहाड → pahāṛ
21
+ instead of pahāḍ. At one time they were written with dots below, i.e., as ड़ and ढ़, though this is no
22
+ longer normal practice in Nepali. The romanizations ṛ and ṛh, respectively, are optional for
23
+ documentary purposes if such dots appear in Nepali writing.
24
+ - व , can be romanized as either v or w. This character is primarily
25
+ romanized as v in consonant initial, medial, and final position; however, initial, medial, and final w
26
+ romanizations can occur. The w romanization is a special case which is believed to be dependent on
27
+ dialect, pronunciation, or stress.
28
+ - |
29
+ An inventory of letter‐diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
30
+ Ṅ(U+1E44) ṅ (U+1E45)
31
+ Ñ (U+00D1) ñ (U+00F1)
32
+ Ṭ (1E6C) ṭ (1E6D)
33
+ Ḍ (1E0C) ḍ (1E0D)
34
+ Ṇ (1E46) ṇ (1E47)
35
+ Ṣ (1E62) ṣ (1E63)
36
+ Ā (U+0100) ā (U+0101)
37
+ Ī (U+012A) ī (U+012B)
38
+ Ū (U+016A) ū (U+016B)
39
+ Ṛ (1E5A) ṛ (1E5B)
40
+
41
+ - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase
42
+ Roman letters as appropriate should be used.
43
+
44
+ - |
45
+ ं (anusvara) is rendered by
46
+ ṅ before क, ख, ग, and घ
47
+ ñ before च, छ, ज, and झ
48
+ ṇ before ट, ठ, ड, and ढ
49
+ n before त, थ, द, and ध
50
+ ṁ before य, र, ल, व, श, ष, स and ह
51
+
52
+ tests:
53
+ - source: "लेखन"
54
+ expected: "lekhn"
55
+ - source: "मुद्रा"
56
+ expected: "mudarā"
57
+ - source: "प्रशंसा"
58
+ expected: "parshṃsā" # note 5 rule checking
59
+ - source: "अंक"
60
+ expected: "aṅk" # note 5 rule checking
61
+ - source: "नेकपाले स्थगित स्थायी कमिटीको बैठक भदौ गते बोलाउने भएको"
62
+ expected: "nekpāle sathgit sathāyī kmiṭīko baiṭhk bhdau gte bolāune bheko"
63
+ - source: "न घर रह्यो, न परिवार"
64
+ expected: "n ghr rhayo, n privār"
65
+ - source: "ढोरपाटनमा भुजीखोला बाढीपहिरोले अभिभावक गुमाएका बालबालिकाको बिचल्ली"
66
+ expected: "ḍhorpāṭnmā bhujīkholā bāḍhīphirole abhibhāvk gumāekā bālbālikāko bichlalī"
67
+ - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
68
+ expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
69
+ - source: "संविधान जारी भएसँगै सार्वजनिक प्रशासनमा नयाँ उत्साह आउने अपेक्षा थियो"
70
+ expected: "sṃvidhān jārī bhes~gai sāravjnik parshāsnmā nyā~ utasāh āune apekṣā thiyo"
71
+ - source: "देशमा कोरोना संक्रमित र मृतकको संख्या हरेक दिन बढ्दो छ"
72
+ expected: "deshmā koronā sṅkarmit r mṛitkko sṅkhayā hrek din bḍhado chh"
73
+ - source: "गाउँपालिकाका अध्यक्ष टिका गुरुङका अनुसार विष्णुदासलाई राजुले सुत्नका लागि बेलुका साथी लगेका थिए"
74
+ expected: "gāu~pālikākā adhaykṣ ṭikā guruṅkā anusār viṣaṇudāslāī rājule sutankā lāgi belukā sāthī lgekā thie"
75
+ - source: "यो आयोजना गाउँपालिकाको केन्द्र तेल्लोकमा पर्छ"
76
+ expected: "yo āyojnā gāu~pālikāko kenadar telalokmā prachh"
77
+ - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
78
+ expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
79
+ - source: "चैत पहिलो साता घर आएका उनी लकडाउन भएपछि यतै रोकिए"
80
+ expected: "chait philo sātā ghr āekā unī lkḍāun bhepchhi ytai rokie"
81
+ - source: "काम गर्न जानेको हकमा रोजगारदाता कम्पनीको पत्रसँगै वडा र जिल्ला प्रशासनको सिफारिस अनिवार्य गरिएको छ"
82
+ expected: "kām gran jāneko hkmā rojgārdātā kmapnīko ptrs~gai vḍā r jilalā parshāsnko siphāris anivāray grieko chh"
83
+ - source: "दुःख"
84
+ expected: "duḥkh"
85
+
86
+ map:
87
+
88
+ rules:
89
+ # note[5]
90
+ - pattern: \u0902(?=[कखगघ]) # ं before क, ख, ग, and घ
91
+ result: ṅ
92
+ - pattern: \u0902(?=[चछजझ]) # ं before च, छ, ज, and झ
93
+ result: ñ
94
+ - pattern: \u0902(?=[टठडढ]) # ं before ट, ठ, ड, and ढ
95
+ result: ṇ
96
+ - pattern: \u0902(?=[तथदध]) # ं before त, थ, द, and ध
97
+ result: n
98
+
99
+ characters:
100
+
101
+ # Vowels and Diphthongs
102
+
103
+ 'अ': 'a'
104
+ 'आ': 'ā'
105
+ 'इ': 'i'
106
+ 'ई': 'ī'
107
+ 'उ': 'u'
108
+ 'ऊ': 'ū'
109
+ 'ऋ': 'ṛi'
110
+ 'ॠ': 'rī'
111
+ 'ए': 'e'
112
+ 'ऐ': 'ai'
113
+ 'ओ': 'o'
114
+ 'औ': 'au'
115
+
116
+ # Medials # Needed for connecting constants
117
+
118
+ 'ा': "ā"
119
+ 'ि': "i"
120
+ 'ी': "ī"
121
+ 'ु': "u"
122
+ 'ू': "ū"
123
+ 'ृ': "ṛi"
124
+ 'ॄ': "rī"
125
+ 'े': "e"
126
+ 'ै': "ai"
127
+ 'ो': "o"
128
+ 'ौ': "au"
129
+
130
+
131
+ # Consonants (see Note 1)
132
+
133
+ # Gutturals
134
+ 'क': 'k'
135
+ 'ख': 'kh'
136
+ 'ग': 'g'
137
+ 'घ': 'gh'
138
+ 'ङ': 'ṅ'
139
+
140
+ # Palatals
141
+ 'च': 'ch'
142
+ 'छ': 'chh'
143
+ 'ज': 'j'
144
+ 'झ': 'jh'
145
+ 'ञ': 'ñ'
146
+
147
+ # Cerebrals
148
+ 'ट': 'ṭ'
149
+ 'ठ': 'ṭh'
150
+ 'ड': 'ḍ'
151
+ 'ढ': 'ḍh'
152
+ 'ण': 'ṇ'
153
+
154
+ # Dentals
155
+ 'त': 't'
156
+ 'थ': 'th'
157
+ 'द': 'd'
158
+ 'ध': 'dh'
159
+ 'न': 'n'
160
+
161
+ # Labials
162
+ 'प': 'p'
163
+ 'फ': 'ph'
164
+ 'ब': 'b'
165
+ 'भ': 'bh'
166
+ 'म': 'm'
167
+
168
+ # Semivowels
169
+ 'य': 'y'
170
+ 'र': 'r'
171
+ 'ल': 'l'
172
+ 'व': 'v' # or wa [Note#3]
173
+
174
+ # Sibilants
175
+ 'श': 'sh'
176
+ 'ष': 'ṣ'
177
+ 'स': 's'
178
+ 'क्ष': 'kṣ'
179
+ 'त्र': 'tr'
180
+ 'ज्ञ' : 'jñ'
181
+
182
+ # Aspirate
183
+ 'ह': 'h'
184
+
185
+ # Anusvāra
186
+ 'ं': 'ṃ'
187
+
188
+ # Bisarga
189
+ 'ः': 'ḥ'
190
+
191
+ # Anunāsika
192
+ 'ँ': '~'
193
+
194
+ 'ॅ': 'r'
195
+
196
+ # halanta
197
+ '्': 'a'
198
+
199
+ # Abagraha
200
+ 'ऽ': '’' # (apostrophe)
@@ -90,4 +90,3 @@ map:
90
90
  '\u0647' : 'h'
91
91
  '\u0648' : 'v'
92
92
  '\u0649' : 'y'
93
-
@@ -160,4 +160,3 @@ map:
160
160
  "\u042e": 'Yu'
161
161
  "\u042f": 'Ya'
162
162
  "\u0490": 'G'
163
-
@@ -0,0 +1,159 @@
1
+ ---
2
+ authority_id: bis
3
+ id: 1991
4
+ language: asm
5
+ source_script: Beng
6
+ destination_script: Latn
7
+ name: Indian script code for information interchange - ISCII - Assamese Romanization
8
+ #url:
9
+ creation_date: 1991
10
+ description: |
11
+ IS 13194 (1991): Indian script code for information
12
+ interchange - ISCII [LITD 20: Indian Language Technologies
13
+ and Products]
14
+
15
+ notes:
16
+ - |
17
+ Exception: Anusvāra is transliterated by:
18
+
19
+ a) ṅ before gutturals,
20
+ b) ñ before palatals,
21
+ c) ṇ before cerebrals,
22
+ d) n before dentals, and
23
+ e) m before labials.
24
+
25
+ tests:
26
+ - source: "অসমীয়া কবিতা"
27
+ expected: "asmīẏā kbitā"
28
+ - source: "কবিৰ আজি জন্মদিন"
29
+ expected: "kbir āji jnmdin"
30
+ - source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
31
+ expected: "bēruṭt ēmāhr pāchtē punr bhẏṅkr agnikāṇḍ"
32
+ - source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
33
+ expected: "bhṅār biruddhē āvēdn dākhil kṅgnār"
34
+ - source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
35
+ expected: "āpuni pd̂hi bhāl pāb prā bātri"
36
+ - source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
37
+ expected: "śrīrāmpurt grubhrti ṭrāk jbd, dujnk āṭk"
38
+ - source: "কেনে আছে প্ৰাক্তন"
39
+ expected: "kēnē āchē prāktn"
40
+ - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
41
+ expected: "kmumbāir mēẏrr dēht kŏbhiḍ pjiṭibh"
42
+ - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
43
+ expected: "ṭuiṭāryŏgē khŏd sdrī krē ēi kthā"
44
+ - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
45
+ expected: "lkhimpur jilār nārāẏṇpurr brpthārt āji prśānti dhām nāmērē ēkhn bṛddhāśrmr śubhārmbh krā hẏ"
46
+
47
+ map:
48
+
49
+ rules:
50
+ # note
51
+ - pattern: \u0982(?=[কখগঘঙ])
52
+ result: ṅ
53
+ - pattern: \u0982(?=[চছজঝঞ])
54
+ result: ñ
55
+ - pattern: \u0982(?=[টঠডড়ঢঢ়ণ])
56
+ result: ṇ
57
+ - pattern: \u0982(?=[তৎথদধন])
58
+ result: n
59
+ - pattern: \u0982(?=[পফবভম])
60
+ result: m
61
+
62
+
63
+ characters:
64
+ 'অ': 'a'
65
+ 'আ': 'ā'
66
+ 'ই': 'i'
67
+ 'ঈ': 'ī'
68
+ 'উ': 'u'
69
+ 'ঊ': 'ū'
70
+ 'ৠ': 'ṛ'
71
+ 'ঌ': 'ḻ'
72
+ 'এ': 'ē'
73
+ 'ঐ': 'ai'
74
+ 'ও': 'ŏ'
75
+ 'ঔ': 'au'
76
+
77
+ # Consonants
78
+ # Gutturals
79
+ 'ক': 'k'
80
+ 'খ': 'kh'
81
+ 'গ': 'g'
82
+ 'ঘ': 'gh'
83
+ 'ঙ': 'ṅ'
84
+
85
+ # Palatals
86
+ 'চ': 'c'
87
+ 'ছ': 'ch'
88
+ 'জ': 'j'
89
+ 'ঝ': 'jh'
90
+ 'ঞ': 'ñ'
91
+
92
+ # Cerebrals
93
+ 'ট': 'ṭ'
94
+ 'ঠ': 'ṭh'
95
+ 'ড': 'ḍ'
96
+ 'ড়': 'd̂'
97
+ 'ঢ': 'ḍh'
98
+ 'ঢ়': 'd̂h'
99
+ 'ণ': 'ṇ'
100
+
101
+ # Dentals
102
+ 'ত': 't'
103
+ 'ৎ': 't'
104
+ 'থ': 'th'
105
+ 'দ': 'd'
106
+ 'ধ': 'dh'
107
+ 'ন': 'n'
108
+
109
+ # Labials
110
+ 'প': 'p'
111
+ 'ফ': 'ph'
112
+ 'ব': 'b'
113
+ 'ভ': 'bh'
114
+ 'ম': 'm'
115
+
116
+ # Semivowels
117
+ 'য': 'y'
118
+ 'য়': 'ẏ'
119
+ 'য়': 'ẏ'
120
+ 'ৰ': 'r'
121
+ 'ল': 'l'
122
+ 'ৱ': 'v'
123
+
124
+
125
+ # Sibilants
126
+ 'শ': 'ś'
127
+ 'ষ': 'ṣ'
128
+ 'স': 's'
129
+
130
+
131
+ # Aspirate
132
+ 'হ': 'h'
133
+
134
+ # Chandrabindu
135
+ 'ঁ': 'm'
136
+
137
+ # Bisarga
138
+ 'ঃ ': 'ḥ'
139
+
140
+ # Anusvāra
141
+ 'ং': 'ṃ'
142
+
143
+ # Medials # Needed for connecting constants
144
+
145
+ '\u09be': 'ā'
146
+ '\u09bf': 'i'
147
+ '\u09c0': 'ī'
148
+ '\u09c1': 'u'
149
+ '\u09c2': 'ū'
150
+ '\u09c3': 'ṛ'
151
+ '\u09c7': 'ē'
152
+ '\u09c8': 'ai'
153
+ '\u09cb': 'ŏ'
154
+ '\u09cc': 'au'
155
+ '\u09CD': '' # Used for joining
156
+ '्': ''
157
+ '़': ''
158
+ '।': '.'
159
+ "‍": ''# Used for joining