RubyGems - interscript - Versions diffs - 0.1.5 → 0.1.6 - Mend

interscript 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

checksums.yaml +4 -4
data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
data/lib/interscript.rb +5 -1
data/lib/interscript/fs.rb +3 -1
data/lib/interscript/mapping.rb +2 -2
data/lib/interscript/opal.rb +5 -1
data/lib/interscript/opal/maps.js.erb +7 -4
data/lib/interscript/version.rb +1 -1
data/maps/acadsin-zho-Hani-Latn-2002.yaml +1 -1
data/maps/alalc-amh-Ethi-Latn-1997.yaml +509 -0
data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
data/maps/alalc-ara-Arab-Latn-1997.yaml +1283 -0
data/maps/alalc-asm-Deva-Latn-1997.yaml +159 -0
data/maps/alalc-aze-Cyrl-Latn-1997.yaml +1 -1
data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +2 -2
data/maps/alalc-ell-Grek-Latn-1997.yaml +2 -3
data/maps/alalc-ell-Grek-Latn-2010.yaml +2 -3
data/maps/alalc-hin-Deva-Latn-2020.yaml +159 -0
data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -2
data/maps/alalc-kor-Hang-Latn-1997.yaml +1 -1
data/maps/alalc-mar-Deva-Latn-1997.yaml +170 -0
data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +0 -0
data/maps/alalc-pan-Deva-Latn-1997.yaml +237 -0
data/maps/alalc-rus-Cyrl-Latn-1997.yaml +1 -2
data/maps/alalc-rus-Cyrl-Latn-2012.yaml +2 -2
data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +0 -0
data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -1
data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -2
data/maps/bgn-kor-Hang-Latn-1943.yaml +1 -1
data/maps/bgn-kor-Kore-Latn-1943.yaml +1 -1
data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +528 -0
data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +592 -0
data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +5 -5
data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +0 -0
data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +3 -4
data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -1
data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -1
data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +17 -17
data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +2 -2
data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +2 -2
data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +200 -0
data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -1
data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -1
data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
data/maps/bis-gjr-Gujr-Latn-13194-1991.yaml +166 -0
data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
data/maps/by-bel-Cyrl-Latn-1998.yaml +4 -4
data/maps/by-bel-Cyrl-Latn-2007.yaml +3 -3
data/maps/dos-nep-Deva-Latn-1997.yaml +33 -0
data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +4 -5
data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +4 -5
data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -1
data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -1
data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -1
data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +1 -1
data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +1 -1
data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +1 -1
data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -5
data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -4
data/maps/icao-per-Arab-Latn-9303.yaml +0 -1
data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -1
data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -1
data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +4 -5
data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +1 -2
data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -1
data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
data/maps/kp-kor-Hang-Latn-2002.yaml +4 -4
data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +2 -2
data/maps/mvd-bel-Cyrl-Latn-2008.yaml +4 -4
data/maps/mvd-bel-Cyrl-Latn-2010.yaml +4 -4
data/maps/mvd-rus-Cyrl-Latn-2008.yaml +1 -2
data/maps/mvd-rus-Cyrl-Latn-2010.yaml +4 -4
data/maps/nil-kor-Hang-Hang-jamo.yaml +3 -3
data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
data/maps/odni-bul-Cyrl-Latn-2015.yaml +3 -3
data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
data/maps/odni-kat-Geor-Latn-2015.yaml +0 -1
data/maps/{odni-mkd-cyrl-latn-2015.yaml → odni-mkd-Cyrl-Latn-2015.yaml} +0 -0
data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
data/maps/odni-srp-Cyrl-Latn-2015.yaml +2 -2
data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -2
data/maps/royin-tha-Thai-Latn-1939-generic.yaml +4 -4
data/maps/royin-tha-Thai-Latn-1968.yaml +4 -4
data/maps/royin-tha-Thai-Latn-1999-chained.yaml +4 -4
data/maps/royin-tha-Thai-Latn-1999.yaml +3 -3
data/maps/{ses-ara-arab-latn-1930.yaml → ses-ara-Arab-Latn-1930.yaml} +7 -3
data/maps/un-ara-Arab-Latn-1971.yaml +16 -4
data/maps/un-ara-Arab-Latn-1972.yaml +14 -7
data/maps/un-ara-Arab-Latn-2017.yaml +56 -19
data/maps/un-bel-Cyrl-Latn-2007.yaml +3 -3
data/maps/un-ell-Grek-Latn-1987-tl.yaml +1 -2
data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -1
data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +42 -42
data/maps/un-mon-Mong-Latn-2013.yaml +9 -3
data/maps/un-nep-Deva-Latn-1972.yaml +163 -0
data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
data/maps/{un-ukr-cyrl-latn-1998.yaml → un-ukr-Cyrl-Latn-1998.yaml} +1 -1
data/maps/ungegn-amh-Ethi-Latn-2016.yaml +575 -0
data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -2
data/maps/var-tha-Thai-Thai-phonemic.yaml +5 -5
data/maps/var-tha-Thai-Zsym-ipa.yaml +12 -12
data/maps/var-zho-Hani-Latn-1979.yaml +7 -7
metadata +41 -15

data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml CHANGED

@@ -15,8 +15,8 @@ description: |
   correspondences given below
 notes:
-  - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
-  - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
+  - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
+  - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
   - The letter w is used between or after vowels.
   - The letter w is used after e, u, ö and ə.
   - |
@@ -55,7 +55,7 @@ tests:
   - source: Ҡыҙылъяр
     expected: Qıźılyar
   # adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
-  - source: кемдең
+  - source: кемдең
     expected: kemdeñ
   - source: кем
     expected: kem
@@ -65,7 +65,7 @@ tests:
     expected: oşo
   - source: быларҙың
     expected: bılarźıñ
-  - source: һеҙҙән
+  - source: һеҙҙән
     expected: heźźən
   - source: һин
     expected: hin
@@ -136,7 +136,7 @@ map:
     '\u042B': 'I'      # Ы
     '\u042C': ''       # Ь
     '\u042D': 'E'      # Э
-    '\u04D8': "\u018F" # Ә
+    '\u04D8': "\u018F" # Ә
     '\u042E': 'Yu'     # Ю
     '\u042F': 'Ya'     # Я

data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} RENAMED

File without changes

data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml CHANGED

@@ -26,7 +26,7 @@ tests:
     expected: |
         Éna práma mónon me parakínise ki eména na grápso óti toútin tin patrídha tin ékhomen óloi mazí, kai sofoí ki amathís kai ploúsioi kai ftokhoí kai politikoí kai stratiotikoí kai oi pléon mikróteroi ánthropoi; ósoi agonistíkamen, analógos o kathís, ékhomen na zísomen edhó. To loipón dhoulépsamen óloi mazí, na tin filámen ki óloi mazí kai na min léyi oúte o dhinatós «egó» oúte o adhínatos. Xérete póte na léyi o kathís «egó»? Ótan agonistí mónos tou kai fkiási í khalási, na léyi «egó»; ótan ómos agonízondai polloí kai fkiánoun, tóte na léne «emís». Ímaste is to «emís» ki ókhi is to «egó». Kai is to exís na máthomen gnósi, an thélomen na fkiásomen khorión, na zísomen óloi mazí.
         Yiánnis Makriyiánnis.
@@ -74,7 +74,7 @@ tests:
     expected: Taḯyetos
   - source: σπρέυ
     expected: spréi
   - source: Αθήνα
     expected: Athína
   - source: Άγιον Όρος
@@ -526,7 +526,7 @@ map:
     - pattern: (?<=[Οο])\u03C5    # υ (after Ο)
       result: u
     - pattern: (?<=[Οο])\u03CD    # ύ (after Ο)
-      result: ú
+      result: ú
     - pattern: \u03A5[Ιιί]        # ΥΙ
       result: I
     - pattern: \u03C5[Ιιί]        # υι
@@ -699,4 +699,3 @@ map:
     "\u0387": ";"   # ·
     "\u00B7": ";"   # ·

data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml CHANGED

@@ -17,4 +17,3 @@ map:
   character_separator: ""
   word_separator: " "
   inherit: "elot-ell-Grek-Latn-743-1982-ts"

data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml CHANGED

@@ -40,4 +40,3 @@ tests:
 map:
   inherit: "ggg-kat-Geor-Latn-2002"

data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml CHANGED

@@ -12,42 +12,42 @@ description:
 notes: "
-  1. At the end of a syllable, the character ᄋ should be romanized ng,
-     as in the following example:
+  1. At the end of a syllable, the character ᄋ should be romanized ng,
+     as in the following example:
      평양 → P’yŏngyang
-     At the beginning of a syllable, the character ᄋ is silent and
-     should not be romanized. An example follows:
+     At the beginning of a syllable, the character ᄋ is silent and
+     should not be romanized. An example follows:
-     용화 → Yonghwa
+     용화 → Yonghwa
   2. Syllable boundaries within words are not reflected in romanization.
-     In the different types of syllables shown in the table below, C
+     In the different types of syllables shown in the table below, C
      represents any consonant character, V represents any vowel character
      and / represents a syllable boundary.
-       Han’gŭl              개성      남포      안양
+       Han’gŭl              개성      남포      안양
        Syllable boundaries  CV/CVC   CVC/CV   VC/VC
        Romanization         Kaesŏng  Namp’o   Anyang
-  3. Euphonic changes occurring within a word, including between the
-     specific and generic of a geographical name, should be reflected in
-     romanization. Generic terms are usually seen separated from the name
-     by a hyphen and with a lower case initial letter rather than as a
-     separate word:
+  3. Euphonic changes occurring within a word, including between the
+     specific and generic of a geographical name, should be reflected in
+     romanization. Generic terms are usually seen separated from the name
+     by a hyphen and with a lower case initial letter rather than as a
+     separate word:
        영진리 → Yŏngjil-li
        덕흥리 → Tŏkhŭng-ni
        압록강 → Amnok-kang
        대동강 → Taedong-gang
-  4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
-     published in North Korea in 1966), unlike the Korean spoken in the
-     Republic of Korea, the language spoken in the Democratic People’s
+  4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
+     published in North Korea in 1966), unlike the Korean spoken in the
+     Republic of Korea, the language spoken in the Democratic People’s
      Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
-     The use of the word-initial ᄅ ('r') can be seen in official news
-     reports as well as native mapping. Since such examples exist, the
+     The use of the word-initial ᄅ ('r') can be seen in official news
+     reports as well as native mapping. Since such examples exist, the
      word initial ᄅ ('r') is reflected as an option in the tables given above.
   5. The Romanization column shows only lowercase forms but, when romanizing,

data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml CHANGED

@@ -5,8 +5,8 @@ language: kor
 source_script: Hang
 destination_script: Latn
 name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
-url:
-creation_date:
+url:
+creation_date:
 adoption_date:
 description:

data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml CHANGED

@@ -5,8 +5,8 @@ language: kor
 source_script: Kore
 destination_script: Latn
 name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
-url:
-creation_date:
+url:
+creation_date:
 adoption_date:
 description:

data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml CHANGED

@@ -25,7 +25,7 @@ notes:
     has been used here for illustrative purposes.
   - The Macedonian Cyrillic lowercase italic Т may sometimes be seen as w̄.
     There is no specific Unicode encoding for this variant form so a comparable character
-    has been used here for illustrative purposes.
+    has been used here for illustrative purposes.
   - |
     An inventory of letter-diacritic combinations, with their Unicode encoding,
     in addition to the unmodified letters of the basic Roman script is:

data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml ADDED

@@ -0,0 +1,200 @@
+---
+authority_id: bgnpcgn
+id: 2020
+language: nep
+source_script: Deva
+destination_script: Latn
+name: Nepali Romanization, 2020
+url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20NEPALI.pdf
+creation_date: 1964
+description: |
+  BGN/PCGN 2011 Agreement Romanization of Nepali
+  The BGN and the PCGN have adopted the Nepal Survey Department (NSD) system for the
+  romanization of Nepali names.  This system, below, should be applied to Nepali names for which Roman‐
+  script spellings in materials produced by the government of Nepal are not available.
+notes:
+  - Only the isolated forms of the characters are given in the consonant table.  See any grammar of Nepali
+    (or other language using the Devanagari alphabet) for variant forms used in conjunct characters.
+  - These two consonant characters appear sometimes to represent ṛ (cerebral r), e.g., पहाड  →  pahāṛ
+    instead of pahāḍ.  At one time they were written with dots below, i.e., as ड़ and ढ़, though this is no
+    longer normal practice in Nepali.    The romanizations ṛ and ṛh, respectively, are optional for
+    documentary purposes if such dots appear in Nepali writing.
+  -  व , can be romanized as either v or w.    This character is primarily
+    romanized as v in consonant initial, medial, and final position; however, initial, medial, and final w
+    romanizations can occur. The w romanization is a special case which is believed to be dependent on
+    dialect, pronunciation, or stress.
+  - |
+    An inventory of letter‐diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
+      Ṅ(U+1E44)  ṅ (U+1E45)
+      Ñ (U+00D1) ñ (U+00F1)
+      Ṭ (1E6C)   ṭ (1E6D)
+      Ḍ (1E0C)   ḍ (1E0D)
+      Ṇ (1E46)   ṇ (1E47)
+      Ṣ (1E62)   ṣ (1E63)
+      Ā (U+0100) ā (U+0101)
+      Ī (U+012A) ī (U+012B)
+      Ū (U+016A) ū (U+016B)
+      Ṛ (1E5A)   ṛ (1E5B)
+  - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase
+    Roman letters as appropriate should be used.
+  - |
+    ं (anusvara) is rendered by
+    ṅ before क, ख, ग, and घ
+    ñ before च, छ, ज, and झ
+    ṇ before ट, ठ, ड, and ढ
+    n before त, थ, द, and ध
+    ṁ before य, र, ल, व, श, ष, स and ह
+tests:
+  - source: "लेखन"
+    expected: "lekhn"
+  - source: "मुद्रा"
+    expected: "mudarā"
+  - source: "प्रशंसा"
+    expected: "parshṃsā" # note 5 rule checking
+  - source: "अंक"
+    expected: "aṅk" # note 5 rule checking
+  - source: "नेकपाले स्थगित स्थायी कमिटीको बैठक भदौ गते बोलाउने भएको"
+    expected: "nekpāle sathgit sathāyī kmiṭīko baiṭhk bhdau gte bolāune bheko"
+  - source: "न घर रह्यो, न परिवार"
+    expected: "n ghr rhayo, n privār"
+  - source: "ढोरपाटनमा भुजीखोला बाढीपहिरोले अभिभावक गुमाएका बालबालिकाको बिचल्ली"
+    expected: "ḍhorpāṭnmā bhujīkholā bāḍhīphirole abhibhāvk gumāekā bālbālikāko bichlalī"
+  - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
+    expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
+  - source: "संविधान जारी भएसँगै सार्वजनिक प्रशासनमा नयाँ उत्साह आउने अपेक्षा थियो"
+    expected: "sṃvidhān jārī bhes~gai sāravjnik parshāsnmā nyā~ utasāh āune apekṣā thiyo"
+  - source: "देशमा कोरोना संक्रमित र मृतकको संख्या हरेक दिन बढ्दो छ"
+    expected: "deshmā koronā sṅkarmit r mṛitkko sṅkhayā hrek din bḍhado chh"
+  - source: "गाउँपालिकाका अध्यक्ष टिका गुरुङका अनुसार विष्णुदासलाई राजुले सुत्नका लागि बेलुका साथी लगेका थिए"
+    expected: "gāu~pālikākā adhaykṣ ṭikā guruṅkā anusār viṣaṇudāslāī rājule sutankā lāgi belukā sāthī lgekā thie"
+  - source: "यो आयोजना गाउँपालिकाको केन्द्र तेल्लोकमा पर्छ"
+    expected: "yo āyojnā gāu~pālikāko kenadar telalokmā prachh"
+  - source: "सुस्मिताका काका हेमबहादुर र काकीलाई पनि पहिरोले बगायो"
+    expected: "susamitākā kākā hembhādur r kākīlāī pni phirole bgāyo"
+  - source: "चैत पहिलो साता घर आएका उनी लकडाउन भएपछि यतै रोकिए"
+    expected: "chait philo sātā ghr āekā unī lkḍāun bhepchhi ytai rokie"
+  - source: "काम गर्न जानेको हकमा रोजगारदाता कम्पनीको पत्रसँगै वडा र जिल्ला प्रशासनको सिफारिस अनिवार्य गरिएको छ"
+    expected: "kām gran jāneko hkmā rojgārdātā kmapnīko ptrs~gai vḍā r jilalā parshāsnko siphāris anivāray grieko chh"
+  - source: "दुःख"
+    expected: "duḥkh"
+map:
+  rules:
+    # note[5]
+    - pattern: \u0902(?=[कखगघ])    # ं  before क, ख, ग, and घ
+      result: ṅ
+    - pattern: \u0902(?=[चछजझ])    # ं  before च, छ, ज, and झ
+      result: ñ
+    - pattern: \u0902(?=[टठडढ])    # ं  before ट, ठ, ड, and ढ
+      result: ṇ
+    - pattern: \u0902(?=[तथदध])    # ं  before त, थ, द, and ध
+      result: n
+  characters:
+    # Vowels and Diphthongs
+    'अ': 'a'
+    'आ': 'ā'
+    'इ': 'i'
+    'ई': 'ī'
+    'उ': 'u'
+    'ऊ': 'ū'
+    'ऋ': 'ṛi'
+    'ॠ': 'rī'
+    'ए': 'e'
+    'ऐ': 'ai'
+    'ओ': 'o'
+    'औ': 'au'
+    # Medials # Needed for connecting constants
+    'ा': "ā"
+    'ि': "i"
+    'ी': "ī"
+    'ु': "u"
+    'ू': "ū"
+    'ृ': "ṛi"
+    'ॄ': "rī"
+    'े': "e"
+    'ै': "ai"
+    'ो': "o"
+    'ौ': "au"
+    # Consonants (see Note 1)
+    # Gutturals
+    'क': 'k'
+    'ख': 'kh'
+    'ग': 'g'
+    'घ': 'gh'
+    'ङ': 'ṅ'
+    # Palatals
+    'च': 'ch'
+    'छ': 'chh'
+    'ज': 'j'
+    'झ': 'jh'
+    'ञ': 'ñ'
+    # Cerebrals
+    'ट': 'ṭ'
+    'ठ': 'ṭh'
+    'ड': 'ḍ'
+    'ढ': 'ḍh'
+    'ण': 'ṇ'
+    # Dentals
+    'त': 't'
+    'थ': 'th'
+    'द': 'd'
+    'ध': 'dh'
+    'न': 'n'
+    # Labials
+    'प': 'p'
+    'फ': 'ph'
+    'ब': 'b'
+    'भ': 'bh'
+    'म': 'm'
+    # Semivowels
+    'य': 'y'
+    'र': 'r'
+    'ल': 'l'
+    'व': 'v' # or wa [Note#3]
+    # Sibilants
+    'श': 'sh'
+    'ष': 'ṣ'
+    'स': 's'
+    'क्ष': 'kṣ'
+    'त्र': 'tr'
+    'ज्ञ' : 'jñ'
+    # Aspirate
+    'ह': 'h'
+    # Anusvāra
+    'ं': 'ṃ'
+    # Bisarga
+    'ः': 'ḥ'
+    # Anunāsika
+    'ँ': '~'
+    'ॅ': 'r'
+    # halanta
+    '्': 'a'
+    # Abagraha
+    'ऽ': '’' # (apostrophe)

data/maps/bgnpcgn-per-Arab-Latn-1956.yaml CHANGED

@@ -90,4 +90,3 @@ map:
     '\u0647' : 'h'
     '\u0648' : 'v'
     '\u0649' : 'y'

data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml CHANGED

@@ -160,4 +160,3 @@ map:
     "\u042e": 'Yu'
     "\u042f": 'Ya'
     "\u0490": 'G'

data/maps/bis-asm-Beng-Latn-13194-1991.yaml ADDED

@@ -0,0 +1,159 @@
+---
+authority_id: bis
+id: 1991
+language: asm
+source_script: Beng
+destination_script: Latn
+name: Indian script code for information interchange - ISCII - Assamese Romanization
+#url:
+creation_date: 1991
+description: |
+  IS 13194 (1991): Indian script code for information
+  interchange - ISCII [LITD 20: Indian Language Technologies
+  and Products]
+notes:
+  - |
+    Exception: Anusvāra is transliterated by:
+    a) ṅ before gutturals,
+    b) ñ before palatals,
+    c) ṇ before cerebrals,
+    d) n before dentals, and
+    e) m before labials.
+tests:
+    - source: "অসমীয়া কবিতা"
+      expected: "asmīẏā kbitā"
+    - source: "কবিৰ আজি জন্মদিন"
+      expected: "kbir āji jnmdin"
+    - source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
+      expected: "bēruṭt ēmāhr pāchtē punr bhẏṅkr agnikāṇḍ"
+    - source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
+      expected: "bhṅār biruddhē āvēdn dākhil kṅgnār"
+    - source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
+      expected: "āpuni pd̂hi bhāl pāb prā bātri"
+    - source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
+      expected: "śrīrāmpurt grubhrti ṭrāk jbd, dujnk āṭk"
+    - source: "কেনে আছে প্ৰাক্তন"
+      expected: "kēnē āchē prāktn"
+    - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
+      expected: "kmumbāir mēẏrr dēht kŏbhiḍ pjiṭibh"
+    - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
+      expected: "ṭuiṭāryŏgē khŏd sdrī krē ēi kthā"
+    - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
+      expected: "lkhimpur jilār nārāẏṇpurr brpthārt āji prśānti dhām nāmērē ēkhn bṛddhāśrmr śubhārmbh krā hẏ"
+map:
+  rules:
+    # note
+    - pattern: \u0982(?=[কখগঘঙ])
+      result: ṅ
+    - pattern: \u0982(?=[চছজঝঞ])
+      result: ñ
+    - pattern: \u0982(?=[টঠডড়ঢঢ়ণ])
+      result: ṇ
+    - pattern: \u0982(?=[তৎথদধন])
+      result: n
+    - pattern: \u0982(?=[পফবভম])
+      result: m
+  characters:
+    'অ': 'a'
+    'আ': 'ā'
+    'ই': 'i'
+    'ঈ': 'ī'
+    'উ': 'u'
+    'ঊ': 'ū'
+    'ৠ': 'ṛ'
+    'ঌ': 'ḻ'
+    'এ': 'ē'
+    'ঐ': 'ai'
+    'ও': 'ŏ'
+    'ঔ': 'au'
+    # Consonants
+    # Gutturals
+    'ক': 'k'
+    'খ': 'kh'
+    'গ': 'g'
+    'ঘ': 'gh'
+    'ঙ': 'ṅ'
+    # Palatals
+    'চ': 'c'
+    'ছ': 'ch'
+    'জ': 'j'
+    'ঝ': 'jh'
+    'ঞ': 'ñ'
+    # Cerebrals
+    'ট':  'ṭ'
+    'ঠ':  'ṭh'
+    'ড':  'ḍ'
+    'ড়': 'd̂'
+    'ঢ':  'ḍh'
+    'ঢ়': 'd̂h'
+    'ণ':  'ṇ'
+    # Dentals
+    'ত': 't'
+    'ৎ': 't'
+    'থ': 'th'
+    'দ': 'd'
+    'ধ': 'dh'
+    'ন': 'n'
+    # Labials
+    'প': 'p'
+    'ফ': 'ph'
+    'ব': 'b'
+    'ভ': 'bh'
+    'ম': 'm'
+    # Semivowels
+    'য': 'y'
+    'য়': 'ẏ'
+    'য়': 'ẏ'
+    'ৰ': 'r'
+    'ল': 'l'
+    'ৱ': 'v'
+    # Sibilants
+    'শ': 'ś'
+    'ষ': 'ṣ'
+    'স': 's'
+    # Aspirate
+    'হ': 'h'
+    # Chandrabindu
+    'ঁ': 'm'
+    # Bisarga
+    'ঃ ': 'ḥ'
+    # Anusvāra
+    'ং': 'ṃ'
+    # Medials # Needed for connecting constants
+    '\u09be': 'ā'
+    '\u09bf': 'i'
+    '\u09c0': 'ī'
+    '\u09c1': 'u'
+    '\u09c2': 'ū'
+    '\u09c3': 'ṛ'
+    '\u09c7': 'ē'
+    '\u09c8': 'ai'
+    '\u09cb': 'ŏ'
+    '\u09cc': 'au'
+    '\u09CD': '' # Used for joining
+    '्': ''
+    '़': ''
+    '।': '.'
+    "‍": ''# Used for joining