interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,21 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2005
4
+ language: iso-639-2:mkd
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Standards for the transliteration of macedonian personal names in written reports and products
8
+ creation_date: 2005
9
+ confirmation_date: 2005
10
+ description: |
11
+ Office of the Director Of National Intelligence Macedonian Personal Names 2004 System
12
+
13
+ tests:
14
+ - source: Билјана
15
+ expected: Biljana
16
+ - source: Душко
17
+ expected: Dushko
18
+
19
+ map:
20
+ inherit: odni-mkd-Cyrl-Latn-2015
21
+ rules:
@@ -0,0 +1,123 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2004
4
+ language: iso-639-3:prs
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Intelligence Community (IC) Standard for the Transliteration of Dari Personal Names (2004)
8
+ url: https://github.com/interscript/interscript-private-references/blob/master/odni/Farsi_(Persian)_%26_Dari_IC_Standards.doc
9
+ creation_date: 2004
10
+ confirmation_date: 2004-11
11
+ description: |
12
+
13
+ notes:
14
+ - This standard is intended only for those Afghan names
15
+ that have a common bond or similarity with Iranian or
16
+ Arabic names. They should not, for example, be used for
17
+ Pashto names, for which a separate standard should be used.
18
+ - Long/short vowels:- There is no distinction made in Roman
19
+ between long and short a:- E.g., Farhad (first a is short,
20
+ second is long).
21
+ - Double consonants:- Double consonants represented by the
22
+ tashdid are shown by doubling the Roman letter:- Mohammad.
23
+ Exceptions:- Consonants represented by Roman digraphs (
24
+ e.g., sh, ch) are not doubled:- Mobasher [not:- Mobashsher]
25
+ . Double letters are only used for tashdid (thus, Hosein [
26
+ not Hossein]) or to reflect the ‘sun letter’ assimilation (
27
+ see below).
28
+ - Hamzeh:- The hamzeh is represented name-internally by an
29
+ apostrophe, as is the ain. Name-initially, however,
30
+ neither hamzeh nor ain are indicated in transliteration (
31
+ e.g., Abdorrahman, not 'Abdorrahman).
32
+ - Digraphs:- No distinction is drawn in Roman between
33
+ digraphs such as sh and single contiguous letters (e.g., s
34
+ followed by h).
35
+ - Arabic definite article "al" ('the'):- Common in many
36
+ names borrowed from Arabic, the transliteration should show
37
+ the 'sun letter' assimilation rather than the “l” for the
38
+ lam. That is:- Abdorrahman. Note also that the "Abdol +
39
+ attribute of Allah" names are written as one unanalyzed
40
+ word, as are other names that contain the definite
41
+ article:- Shamsoddin (not Shams al-Din), Nezamoddin, etc.
42
+ - Diphthongs:- Diphthongs are written ei and ow
43
+ respectively:- Hosein; Khosrow.
44
+ - Yeh maqsura (final yeh pronounced as “a”):- should be
45
+ written as “a” as in “Musa”.
46
+
47
+ - Special Rules
48
+
49
+ - Hyphens:- A hyphen is used to indicate the ezafeh
50
+ construction:- Arshad-e Ameri
51
+ - Borrowed names that incorporate the name of God (Allah)
52
+ are transliterated as one word, with the letter "o":- E.g.,
53
+ Abdollah, Ayatollah, Azizollah.
54
+ - Foreign names borrowed or appearing in Dari are spelled
55
+ according to the standard Western tradition (even if there
56
+ is an Arabic or Dari version of the same name):- Joseph,
57
+ Michael.
58
+ - Common suffixes, such as gol, pur, mand, yar, zadeh,
59
+ etc., as well as nesbeh (‘relationship’ (to place of birth,
60
+ etc.)) names derived with these suffixes (e.g., abadi) are
61
+ written as part of the name:-
62
+
63
+ gol Parigol, Ziagol
64
+ pur Shahpur, Mehrpur
65
+ mand Gulahmand
66
+ yar Aminyar
67
+ zadeh Ismailzadeh, Karimzadeh
68
+
69
+ abadi Kamalabadi
70
+
71
+
72
+ tests:
73
+ - source: مُوسَى
74
+ expected: musa
75
+
76
+ - source: مُؤمِن
77
+ expected: momen
78
+
79
+ - source: رِضايي
80
+ expected: rezai
81
+
82
+ - source: مُبَشِّر
83
+ expected: mobasher
84
+
85
+ - source: حَسَّان
86
+ expected: hassan
87
+
88
+ - source: حَسَن
89
+ expected: hasan
90
+
91
+ - source: صَفَّار
92
+ expected: saffar
93
+
94
+ - source: صَفَر
95
+ expected: safar
96
+
97
+ map:
98
+ inherit: odni-fas-Arab-Latn-2004
99
+ characters:
100
+
101
+ '\u0626' : '' # ئ
102
+ '\u0624' : '' # ؤ
103
+
104
+ # shadda
105
+
106
+ '\u0642\u0651' : 'qq' # ق
107
+ '\u0648\u0651' : 'ww' # و
108
+
109
+ '\u0621': '' # ء
110
+
111
+ # FROM NOTES
112
+
113
+ '\u064a\u064a' : 'i' # NOTE 4 (2)
114
+ '\u06cc\u06cc' : 'i'
115
+
116
+ '\u0627\u064a\b' : 'i' # NOTE 4 (3)
117
+ '\u0627\u06cc\b' : 'i'
118
+
119
+ # Farsi consonant characters
120
+
121
+ '\u0639' : '' # ع # new
122
+ '\u0642' : 'q' # ق
123
+ '\u0648' : 'w' # و
@@ -0,0 +1,36 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2005
4
+ language: iso-639-2:srp
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Office of the Director Of National Intelligence Serbian Personal Names 2004 System
8
+ creation_date: 2005
9
+ confirmation_date: 2005
10
+ description: |
11
+ Office of the Director Of National Intelligence Serbian Personal Names 2004 System
12
+
13
+ notes:
14
+
15
+ tests:
16
+ - source: Гојко Митић
17
+ expected: Gojko Mitic
18
+ - source: Горња Ваганица
19
+ expected: Gornja Vaganica
20
+ - source: Довиђења
21
+ expected: Dovidjenja
22
+ - source: Ћао! Здраво!
23
+ expected: Cao! Zdravo!
24
+ - source: Кључ
25
+ expected: Kljuc
26
+ - source: Цигарете
27
+ expected: Cigarete
28
+ - source: Пролеће
29
+ expected: Prolece
30
+ - source: Понедељак
31
+ expected: Ponedeljak
32
+ - source: Горња Ваганица
33
+ expected: Gornja Vaganica
34
+
35
+ map:
36
+ inherit: odni-srp-Cyrl-Latn-2015
@@ -0,0 +1,170 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2015
4
+ language: iso-639-2:tuk
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Standards for the transliteration of Turkmen personal names in written reports and products
8
+ url: https://github.com/interscript/ics-630-01/blob/master/reference-docs/ANNEX%20T%20-%20Turkmen_Personal_Names_FLTS%20(U).pdf
9
+ source: ICS-630-01 Annex T
10
+ creation_date: 2015
11
+ confirmation_date: 2015
12
+ description: |
13
+ This system is the Intelligence Community standard for the transliteration of Turkmen person
14
+ names that will be applied to all final written reports and products for IC consumers. It is not
15
+ intended to eliminate variations of a name that can contribute forensic information. Rather, it is to
16
+ provide an IC standard Romanized (English) transliteration from Turkmen that can then be linked
17
+ to forensic information in ways that will help identify the referent of the name.
18
+
19
+ In cases where an individual’s name has already been transliterated in a variant spelling, the IC
20
+ Standard spelling should appear first, followed by the variant spelling(s) in parentheses at the first
21
+ usage. In addition, if the original Cyrillic-script spelling is known, that spelling should also
22
+ appear in parentheses following the name, if possible, following best practices of the issuing
23
+ organization and taking into consideration information system capabilities. For example:
24
+ Azat Muhadov (also seen as Azat Muhadow, Азат Мухадов). This convention is designed to
25
+ ensure that vital forensic information is not lost.
26
+
27
+ For names of persons who are known to not be part of the Turkmen-speaking community, use the
28
+ relevant IC transliteration standard for names from that language (e.g., Yitzhak). A translator’s
29
+ note may be used to clarify the known origin of the person. Spell names of individuals from
30
+ languages that are written in Roman letters as they are spelled in those languages (e.g., George
31
+ Clooney, Jorge Garcia, Georges Pompidou).
32
+
33
+ In the case of active senior government officials in the on-line CIA World Factbook and the on-
34
+ line directory of Chiefs of State and Cabinet Members of Foreign Governments, the spellings
35
+ given in these on-line reference works should be used in place of the IC Standard. For any
36
+ individual who has at one time been listed in the Factbook or Chiefs of State directory but who no
37
+ longer appears in those resources (i.e. is no longer a government official), the IC Standard
38
+ spelling should appear first, with the spelling, if known, as it previously appeared in those
39
+ resources listed within parentheses at the first usage.
40
+
41
+ The primary goal is to produce a consistent Romanized transcription of names that is specifically
42
+ readable to the English-speaking non-specialist. The system uses the 26 letters of the standard
43
+ (English) Roman alphabet. Some ambiguities in the Romanized form will occur without the use
44
+ of diacritics. However, within the context of a report, where additional information about the
45
+ individual is provided, the referent will be clearly identified. This system will be used in
46
+ conjunction with on-line tools, name dictionaries, and lists containing conventional spellings of
47
+ names of well-known individuals.
48
+
49
+ notes:
50
+ - Transliterate double digraphs as a single digraph, i.e. шш -> sh, not shsh
51
+ - In the Roman, no distinction is made between digraphs such as 'sh' and single contiguous letters, (e.g. 's' followed by 'h').
52
+ - The Cyrillic ъ and ь are not transliterated, but instead are left out of the transliteration.
53
+
54
+ tests:
55
+ - source: Акгюль
56
+ expected: Akgyul
57
+ - source: Акгыз
58
+ expected: Akgyz
59
+ - source: Арсланбек
60
+ expected: Arslanbek
61
+ - source: Берди
62
+ expected: Berdi
63
+ - source: Дидар
64
+ expected: Didar
65
+ - source: Гөзел
66
+ expected: Gozel
67
+ - source: Гуля
68
+ expected: Gulya
69
+ - source: Гюля
70
+ expected: Gyulya
71
+ - source: Мәхри
72
+ expected: Mahri
73
+ - source: Майса
74
+ expected: Maysa
75
+ - source: Мырат
76
+ expected: Myrat
77
+ - source: Өвез
78
+ expected: Ovez
79
+ - source: Рашит
80
+ expected: Rashit
81
+ - source: Сапармырат
82
+ expected: Saparmyrat
83
+
84
+ map:
85
+ rules:
86
+ - pattern: "\u0448\u0448" # шш -> sh
87
+ result: sh
88
+ - pattern: "\u0428\u0448" # Шш -> Sh
89
+ result: Sh
90
+ - pattern: "\u0428\u0428" # ШШ -> SH
91
+ result: SH
92
+ - pattern: "\u0448\u0428" # шШ -> sH
93
+ result: sH
94
+ - pattern: "\u042C|\u044C" # remove Ь and ь
95
+ result: ''
96
+
97
+ characters:
98
+ '\u0410': 'A' # А
99
+ '\u0411': 'B' # Б
100
+ '\u0412': 'V' # В
101
+ '\u0413': 'G' # Г
102
+ '\u0414': 'D' # Д
103
+ '\u0415': 'E' # Е
104
+ '\u0401': 'Yo' # Ё
105
+ '\u0416': 'Zh' # Ж
106
+ '\u0496': 'J' # җ
107
+ '\u0417': 'Z' # З
108
+ '\u0418': 'I' # И
109
+ '\u0419': 'Y' # Й
110
+ '\u041A': 'K' # К
111
+ '\u041B': 'L' # Л
112
+ '\u041C': 'M' # М
113
+ '\u041D': 'N' # Н
114
+ '\u04A2': 'Ng' # Ң
115
+ '\u041E': 'O' # О
116
+ '\u04E8': 'O' # Ө
117
+ '\u041F': 'P' # П
118
+ '\u0420': 'R' # Р
119
+ '\u0421': 'S' # С
120
+ '\u0422': 'T' # Т
121
+ '\u0423': 'U' # У
122
+ '\u04AE': 'U' # Ү
123
+ '\u0424': 'F' # Ф
124
+ '\u0425': 'H' # Х
125
+ '\u0426': 'Ts' # Ц
126
+ '\u0427': 'Ch' # Ч
127
+ '\u0428': 'Sh' # Ш
128
+ '\u0429': 'Shch' # Щ
129
+ '\u042B': 'Y' # Ы
130
+ '\u042D': 'E' # Э
131
+ '\u04D8': 'A' # Ә
132
+ '\u042E': 'Yu' # Ю
133
+ '\u042F': 'Ya' # Я
134
+
135
+ '\u0430': 'a' # а
136
+ '\u0431': 'b' # б
137
+ '\u0432': 'v' # в
138
+ '\u0433': 'g' # г
139
+ '\u0434': 'd' # д
140
+ '\u0435': 'e' # е
141
+ '\u0451': 'yo' # ё
142
+ '\u0436': 'zh' # ж
143
+ '\u0497': 'j' # җ
144
+ '\u0437': 'z' # з
145
+ '\u0438': 'i' # и
146
+ '\u0439': 'y' # й
147
+ '\u043A': 'k' # к
148
+ '\u043B': 'l' # л
149
+ '\u043C': 'm' # м
150
+ '\u043D': 'n' # н
151
+ '\u04A3': 'ng' # ң
152
+ '\u043E': 'o' # о
153
+ '\u04E9': 'o' # ө
154
+ '\u043F': 'p' # п
155
+ '\u0440': 'r' # р
156
+ '\u0441': 's' # с
157
+ '\u0442': 't' # т
158
+ '\u0443': 'u' # у
159
+ '\u04AF': 'u' # ү
160
+ '\u0444': 'f' # ф
161
+ '\u0445': 'h' # х
162
+ '\u0446': 'ts' # ц
163
+ '\u0447': 'ch' # ч
164
+ '\u0448': 'sh' # ш
165
+ '\u0449': 'shch' # щ
166
+ '\u044B': 'y' # ы
167
+ '\u044D': 'e' # э
168
+ '\u04D9': 'a' # ә
169
+ '\u044E': 'yu' # ю
170
+ '\u044F': 'ya' # я
@@ -82,6 +82,10 @@ tests:
82
82
  expected: Yaroshenko
83
83
  - source: Костянтин
84
84
  expected: Kostyantyn
85
+ - source: Новофедорівка
86
+ expected: Novofedorivka
87
+ - source: Гуляйгородок
88
+ expected: Hulyayhorodok
85
89
 
86
90
  map:
87
91
  rules:
@@ -168,7 +168,7 @@ map:
168
168
  result: ' ad͟h D͟h'
169
169
  - pattern : ' Al L' # الل
170
170
  result: ' al L'
171
- - pattern : ' an n' # الن
171
+ - pattern : ' An N' # الن
172
172
  result: ' an N'
173
173
  - pattern: " Al " # ال
174
174
  result: " al "
@@ -0,0 +1,223 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1972
4
+ language: iso-639-2:ben
5
+ source_script: Beng
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Assamese Romanization, Version 4.0
8
+ url: https://www.eki.ee/wgrs/rom1_as.htm
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
13
+ based on a report prepared by D. N. Sharma. The tables and their corrections were published in
14
+ volume II of the conference reports.
15
+
16
+ There is no evidence of the use of the system either in India or in international cartographic products.
17
+
18
+ Assamese (Asamīyā) uses an alphasyllabic script whereby each character represents a syllable rather
19
+ than one sound. Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially)
20
+ and in an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous but the user
21
+ would have to recognize many ligatures not given in the original table. The system is mostly reversible but there
22
+ exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters) and consonants
23
+ (ligatures vs. character sequences).
24
+
25
+ References
26
+
27
+ Second United Nations Conference on the Standardization of Geographical Names.
28
+ London, 10–31 May 1972. Vol. II. Technical papers. United Nations. New York 1974, pp. 141–142.
29
+
30
+ Third United Nations Conference on the Standardization of Geographical Names. Athens,
31
+ 17 August – 7 September 1977. Vol. II, Technical papers, pp. 393 etc.
32
+
33
+ notes:
34
+ - |
35
+ ু Exceptions: গু gu; রু ru; শু shu; হু hu; ন্তু ntu; স্তু stu.
36
+ - |
37
+ ূ Exceptions: রূ rū.
38
+ - |
39
+ ৃ Exceptions: হৃ hṛ.
40
+ - |
41
+ ্‌ Pronunciation without a vowel; special form: ৎ t.
42
+ - |
43
+ Dotted variants of the characters: ড় ṙa; ঢ় ṙha; য় ya.
44
+
45
+ tests:
46
+ - source: "অসমীয়া কবিতা"
47
+ expected: "asamīyā kabitā"
48
+ - source: "কবিৰ আজি জন্মদিন"
49
+ expected: "kabira āji janmadina"
50
+ - source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
51
+ expected: "beruṭata emāhara pāchhate punara bhayaṁkara agnikāṇḍa"
52
+ - source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
53
+ expected: "bhaṅāra biruddhe āvedana dākhila kaṁganāra"
54
+ - source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
55
+ expected: "āpuni paṙhi bhāla pāba parā bātari"
56
+ - source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
57
+ expected: "shrīrāmapurata garubharti ṭrāka jabda, dujanaka āṭaka"
58
+ - source: "কেনে আছে প্ৰাক্তন"
59
+ expected: "kene āchhe prāktana"
60
+ - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
61
+ expected: "kamumbāira meyarara dehata kobhiḍa pajiṭibha"
62
+ - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
63
+ expected: "ṭuiṭāraj̱oge khoda sadarī kare ei kathā"
64
+ - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
65
+ expected: "lakhimapura jilāra nārāyaṇapurara barapathārata āji prashānti dhāma nāmere ekhana bṛddhāshramara shubhārambha karā haya"
66
+
67
+ map:
68
+ rules:
69
+ - pattern: ([ক]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
70
+ result: 'k'
71
+ - pattern: ([খ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
72
+ result: 'kh'
73
+ - pattern: ([গ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
74
+ result: 'g'
75
+ - pattern: ([ঘ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
76
+ result: 'gh'
77
+ - pattern: ([ঙ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
78
+ result: 'ṅ'
79
+ - pattern: ([চ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
80
+ result: 'ch'
81
+ - pattern: ([ছ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
82
+ result: 'chh'
83
+ - pattern: ([জ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
84
+ result: 'j'
85
+ - pattern: ([ঝ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
86
+ result: 'jh'
87
+ - pattern: ([ঞ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
88
+ result: 'ñ'
89
+ - pattern: ([ট]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
90
+ result: 'ṭ'
91
+ - pattern: ([ঠ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
92
+ result: 'ṭh'
93
+ - pattern: ([ড]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
94
+ result: 'ḍ'
95
+ - pattern: ([ঢ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
96
+ result: 'ḍh'
97
+ - pattern: ([ণ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
98
+ result: 'ṇ'
99
+ - pattern: ([ত]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
100
+ result: 't'
101
+ - pattern: ([থ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
102
+ result: 'th'
103
+ - pattern: ([দ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
104
+ result: 'd'
105
+ - pattern: ([ধ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
106
+ result: 'dh'
107
+ - pattern: ([ন]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
108
+ result: 'n'
109
+ - pattern: ([প]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
110
+ result: 'p'
111
+ - pattern: ([ফ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
112
+ result: 'ph'
113
+ - pattern: ([ব]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
114
+ result: 'b'
115
+ - pattern: ([ভ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
116
+ result: 'bh'
117
+ - pattern: ([ম]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
118
+ result: 'm'
119
+ - pattern: ([য]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
120
+ result: 'j̱'
121
+ - pattern: ([ৰ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
122
+ result: 'r'
123
+ - pattern: ([ল]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
124
+ result: 'l'
125
+ - pattern: ([ৱ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
126
+ result: 'v'
127
+ - pattern: ([শ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
128
+ result: 'sh'
129
+ - pattern: ([ষ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
130
+ result: 'ṣh'
131
+ - pattern: ([স]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
132
+ result: 's'
133
+ - pattern: ([হ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
134
+ result: 'h'
135
+ - pattern: ([ড়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
136
+ result: 'ṙ'
137
+ - pattern: ([ঢ়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
138
+ result: 'ṙh'
139
+ - pattern: ([য়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
140
+ result: 'y'
141
+ - pattern: ([ড়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
142
+ result: 'ṙ'
143
+ - pattern: ([ঢ়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
144
+ result: 'ṙh'
145
+ - pattern: ([য়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
146
+ result: 'y'
147
+
148
+ characters:
149
+
150
+ # I. Independent vowel characters
151
+ 'অ': 'a'
152
+ 'আ': 'ā'
153
+ 'ই': 'i'
154
+ 'ঈ': 'ī'
155
+ 'উ': 'u'
156
+ 'ঊ': 'ū'
157
+ 'ঋ': 'ṛ'
158
+ 'এ': 'e'
159
+ 'ঐ': 'ai'
160
+ 'ও': 'o'
161
+ 'ঔ': 'au'
162
+
163
+ # II. Abbreviated vowel characters
164
+ '\u09be': 'ā'
165
+ '\u09bf': 'i'
166
+ '\u09c0': 'ī'
167
+ '\u09c1': 'u'
168
+ '\u09c2': 'ū'
169
+ '\u09c3': 'ṛ'
170
+ '\u09c7': 'e'
171
+ '\u09c8': 'ai'
172
+ '\u09cb': 'o'
173
+ '\u09cc': 'au'
174
+
175
+ # III. Other symbols
176
+ '\u0982': 'ṁ'
177
+ '\u0981': 'm̐'
178
+ '\u0983': 'ḥ'
179
+ '\u09cd': ''
180
+
181
+ # IV. Consonant characters
182
+ 'ক': 'ka'
183
+ 'খ': 'kha'
184
+ 'গ': 'ga'
185
+ 'ঘ': 'gha'
186
+ 'ঙ': 'ṅa'
187
+ 'চ': 'cha'
188
+ 'ছ': 'chha'
189
+ 'জ': 'ja'
190
+ 'ঝ': 'jha'
191
+ 'ঞ': 'ña'
192
+ 'ট': 'ṭa'
193
+ 'ঠ': 'ṭha'
194
+ 'ড': 'ḍa'
195
+ 'ঢ': 'ḍha'
196
+ 'ণ': 'ṇa'
197
+ 'ত': 'ta'
198
+ 'থ': 'tha'
199
+ 'দ': 'da'
200
+ 'ধ': 'dha'
201
+ 'ন': 'na'
202
+ 'প': 'pa'
203
+ 'ফ': 'pha'
204
+ 'ব': 'ba'
205
+ 'ভ': 'bha'
206
+ 'ম': 'ma'
207
+ 'য': 'j̱a'
208
+ 'ৰ': 'ra'
209
+ 'ল': 'la'
210
+ 'ৱ': 'va'
211
+ 'শ': 'sha'
212
+ 'ষ': 'ṣha'
213
+ 'স': 'sa'
214
+ 'হ': 'ha'
215
+ 'ৎ': 't'
216
+
217
+ # Note V Dotted variants
218
+ 'ড়': 'ṙa'
219
+ 'ঢ়': 'ṙha'
220
+ 'য়': 'ya'
221
+ 'য়': 'ya'
222
+ 'ড়': 'ṙa'
223
+ 'ঢ়': 'ya'