interscript 0.1.7 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,21 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2005
4
+ language: iso-639-2:mkd
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Standards for the transliteration of macedonian personal names in written reports and products
8
+ creation_date: 2005
9
+ confirmation_date: 2005
10
+ description: |
11
+ Office of the Director Of National Intelligence Macedonian Personal Names 2004 System
12
+
13
+ tests:
14
+ - source: Билјана
15
+ expected: Biljana
16
+ - source: Душко
17
+ expected: Dushko
18
+
19
+ map:
20
+ inherit: odni-mkd-Cyrl-Latn-2015
21
+ rules:
@@ -0,0 +1,123 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2004
4
+ language: iso-639-3:prs
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Intelligence Community (IC) Standard for the Transliteration of Dari Personal Names (2004)
8
+ url: https://github.com/interscript/interscript-private-references/blob/master/odni/Farsi_(Persian)_%26_Dari_IC_Standards.doc
9
+ creation_date: 2004
10
+ confirmation_date: 2004-11
11
+ description: |
12
+
13
+ notes:
14
+ - This standard is intended only for those Afghan names
15
+ that have a common bond or similarity with Iranian or
16
+ Arabic names. They should not, for example, be used for
17
+ Pashto names, for which a separate standard should be used.
18
+ - Long/short vowels:- There is no distinction made in Roman
19
+ between long and short a:- E.g., Farhad (first a is short,
20
+ second is long).
21
+ - Double consonants:- Double consonants represented by the
22
+ tashdid are shown by doubling the Roman letter:- Mohammad.
23
+ Exceptions:- Consonants represented by Roman digraphs (
24
+ e.g., sh, ch) are not doubled:- Mobasher [not:- Mobashsher]
25
+ . Double letters are only used for tashdid (thus, Hosein [
26
+ not Hossein]) or to reflect the ‘sun letter’ assimilation (
27
+ see below).
28
+ - Hamzeh:- The hamzeh is represented name-internally by an
29
+ apostrophe, as is the ain. Name-initially, however,
30
+ neither hamzeh nor ain are indicated in transliteration (
31
+ e.g., Abdorrahman, not 'Abdorrahman).
32
+ - Digraphs:- No distinction is drawn in Roman between
33
+ digraphs such as sh and single contiguous letters (e.g., s
34
+ followed by h).
35
+ - Arabic definite article "al" ('the'):- Common in many
36
+ names borrowed from Arabic, the transliteration should show
37
+ the 'sun letter' assimilation rather than the “l” for the
38
+ lam. That is:- Abdorrahman. Note also that the "Abdol +
39
+ attribute of Allah" names are written as one unanalyzed
40
+ word, as are other names that contain the definite
41
+ article:- Shamsoddin (not Shams al-Din), Nezamoddin, etc.
42
+ - Diphthongs:- Diphthongs are written ei and ow
43
+ respectively:- Hosein; Khosrow.
44
+ - Yeh maqsura (final yeh pronounced as “a”):- should be
45
+ written as “a” as in “Musa”.
46
+
47
+ - Special Rules
48
+
49
+ - Hyphens:- A hyphen is used to indicate the ezafeh
50
+ construction:- Arshad-e Ameri
51
+ - Borrowed names that incorporate the name of God (Allah)
52
+ are transliterated as one word, with the letter "o":- E.g.,
53
+ Abdollah, Ayatollah, Azizollah.
54
+ - Foreign names borrowed or appearing in Dari are spelled
55
+ according to the standard Western tradition (even if there
56
+ is an Arabic or Dari version of the same name):- Joseph,
57
+ Michael.
58
+ - Common suffixes, such as gol, pur, mand, yar, zadeh,
59
+ etc., as well as nesbeh (‘relationship’ (to place of birth,
60
+ etc.)) names derived with these suffixes (e.g., abadi) are
61
+ written as part of the name:-
62
+
63
+ gol Parigol, Ziagol
64
+ pur Shahpur, Mehrpur
65
+ mand Gulahmand
66
+ yar Aminyar
67
+ zadeh Ismailzadeh, Karimzadeh
68
+
69
+ abadi Kamalabadi
70
+
71
+
72
+ tests:
73
+ - source: مُوسَى
74
+ expected: musa
75
+
76
+ - source: مُؤمِن
77
+ expected: momen
78
+
79
+ - source: رِضايي
80
+ expected: rezai
81
+
82
+ - source: مُبَشِّر
83
+ expected: mobasher
84
+
85
+ - source: حَسَّان
86
+ expected: hassan
87
+
88
+ - source: حَسَن
89
+ expected: hasan
90
+
91
+ - source: صَفَّار
92
+ expected: saffar
93
+
94
+ - source: صَفَر
95
+ expected: safar
96
+
97
+ map:
98
+ inherit: odni-fas-Arab-Latn-2004
99
+ characters:
100
+
101
+ '\u0626' : '' # ئ
102
+ '\u0624' : '' # ؤ
103
+
104
+ # shadda
105
+
106
+ '\u0642\u0651' : 'qq' # ق
107
+ '\u0648\u0651' : 'ww' # و
108
+
109
+ '\u0621': '' # ء
110
+
111
+ # FROM NOTES
112
+
113
+ '\u064a\u064a' : 'i' # NOTE 4 (2)
114
+ '\u06cc\u06cc' : 'i'
115
+
116
+ '\u0627\u064a\b' : 'i' # NOTE 4 (3)
117
+ '\u0627\u06cc\b' : 'i'
118
+
119
+ # Farsi consonant characters
120
+
121
+ '\u0639' : '' # ع # new
122
+ '\u0642' : 'q' # ق
123
+ '\u0648' : 'w' # و
@@ -0,0 +1,36 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2005
4
+ language: iso-639-2:srp
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Office of the Director Of National Intelligence Serbian Personal Names 2004 System
8
+ creation_date: 2005
9
+ confirmation_date: 2005
10
+ description: |
11
+ Office of the Director Of National Intelligence Serbian Personal Names 2004 System
12
+
13
+ notes:
14
+
15
+ tests:
16
+ - source: Гојко Митић
17
+ expected: Gojko Mitic
18
+ - source: Горња Ваганица
19
+ expected: Gornja Vaganica
20
+ - source: Довиђења
21
+ expected: Dovidjenja
22
+ - source: Ћао! Здраво!
23
+ expected: Cao! Zdravo!
24
+ - source: Кључ
25
+ expected: Kljuc
26
+ - source: Цигарете
27
+ expected: Cigarete
28
+ - source: Пролеће
29
+ expected: Prolece
30
+ - source: Понедељак
31
+ expected: Ponedeljak
32
+ - source: Горња Ваганица
33
+ expected: Gornja Vaganica
34
+
35
+ map:
36
+ inherit: odni-srp-Cyrl-Latn-2015
@@ -0,0 +1,170 @@
1
+ ---
2
+ authority_id: odni
3
+ id: 2015
4
+ language: iso-639-2:tuk
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Standards for the transliteration of Turkmen personal names in written reports and products
8
+ url: https://github.com/interscript/ics-630-01/blob/master/reference-docs/ANNEX%20T%20-%20Turkmen_Personal_Names_FLTS%20(U).pdf
9
+ source: ICS-630-01 Annex T
10
+ creation_date: 2015
11
+ confirmation_date: 2015
12
+ description: |
13
+ This system is the Intelligence Community standard for the transliteration of Turkmen person
14
+ names that will be applied to all final written reports and products for IC consumers. It is not
15
+ intended to eliminate variations of a name that can contribute forensic information. Rather, it is to
16
+ provide an IC standard Romanized (English) transliteration from Turkmen that can then be linked
17
+ to forensic information in ways that will help identify the referent of the name.
18
+
19
+ In cases where an individual’s name has already been transliterated in a variant spelling, the IC
20
+ Standard spelling should appear first, followed by the variant spelling(s) in parentheses at the first
21
+ usage. In addition, if the original Cyrillic-script spelling is known, that spelling should also
22
+ appear in parentheses following the name, if possible, following best practices of the issuing
23
+ organization and taking into consideration information system capabilities. For example:
24
+ Azat Muhadov (also seen as Azat Muhadow, Азат Мухадов). This convention is designed to
25
+ ensure that vital forensic information is not lost.
26
+
27
+ For names of persons who are known to not be part of the Turkmen-speaking community, use the
28
+ relevant IC transliteration standard for names from that language (e.g., Yitzhak). A translator’s
29
+ note may be used to clarify the known origin of the person. Spell names of individuals from
30
+ languages that are written in Roman letters as they are spelled in those languages (e.g., George
31
+ Clooney, Jorge Garcia, Georges Pompidou).
32
+
33
+ In the case of active senior government officials in the on-line CIA World Factbook and the on-
34
+ line directory of Chiefs of State and Cabinet Members of Foreign Governments, the spellings
35
+ given in these on-line reference works should be used in place of the IC Standard. For any
36
+ individual who has at one time been listed in the Factbook or Chiefs of State directory but who no
37
+ longer appears in those resources (i.e. is no longer a government official), the IC Standard
38
+ spelling should appear first, with the spelling, if known, as it previously appeared in those
39
+ resources listed within parentheses at the first usage.
40
+
41
+ The primary goal is to produce a consistent Romanized transcription of names that is specifically
42
+ readable to the English-speaking non-specialist. The system uses the 26 letters of the standard
43
+ (English) Roman alphabet. Some ambiguities in the Romanized form will occur without the use
44
+ of diacritics. However, within the context of a report, where additional information about the
45
+ individual is provided, the referent will be clearly identified. This system will be used in
46
+ conjunction with on-line tools, name dictionaries, and lists containing conventional spellings of
47
+ names of well-known individuals.
48
+
49
+ notes:
50
+ - Transliterate double digraphs as a single digraph, i.e. шш -> sh, not shsh
51
+ - In the Roman, no distinction is made between digraphs such as 'sh' and single contiguous letters, (e.g. 's' followed by 'h').
52
+ - The Cyrillic ъ and ь are not transliterated, but instead are left out of the transliteration.
53
+
54
+ tests:
55
+ - source: Акгюль
56
+ expected: Akgyul
57
+ - source: Акгыз
58
+ expected: Akgyz
59
+ - source: Арсланбек
60
+ expected: Arslanbek
61
+ - source: Берди
62
+ expected: Berdi
63
+ - source: Дидар
64
+ expected: Didar
65
+ - source: Гөзел
66
+ expected: Gozel
67
+ - source: Гуля
68
+ expected: Gulya
69
+ - source: Гюля
70
+ expected: Gyulya
71
+ - source: Мәхри
72
+ expected: Mahri
73
+ - source: Майса
74
+ expected: Maysa
75
+ - source: Мырат
76
+ expected: Myrat
77
+ - source: Өвез
78
+ expected: Ovez
79
+ - source: Рашит
80
+ expected: Rashit
81
+ - source: Сапармырат
82
+ expected: Saparmyrat
83
+
84
+ map:
85
+ rules:
86
+ - pattern: "\u0448\u0448" # шш -> sh
87
+ result: sh
88
+ - pattern: "\u0428\u0448" # Шш -> Sh
89
+ result: Sh
90
+ - pattern: "\u0428\u0428" # ШШ -> SH
91
+ result: SH
92
+ - pattern: "\u0448\u0428" # шШ -> sH
93
+ result: sH
94
+ - pattern: "\u042C|\u044C" # remove Ь and ь
95
+ result: ''
96
+
97
+ characters:
98
+ '\u0410': 'A' # А
99
+ '\u0411': 'B' # Б
100
+ '\u0412': 'V' # В
101
+ '\u0413': 'G' # Г
102
+ '\u0414': 'D' # Д
103
+ '\u0415': 'E' # Е
104
+ '\u0401': 'Yo' # Ё
105
+ '\u0416': 'Zh' # Ж
106
+ '\u0496': 'J' # җ
107
+ '\u0417': 'Z' # З
108
+ '\u0418': 'I' # И
109
+ '\u0419': 'Y' # Й
110
+ '\u041A': 'K' # К
111
+ '\u041B': 'L' # Л
112
+ '\u041C': 'M' # М
113
+ '\u041D': 'N' # Н
114
+ '\u04A2': 'Ng' # Ң
115
+ '\u041E': 'O' # О
116
+ '\u04E8': 'O' # Ө
117
+ '\u041F': 'P' # П
118
+ '\u0420': 'R' # Р
119
+ '\u0421': 'S' # С
120
+ '\u0422': 'T' # Т
121
+ '\u0423': 'U' # У
122
+ '\u04AE': 'U' # Ү
123
+ '\u0424': 'F' # Ф
124
+ '\u0425': 'H' # Х
125
+ '\u0426': 'Ts' # Ц
126
+ '\u0427': 'Ch' # Ч
127
+ '\u0428': 'Sh' # Ш
128
+ '\u0429': 'Shch' # Щ
129
+ '\u042B': 'Y' # Ы
130
+ '\u042D': 'E' # Э
131
+ '\u04D8': 'A' # Ә
132
+ '\u042E': 'Yu' # Ю
133
+ '\u042F': 'Ya' # Я
134
+
135
+ '\u0430': 'a' # а
136
+ '\u0431': 'b' # б
137
+ '\u0432': 'v' # в
138
+ '\u0433': 'g' # г
139
+ '\u0434': 'd' # д
140
+ '\u0435': 'e' # е
141
+ '\u0451': 'yo' # ё
142
+ '\u0436': 'zh' # ж
143
+ '\u0497': 'j' # җ
144
+ '\u0437': 'z' # з
145
+ '\u0438': 'i' # и
146
+ '\u0439': 'y' # й
147
+ '\u043A': 'k' # к
148
+ '\u043B': 'l' # л
149
+ '\u043C': 'm' # м
150
+ '\u043D': 'n' # н
151
+ '\u04A3': 'ng' # ң
152
+ '\u043E': 'o' # о
153
+ '\u04E9': 'o' # ө
154
+ '\u043F': 'p' # п
155
+ '\u0440': 'r' # р
156
+ '\u0441': 's' # с
157
+ '\u0442': 't' # т
158
+ '\u0443': 'u' # у
159
+ '\u04AF': 'u' # ү
160
+ '\u0444': 'f' # ф
161
+ '\u0445': 'h' # х
162
+ '\u0446': 'ts' # ц
163
+ '\u0447': 'ch' # ч
164
+ '\u0448': 'sh' # ш
165
+ '\u0449': 'shch' # щ
166
+ '\u044B': 'y' # ы
167
+ '\u044D': 'e' # э
168
+ '\u04D9': 'a' # ә
169
+ '\u044E': 'yu' # ю
170
+ '\u044F': 'ya' # я
@@ -82,6 +82,10 @@ tests:
82
82
  expected: Yaroshenko
83
83
  - source: Костянтин
84
84
  expected: Kostyantyn
85
+ - source: Новофедорівка
86
+ expected: Novofedorivka
87
+ - source: Гуляйгородок
88
+ expected: Hulyayhorodok
85
89
 
86
90
  map:
87
91
  rules:
@@ -168,7 +168,7 @@ map:
168
168
  result: ' ad͟h D͟h'
169
169
  - pattern : ' Al L' # الل
170
170
  result: ' al L'
171
- - pattern : ' an n' # الن
171
+ - pattern : ' An N' # الن
172
172
  result: ' an N'
173
173
  - pattern: " Al " # ال
174
174
  result: " al "
@@ -0,0 +1,223 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1972
4
+ language: iso-639-2:ben
5
+ source_script: Beng
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Assamese Romanization, Version 4.0
8
+ url: https://www.eki.ee/wgrs/rom1_as.htm
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
13
+ based on a report prepared by D. N. Sharma. The tables and their corrections were published in
14
+ volume II of the conference reports.
15
+
16
+ There is no evidence of the use of the system either in India or in international cartographic products.
17
+
18
+ Assamese (Asamīyā) uses an alphasyllabic script whereby each character represents a syllable rather
19
+ than one sound. Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially)
20
+ and in an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous but the user
21
+ would have to recognize many ligatures not given in the original table. The system is mostly reversible but there
22
+ exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters) and consonants
23
+ (ligatures vs. character sequences).
24
+
25
+ References
26
+
27
+ Second United Nations Conference on the Standardization of Geographical Names.
28
+ London, 10–31 May 1972. Vol. II. Technical papers. United Nations. New York 1974, pp. 141–142.
29
+
30
+ Third United Nations Conference on the Standardization of Geographical Names. Athens,
31
+ 17 August – 7 September 1977. Vol. II, Technical papers, pp. 393 etc.
32
+
33
+ notes:
34
+ - |
35
+ ু Exceptions: গু gu; রু ru; শু shu; হু hu; ন্তু ntu; স্তু stu.
36
+ - |
37
+ ূ Exceptions: রূ rū.
38
+ - |
39
+ ৃ Exceptions: হৃ hṛ.
40
+ - |
41
+ ্‌ Pronunciation without a vowel; special form: ৎ t.
42
+ - |
43
+ Dotted variants of the characters: ড় ṙa; ঢ় ṙha; য় ya.
44
+
45
+ tests:
46
+ - source: "অসমীয়া কবিতা"
47
+ expected: "asamīyā kabitā"
48
+ - source: "কবিৰ আজি জন্মদিন"
49
+ expected: "kabira āji janmadina"
50
+ - source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
51
+ expected: "beruṭata emāhara pāchhate punara bhayaṁkara agnikāṇḍa"
52
+ - source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
53
+ expected: "bhaṅāra biruddhe āvedana dākhila kaṁganāra"
54
+ - source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
55
+ expected: "āpuni paṙhi bhāla pāba parā bātari"
56
+ - source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
57
+ expected: "shrīrāmapurata garubharti ṭrāka jabda, dujanaka āṭaka"
58
+ - source: "কেনে আছে প্ৰাক্তন"
59
+ expected: "kene āchhe prāktana"
60
+ - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
61
+ expected: "kamumbāira meyarara dehata kobhiḍa pajiṭibha"
62
+ - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
63
+ expected: "ṭuiṭāraj̱oge khoda sadarī kare ei kathā"
64
+ - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
65
+ expected: "lakhimapura jilāra nārāyaṇapurara barapathārata āji prashānti dhāma nāmere ekhana bṛddhāshramara shubhārambha karā haya"
66
+
67
+ map:
68
+ rules:
69
+ - pattern: ([ক]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
70
+ result: 'k'
71
+ - pattern: ([খ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
72
+ result: 'kh'
73
+ - pattern: ([গ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
74
+ result: 'g'
75
+ - pattern: ([ঘ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
76
+ result: 'gh'
77
+ - pattern: ([ঙ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
78
+ result: 'ṅ'
79
+ - pattern: ([চ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
80
+ result: 'ch'
81
+ - pattern: ([ছ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
82
+ result: 'chh'
83
+ - pattern: ([জ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
84
+ result: 'j'
85
+ - pattern: ([ঝ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
86
+ result: 'jh'
87
+ - pattern: ([ঞ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
88
+ result: 'ñ'
89
+ - pattern: ([ট]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
90
+ result: 'ṭ'
91
+ - pattern: ([ঠ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
92
+ result: 'ṭh'
93
+ - pattern: ([ড]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
94
+ result: 'ḍ'
95
+ - pattern: ([ঢ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
96
+ result: 'ḍh'
97
+ - pattern: ([ণ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
98
+ result: 'ṇ'
99
+ - pattern: ([ত]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
100
+ result: 't'
101
+ - pattern: ([থ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
102
+ result: 'th'
103
+ - pattern: ([দ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
104
+ result: 'd'
105
+ - pattern: ([ধ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
106
+ result: 'dh'
107
+ - pattern: ([ন]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
108
+ result: 'n'
109
+ - pattern: ([প]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
110
+ result: 'p'
111
+ - pattern: ([ফ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
112
+ result: 'ph'
113
+ - pattern: ([ব]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
114
+ result: 'b'
115
+ - pattern: ([ভ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
116
+ result: 'bh'
117
+ - pattern: ([ম]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
118
+ result: 'm'
119
+ - pattern: ([য]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
120
+ result: 'j̱'
121
+ - pattern: ([ৰ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
122
+ result: 'r'
123
+ - pattern: ([ল]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
124
+ result: 'l'
125
+ - pattern: ([ৱ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
126
+ result: 'v'
127
+ - pattern: ([শ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
128
+ result: 'sh'
129
+ - pattern: ([ষ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
130
+ result: 'ṣh'
131
+ - pattern: ([স]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
132
+ result: 's'
133
+ - pattern: ([হ]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
134
+ result: 'h'
135
+ - pattern: ([ড়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
136
+ result: 'ṙ'
137
+ - pattern: ([ঢ়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
138
+ result: 'ṙh'
139
+ - pattern: ([য়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
140
+ result: 'y'
141
+ - pattern: ([ড়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
142
+ result: 'ṙ'
143
+ - pattern: ([ঢ়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
144
+ result: 'ṙh'
145
+ - pattern: ([য়]=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09cd])
146
+ result: 'y'
147
+
148
+ characters:
149
+
150
+ # I. Independent vowel characters
151
+ 'অ': 'a'
152
+ 'আ': 'ā'
153
+ 'ই': 'i'
154
+ 'ঈ': 'ī'
155
+ 'উ': 'u'
156
+ 'ঊ': 'ū'
157
+ 'ঋ': 'ṛ'
158
+ 'এ': 'e'
159
+ 'ঐ': 'ai'
160
+ 'ও': 'o'
161
+ 'ঔ': 'au'
162
+
163
+ # II. Abbreviated vowel characters
164
+ '\u09be': 'ā'
165
+ '\u09bf': 'i'
166
+ '\u09c0': 'ī'
167
+ '\u09c1': 'u'
168
+ '\u09c2': 'ū'
169
+ '\u09c3': 'ṛ'
170
+ '\u09c7': 'e'
171
+ '\u09c8': 'ai'
172
+ '\u09cb': 'o'
173
+ '\u09cc': 'au'
174
+
175
+ # III. Other symbols
176
+ '\u0982': 'ṁ'
177
+ '\u0981': 'm̐'
178
+ '\u0983': 'ḥ'
179
+ '\u09cd': ''
180
+
181
+ # IV. Consonant characters
182
+ 'ক': 'ka'
183
+ 'খ': 'kha'
184
+ 'গ': 'ga'
185
+ 'ঘ': 'gha'
186
+ 'ঙ': 'ṅa'
187
+ 'চ': 'cha'
188
+ 'ছ': 'chha'
189
+ 'জ': 'ja'
190
+ 'ঝ': 'jha'
191
+ 'ঞ': 'ña'
192
+ 'ট': 'ṭa'
193
+ 'ঠ': 'ṭha'
194
+ 'ড': 'ḍa'
195
+ 'ঢ': 'ḍha'
196
+ 'ণ': 'ṇa'
197
+ 'ত': 'ta'
198
+ 'থ': 'tha'
199
+ 'দ': 'da'
200
+ 'ধ': 'dha'
201
+ 'ন': 'na'
202
+ 'প': 'pa'
203
+ 'ফ': 'pha'
204
+ 'ব': 'ba'
205
+ 'ভ': 'bha'
206
+ 'ম': 'ma'
207
+ 'য': 'j̱a'
208
+ 'ৰ': 'ra'
209
+ 'ল': 'la'
210
+ 'ৱ': 'va'
211
+ 'শ': 'sha'
212
+ 'ষ': 'ṣha'
213
+ 'স': 'sa'
214
+ 'হ': 'ha'
215
+ 'ৎ': 't'
216
+
217
+ # Note V Dotted variants
218
+ 'ড়': 'ṙa'
219
+ 'ঢ়': 'ṙha'
220
+ 'য়': 'ya'
221
+ 'য়': 'ya'
222
+ 'ড়': 'ṙa'
223
+ 'ঢ়': 'ya'