interscript 0.1.5 → 2.1.0a8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +3 -39
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/exe/interscript +6 -0
  19. data/interscript.gemspec +31 -0
  20. data/lib/interscript.rb +81 -123
  21. data/lib/interscript/command.rb +5 -5
  22. data/lib/interscript/compiler.rb +22 -0
  23. data/lib/interscript/compiler/javascript.rb +292 -0
  24. data/lib/interscript/compiler/ruby.rb +262 -0
  25. data/lib/interscript/dsl.rb +67 -0
  26. data/lib/interscript/dsl/aliases.rb +23 -0
  27. data/lib/interscript/dsl/document.rb +46 -0
  28. data/lib/interscript/dsl/group.rb +45 -0
  29. data/lib/interscript/dsl/group/parallel.rb +6 -0
  30. data/lib/interscript/dsl/items.rb +89 -0
  31. data/lib/interscript/dsl/metadata.rb +26 -0
  32. data/lib/interscript/dsl/stage.rb +6 -0
  33. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  34. data/lib/interscript/dsl/tests.rb +12 -0
  35. data/lib/interscript/interpreter.rb +251 -0
  36. data/lib/interscript/node.rb +25 -0
  37. data/lib/interscript/node/alias_def.rb +15 -0
  38. data/lib/interscript/node/dependency.rb +13 -0
  39. data/lib/interscript/node/document.rb +45 -0
  40. data/lib/interscript/node/group.rb +34 -0
  41. data/lib/interscript/node/group/parallel.rb +9 -0
  42. data/lib/interscript/node/group/sequential.rb +2 -0
  43. data/lib/interscript/node/item.rb +52 -0
  44. data/lib/interscript/node/item/alias.rb +42 -0
  45. data/lib/interscript/node/item/any.rb +61 -0
  46. data/lib/interscript/node/item/capture.rb +50 -0
  47. data/lib/interscript/node/item/group.rb +51 -0
  48. data/lib/interscript/node/item/repeat.rb +40 -0
  49. data/lib/interscript/node/item/stage.rb +23 -0
  50. data/lib/interscript/node/item/string.rb +51 -0
  51. data/lib/interscript/node/metadata.rb +18 -0
  52. data/lib/interscript/node/rule.rb +6 -0
  53. data/lib/interscript/node/rule/funcall.rb +18 -0
  54. data/lib/interscript/node/rule/run.rb +15 -0
  55. data/lib/interscript/node/rule/sub.rb +65 -0
  56. data/lib/interscript/node/stage.rb +19 -0
  57. data/lib/interscript/node/tests.rb +15 -0
  58. data/lib/interscript/stdlib.rb +211 -0
  59. data/lib/interscript/utils/regexp_converter.rb +283 -0
  60. data/lib/interscript/version.rb +1 -1
  61. data/requirements.txt +1 -0
  62. metadata +73 -311
  63. data/README.adoc +0 -298
  64. data/bin/rspec +0 -29
  65. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript-opal.rb +0 -2
  68. data/lib/interscript/fs.rb +0 -69
  69. data/lib/interscript/mapping.rb +0 -142
  70. data/lib/interscript/opal.rb +0 -23
  71. data/lib/interscript/opal/maps.js.erb +0 -7
  72. data/lib/interscript/opal_map_translate.rb +0 -12
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38912
  76. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -141
  77. data/maps/alalc-bel-cyrl-latn-1997.yaml +0 -125
  78. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  79. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -94
  80. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -625
  81. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -628
  82. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -112
  83. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -146
  84. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -94
  85. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  86. data/maps/alalc-mkd-cyrl-latn-1997.yaml +0 -114
  87. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -222
  88. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  89. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -114
  90. data/maps/alalc-srp-cyrl-latn-2013.yaml +0 -135
  91. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -141
  92. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  93. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  94. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -175
  95. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  96. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -294
  97. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -31
  98. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  99. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  100. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  101. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  102. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -104
  103. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -184
  104. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +0 -285
  105. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -115
  106. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -38
  107. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -702
  108. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -20
  109. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  110. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -127
  111. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -43
  112. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  113. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  114. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  115. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -159
  116. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  117. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +0 -93
  118. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -314
  119. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -166
  120. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -163
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -208
  122. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  123. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -168
  124. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  125. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -685
  126. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -681
  127. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -20
  128. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -32
  129. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -89
  130. data/maps/gki-bel-cyrl-latn-1992.yaml +0 -33
  131. data/maps/gki-bel-cyrl-latn-2000.yaml +0 -201
  132. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +0 -186
  133. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  134. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -141
  135. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -122
  136. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  137. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  138. data/maps/icao-per-Arab-Latn-9303.yaml +0 -104
  139. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -118
  140. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  141. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -120
  142. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -610
  143. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -41
  144. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -62
  145. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -272
  146. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  147. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -901
  148. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  149. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  150. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -803
  151. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  152. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  153. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  154. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -110
  155. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  156. data/maps/nil-kor-Hang-Hang-jamo.yaml +0 -11193
  157. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  158. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  159. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  160. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -88
  161. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  162. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  163. data/maps/odni-mkd-cyrl-latn-2015.yaml +0 -122
  164. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  165. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  166. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  167. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  168. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  169. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -157
  170. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -167
  171. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  172. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -179
  173. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  174. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -76
  175. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24759
  176. data/maps/ses-ara-arab-latn-1930.yaml +0 -275
  177. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  178. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -193
  179. data/maps/un-ara-Arab-Latn-1971.yaml +0 -127
  180. data/maps/un-ara-Arab-Latn-1972.yaml +0 -152
  181. data/maps/un-ara-Arab-Latn-2017.yaml +0 -383
  182. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  183. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  184. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -32
  185. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -20
  186. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +0 -780
  187. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  188. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  189. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
  190. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  191. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  192. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  193. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  194. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -37
  195. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  196. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  197. data/maps/var-zho-Hani-Latn-1979.yaml +0 -38908
  198. data/spec/interscript/mapping_spec.rb +0 -42
  199. data/spec/interscript_spec.rb +0 -26
  200. data/spec/spec_helper.rb +0 -3
@@ -1,534 +0,0 @@
1
- ---
2
- authority_id: un
3
- id: 2016
4
- language: ben
5
- source_script: Beng
6
- destination_script: Latn
7
- name: Bengali Romanization, Version 4.0
8
- url: http://www.eki.ee/wgrs/rom1_bn.htm
9
- creation_date: 2016
10
- description: |
11
- The United Nations recommended system was approved in 1972 (II/11)
12
- and amended in 1977 (III/12), based on a report prepared by D. N.
13
- Sharma. The tables and their corrections were published in volume II of
14
- the conference reports1,2.
15
-
16
- There is no evidence of the use of the system either in Bangladesh,
17
- in India or in international cartographic products. The resolution
18
- IV/17 (1982) recommended association, inter alia, with Bangladesh, in
19
- carrying out further studies on the system.
20
-
21
- Bengali (Bānglā) uses an alphasyllabic script whereby each character
22
- represents a syllable rather than one sound. Vowels and diphthongs are
23
- marked in two ways: as independent characters (used syllable-initially)
24
- and in an abbreviated form, to denote vowels after consonants. The
25
- romanization table is unambiguous but the user would have to recognize
26
- many ligatures not given in the original table. The system is mostly
27
- reversible but there exist some ambiguities in the romanization of
28
- vowels (independent vs. abbreviated characters) and consonants
29
- (ligatures vs. character sequences).
30
-
31
- Other systems of romanization
32
-
33
- For differences between the UN system and the ISO transliteration
34
- standard ISO 15919: 2001 see the section on the romanization of Hindi.
35
-
36
- References
37
-
38
- Second United Nations Conference on the Standardization of
39
- Geographical Names. London, 10–31 May 1972. Vol. II. Technical papers.
40
- United Nations. New York 1974, pp. 139–140.
41
-
42
- Third United Nations Conference on the Standardization of
43
- Geographical Names. Athens, 17 August – 7 September 1977. Vol. II,
44
- Technical papers, pp. 393 etc.
45
-
46
- notes:
47
- - |
48
- In the romanization system below character variations and the table of ligatures have been added.
49
-
50
- I. Independent vowel characters
51
-
52
- 1 অ a
53
- 2 আ ā
54
- 3 ই i
55
- 4 ঈ ī
56
- 5 উ u
57
- 6 ঊ ū
58
- 7 ঋ ṛ
59
- 8 এ e
60
- 9 ঐ ai
61
- 10 ও o
62
- 11 ঔ au
63
-
64
-
65
-
66
-
67
- - Where two Roman equivalents are given, the second (in brackets) is
68
- used for recording the pronunciation of place-names while the first
69
- form is for general use.
70
- - In the table only word-initial character variants are shown.
71
- Depending on the position in the word many variants of the characters
72
- are used as well as some ligatures. These features are not covered here.
73
- - For technical reasons the characters of the Mongolian script are
74
- turned 90˚ anti-clockwise.
75
-
76
- tests:
77
- - source: "র্ক"
78
- expected: "rka"
79
- - source: "গ্র"
80
- expected: "gra"
81
- - source: "ত্য"
82
- expected: "tya"
83
-
84
- - source: |
85
- আমার সোনার বাংলা, আমি তোমায় ভালোবাসি।
86
- চিরদিন তোমার আকাশ, তোমার বাতাস, আমার প্রাণে বাজায় বাঁশি॥
87
- ও মা, ফাগুনে তোর আমের বনে ঘ্রাণে পাগল করে, মরি হায়, হায় রে—
88
- ও মা, অঘ্রাণে তোর ভরা ক্ষেতে আমি কী দেখেছি মধুর হাসি॥
89
-
90
- কী শোভা, কী ছায়া গো, কী স্নেহ, কী মায়া গো—
91
- কী আঁচল বিছায়েছ বটের মূলে, নদীর কূলে কূলে।
92
- মা, তোর মুখের বাণী আমার কানে লাগে সুধার মতো,
93
- মরি হায়, হায় রে—
94
- মা, তোর বদনখানি মলিন হলে, ও মা, আমি নয়নজলে ভাসি॥
95
-
96
- # Note: There are still couple of improvements we can do in the
97
- # transilation system, but for now this could work
98
- #
99
- # But please revisit this - specially the use case of `য়`, it's adding
100
- # some mixed character in the text.
101
- #
102
- expected: |
103
- āmaāra saonaāra baāṁlaā, āmai taomaāj̱aA় bhaālaobaāsai।
104
- chairadaina taomaāra ākaāsha, taomaāra baātaāsa, āmaāra praāṇae baājaāj̱aA় baām̐shai॥
105
- o maā, phaāgaunae taora āmaera banae ghraāṇae paāgala karae, marai haāj̱aA়, haāj̱aA় rae—
106
- o maā, aghraāṇae taora bharaā kṣhaetae āmai kaī daekhaechhai madhaura haāsai॥
107
-
108
- kaī shaobhaā, kaī chhaāj̱aA়ā gao, kaī snaeha, kaī maāj̱aA়ā gao—
109
- kaī ām̐chala baichhaāj̱aA়echha baṭaera maūlae, nadaīra kaūlae kaūlae।
110
- maā, taora maukhaera baāṇaī āmaāra kaānae laāgae saudhaāra matao,
111
- marai haāj̱aA়, haāj̱aA় rae—
112
- maā, taora badanakhaānai malaina halae, o maā, āmai naj̱aA়najalae bhaāsai॥
113
-
114
- map:
115
- characters:
116
-
117
- # I. Independent vowel characters
118
-
119
- 'অ': 'a' # 1
120
- 'আ': 'ā' # 2
121
- 'ই': 'i' # 3
122
- 'ঈ': 'ī' # 4
123
- 'উ': 'u' # 5
124
- 'ঊ': 'ū' # 6
125
- 'ঋ': 'ṛ' # 7
126
- 'এ': 'e' # 8
127
- 'ঐ': 'ai' # 9
128
- 'ও': 'o' # 10
129
- 'ঔ': 'au' # 11
130
-
131
- # II. Abbreviated vowel characters (ক stands for any consonant character)
132
-
133
- # 'ক': 'a' # 1
134
- '\u09be': 'ā' # 2 কা
135
- '\u09bf': 'i' # 3 কি
136
- '\u09c0': 'ī' # 4 কী
137
- '\u09c1': 'u' # 5 কু Exceptions: গু gu; রু ru; শু shu; হু hu; ন্তু ntu; স্তু stu.
138
- '\u09c2': 'ū' # 6 কূ Exception: রূ rū.
139
- '\u09c3': 'ṛ' # 7 কৃ Exception: হৃ hṛ.
140
- '\u09c7': 'e' # 8 কে
141
- '\u09c8': 'ai' # 9 কৈ
142
- '\u09cb': 'o' # 10 কো
143
- '\u09cc': 'au' # 11 কৌ
144
-
145
- # II 5 Exceptions
146
- 'গু': 'gu'
147
- 'রু': 'ru'
148
- 'শু': 'shu'
149
- 'হু': 'hu'
150
- 'ন্তু': 'ntu'
151
- 'স্তু': 'stu'
152
- # II 6 Exceptions
153
- 'রূ': 'rū'
154
- # II 7 Exceptions
155
- 'হৃ': 'hṛ'
156
-
157
- # III. Other symbols (ক stands for any consonant character)
158
-
159
- '\u0982': 'ṁ' # 1 কং
160
- '\u0981': 'm̐' # 2 কঁ
161
- '\u0983': 'ḥ' # 3 কঃ
162
- '\u09cd\u200c': '' # 4 ক্‌ Pronunciation without a vowel; special form: ৎ t.
163
-
164
- # III 4 special form
165
- 'ৎ': 't'
166
-
167
- # IV. Consonant characters
168
-
169
- 'ক': 'ka' # 1
170
- 'খ': 'kha' # 2
171
- 'গ': 'ga' # 3
172
- 'ঘ': 'gha' # 4
173
- 'ঙ': 'ṅa' # 5
174
- 'চ': 'cha' # 6
175
- 'ছ': 'chha' # 7
176
- 'জ': 'ja' # 8
177
- 'ঝ': 'jha' # 9
178
- 'ঞ': 'ña' # 10
179
- 'ট': 'ṭa' # 11
180
- 'ঠ': 'ṭha' # 12
181
- 'ড': 'ḍa' # 13 A Dotted variants of the characters: ড় ṙa; ঢ় ṙha; য় ya.
182
- 'ঢ': 'ḍha' # 14 A Dotted variants of the characters: ড় ṙa; ঢ় ṙha; য় ya.
183
- 'ণ': 'ṇa' # 15
184
- 'ত': 'ta' # 16
185
- 'থ': 'tha' # 17
186
- 'দ': 'da' # 18
187
- 'ধ': 'dha' # 19
188
- 'ন': 'na' # 20
189
- 'প': 'pa' # 21
190
- 'ফ': 'pha' # 22
191
- 'ব': 'ba' # 23
192
- 'ভ': 'bha' # 24
193
- 'ম': 'ma' # 25
194
- 'য': 'j̱aA' # 26
195
- 'র': 'ra' # 27
196
- 'ল': 'la' # 28
197
- 'শ': 'sha' # 29
198
- 'ষ': 'ṣha' # 30
199
- 'স': 'sa' # 31
200
- 'হ': 'ha' # 32
201
-
202
- # IV 13, 14
203
- 'ড়': 'ṙa'
204
- 'ঢ়': 'ṙha'
205
- 'য়': 'ya'
206
-
207
-
208
- # V. Ligatures
209
- # Adscript forms of some consonants
210
- #
211
- # We already implemented one to one mapping for most commonly used
212
- # combined letters - (Zuktabarna), so we can ignore this custom rules
213
- # fro now.
214
- #
215
- # 'র্‍': 'r-:'
216
- # '‍্র': '-r:'
217
- # '‍্য': '-y:'
218
-
219
-
220
- # Other ligatures (the list is not complete)
221
-
222
- 'ক্ক': 'kka'
223
- 'ক্ট': 'kṭa'
224
- 'ক্ত': 'kta'
225
- 'ক্ন': 'kna'
226
- 'ক্ম': 'kma'
227
- 'ক্র': 'kra'
228
- 'ক্ল': 'kla'
229
- 'ক্ব': 'kva'
230
- 'ক্ষ': 'kṣha'
231
- 'ক্ষ্ন': 'kṣhna'
232
- 'ক্ষ্ম': 'kṣhma'
233
- 'ক্ষ্ব': 'kṣhva'
234
-
235
- 'ক্স': 'ksa'
236
- 'গ্গ': 'gga'
237
- 'গ্দ': 'gda'
238
- 'গ্ধ': 'gdha'
239
- 'গ্ন': 'gna'
240
- 'গ্ম': 'gma'
241
- 'গ্র': 'gra'
242
- 'গ্ল': 'gla'
243
- 'ঘ্র': 'ghra'
244
- 'ঙ্ক': 'ṅka'
245
- 'ঙ্গ': 'ṅga'
246
- 'চ্চ': 'chcha'
247
-
248
- 'চ্ছ': 'chchha'
249
- 'চ্ছ্ব': 'chchhva'
250
- 'চ্ঞ': 'chña'
251
- 'জ্জ': 'jja'
252
- 'জ্জ্ব': 'jjva'
253
- 'জ্ঝ': 'jjha'
254
- 'জ্ঞ': 'jña'
255
- 'জ্ব': 'jva'
256
- 'ঞ্চ': 'ñcha'
257
- 'ঞ্ছ': 'ñchha'
258
- 'ঞ্জ': 'ñja'
259
- 'ঞ্ঝ': 'ñjha'
260
-
261
- 'ট্ট': 'ṭṭa'
262
- 'ড্ড': 'ḍḍa'
263
- 'ণ্ট': 'ṇṭa'
264
- 'ণ্ঠ': 'ṇṭha'
265
- 'ণ্ড': 'ṇḍa'
266
- 'ত্ত': 'tta'
267
- 'ত্ত্ব': 'ttva'
268
- 'ত্থ': 'ttha'
269
- 'ত্ন': 'tna'
270
- 'ত্ম': 'tma'
271
- 'ত্র': 'tra'
272
- 'ত্ল': 'tla'
273
-
274
- 'ত্ব': 'tva'
275
- 'দ্দ': 'dda'
276
- 'দ্দ্ব': 'ddva'
277
- 'দ্ধ': 'ddha'
278
- 'দ্ধ্ব': 'ddhva'
279
- 'দ্ন': 'dna'
280
- 'দ্ব': 'dva'
281
- 'দ্ভ': 'dbha'
282
- 'দ্ম': 'dma'
283
- 'দ্র': 'dra'
284
- 'দ্ল': 'dla'
285
- 'ধ্র': 'dhra'
286
-
287
- 'ন্ঠ': 'nṭha'
288
- 'ন্ড': 'nḍa'
289
- 'ন্ক': 'nka'
290
- 'ন্ত': 'nta'
291
- 'ন্ত্র': 'ntra'
292
- 'ন্থ': 'ntha'
293
- 'ন্দ': 'nda'
294
- 'ন্দ্র': 'ndra'
295
- 'ন্ধ': 'ndha'
296
- 'ন্ন': 'nna'
297
- 'ন্ম': 'nma'
298
- 'ন্ব': 'nva'
299
-
300
- 'প্ন': 'pna'
301
- 'প্ত': 'pta'
302
- 'প্প': 'ppa'
303
- 'প্র': 'pra'
304
- 'প্ল': 'pla'
305
- 'ফ্র': 'phra'
306
- 'ব্জ': 'bja'
307
- 'ব্দ': 'bda'
308
- 'ব্ধ': 'bdha'
309
- 'ব্ব': 'bba'
310
- 'ব্র': 'bra'
311
- 'ভ্র': 'bhra'
312
- 'ম্প': 'mpa'
313
- 'ম্ব': 'mba'
314
- 'ম্ভ': 'mbha'
315
- 'ম্ভ্র': 'mbhra'
316
- 'ম্ম': 'mma'
317
- 'ম্র': 'mra'
318
- 'ম্ল': 'mla'
319
- 'ল্ক': 'lka'
320
- 'ল্ট': 'lṭa'
321
- 'ল্ড': 'lḍa'
322
- 'ল্ম': 'lma'
323
- 'ল্ল': 'lla'
324
-
325
- 'শ্চ': 'shcha'
326
- 'শ্ছ': 'shchha'
327
- 'শ্ত': 'shta'
328
- 'শ্ন': 'shna'
329
- 'শ্ম': 'shma'
330
- 'শ্র': 'shra'
331
- 'শ্ল': 'shla'
332
- 'শ্ব': 'shva'
333
- 'ষ্ক': 'ṣhka'
334
- 'ষ্ট': 'ṣhṭa'
335
- 'ষ্ট্র': 'ṣhṭra'
336
- 'ষ্ঠ': 'ṣhṭha'
337
-
338
- 'ষ্ঞ': 'ṣhña'
339
- 'ষ্প': 'ṣhpa'
340
- 'ষ্ফ': 'ṣhpha'
341
- 'স্ক': 'ska'
342
- 'স্ক্র': 'skra'
343
- 'স্খ': 'skha'
344
- 'স্ত': 'sta'
345
- 'স্ন': 'sna'
346
- 'স্ম': 'sma'
347
- 'স্র': 'sra'
348
- 'স্ব': 'sva'
349
- 'হ্ন': 'hna'
350
-
351
- 'হ্ম': 'hma'
352
- 'হ্র': 'hra'
353
- 'হ্ল': 'hla'
354
-
355
- # Zuktabarna - combined letters
356
- #
357
- # The followings are not the official list, but this has been
358
- # collected and varified from some reliable source.
359
- # Source: https://www.somewhereinblog.net/blog/trivuzblog/28849694
360
- #
361
- 'ক্ট্র': 'kṭra'
362
- 'ক্ত্র': 'ktra'
363
- 'ক্য': 'kya'
364
- 'ক্ষ্ণ': 'kṣṇa'
365
- 'ক্ষ্ম': 'kṣma'
366
- 'খ্য': 'khaj̱a'
367
- 'খ্র': 'khra'
368
- 'গ্ন': 'gna'
369
- 'গ্‌ণ': 'gṇa'
370
- 'গ্ধ্য': 'gdhya'
371
- 'গ্ধ্র': 'gdhra'
372
- 'গ্ন্য': 'gnya'
373
- 'গ্ব': 'gva'
374
- 'গ্য': 'gya'
375
- 'গ্র্য': 'grya'
376
- 'ঘ্ন': 'ghna'
377
- 'ঘ্য': 'ghya'
378
- 'ঙ্‌ক্ত': 'ṅkata'
379
- 'ঙ্ক্য': 'ṅkaya'
380
- 'ঙ্ক্ষ': 'ṅkṣa'
381
- 'ঙ্খ': 'ṅkha'
382
- 'ঙ্গ্য': 'ṅgaya'
383
- 'ঙ্ঘ': 'ṅgha'
384
- 'ঙ্ঘ্য': 'ṅghya'
385
- 'ঙ্ঘ্র': 'ṅghra'
386
- 'ঙ্ম': 'ṅma'
387
- 'চ্ছ্র': 'cchra'
388
- 'চ্ব': 'cva'
389
- 'চ্য': 'cya'
390
- 'জ্য': 'jya'
391
- 'জ্র': 'jra'
392
- 'ট্ব': 'ṭva'
393
- 'ট্ম': 'ṭma'
394
- 'ট্য': 'ṭya'
395
- 'ট্র': 'ṭra'
396
- 'ড্ব': 'ḍva'
397
- 'ড্য': 'ḍya'
398
- 'ড্র': 'ḍra'
399
- 'ড়্গ': 'ḍga'
400
- 'ঢ্য': 'ḍhya'
401
- 'ঢ্র': 'ḍhra'
402
- 'ণ্ঠ্য': 'ṇṭhya'
403
- 'ণ্ড্য': 'ṇḍya'
404
- 'ণ্ড্র': 'ṇḍra'
405
- 'ণ্ঢ': 'ṇḍha'
406
- 'ণ্ণ': 'ṇṇa'
407
- 'ণ্ব': 'ṇva'
408
- 'ণ্ম': 'ṇma'
409
- 'ণ্য': 'ṇya'
410
- 'ৎক': 'tka'
411
- 'ত্ত্য': 'ttya'
412
- 'ত্ম্য': 'tmya'
413
- 'ত্য': 'tya'
414
- 'ত্র্য': 'trya'
415
- 'ৎল': 'tla'
416
- 'ৎস': 'tsa'
417
- 'থ্ব': 'thva'
418
- 'থ্য': 'thya'
419
- 'থ্র': 'thra'
420
- 'দ্গ': 'dga'
421
- 'দ্ঘ': 'dgha'
422
- 'দ্ভ্র': 'dbhra'
423
- 'দ্য': 'dya'
424
- 'দ্র্য': 'draya'
425
- 'ধ্ন': 'dhna'
426
- 'ধ্ব': 'dhva'
427
- 'ধ্ম': 'dhma'
428
- 'ধ্য': 'dya'
429
- 'ন্ট': 'nṭa'
430
- 'ন্ট্র': 'nṭra'
431
- 'ন্ড্র': 'nḍra'
432
- 'ন্ত্ব': 'ntva'
433
- 'ন্ত্য': 'ntaya'
434
- 'ন্ত্র্য': 'ntraya'
435
- 'ন্থ্র': 'nthra'
436
- 'ন্দ্য': 'ndya'
437
- 'ন্দ্ব': 'ndva'
438
- 'ন্ধ্য': 'ndhya'
439
- 'ন্ধ্র': 'ndhra'
440
- 'ন্য': 'nya'
441
- 'প্ট': 'pṭa'
442
- 'প্য': 'pya'
443
- 'প্র্য': 'praya'
444
- 'প্স': 'psa'
445
- 'ফ্ল': 'phla'
446
- 'ব্য': 'bya'
447
- 'ব্ল': 'bla'
448
- 'ভ্ব': 'bhva'
449
- 'ভ্য': 'bhya'
450
- 'ম্ন': 'mna'
451
- 'ম্প্র': 'mpra'
452
- 'ম্ফ': 'mpha'
453
- 'ম্ব্র': 'mvra'
454
- 'ম্য': 'mya'
455
- 'য্য': 'j̱aya'
456
- 'র্ক': 'rka'
457
- 'র্ক্য': 'rkya'
458
- 'র্গ্য': 'rgya'
459
- 'র্ঘ্য': 'rghya'
460
- 'র্চ্য': 'rchya'
461
- 'র্জ্য': 'rjya'
462
- 'র্ণ্য': 'rṇya'
463
- 'র্ত্য': 'rtya'
464
- 'র্থ্য': 'rthya'
465
- 'র্ব্য': 'rvya'
466
- 'র্ম্য': 'rmya'
467
- 'র্শ্য': 'rshya'
468
- 'র্ষ্য': 'rṣhya'
469
- 'র্হ্য': 'rhya'
470
- 'র্খ': 'rkha'
471
- 'র্গ': 'rga'
472
- 'র্গ্র': 'rgra'
473
- 'র্ঘ': 'rgha'
474
- 'র্চ': 'rcha'
475
- 'র্ছ': 'rchha'
476
- 'র্জ': 'rja'
477
- 'র্ঝ': 'rjha'
478
- 'র্ট': 'rṭa'
479
- 'র্ড': 'rḍa'
480
- 'র্ণ': 'rṇa'
481
- 'র্ত': 'rta'
482
- 'র্ত্র': 'rtra'
483
- 'র্থ': 'rtha'
484
- 'র্দ': 'rda'
485
- 'র্দ্ব': 'rdva'
486
- 'র্দ্র': 'rdra'
487
- 'র্ধ': 'rdha'
488
- 'র্ধ্ব': 'rdhba'
489
- 'র্ন': 'rna'
490
- 'র্প': 'rpa'
491
- 'র্ফ': 'rpha'
492
- 'র্ভ': 'rbha'
493
- 'র্ম': 'rma'
494
- 'র্য': 'rya'
495
- 'র্ল': 'rla'
496
- 'র্শ': 'rsha'
497
- 'র্শ্ব': 'rshba'
498
- 'র্ষ': 'rṣha'
499
- 'র্স': 'rsa'
500
- 'র্হ': 'rha'
501
- 'র্ঢ্য': 'rḍhya'
502
- 'ল্ক্য': 'lkaya'
503
- 'ল্গ': 'lga'
504
- 'ল্প': 'lpa'
505
- 'ল্‌ফ': 'lpha'
506
- 'ল্ফ': 'lpha'
507
- 'ল্ব': 'lba'
508
- 'ল্‌ভ': 'lbha'
509
- 'ল্য': 'lya'
510
- 'শ্য': 'sya'
511
- 'ষ্ক্র': 'ṣkra'
512
- 'ষ্ট্য': 'ṣṭya'
513
- 'ষ্ঠ্য': 'ṣṭhya'
514
- 'ষ্ণ': 'ṣṇa'
515
- 'ষ্প্র': 'ṣpra'
516
- 'ষ্ব': 'ṣva'
517
- 'ষ্ম': 'ṣma'
518
- 'ষ্য': 'ṣya'
519
- 'স্ট': 'sṭa'
520
- 'স্ট্র': 'sṭra'
521
- 'স্ত্ব': 'stva'
522
- 'স্ত্য': 'stṣya'
523
- 'স্ত্র': 'stra'
524
- 'স্থ': 'stha'
525
- 'স্থ্য': 'sthya'
526
- 'স্প': 'spa'
527
- 'স্প্র': 'spra'
528
- 'স্প্‌ল': 'spala'
529
- 'স্ফ': 'spha'
530
- 'স্য': 'sya'
531
- 'স্ল': 'sla'
532
- 'হ্ণ': 'hṇa'
533
- 'হ্ব': 'hva'
534
- 'হ্য': 'hya'