BanglaToEnglish 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bangla_to_english.rb +5 -5
- data/lib/patterns.rb +248 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 184a0886980108ca218354a4dd687c075de3e03ae61c8a802f5572ae5c31205f
|
4
|
+
data.tar.gz: d6d37e26661da9e7707df330823fcec66f2557b837efcb358cb6d1b8c0d501a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc3b248ca1d78d283057d0fb816a61400c53e9120423bb7221985e7bd6dfe84e226055985ad58cf4ee89fc1f42010677051e14a22b22fde73038277aafaa276c
|
7
|
+
data.tar.gz: 5f86ffbe96265ee0435c15055c740545a3309e15b74beb75fc54fcfe9e4ce1b73b2baaddac0459c7f8fdb0f7f075c66a94ed71120c3f36612a129d45368f2f6a
|
data/lib/bangla_to_english.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
|
1
|
+
require_relative ('patterns')
|
2
2
|
class BanglaToEnglish
|
3
3
|
attr_accessor :bangla_text
|
4
4
|
|
5
5
|
def initialize(bangla_text)
|
6
6
|
@bangla_text = bangla_text
|
7
|
-
|
7
|
+
end
|
8
8
|
|
9
9
|
def convert_to_english
|
10
10
|
english_text = ''
|
11
11
|
@bangla_text&.chars.map do |font|
|
12
|
-
|
13
|
-
|
12
|
+
english_text += bangla_font?(font) ? english_font(font) : font
|
13
|
+
end
|
14
14
|
english_text
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
18
18
|
def bangla_font? ( font )
|
19
|
-
|
19
|
+
font.ord >= 2432 && font.ord <= 2543
|
20
20
|
end
|
21
21
|
|
22
22
|
end
|
data/lib/patterns.rb
ADDED
@@ -0,0 +1,248 @@
|
|
1
|
+
def english_font(bangla_font)
|
2
|
+
patterns = {
|
3
|
+
'ভ্ল' => 'bhl',
|
4
|
+
'পশ' => 'psh',
|
5
|
+
'ব্ধ' => 'bdh',
|
6
|
+
'ব্জ' => 'bj',
|
7
|
+
'ব্দ' => 'bd',
|
8
|
+
'ব্ব' => 'bb',
|
9
|
+
'ব্ল' => 'bl',
|
10
|
+
'ভ' => 'bh',
|
11
|
+
'ব' => 'b',
|
12
|
+
'চ্ঞ' => 'cNG',
|
13
|
+
'চ্ছ' => 'cch',
|
14
|
+
'চ্চ' => 'cc',
|
15
|
+
'ছ' => 'chh',
|
16
|
+
'চ' => 'ch',
|
17
|
+
'ধ্ন' => 'dhn',
|
18
|
+
'ধ্ম' => 'dhm',
|
19
|
+
'দ্ঘ' => 'dgh',
|
20
|
+
'দ্ধ' => 'ddh',
|
21
|
+
'দ্ভ' => 'dbh',
|
22
|
+
'দ্ম' => 'dm',
|
23
|
+
'ড্ড' => 'DD',
|
24
|
+
'ঢ' => 'Dh',
|
25
|
+
'ধ' => 'dh',
|
26
|
+
'দ্গ' => 'dg',
|
27
|
+
'দ্দ' => 'dd',
|
28
|
+
'ড' => 'D',
|
29
|
+
'দ' => 'd',
|
30
|
+
'...' => '...',
|
31
|
+
'.' => '.',
|
32
|
+
'।।' => '..',
|
33
|
+
'।' => '.',
|
34
|
+
'ঘ্ন' => 'Ghn',
|
35
|
+
'গ্ধ' => 'Gdh',
|
36
|
+
'গ্ণ' => 'GN',
|
37
|
+
'গ্ন' => 'gn',
|
38
|
+
'গ্ম' => 'gm',
|
39
|
+
'গ্ল' => 'Gl',
|
40
|
+
'জ্ঞ' => 'gg',
|
41
|
+
'ঘ' => 'Gh',
|
42
|
+
'গ' => 'g',
|
43
|
+
'হ্ণ' => 'hN',
|
44
|
+
'হ্ন' => 'hn',
|
45
|
+
'হ্ম' => 'hm',
|
46
|
+
'হ্ল' => 'hl',
|
47
|
+
'হ' => 'h',
|
48
|
+
'জ্ঝ' => 'jjh',
|
49
|
+
'ঝ' => 'jh',
|
50
|
+
'জ্জ' => 'jj',
|
51
|
+
'জ' => 'j',
|
52
|
+
'ক্ষ্ণ' => 'kkhN',
|
53
|
+
'ক্ষ্ম' => 'kkhm',
|
54
|
+
'ক্ষ্ম' => 'kxm',
|
55
|
+
'ক্ষ' => 'kkh',
|
56
|
+
'কশ' => 'ksh',
|
57
|
+
'ক্ক' => 'kk',
|
58
|
+
'ক্ট' => 'kT',
|
59
|
+
'ক্ত' => 'kt',
|
60
|
+
'ক্ল' => 'kl',
|
61
|
+
'ক্স' => 'ks',
|
62
|
+
'খ' => 'kh',
|
63
|
+
'ক' => 'k',
|
64
|
+
'ল্ভ' => 'lbh',
|
65
|
+
'ল্ধ' => 'ldh',
|
66
|
+
'লখ' => 'lkh',
|
67
|
+
'লঘ' => 'lgh',
|
68
|
+
'লফ' => 'lph',
|
69
|
+
'ল্ক' => 'lk',
|
70
|
+
'ল্গ' => 'lg',
|
71
|
+
'ল্ট' => 'lT',
|
72
|
+
'ল্ড' => 'lD',
|
73
|
+
'ল্প' => 'lp',
|
74
|
+
'ল্ম' => 'lm',
|
75
|
+
'ল্ল' => 'll',
|
76
|
+
'ল্ব' => 'lb',
|
77
|
+
'ল' => 'l',
|
78
|
+
'ম্থ' => 'mth',
|
79
|
+
'ম্ফ' => 'mph',
|
80
|
+
'ম্ভ' => 'mbh',
|
81
|
+
'মপ্ল' => 'mpl',
|
82
|
+
'ম্ন' => 'mn',
|
83
|
+
'ম্প' => 'mp',
|
84
|
+
'ম্ম' => 'mm',
|
85
|
+
'ম্ল' => 'ml',
|
86
|
+
'ম্ব' => 'mb',
|
87
|
+
'ম' => 'm',
|
88
|
+
'০' => 'shyuno',
|
89
|
+
'ak' => '1',
|
90
|
+
'২' => 'dui',
|
91
|
+
'৩' => 'teen',
|
92
|
+
'৪' => 'char',
|
93
|
+
'৫' => 'panch',
|
94
|
+
'৬' => 'choi',
|
95
|
+
'৭' => 'saat',
|
96
|
+
'৮' => 'aat',
|
97
|
+
'৯' => 'nooi',
|
98
|
+
'ঙ্ক্ষ' => 'NgkSh',
|
99
|
+
'ঞ্ছ' => 'NGch',
|
100
|
+
'ঙ্ঘ' => 'Nggh',
|
101
|
+
'ঙ্খ' => 'Ngkh',
|
102
|
+
'ঞ্ঝ' => 'NGjh',
|
103
|
+
'ঙ্গৌ' => 'ngOU',
|
104
|
+
'ঙ্গৈ' => 'ngOI',
|
105
|
+
'ঞ্চ' => 'NGc',
|
106
|
+
'ঙ্ক' => 'Ngk',
|
107
|
+
'ঙ্ষ' => 'Ngx',
|
108
|
+
'ঙ্গ' => 'Ngg',
|
109
|
+
'ঙ্ম' => 'Ngm',
|
110
|
+
'ঞ্জ' => 'NGj',
|
111
|
+
'ন্ধ' => 'ndh',
|
112
|
+
'ন্ঠ' => 'nTh',
|
113
|
+
'ণ্ঠ' => 'NTh',
|
114
|
+
'ন্থ' => 'nth',
|
115
|
+
'ঙ্গা' => 'nga',
|
116
|
+
'ঙ্গি' => 'ngi',
|
117
|
+
'ঙ্গী' => 'ngI',
|
118
|
+
'ঙ্গু' => 'ngu',
|
119
|
+
'ঙ্গূ' => 'ngU',
|
120
|
+
'ঙ্গে' => 'nge',
|
121
|
+
'ঙ্গো' => 'ngO',
|
122
|
+
'ণ্ঢ' => 'NDh',
|
123
|
+
'নশ' => 'nsh',
|
124
|
+
'ঙর' => 'Ngr',
|
125
|
+
'ঞর' => 'NGr',
|
126
|
+
'ংর' => 'ngr',
|
127
|
+
'ঙ' => 'Ng',
|
128
|
+
'ঞ' => 'NG',
|
129
|
+
'ং' => 'ng',
|
130
|
+
'ন্ন' => 'nn',
|
131
|
+
'ণ্ণ' => 'NN',
|
132
|
+
'ণ্ন' => 'Nn',
|
133
|
+
'ন্ম' => 'nm',
|
134
|
+
'ণ্ম' => 'Nm',
|
135
|
+
'ন্দ' => 'nd',
|
136
|
+
'ন্ট' => 'nT',
|
137
|
+
'ণ্ট' => 'NT',
|
138
|
+
'ন্ড' => 'nD',
|
139
|
+
'ণ্ড' => 'ND',
|
140
|
+
'ন্ত' => 'nt',
|
141
|
+
'ন্স' => 'ns',
|
142
|
+
'ন' => 'n',
|
143
|
+
'ণ' => 'N',
|
144
|
+
'ৈ' => 'OI',
|
145
|
+
'ৌ' => 'OU',
|
146
|
+
'ো' => 'O',
|
147
|
+
'ঐ' => 'OI',
|
148
|
+
'ঔ' => 'OU',
|
149
|
+
'ও' => 'O',
|
150
|
+
'ফ্ল' => 'phl',
|
151
|
+
'প্ট' => 'pT',
|
152
|
+
'প্ত' => 'pt',
|
153
|
+
'প্ন' => 'pn',
|
154
|
+
'প্প' => 'pp',
|
155
|
+
'প্ল' => 'pl',
|
156
|
+
'প্স' => 'ps',
|
157
|
+
'ফ' => 'ph',
|
158
|
+
'প' => 'p',
|
159
|
+
'ৃ' => 'rri',
|
160
|
+
'ঋ' => 'rri',
|
161
|
+
'রর্য' => 'rrZ',
|
162
|
+
'র্য' => 'rZ',
|
163
|
+
'্র্য' => 'rZ',
|
164
|
+
'রর' => 'rr',
|
165
|
+
'র্' => 'rr',
|
166
|
+
'্রর' => 'rr',
|
167
|
+
'ড়্গ' => 'Rg',
|
168
|
+
'ঢ়' => 'Rh',
|
169
|
+
'ড়' => 'R',
|
170
|
+
'র' => 'r',
|
171
|
+
'্র' => 'r',
|
172
|
+
'শ্ছ' => 'shch',
|
173
|
+
'ষ্ঠ' => 'ShTh',
|
174
|
+
'ষ্ফ' => 'Shph',
|
175
|
+
'স্ক্ল' => 'skl',
|
176
|
+
'স্খ' => 'skh',
|
177
|
+
'স্থ' => 'sth',
|
178
|
+
'স্ফ' => 'sph',
|
179
|
+
'শ্চ' => 'shc',
|
180
|
+
'শ্ত' => 'sht',
|
181
|
+
'শ্ন' => 'shn',
|
182
|
+
'শ্ম' => 'shm',
|
183
|
+
'শ্ল' => 'shl',
|
184
|
+
'ষ্ক' => 'Shk',
|
185
|
+
'ষ্ট' => 'ShT',
|
186
|
+
'ষ্ণ' => 'ShN',
|
187
|
+
'ষ্প' => 'Shp',
|
188
|
+
'ষ্ম' => 'Shm',
|
189
|
+
'স্প্ল' => 'spl',
|
190
|
+
'স্ক' => 'sk',
|
191
|
+
'স্ট' => 'sT',
|
192
|
+
'স্ত' => 'st',
|
193
|
+
'স্ন' => 'sn',
|
194
|
+
'স্প' => 'sp',
|
195
|
+
'স্ম' => 'sm',
|
196
|
+
'স্ল' => 'sl',
|
197
|
+
'শ' => 'sh',
|
198
|
+
'ষ' => 'Sh',
|
199
|
+
'স' => 's',
|
200
|
+
'ু' => 'oo',
|
201
|
+
'উ' => 'oo',
|
202
|
+
'অ্য' => 'oZ',
|
203
|
+
'অ' => 'o',
|
204
|
+
'ত্থ' => 'tth',
|
205
|
+
'ৎ' => 'dth',
|
206
|
+
'ট্ট' => 'TT',
|
207
|
+
'ট্ম' => 'Tm',
|
208
|
+
'ঠ' => 'Th',
|
209
|
+
'ত্ন' => 'tn',
|
210
|
+
'ত্ম' => 'tm',
|
211
|
+
'থ' => 'th',
|
212
|
+
'ত্ত' => 'tt',
|
213
|
+
'ট' => 'T',
|
214
|
+
'ত' => 't',
|
215
|
+
'অ্যা' => 'aZ',
|
216
|
+
'া' => 'aya',
|
217
|
+
'া' => 'a',
|
218
|
+
'আ' => 'a',
|
219
|
+
'য়া' => 'a',
|
220
|
+
'ি' => 'i',
|
221
|
+
'ই' => 'i',
|
222
|
+
'ী' => 'I',
|
223
|
+
'ঈ' => 'I',
|
224
|
+
'ু' => 'u',
|
225
|
+
'উ' => 'u',
|
226
|
+
'ূ' => 'U',
|
227
|
+
'ঊ' => 'U',
|
228
|
+
'ী' => 'ee',
|
229
|
+
'ঈ' => 'ee',
|
230
|
+
'ে' => 'e',
|
231
|
+
'এ' => 'e',
|
232
|
+
'য' => 'z',
|
233
|
+
'্য' => 'Z',
|
234
|
+
'য়' => 'y',
|
235
|
+
'ইয়' => 'y',
|
236
|
+
'ওয়' => 'w',
|
237
|
+
'্ব' => 'w',
|
238
|
+
'এক্স' => 'x',
|
239
|
+
':' => ':',
|
240
|
+
'ঃ' => ':',
|
241
|
+
'^' => '^',
|
242
|
+
'ঁ' => '',
|
243
|
+
'্' => '',
|
244
|
+
'₹' => '$',
|
245
|
+
' ' => ' '
|
246
|
+
}
|
247
|
+
patterns[bangla_font] || ''
|
248
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: BanglaToEnglish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MD Tawab Alam Khan
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A simple gem to convert Bangla text to English based on phonetics matching
|
14
14
|
email: cs.antorkhan@gmail.com
|
@@ -17,6 +17,7 @@ extensions: []
|
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
19
|
- lib/bangla_to_english.rb
|
20
|
+
- lib/patterns.rb
|
20
21
|
homepage: https://github.com/antorkhan/bangla-to-english
|
21
22
|
licenses:
|
22
23
|
- MIT
|