language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 21726
|
2
|
+
� 4832
|
3
|
+
� 4080
|
4
|
+
� 3253
|
5
|
+
� 2775
|
6
|
+
� 2747
|
7
|
+
� 2642
|
8
|
+
� 2481
|
9
|
+
� 2373
|
10
|
+
� 2066
|
11
|
+
� 1929
|
12
|
+
� 1806
|
13
|
+
� 1797
|
14
|
+
� 1760
|
15
|
+
� 1605
|
16
|
+
� 1347
|
17
|
+
� 1345
|
18
|
+
� 1061
|
19
|
+
� 1060
|
20
|
+
�_ 971
|
21
|
+
�_ 965
|
22
|
+
�_ 916
|
23
|
+
_� 902
|
24
|
+
� 881
|
25
|
+
_� 869
|
26
|
+
�_ 847
|
27
|
+
� 843
|
28
|
+
�_ 810
|
29
|
+
� 760
|
30
|
+
_� 696
|
31
|
+
. 676
|
32
|
+
� 654
|
33
|
+
� 647
|
34
|
+
_� 623
|
35
|
+
" 621
|
36
|
+
_� 606
|
37
|
+
�� 582
|
38
|
+
_� 565
|
39
|
+
� 543
|
40
|
+
_� 514
|
41
|
+
�� 480
|
42
|
+
._ 476
|
43
|
+
�� 464
|
44
|
+
�� 459
|
45
|
+
צ 457
|
46
|
+
�_ 457
|
47
|
+
�_ 451
|
48
|
+
� 450
|
49
|
+
�_ 449
|
50
|
+
_� 443
|
51
|
+
� 438
|
52
|
+
�� 431
|
53
|
+
� 428
|
54
|
+
_� 419
|
55
|
+
�� 418
|
56
|
+
�� 414
|
57
|
+
�� 413
|
58
|
+
� 410
|
59
|
+
�� 400
|
60
|
+
�� 396
|
61
|
+
�_ 394
|
62
|
+
�� 377
|
63
|
+
�� 375
|
64
|
+
�� 373
|
65
|
+
�� 358
|
66
|
+
�� 356
|
67
|
+
�_ 354
|
68
|
+
� 354
|
69
|
+
_� 350
|
70
|
+
�� 346
|
71
|
+
, 343
|
72
|
+
�� 341
|
73
|
+
�� 340
|
74
|
+
�� 336
|
75
|
+
�� 333
|
76
|
+
�� 331
|
77
|
+
� 328
|
78
|
+
�� 326
|
79
|
+
,_ 323
|
80
|
+
�� 322
|
81
|
+
�� 321
|
82
|
+
�� 319
|
83
|
+
�� 318
|
84
|
+
�� 317
|
85
|
+
� 314
|
86
|
+
_�� 314
|
87
|
+
� 312
|
88
|
+
�� 310
|
89
|
+
�� 310
|
90
|
+
�� 305
|
91
|
+
� 295
|
92
|
+
�� 295
|
93
|
+
Φ 294
|
94
|
+
_�� 289
|
95
|
+
_�� 288
|
96
|
+
�� 288
|
97
|
+
� 287
|
98
|
+
�� 282
|
99
|
+
�� 282
|
100
|
+
��_ 280
|
101
|
+
�� 274
|
102
|
+
� 273
|
103
|
+
�� 268
|
104
|
+
_� 268
|
105
|
+
� 267
|
106
|
+
�� 266
|
107
|
+
"_ 264
|
108
|
+
��� 254
|
109
|
+
�� 252
|
110
|
+
�� 252
|
111
|
+
_" 251
|
112
|
+
��_ 251
|
113
|
+
��_ 249
|
114
|
+
�_ 248
|
115
|
+
� 246
|
116
|
+
��_ 245
|
117
|
+
I 245
|
118
|
+
�� 244
|
119
|
+
_� 241
|
120
|
+
�_ 239
|
121
|
+
_�� 239
|
122
|
+
�� 235
|
123
|
+
�� 235
|
124
|
+
� 233
|
125
|
+
_� 232
|
126
|
+
_�_ 228
|
127
|
+
�� 228
|
128
|
+
�� 228
|
129
|
+
�_ 226
|
130
|
+
�� 224
|
131
|
+
_�� 221
|
132
|
+
�� 219
|
133
|
+
_�� 218
|
134
|
+
�� 216
|
135
|
+
�� 215
|
136
|
+
�� 214
|
137
|
+
�� 213
|
138
|
+
_צ 210
|
139
|
+
�� 209
|
140
|
+
_� 207
|
141
|
+
�_ 206
|
142
|
+
�� 206
|
143
|
+
��_ 204
|
144
|
+
�� 203
|
145
|
+
_� 203
|
146
|
+
�_ 202
|
147
|
+
�� 201
|
148
|
+
̦ 200
|
149
|
+
� 199
|
150
|
+
���_ 199
|
151
|
+
� 197
|
152
|
+
��_ 196
|
153
|
+
�� 193
|
154
|
+
�� 193
|
155
|
+
_��_ 190
|
156
|
+
_� 190
|
157
|
+
�� 188
|
158
|
+
�� 187
|
159
|
+
_� 186
|
160
|
+
�� 186
|
161
|
+
�� 185
|
162
|
+
�� 184
|
163
|
+
��_ 184
|
164
|
+
_� 176
|
165
|
+
_�_ 174
|
166
|
+
_� 174
|
167
|
+
�� 173
|
168
|
+
�� 173
|
169
|
+
� 173
|
170
|
+
�� 170
|
171
|
+
_�� 169
|
172
|
+
�� 168
|
173
|
+
�� 167
|
174
|
+
� 166
|
175
|
+
�� 166
|
176
|
+
�� 163
|
177
|
+
� 163
|
178
|
+
Ҧ 163
|
179
|
+
� 163
|
180
|
+
� 162
|
181
|
+
��_ 161
|
182
|
+
�� 160
|
183
|
+
Φ_ 160
|
184
|
+
�� 160
|
185
|
+
��_ 159
|
186
|
+
צ� 159
|
187
|
+
_� 158
|
188
|
+
Ԧ 155
|
189
|
+
_�� 155
|
190
|
+
* 154
|
191
|
+
�� 153
|
192
|
+
�� 153
|
193
|
+
_* 152
|
194
|
+
��_ 152
|
195
|
+
��_ 152
|
196
|
+
�� 152
|
197
|
+
_�� 151
|
198
|
+
�_ 151
|
199
|
+
�� 151
|
200
|
+
*_ 151
|
201
|
+
�� 151
|
202
|
+
_*_ 150
|
203
|
+
� 149
|
204
|
+
_�_ 149
|
205
|
+
_�� 148
|
206
|
+
_� 147
|
207
|
+
Ħ 147
|
208
|
+
�� 146
|
209
|
+
�� 145
|
210
|
+
_��_ 144
|
211
|
+
��� 144
|
212
|
+
��_ 143
|
213
|
+
�� 142
|
214
|
+
_�� 141
|
215
|
+
�� 141
|
216
|
+
�� 140
|
217
|
+
�_ 139
|
218
|
+
�� 138
|
219
|
+
Ц 137
|
220
|
+
��� 137
|
221
|
+
�� 136
|
222
|
+
�� 136
|
223
|
+
_��_ 136
|
224
|
+
�� 135
|
225
|
+
æ 135
|
226
|
+
_� 134
|
227
|
+
_�� 133
|
228
|
+
�� 133
|
229
|
+
��_ 133
|
230
|
+
�� 132
|
231
|
+
�� 131
|
232
|
+
�� 130
|
233
|
+
�� 129
|
234
|
+
_�� 129
|
235
|
+
�� 129
|
236
|
+
_�� 129
|
237
|
+
�_ 128
|
238
|
+
�� 128
|
239
|
+
�� 128
|
240
|
+
ͦ 128
|
241
|
+
_�� 127
|
242
|
+
�� 126
|
243
|
+
�_ 126
|
244
|
+
�צ 123
|
245
|
+
�� 123
|
246
|
+
�� 122
|
247
|
+
˦ 121
|
248
|
+
: 119
|
249
|
+
_� 118
|
250
|
+
�� 118
|
251
|
+
�� 116
|
252
|
+
Ӧ 116
|
253
|
+
�� 115
|
254
|
+
ϧ 115
|
255
|
+
�� 115
|
256
|
+
��_ 115
|
257
|
+
��� 114
|
258
|
+
��_ 113
|
259
|
+
_� 113
|
260
|
+
��� 113
|
261
|
+
�� 113
|
262
|
+
��� 112
|
263
|
+
�� 112
|
264
|
+
? 111
|
265
|
+
�� 110
|
266
|
+
��� 109
|
267
|
+
_� 109
|
268
|
+
�� 109
|
269
|
+
�� 109
|
270
|
+
�� 109
|
271
|
+
��_ 108
|
272
|
+
�� 108
|
273
|
+
_� 106
|
274
|
+
� 106
|
275
|
+
��_ 106
|
276
|
+
�� 106
|
277
|
+
_צ� 106
|
278
|
+
�� 106
|
279
|
+
�� 105
|
280
|
+
¦ 105
|
281
|
+
��� 105
|
282
|
+
��� 104
|
283
|
+
�� 104
|
284
|
+
�� 104
|
285
|
+
��� 104
|
286
|
+
�� 103
|
287
|
+
�� 103
|
288
|
+
_� 102
|
289
|
+
_�� 102
|
290
|
+
�_ 102
|
291
|
+
��_ 102
|
292
|
+
��� 102
|
293
|
+
_� 101
|
294
|
+
_�� 101
|
295
|
+
��� 101
|
296
|
+
� 100
|
297
|
+
_�� 100
|
298
|
+
�� 99
|
299
|
+
���_ 99
|
300
|
+
��� 99
|
301
|
+
_��_ 98
|
302
|
+
��� 98
|
303
|
+
�� 98
|
304
|
+
��� 97
|
305
|
+
��_ 97
|
306
|
+
�� 96
|
307
|
+
�� 94
|
308
|
+
���_ 94
|
309
|
+
_� 94
|
310
|
+
�� 93
|
311
|
+
! 93
|
312
|
+
_�� 93
|
313
|
+
.. 93
|
314
|
+
��� 92
|
315
|
+
�� 92
|
316
|
+
��� 92
|
317
|
+
�� 92
|
318
|
+
_��� 91
|
319
|
+
�� 91
|
320
|
+
��� 91
|
321
|
+
_��� 91
|
322
|
+
�� 91
|
323
|
+
�� 89
|
324
|
+
� 89
|
325
|
+
�_ 88
|
326
|
+
���_ 88
|
327
|
+
�� 88
|
328
|
+
�� 86
|
329
|
+
�� 86
|
330
|
+
�� 86
|
331
|
+
���� 86
|
332
|
+
�� 85
|
333
|
+
�_ 85
|
334
|
+
�� 84
|
335
|
+
��� 84
|
336
|
+
��� 84
|
337
|
+
_���� 83
|
338
|
+
_Ц 83
|
339
|
+
_�� 82
|
340
|
+
�� 82
|
341
|
+
�� 82
|
342
|
+
� 81
|
343
|
+
� 81
|
344
|
+
_� 81
|
345
|
+
��_ 81
|
346
|
+
�� 81
|
347
|
+
�� 81
|
348
|
+
��� 81
|
349
|
+
��_ 80
|
350
|
+
_� 79
|
351
|
+
_� 79
|
352
|
+
�� 78
|
353
|
+
�� 78
|
354
|
+
�� 78
|
355
|
+
�� 78
|
356
|
+
�� 78
|
357
|
+
_��� 77
|
358
|
+
��_ 77
|
359
|
+
��_ 77
|
360
|
+
�� 77
|
361
|
+
�� 77
|
362
|
+
_� 77
|
363
|
+
_� 77
|
364
|
+
_�� 76
|
365
|
+
_�� 76
|
366
|
+
��_ 76
|
367
|
+
�� 76
|
368
|
+
�� 75
|
369
|
+
�� 75
|
370
|
+
��� 75
|
371
|
+
�� 74
|
372
|
+
��� 74
|
373
|
+
��� 74
|
374
|
+
���� 74
|
375
|
+
ϧ_ 73
|
376
|
+
_��� 73
|
377
|
+
��� 72
|
378
|
+
". 72
|
379
|
+
." 72
|
380
|
+
���� 72
|
381
|
+
�� 72
|
382
|
+
�� 71
|
383
|
+
��_ 71
|
384
|
+
�� 71
|
385
|
+
� 71
|
386
|
+
_�� 71
|
387
|
+
��� 71
|
388
|
+
��� 71
|
389
|
+
��_ 70
|
390
|
+
_� 70
|
391
|
+
��� 70
|
392
|
+
_�� 69
|
393
|
+
�Φ 69
|
394
|
+
�� 68
|
395
|
+
��� 68
|
396
|
+
���� 68
|
397
|
+
�� 68
|
398
|
+
��_ 68
|
399
|
+
����_ 68
|
400
|
+
��� 68
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 88044
|
2
|
+
n 17000
|
3
|
+
h 12823
|
4
|
+
t 9071
|
5
|
+
i 8490
|
6
|
+
c 8394
|
7
|
+
g 8035
|
8
|
+
ng 6718
|
9
|
+
_t 6352
|
10
|
+
_c 5234
|
11
|
+
a 5083
|
12
|
+
g_ 4883
|
13
|
+
ng_ 4882
|
14
|
+
_n 4379
|
15
|
+
n_ 4365
|
16
|
+
i_ 4365
|
17
|
+
u 4149
|
18
|
+
m 3648
|
19
|
+
� 3635
|
20
|
+
nh 3480
|
21
|
+
o 3451
|
22
|
+
� 3193
|
23
|
+
_� 3168
|
24
|
+
r 3011
|
25
|
+
l 2692
|
26
|
+
� 2659
|
27
|
+
, 2328
|
28
|
+
,_ 2295
|
29
|
+
c_ 2279
|
30
|
+
_l 2263
|
31
|
+
ch 2226
|
32
|
+
v 2161
|
33
|
+
th 2158
|
34
|
+
a_ 2132
|
35
|
+
_th 2113
|
36
|
+
y 2111
|
37
|
+
. 2107
|
38
|
+
t_ 2106
|
39
|
+
h_ 2074
|
40
|
+
_v 2057
|
41
|
+
_h 1888
|
42
|
+
_m 1834
|
43
|
+
_ch 1813
|
44
|
+
s 1749
|
45
|
+
nh_ 1715
|
46
|
+
u_ 1679
|
47
|
+
� 1668
|
48
|
+
�_ 1649
|
49
|
+
tr 1611
|
50
|
+
b 1589
|
51
|
+
_tr 1581
|
52
|
+
_nh 1541
|
53
|
+
_b 1530
|
54
|
+
m_ 1505
|
55
|
+
p 1483
|
56
|
+
._ 1455
|
57
|
+
k 1429
|
58
|
+
_s 1415
|
59
|
+
o_ 1380
|
60
|
+
y_ 1371
|
61
|
+
_k 1367
|
62
|
+
_ng 1348
|
63
|
+
� 1343
|
64
|
+
e 1296
|
65
|
+
� 1208
|
66
|
+
� 1199
|
67
|
+
� 1181
|
68
|
+
� 1139
|
69
|
+
� 1132
|
70
|
+
T 1126
|
71
|
+
hi 1121
|
72
|
+
� 1081
|
73
|
+
� 1058
|
74
|
+
� 1055
|
75
|
+
d 1040
|
76
|
+
_g 1039
|
77
|
+
kh 1034
|
78
|
+
_kh 1027
|
79
|
+
_T 994
|
80
|
+
�i 967
|
81
|
+
l� 943
|
82
|
+
_l� 941
|
83
|
+
" 926
|
84
|
+
� 916
|
85
|
+
ܩ 901
|
86
|
+
an 893
|
87
|
+
_d 888
|
88
|
+
� 877
|
89
|
+
�i_ 859
|
90
|
+
on 853
|
91
|
+
� 848
|
92
|
+
N 846
|
93
|
+
� 820
|
94
|
+
�_ 813
|
95
|
+
� 801
|
96
|
+
�n 795
|
97
|
+
ph 776
|
98
|
+
_p 772
|
99
|
+
_ph 752
|
100
|
+
� 752
|
101
|
+
� 734
|
102
|
+
h� 728
|
103
|
+
ho 715
|
104
|
+
v� 709
|
105
|
+
_v� 703
|
106
|
+
g� 702
|
107
|
+
� 700
|
108
|
+
_r 696
|
109
|
+
H 693
|
110
|
+
_l�_ 691
|
111
|
+
l�_ 691
|
112
|
+
ܩi 684
|
113
|
+
�n 679
|
114
|
+
�n 677
|
115
|
+
�ng 676
|
116
|
+
ha 672
|
117
|
+
gi 663
|
118
|
+
C 656
|
119
|
+
_gi 655
|
120
|
+
� 654
|
121
|
+
gܩi 646
|
122
|
+
gܩ 646
|
123
|
+
ܩi_ 624
|
124
|
+
�ng_ 610
|
125
|
+
ng� 610
|
126
|
+
_ng� 608
|
127
|
+
�c 606
|
128
|
+
� 601
|
129
|
+
ngܩ 600
|
130
|
+
ngܩi 600
|
131
|
+
_ngܩ 598
|
132
|
+
gܩi_ 594
|
133
|
+
�a 587
|
134
|
+
c� 586
|
135
|
+
_c� 585
|
136
|
+
�t 585
|
137
|
+
c�a 585
|
138
|
+
�n_ 585
|
139
|
+
_c�a 584
|
140
|
+
c� 583
|
141
|
+
�a_ 582
|
142
|
+
_c� 581
|
143
|
+
c�a_ 581
|
144
|
+
� 581
|
145
|
+
_c�a_ 580
|
146
|
+
�t_ 578
|
147
|
+
_N 574
|
148
|
+
c�_ 574
|
149
|
+
� 573
|
150
|
+
_c�_ 572
|
151
|
+
i� 568
|
152
|
+
� 562
|
153
|
+
� 562
|
154
|
+
m� 557
|
155
|
+
_C 553
|
156
|
+
_m� 551
|
157
|
+
p_ 540
|
158
|
+
� 540
|
159
|
+
m�t 538
|
160
|
+
m�t_ 534
|
161
|
+
_m�t 532
|
162
|
+
�n 528
|
163
|
+
_m�t_ 528
|
164
|
+
ti 526
|
165
|
+
i� 525
|
166
|
+
� 517
|
167
|
+
� 512
|
168
|
+
�nh 500
|
169
|
+
.. 500
|
170
|
+
� 497
|
171
|
+
v�_ 497
|
172
|
+
_v�_ 496
|
173
|
+
� 491
|
174
|
+
q 490
|
175
|
+
qu 490
|
176
|
+
_H 487
|
177
|
+
_q 484
|
178
|
+
_qu 484
|
179
|
+
ong 481
|
180
|
+
ong_ 471
|
181
|
+
h� 471
|
182
|
+
x 470
|
183
|
+
h� 468
|
184
|
+
� 466
|
185
|
+
_" 460
|
186
|
+
ܮ 456
|
187
|
+
ro 453
|
188
|
+
�nh_ 445
|
189
|
+
�_ 434
|
190
|
+
_x 434
|
191
|
+
�_ 427
|
192
|
+
� 423
|
193
|
+
_ti 423
|
194
|
+
in 422
|
195
|
+
�n 421
|
196
|
+
"_ 418
|
197
|
+
i� 415
|
198
|
+
�n 413
|
199
|
+
ron 412
|
200
|
+
V 411
|
201
|
+
rong 410
|
202
|
+
�i 410
|
203
|
+
rong_ 409
|
204
|
+
�c_ 405
|
205
|
+
� 400
|
206
|
+
�n 399
|
207
|
+
h�n 398
|
208
|
+
�i 395
|
209
|
+
ay 390
|
210
|
+
_V 387
|
211
|
+
h� 382
|
212
|
+
h� 380
|
213
|
+
�y 377
|
214
|
+
�t 376
|
215
|
+
uy 374
|
216
|
+
�� 374
|
217
|
+
_�� 373
|
218
|
+
�i_ 372
|
219
|
+
c� 371
|
220
|
+
_c� 367
|
221
|
+
n� 366
|
222
|
+
�_ 366
|
223
|
+
h� 366
|
224
|
+
_n� 365
|
225
|
+
ra 363
|
226
|
+
h�ng 362
|
227
|
+
ho_ 359
|
228
|
+
�n_ 356
|
229
|
+
�n 356
|
230
|
+
� 355
|
231
|
+
�i_ 354
|
232
|
+
ai 352
|
233
|
+
hu 352
|
234
|
+
cho 352
|
235
|
+
ܮc 351
|
236
|
+
�c 351
|
237
|
+
�n 351
|
238
|
+
_cho 349
|
239
|
+
tro 347
|
240
|
+
�t 347
|
241
|
+
�o 347
|
242
|
+
_tro 346
|
243
|
+
_tron 346
|
244
|
+
� 346
|
245
|
+
tron 346
|
246
|
+
trong 346
|
247
|
+
M 345
|
248
|
+
kh� 340
|
249
|
+
�u 338
|
250
|
+
_kh� 338
|
251
|
+
cho_ 337
|
252
|
+
_cho_ 336
|
253
|
+
h�ng_ 336
|
254
|
+
ay_ 333
|
255
|
+
ch_ 332
|
256
|
+
�� 331
|
257
|
+
�t_ 331
|
258
|
+
( 331
|
259
|
+
_( 330
|
260
|
+
� 329
|
261
|
+
_�� 329
|
262
|
+
�ܮ 328
|
263
|
+
_�ܮc 328
|
264
|
+
) 328
|
265
|
+
�ܮc 328
|
266
|
+
_�ܮ 328
|
267
|
+
kh�n 324
|
268
|
+
_� 324
|
269
|
+
_kh�n 322
|
270
|
+
��_ 322
|
271
|
+
_��_ 320
|
272
|
+
� 318
|
273
|
+
�n 318
|
274
|
+
kh�ng 316
|
275
|
+
�o_ 316
|
276
|
+
ܮc_ 316
|
277
|
+
�c_ 316
|
278
|
+
nh� 315
|
279
|
+
�_ 315
|
280
|
+
�ng 313
|
281
|
+
�ng_ 313
|
282
|
+
�n 313
|
283
|
+
_nh� 313
|
284
|
+
Th 312
|
285
|
+
h� 311
|
286
|
+
� 310
|
287
|
+
h�n 310
|
288
|
+
h�ng_ 310
|
289
|
+
h�ng 310
|
290
|
+
i� 309
|
291
|
+
gh 309
|
292
|
+
�u 307
|
293
|
+
ta 307
|
294
|
+
anh 307
|
295
|
+
� 307
|
296
|
+
ai_ 306
|
297
|
+
�ng 306
|
298
|
+
�y_ 304
|
299
|
+
�ܮc_ 298
|
300
|
+
�� 295
|
301
|
+
S 295
|
302
|
+
: 294
|
303
|
+
�t_ 294
|
304
|
+
e_ 294
|
305
|
+
:_ 290
|
306
|
+
�_ 289
|
307
|
+
ua 288
|
308
|
+
�n 288
|
309
|
+
�i 286
|
310
|
+
�u_ 285
|
311
|
+
_nh� 285
|
312
|
+
nh� 285
|
313
|
+
nh�ng 285
|
314
|
+
nh�n 285
|
315
|
+
_nh�n 285
|
316
|
+
_Th 285
|
317
|
+
�n_ 284
|
318
|
+
�n 283
|
319
|
+
� 281
|
320
|
+
_M 281
|
321
|
+
A 281
|
322
|
+
�c 278
|
323
|
+
L 277
|
324
|
+
� 277
|
325
|
+
��n 276
|
326
|
+
_ha 276
|
327
|
+
n, 275
|
328
|
+
�ng 275
|
329
|
+
��ng 275
|
330
|
+
an_ 272
|
331
|
+
�i_ 271
|
332
|
+
i�u 269
|
333
|
+
s� 269
|
334
|
+
n,_ 268
|
335
|
+
�_ 268
|
336
|
+
on_ 267
|
337
|
+
_s� 267
|
338
|
+
c�c 265
|
339
|
+
�ng_ 265
|
340
|
+
_c�c 264
|
341
|
+
anh_ 264
|
342
|
+
ngh 264
|
343
|
+
_ta 263
|
344
|
+
hi_ 262
|
345
|
+
h�n 261
|
346
|
+
�_ 261
|
347
|
+
�u_ 261
|
348
|
+
�n 260
|
349
|
+
� 260
|
350
|
+
_ngh 257
|
351
|
+
ia 255
|
352
|
+
� 252
|
353
|
+
... 252
|
354
|
+
�u 251
|
355
|
+
�c 251
|
356
|
+
i, 249
|
357
|
+
i�u_ 248
|
358
|
+
nhi 247
|
359
|
+
B 246
|
360
|
+
i,_ 245
|
361
|
+
�ng 244
|
362
|
+
�_ 244
|
363
|
+
co 244
|
364
|
+
_nhi 244
|
365
|
+
� 244
|
366
|
+
ܧ 244
|
367
|
+
i�n 243
|
368
|
+
D 243
|
369
|
+
Tr 241
|
370
|
+
_S 240
|
371
|
+
h�_ 239
|
372
|
+
�n 237
|
373
|
+
h� 236
|
374
|
+
h� 233
|
375
|
+
K 233
|
376
|
+
�ng_ 232
|
377
|
+
�i 232
|
378
|
+
��ng_ 232
|
379
|
+
�n_ 231
|
380
|
+
_co 231
|
381
|
+
�n_ 229
|
382
|
+
h� 229
|
383
|
+
�i 229
|
384
|
+
y� 229
|
385
|
+
�n 229
|
386
|
+
�n 228
|
387
|
+
�ng 228
|
388
|
+
_L 227
|
389
|
+
�ng_ 226
|
390
|
+
inh 226
|
391
|
+
�i 225
|
392
|
+
m� 225
|
393
|
+
_�i 225
|
394
|
+
ng, 224
|
395
|
+
ang 224
|
396
|
+
P 224
|
397
|
+
�n 224
|
398
|
+
g, 224
|
399
|
+
g,_ 223
|
400
|
+
_v� 223
|