language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 26104
|
2
|
+
a 6496
|
3
|
+
r 6044
|
4
|
+
n 5160
|
5
|
+
i 5123
|
6
|
+
s 3987
|
7
|
+
e 3891
|
8
|
+
u 3582
|
9
|
+
t 3300
|
10
|
+
� 3126
|
11
|
+
l 3071
|
12
|
+
g 2726
|
13
|
+
m 2459
|
14
|
+
k 2256
|
15
|
+
f 2230
|
16
|
+
r_ 1967
|
17
|
+
v 1641
|
18
|
+
ar 1472
|
19
|
+
�_ 1420
|
20
|
+
_s 1332
|
21
|
+
� 1248
|
22
|
+
o 1221
|
23
|
+
� 1197
|
24
|
+
a_ 1155
|
25
|
+
in 1150
|
26
|
+
i_ 1114
|
27
|
+
h 1023
|
28
|
+
j 975
|
29
|
+
d 974
|
30
|
+
st 929
|
31
|
+
a� 925
|
32
|
+
. 922
|
33
|
+
n_ 875
|
34
|
+
._ 875
|
35
|
+
_v 874
|
36
|
+
m_ 854
|
37
|
+
nn 842
|
38
|
+
_f 840
|
39
|
+
ur 824
|
40
|
+
_a 806
|
41
|
+
� 785
|
42
|
+
_h 782
|
43
|
+
� 779
|
44
|
+
i� 771
|
45
|
+
er 765
|
46
|
+
um 727
|
47
|
+
g_ 716
|
48
|
+
y 711
|
49
|
+
_e 709
|
50
|
+
� 672
|
51
|
+
b 672
|
52
|
+
ir 671
|
53
|
+
ri 670
|
54
|
+
an 667
|
55
|
+
� 650
|
56
|
+
_� 648
|
57
|
+
a�_ 645
|
58
|
+
u_ 633
|
59
|
+
na 631
|
60
|
+
�_ 625
|
61
|
+
ar_ 622
|
62
|
+
_� 608
|
63
|
+
�_ 606
|
64
|
+
_� 595
|
65
|
+
ta 567
|
66
|
+
ei 566
|
67
|
+
la 558
|
68
|
+
_m 549
|
69
|
+
_�_ 549
|
70
|
+
um_ 547
|
71
|
+
t_ 535
|
72
|
+
ti 529
|
73
|
+
_o 524
|
74
|
+
ur_ 523
|
75
|
+
_a� 519
|
76
|
+
i�_ 512
|
77
|
+
�i 496
|
78
|
+
ve 494
|
79
|
+
og 494
|
80
|
+
� 482
|
81
|
+
og_ 479
|
82
|
+
_og 478
|
83
|
+
_og_ 478
|
84
|
+
nd 469
|
85
|
+
p 464
|
86
|
+
ra 455
|
87
|
+
un 454
|
88
|
+
ir_ 452
|
89
|
+
_a�_ 451
|
90
|
+
ni 439
|
91
|
+
en 439
|
92
|
+
�u 439
|
93
|
+
_�_ 436
|
94
|
+
ng 434
|
95
|
+
il 404
|
96
|
+
ga 395
|
97
|
+
_t 395
|
98
|
+
nu 393
|
99
|
+
ki 392
|
100
|
+
ja 383
|
101
|
+
inn 379
|
102
|
+
_b 375
|
103
|
+
sk 352
|
104
|
+
s_ 350
|
105
|
+
vi 349
|
106
|
+
r� 347
|
107
|
+
�a 345
|
108
|
+
ef 339
|
109
|
+
ag 336
|
110
|
+
_u 330
|
111
|
+
se 324
|
112
|
+
lu 324
|
113
|
+
af 321
|
114
|
+
_ve 320
|
115
|
+
tu 318
|
116
|
+
em 307
|
117
|
+
eg 304
|
118
|
+
nn_ 303
|
119
|
+
_l 303
|
120
|
+
va 301
|
121
|
+
_k 296
|
122
|
+
, 295
|
123
|
+
ns 292
|
124
|
+
re 292
|
125
|
+
tt 291
|
126
|
+
,_ 291
|
127
|
+
l_ 286
|
128
|
+
am 286
|
129
|
+
es 285
|
130
|
+
yr 285
|
131
|
+
al 281
|
132
|
+
da 277
|
133
|
+
S 275
|
134
|
+
gu 273
|
135
|
+
_se 271
|
136
|
+
ver 268
|
137
|
+
_g 266
|
138
|
+
ing 266
|
139
|
+
_n 262
|
140
|
+
is 258
|
141
|
+
_er 257
|
142
|
+
sa 256
|
143
|
+
�ur 255
|
144
|
+
le 255
|
145
|
+
_st 255
|
146
|
+
_S 254
|
147
|
+
sem 254
|
148
|
+
ll 254
|
149
|
+
me 253
|
150
|
+
ha 251
|
151
|
+
li 249
|
152
|
+
kk 249
|
153
|
+
rs 247
|
154
|
+
_vi 247
|
155
|
+
rn 246
|
156
|
+
sl 244
|
157
|
+
gi 243
|
158
|
+
ss 242
|
159
|
+
rf 241
|
160
|
+
fy 240
|
161
|
+
�i_ 240
|
162
|
+
mi 238
|
163
|
+
ka 237
|
164
|
+
ma 231
|
165
|
+
ld 230
|
166
|
+
� 229
|
167
|
+
rir 227
|
168
|
+
sta 227
|
169
|
+
fyr 227
|
170
|
+
v� 227
|
171
|
+
di 226
|
172
|
+
ru 224
|
173
|
+
var 224
|
174
|
+
_fy 222
|
175
|
+
ku 221
|
176
|
+
em_ 221
|
177
|
+
nar 220
|
178
|
+
_sem_ 220
|
179
|
+
_sem 220
|
180
|
+
sem_ 220
|
181
|
+
he 219
|
182
|
+
yri 217
|
183
|
+
_fyr 216
|
184
|
+
si 216
|
185
|
+
yrir 215
|
186
|
+
au 212
|
187
|
+
er_ 212
|
188
|
+
ek 211
|
189
|
+
_ha 210
|
190
|
+
�e 209
|
191
|
+
fyri 207
|
192
|
+
fyrir 207
|
193
|
+
_�e 205
|
194
|
+
fi 204
|
195
|
+
fr 203
|
196
|
+
ge 201
|
197
|
+
or 200
|
198
|
+
ne 200
|
199
|
+
ann 198
|
200
|
+
j� 198
|
201
|
+
_va 196
|
202
|
+
_fyri 196
|
203
|
+
_ver 194
|
204
|
+
fl 192
|
205
|
+
_er_ 191
|
206
|
+
_um 189
|
207
|
+
�k 188
|
208
|
+
til 187
|
209
|
+
_he 186
|
210
|
+
fa 186
|
211
|
+
il_ 182
|
212
|
+
_ti 178
|
213
|
+
_til 177
|
214
|
+
gar 176
|
215
|
+
_var 176
|
216
|
+
na_ 176
|
217
|
+
� 175
|
218
|
+
e� 171
|
219
|
+
fu 170
|
220
|
+
nni 169
|
221
|
+
_me 168
|
222
|
+
ki_ 167
|
223
|
+
vi� 166
|
224
|
+
ey 165
|
225
|
+
fn 165
|
226
|
+
arf 164
|
227
|
+
til_ 163
|
228
|
+
st_ 162
|
229
|
+
_til_ 162
|
230
|
+
�a 161
|
231
|
+
num 161
|
232
|
+
_�a 161
|
233
|
+
as 160
|
234
|
+
_vi� 160
|
235
|
+
rt 159
|
236
|
+
el 158
|
237
|
+
u� 156
|
238
|
+
inn_ 155
|
239
|
+
_um_ 154
|
240
|
+
ra_ 153
|
241
|
+
b� 153
|
242
|
+
tar 151
|
243
|
+
ta_ 151
|
244
|
+
er� 151
|
245
|
+
�r 148
|
246
|
+
and 148
|
247
|
+
_sa 146
|
248
|
+
ig 146
|
249
|
+
_en 146
|
250
|
+
nga 145
|
251
|
+
rir_ 145
|
252
|
+
us 144
|
253
|
+
jar 143
|
254
|
+
et 143
|
255
|
+
�r 142
|
256
|
+
_sk 140
|
257
|
+
ndi 140
|
258
|
+
�� 139
|
259
|
+
var_ 139
|
260
|
+
_r 138
|
261
|
+
av 138
|
262
|
+
�k 137
|
263
|
+
nna 137
|
264
|
+
�ur_ 136
|
265
|
+
�l 136
|
266
|
+
ko 135
|
267
|
+
nin 135
|
268
|
+
vi�_ 135
|
269
|
+
�l 135
|
270
|
+
ins 134
|
271
|
+
ik 133
|
272
|
+
E 133
|
273
|
+
K 133
|
274
|
+
yrir_ 133
|
275
|
+
ns_ 133
|
276
|
+
on 133
|
277
|
+
ein 132
|
278
|
+
_vi�_ 132
|
279
|
+
�g 132
|
280
|
+
j� 132
|
281
|
+
� 132
|
282
|
+
�l 132
|
283
|
+
�s 132
|
284
|
+
_mi 131
|
285
|
+
f_ 131
|
286
|
+
s� 131
|
287
|
+
sj 131
|
288
|
+
stu 131
|
289
|
+
nda 130
|
290
|
+
_var_ 130
|
291
|
+
gr 129
|
292
|
+
�s 128
|
293
|
+
t� 127
|
294
|
+
ri_ 126
|
295
|
+
haf 126
|
296
|
+
_s� 125
|
297
|
+
v�k 124
|
298
|
+
rin 124
|
299
|
+
te 124
|
300
|
+
r. 124
|
301
|
+
r._ 123
|
302
|
+
H 123
|
303
|
+
nes 123
|
304
|
+
�t 123
|
305
|
+
ru_ 123
|
306
|
+
kr 122
|
307
|
+
F 122
|
308
|
+
�r 121
|
309
|
+
num_ 121
|
310
|
+
k_ 121
|
311
|
+
a. 121
|
312
|
+
_H 121
|
313
|
+
_fr 120
|
314
|
+
_ge 120
|
315
|
+
r� 120
|
316
|
+
_E 120
|
317
|
+
_� 120
|
318
|
+
ug 120
|
319
|
+
ngu 119
|
320
|
+
an_ 119
|
321
|
+
inga 118
|
322
|
+
_K 118
|
323
|
+
_haf 118
|
324
|
+
enn 117
|
325
|
+
ars 117
|
326
|
+
ri� 117
|
327
|
+
en_ 117
|
328
|
+
sin 116
|
329
|
+
kur 116
|
330
|
+
it 116
|
331
|
+
�a_ 116
|
332
|
+
ti_ 115
|
333
|
+
r�i 114
|
334
|
+
tj 114
|
335
|
+
ni_ 114
|
336
|
+
at 114
|
337
|
+
tarf 114
|
338
|
+
br 113
|
339
|
+
slu 113
|
340
|
+
kki 113
|
341
|
+
rg 113
|
342
|
+
m� 113
|
343
|
+
kv 113
|
344
|
+
_en_ 113
|
345
|
+
a._ 112
|
346
|
+
gar_ 112
|
347
|
+
du 112
|
348
|
+
ju 110
|
349
|
+
ei� 110
|
350
|
+
und 110
|
351
|
+
lag 110
|
352
|
+
tur 110
|
353
|
+
ega 109
|
354
|
+
h� 109
|
355
|
+
�� 109
|
356
|
+
gn 109
|
357
|
+
hef 109
|
358
|
+
kj 109
|
359
|
+
_hef 109
|
360
|
+
_sta 108
|
361
|
+
B 108
|
362
|
+
V 108
|
363
|
+
sam 107
|
364
|
+
_ei 106
|
365
|
+
_B 106
|
366
|
+
ft 106
|
367
|
+
ga_ 106
|
368
|
+
G 106
|
369
|
+
_G 105
|
370
|
+
l� 105
|
371
|
+
kki_ 105
|
372
|
+
star 104
|
373
|
+
in_ 104
|
374
|
+
R 104
|
375
|
+
me� 104
|
376
|
+
_�r 103
|
377
|
+
_me� 103
|
378
|
+
ekk 103
|
379
|
+
inu 103
|
380
|
+
t� 103
|
381
|
+
_V 103
|
382
|
+
m. 103
|
383
|
+
a�i 103
|
384
|
+
j� 102
|
385
|
+
�j 102
|
386
|
+
_b� 102
|
387
|
+
ess 102
|
388
|
+
h�s 101
|
389
|
+
ut 101
|
390
|
+
gs 101
|
391
|
+
av� 101
|
392
|
+
mu 101
|
393
|
+
_R 101
|
394
|
+
_� 101
|
395
|
+
rst 100
|
396
|
+
�jar 100
|
397
|
+
leg 100
|
398
|
+
�ja 100
|
399
|
+
ja_ 99
|
400
|
+
av�k 99
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 19406
|
2
|
+
a 10666
|
3
|
+
n 5455
|
4
|
+
e 4535
|
5
|
+
i 4387
|
6
|
+
r 2936
|
7
|
+
t 2902
|
8
|
+
an 2853
|
9
|
+
u 2841
|
10
|
+
k 2761
|
11
|
+
s 2311
|
12
|
+
m 2178
|
13
|
+
d 2134
|
14
|
+
g 2105
|
15
|
+
l 1780
|
16
|
+
a_ 1506
|
17
|
+
n_ 1476
|
18
|
+
ng 1449
|
19
|
+
p 1397
|
20
|
+
b 1275
|
21
|
+
an_ 1270
|
22
|
+
o 1246
|
23
|
+
h 1130
|
24
|
+
i_ 1108
|
25
|
+
er 1038
|
26
|
+
ka 1032
|
27
|
+
_d 1006
|
28
|
+
y 997
|
29
|
+
, 951
|
30
|
+
en 941
|
31
|
+
ar 914
|
32
|
+
,_ 900
|
33
|
+
_m 880
|
34
|
+
ya 842
|
35
|
+
ta 838
|
36
|
+
ang 797
|
37
|
+
di 787
|
38
|
+
da 773
|
39
|
+
. 754
|
40
|
+
la 742
|
41
|
+
._ 738
|
42
|
+
me 732
|
43
|
+
ak 728
|
44
|
+
_s 718
|
45
|
+
at 690
|
46
|
+
ra 688
|
47
|
+
ga 683
|
48
|
+
_k 671
|
49
|
+
_me 650
|
50
|
+
in 628
|
51
|
+
ah 601
|
52
|
+
_t 583
|
53
|
+
_p 570
|
54
|
+
g_ 551
|
55
|
+
_b 551
|
56
|
+
_di 548
|
57
|
+
ng_ 544
|
58
|
+
ma 536
|
59
|
+
se 526
|
60
|
+
tu 511
|
61
|
+
na 506
|
62
|
+
al 500
|
63
|
+
ri 490
|
64
|
+
as 483
|
65
|
+
k_ 482
|
66
|
+
j 480
|
67
|
+
si 470
|
68
|
+
ny 467
|
69
|
+
h_ 457
|
70
|
+
sa 452
|
71
|
+
ang_ 439
|
72
|
+
it 424
|
73
|
+
kan 423
|
74
|
+
ti 418
|
75
|
+
_se 417
|
76
|
+
pe 412
|
77
|
+
S 409
|
78
|
+
ba 407
|
79
|
+
ke 407
|
80
|
+
em 405
|
81
|
+
men 405
|
82
|
+
be 403
|
83
|
+
un 401
|
84
|
+
te 401
|
85
|
+
am 396
|
86
|
+
pa 395
|
87
|
+
nya 390
|
88
|
+
_men 374
|
89
|
+
el 374
|
90
|
+
t_ 371
|
91
|
+
_a 360
|
92
|
+
_i 356
|
93
|
+
u_ 355
|
94
|
+
kan_ 345
|
95
|
+
_ke 339
|
96
|
+
is 335
|
97
|
+
ah_ 324
|
98
|
+
_S 324
|
99
|
+
eng 321
|
100
|
+
nga 320
|
101
|
+
ia 318
|
102
|
+
_pe 316
|
103
|
+
ha 313
|
104
|
+
ap 311
|
105
|
+
r_ 308
|
106
|
+
w 305
|
107
|
+
li 301
|
108
|
+
_da 300
|
109
|
+
s_ 299
|
110
|
+
P 296
|
111
|
+
nd 290
|
112
|
+
_be 287
|
113
|
+
ik 283
|
114
|
+
ja 281
|
115
|
+
yan 281
|
116
|
+
ad 275
|
117
|
+
ek 273
|
118
|
+
uk 272
|
119
|
+
di_ 270
|
120
|
+
bu 269
|
121
|
+
ya_ 268
|
122
|
+
yang 268
|
123
|
+
ak_ 266
|
124
|
+
ber 265
|
125
|
+
_y 265
|
126
|
+
_ya 264
|
127
|
+
_P 263
|
128
|
+
ru 260
|
129
|
+
K 259
|
130
|
+
yang_ 256
|
131
|
+
_yang 256
|
132
|
+
_yan 256
|
133
|
+
nt 255
|
134
|
+
de 253
|
135
|
+
_te 250
|
136
|
+
wa 249
|
137
|
+
et 247
|
138
|
+
at_ 246
|
139
|
+
ara 245
|
140
|
+
gan 243
|
141
|
+
A 237
|
142
|
+
ari 235
|
143
|
+
ala 230
|
144
|
+
itu 229
|
145
|
+
c 225
|
146
|
+
ol 225
|
147
|
+
ni 225
|
148
|
+
us 225
|
149
|
+
dan 224
|
150
|
+
_K 224
|
151
|
+
M 224
|
152
|
+
B 223
|
153
|
+
ata 222
|
154
|
+
ai 221
|
155
|
+
ur 219
|
156
|
+
nya_ 217
|
157
|
+
ua 215
|
158
|
+
_ka 214
|
159
|
+
_ber 210
|
160
|
+
eb 209
|
161
|
+
ran 206
|
162
|
+
D 206
|
163
|
+
ela 206
|
164
|
+
_di_ 205
|
165
|
+
_l 204
|
166
|
+
ngan 204
|
167
|
+
ter 203
|
168
|
+
re 201
|
169
|
+
- 199
|
170
|
+
aka 198
|
171
|
+
l_ 194
|
172
|
+
_A 191
|
173
|
+
era 191
|
174
|
+
a, 191
|
175
|
+
e_ 190
|
176
|
+
ir 187
|
177
|
+
I 186
|
178
|
+
tan 185
|
179
|
+
_B 184
|
180
|
+
ut 184
|
181
|
+
ku 183
|
182
|
+
a,_ 183
|
183
|
+
il 182
|
184
|
+
J 181
|
185
|
+
um 180
|
186
|
+
_it 180
|
187
|
+
_itu 180
|
188
|
+
_ta 179
|
189
|
+
su 179
|
190
|
+
dan_ 177
|
191
|
+
es 177
|
192
|
+
on 177
|
193
|
+
or 177
|
194
|
+
_dan 176
|
195
|
+
lu 174
|
196
|
+
_M 172
|
197
|
+
tu_ 172
|
198
|
+
_dan_ 172
|
199
|
+
enga 171
|
200
|
+
mb 169
|
201
|
+
R 169
|
202
|
+
si_ 168
|
203
|
+
per 168
|
204
|
+
gan_ 168
|
205
|
+
ngan_ 165
|
206
|
+
" 162
|
207
|
+
_ter 162
|
208
|
+
a. 161
|
209
|
+
man 161
|
210
|
+
gk 160
|
211
|
+
a._ 160
|
212
|
+
asi 160
|
213
|
+
ngk 160
|
214
|
+
ep 160
|
215
|
+
ag 159
|
216
|
+
ul 158
|
217
|
+
da_ 157
|
218
|
+
m_ 155
|
219
|
+
du 155
|
220
|
+
ada 153
|
221
|
+
ki 153
|
222
|
+
rt 150
|
223
|
+
mp 150
|
224
|
+
T 150
|
225
|
+
ama 148
|
226
|
+
ing 148
|
227
|
+
na_ 147
|
228
|
+
_J 147
|
229
|
+
_D 145
|
230
|
+
ung 145
|
231
|
+
ana 145
|
232
|
+
n, 144
|
233
|
+
ju 144
|
234
|
+
ud 144
|
235
|
+
rin 143
|
236
|
+
gi 143
|
237
|
+
aw 141
|
238
|
+
lah 138
|
239
|
+
lan 138
|
240
|
+
_sa 136
|
241
|
+
ri_ 136
|
242
|
+
meng 136
|
243
|
+
_meng 135
|
244
|
+
_ma 134
|
245
|
+
n,_ 134
|
246
|
+
awa 134
|
247
|
+
st 134
|
248
|
+
eka 133
|
249
|
+
mi 133
|
250
|
+
mu 132
|
251
|
+
_T 132
|
252
|
+
po 131
|
253
|
+
ge 131
|
254
|
+
ar_ 130
|
255
|
+
id 129
|
256
|
+
ko 129
|
257
|
+
le 128
|
258
|
+
_h 128
|
259
|
+
ena 127
|
260
|
+
_j 126
|
261
|
+
emb 126
|
262
|
+
ina 125
|
263
|
+
_r 124
|
264
|
+
itu_ 124
|
265
|
+
ay 123
|
266
|
+
ngg 123
|
267
|
+
gg 123
|
268
|
+
rang 123
|
269
|
+
pi 120
|
270
|
+
nan 120
|
271
|
+
_ba 119
|
272
|
+
_la 119
|
273
|
+
apa 119
|
274
|
+
_I 118
|
275
|
+
p_ 118
|
276
|
+
bi 117
|
277
|
+
ai_ 117
|
278
|
+
ta_ 116
|
279
|
+
san 116
|
280
|
+
Ke 116
|
281
|
+
ro 115
|
282
|
+
eri 114
|
283
|
+
kar 113
|
284
|
+
lah_ 113
|
285
|
+
_itu_ 112
|
286
|
+
aya 111
|
287
|
+
i, 110
|
288
|
+
an, 110
|
289
|
+
ra_ 110
|
290
|
+
_per 110
|
291
|
+
im 110
|
292
|
+
ika 109
|
293
|
+
isi 109
|
294
|
+
mem 109
|
295
|
+
tah 108
|
296
|
+
_Ke 108
|
297
|
+
ian 108
|
298
|
+
_mem 108
|
299
|
+
akan 108
|
300
|
+
Se 108
|
301
|
+
to 107
|
302
|
+
ab 107
|
303
|
+
ngka 106
|
304
|
+
rs 106
|
305
|
+
gka 106
|
306
|
+
uk_ 105
|
307
|
+
seb 104
|
308
|
+
_de 104
|
309
|
+
pu 104
|
310
|
+
i,_ 104
|
311
|
+
ita 104
|
312
|
+
nda 103
|
313
|
+
_ti 103
|
314
|
+
ni_ 103
|
315
|
+
ca 103
|
316
|
+
_Se 103
|
317
|
+
ers 103
|
318
|
+
pen 103
|
319
|
+
ini 102
|
320
|
+
an,_ 102
|
321
|
+
angk 101
|
322
|
+
uh 101
|
323
|
+
han 101
|
324
|
+
nta 100
|
325
|
+
_in 99
|
326
|
+
f 99
|
327
|
+
nj 99
|
328
|
+
ok 99
|
329
|
+
aga 99
|
330
|
+
_R 98
|
331
|
+
as_ 98
|
332
|
+
tr 98
|
333
|
+
mer 97
|
334
|
+
lam 97
|
335
|
+
and 97
|
336
|
+
end 96
|
337
|
+
anga 96
|
338
|
+
ne 96
|
339
|
+
Sa 96
|
340
|
+
ka_ 96
|
341
|
+
arin 95
|
342
|
+
gu 95
|
343
|
+
_ha 94
|
344
|
+
Z 94
|
345
|
+
al_ 94
|
346
|
+
ga_ 94
|
347
|
+
_Z 93
|
348
|
+
_pen 93
|
349
|
+
dar 93
|
350
|
+
Ad 93
|
351
|
+
i._ 93
|
352
|
+
ada_ 93
|
353
|
+
atan 93
|
354
|
+
tak 93
|
355
|
+
i. 93
|
356
|
+
ia_ 92
|
357
|
+
asa 92
|
358
|
+
ap_ 92
|
359
|
+
ari_ 92
|
360
|
+
kat 92
|
361
|
+
_seb 92
|
362
|
+
_Za 91
|
363
|
+
Za 91
|
364
|
+
den 91
|
365
|
+
n. 90
|
366
|
+
_u 90
|
367
|
+
_Ad 90
|
368
|
+
engan 89
|
369
|
+
ib 89
|
370
|
+
any 89
|
371
|
+
n._ 89
|
372
|
+
o_ 89
|
373
|
+
Zar 88
|
374
|
+
Zari 88
|
375
|
+
rina 88
|
376
|
+
_Zar 88
|
377
|
+
_Zari 88
|
378
|
+
Zarin 88
|
379
|
+
arina 88
|
380
|
+
ks 88
|
381
|
+
angka 87
|
382
|
+
oli 87
|
383
|
+
eg 87
|
384
|
+
kt 86
|
385
|
+
_Sa 86
|
386
|
+
hu 85
|
387
|
+
ih 85
|
388
|
+
us_ 85
|
389
|
+
adi 85
|
390
|
+
om 85
|
391
|
+
eba 85
|
392
|
+
anya 85
|
393
|
+
_bu 84
|
394
|
+
denga 83
|
395
|
+
L 83
|
396
|
+
ed 83
|
397
|
+
dak 83
|
398
|
+
deng 83
|
399
|
+
ma_ 82
|
400
|
+
asi_ 82
|