language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 26104
|
2
|
+
a 6496
|
3
|
+
r 6044
|
4
|
+
n 5160
|
5
|
+
i 5123
|
6
|
+
s 3987
|
7
|
+
e 3891
|
8
|
+
u 3582
|
9
|
+
t 3300
|
10
|
+
� 3126
|
11
|
+
l 3071
|
12
|
+
g 2726
|
13
|
+
m 2459
|
14
|
+
k 2256
|
15
|
+
f 2230
|
16
|
+
r_ 1967
|
17
|
+
v 1641
|
18
|
+
ar 1472
|
19
|
+
�_ 1420
|
20
|
+
_s 1332
|
21
|
+
� 1248
|
22
|
+
o 1221
|
23
|
+
� 1197
|
24
|
+
a_ 1155
|
25
|
+
in 1150
|
26
|
+
i_ 1114
|
27
|
+
h 1023
|
28
|
+
j 975
|
29
|
+
d 974
|
30
|
+
st 929
|
31
|
+
a� 925
|
32
|
+
. 922
|
33
|
+
n_ 875
|
34
|
+
._ 875
|
35
|
+
_v 874
|
36
|
+
m_ 854
|
37
|
+
nn 842
|
38
|
+
_f 840
|
39
|
+
ur 824
|
40
|
+
_a 806
|
41
|
+
� 785
|
42
|
+
_h 782
|
43
|
+
� 779
|
44
|
+
i� 771
|
45
|
+
er 765
|
46
|
+
um 727
|
47
|
+
g_ 716
|
48
|
+
y 711
|
49
|
+
_e 709
|
50
|
+
� 672
|
51
|
+
b 672
|
52
|
+
ir 671
|
53
|
+
ri 670
|
54
|
+
an 667
|
55
|
+
� 650
|
56
|
+
_� 648
|
57
|
+
a�_ 645
|
58
|
+
u_ 633
|
59
|
+
na 631
|
60
|
+
�_ 625
|
61
|
+
ar_ 622
|
62
|
+
_� 608
|
63
|
+
�_ 606
|
64
|
+
_� 595
|
65
|
+
ta 567
|
66
|
+
ei 566
|
67
|
+
la 558
|
68
|
+
_m 549
|
69
|
+
_�_ 549
|
70
|
+
um_ 547
|
71
|
+
t_ 535
|
72
|
+
ti 529
|
73
|
+
_o 524
|
74
|
+
ur_ 523
|
75
|
+
_a� 519
|
76
|
+
i�_ 512
|
77
|
+
�i 496
|
78
|
+
ve 494
|
79
|
+
og 494
|
80
|
+
� 482
|
81
|
+
og_ 479
|
82
|
+
_og 478
|
83
|
+
_og_ 478
|
84
|
+
nd 469
|
85
|
+
p 464
|
86
|
+
ra 455
|
87
|
+
un 454
|
88
|
+
ir_ 452
|
89
|
+
_a�_ 451
|
90
|
+
ni 439
|
91
|
+
en 439
|
92
|
+
�u 439
|
93
|
+
_�_ 436
|
94
|
+
ng 434
|
95
|
+
il 404
|
96
|
+
ga 395
|
97
|
+
_t 395
|
98
|
+
nu 393
|
99
|
+
ki 392
|
100
|
+
ja 383
|
101
|
+
inn 379
|
102
|
+
_b 375
|
103
|
+
sk 352
|
104
|
+
s_ 350
|
105
|
+
vi 349
|
106
|
+
r� 347
|
107
|
+
�a 345
|
108
|
+
ef 339
|
109
|
+
ag 336
|
110
|
+
_u 330
|
111
|
+
se 324
|
112
|
+
lu 324
|
113
|
+
af 321
|
114
|
+
_ve 320
|
115
|
+
tu 318
|
116
|
+
em 307
|
117
|
+
eg 304
|
118
|
+
nn_ 303
|
119
|
+
_l 303
|
120
|
+
va 301
|
121
|
+
_k 296
|
122
|
+
, 295
|
123
|
+
ns 292
|
124
|
+
re 292
|
125
|
+
tt 291
|
126
|
+
,_ 291
|
127
|
+
l_ 286
|
128
|
+
am 286
|
129
|
+
es 285
|
130
|
+
yr 285
|
131
|
+
al 281
|
132
|
+
da 277
|
133
|
+
S 275
|
134
|
+
gu 273
|
135
|
+
_se 271
|
136
|
+
ver 268
|
137
|
+
_g 266
|
138
|
+
ing 266
|
139
|
+
_n 262
|
140
|
+
is 258
|
141
|
+
_er 257
|
142
|
+
sa 256
|
143
|
+
�ur 255
|
144
|
+
le 255
|
145
|
+
_st 255
|
146
|
+
_S 254
|
147
|
+
sem 254
|
148
|
+
ll 254
|
149
|
+
me 253
|
150
|
+
ha 251
|
151
|
+
li 249
|
152
|
+
kk 249
|
153
|
+
rs 247
|
154
|
+
_vi 247
|
155
|
+
rn 246
|
156
|
+
sl 244
|
157
|
+
gi 243
|
158
|
+
ss 242
|
159
|
+
rf 241
|
160
|
+
fy 240
|
161
|
+
�i_ 240
|
162
|
+
mi 238
|
163
|
+
ka 237
|
164
|
+
ma 231
|
165
|
+
ld 230
|
166
|
+
� 229
|
167
|
+
rir 227
|
168
|
+
sta 227
|
169
|
+
fyr 227
|
170
|
+
v� 227
|
171
|
+
di 226
|
172
|
+
ru 224
|
173
|
+
var 224
|
174
|
+
_fy 222
|
175
|
+
ku 221
|
176
|
+
em_ 221
|
177
|
+
nar 220
|
178
|
+
_sem_ 220
|
179
|
+
_sem 220
|
180
|
+
sem_ 220
|
181
|
+
he 219
|
182
|
+
yri 217
|
183
|
+
_fyr 216
|
184
|
+
si 216
|
185
|
+
yrir 215
|
186
|
+
au 212
|
187
|
+
er_ 212
|
188
|
+
ek 211
|
189
|
+
_ha 210
|
190
|
+
�e 209
|
191
|
+
fyri 207
|
192
|
+
fyrir 207
|
193
|
+
_�e 205
|
194
|
+
fi 204
|
195
|
+
fr 203
|
196
|
+
ge 201
|
197
|
+
or 200
|
198
|
+
ne 200
|
199
|
+
ann 198
|
200
|
+
j� 198
|
201
|
+
_va 196
|
202
|
+
_fyri 196
|
203
|
+
_ver 194
|
204
|
+
fl 192
|
205
|
+
_er_ 191
|
206
|
+
_um 189
|
207
|
+
�k 188
|
208
|
+
til 187
|
209
|
+
_he 186
|
210
|
+
fa 186
|
211
|
+
il_ 182
|
212
|
+
_ti 178
|
213
|
+
_til 177
|
214
|
+
gar 176
|
215
|
+
_var 176
|
216
|
+
na_ 176
|
217
|
+
� 175
|
218
|
+
e� 171
|
219
|
+
fu 170
|
220
|
+
nni 169
|
221
|
+
_me 168
|
222
|
+
ki_ 167
|
223
|
+
vi� 166
|
224
|
+
ey 165
|
225
|
+
fn 165
|
226
|
+
arf 164
|
227
|
+
til_ 163
|
228
|
+
st_ 162
|
229
|
+
_til_ 162
|
230
|
+
�a 161
|
231
|
+
num 161
|
232
|
+
_�a 161
|
233
|
+
as 160
|
234
|
+
_vi� 160
|
235
|
+
rt 159
|
236
|
+
el 158
|
237
|
+
u� 156
|
238
|
+
inn_ 155
|
239
|
+
_um_ 154
|
240
|
+
ra_ 153
|
241
|
+
b� 153
|
242
|
+
tar 151
|
243
|
+
ta_ 151
|
244
|
+
er� 151
|
245
|
+
�r 148
|
246
|
+
and 148
|
247
|
+
_sa 146
|
248
|
+
ig 146
|
249
|
+
_en 146
|
250
|
+
nga 145
|
251
|
+
rir_ 145
|
252
|
+
us 144
|
253
|
+
jar 143
|
254
|
+
et 143
|
255
|
+
�r 142
|
256
|
+
_sk 140
|
257
|
+
ndi 140
|
258
|
+
�� 139
|
259
|
+
var_ 139
|
260
|
+
_r 138
|
261
|
+
av 138
|
262
|
+
�k 137
|
263
|
+
nna 137
|
264
|
+
�ur_ 136
|
265
|
+
�l 136
|
266
|
+
ko 135
|
267
|
+
nin 135
|
268
|
+
vi�_ 135
|
269
|
+
�l 135
|
270
|
+
ins 134
|
271
|
+
ik 133
|
272
|
+
E 133
|
273
|
+
K 133
|
274
|
+
yrir_ 133
|
275
|
+
ns_ 133
|
276
|
+
on 133
|
277
|
+
ein 132
|
278
|
+
_vi�_ 132
|
279
|
+
�g 132
|
280
|
+
j� 132
|
281
|
+
� 132
|
282
|
+
�l 132
|
283
|
+
�s 132
|
284
|
+
_mi 131
|
285
|
+
f_ 131
|
286
|
+
s� 131
|
287
|
+
sj 131
|
288
|
+
stu 131
|
289
|
+
nda 130
|
290
|
+
_var_ 130
|
291
|
+
gr 129
|
292
|
+
�s 128
|
293
|
+
t� 127
|
294
|
+
ri_ 126
|
295
|
+
haf 126
|
296
|
+
_s� 125
|
297
|
+
v�k 124
|
298
|
+
rin 124
|
299
|
+
te 124
|
300
|
+
r. 124
|
301
|
+
r._ 123
|
302
|
+
H 123
|
303
|
+
nes 123
|
304
|
+
�t 123
|
305
|
+
ru_ 123
|
306
|
+
kr 122
|
307
|
+
F 122
|
308
|
+
�r 121
|
309
|
+
num_ 121
|
310
|
+
k_ 121
|
311
|
+
a. 121
|
312
|
+
_H 121
|
313
|
+
_fr 120
|
314
|
+
_ge 120
|
315
|
+
r� 120
|
316
|
+
_E 120
|
317
|
+
_� 120
|
318
|
+
ug 120
|
319
|
+
ngu 119
|
320
|
+
an_ 119
|
321
|
+
inga 118
|
322
|
+
_K 118
|
323
|
+
_haf 118
|
324
|
+
enn 117
|
325
|
+
ars 117
|
326
|
+
ri� 117
|
327
|
+
en_ 117
|
328
|
+
sin 116
|
329
|
+
kur 116
|
330
|
+
it 116
|
331
|
+
�a_ 116
|
332
|
+
ti_ 115
|
333
|
+
r�i 114
|
334
|
+
tj 114
|
335
|
+
ni_ 114
|
336
|
+
at 114
|
337
|
+
tarf 114
|
338
|
+
br 113
|
339
|
+
slu 113
|
340
|
+
kki 113
|
341
|
+
rg 113
|
342
|
+
m� 113
|
343
|
+
kv 113
|
344
|
+
_en_ 113
|
345
|
+
a._ 112
|
346
|
+
gar_ 112
|
347
|
+
du 112
|
348
|
+
ju 110
|
349
|
+
ei� 110
|
350
|
+
und 110
|
351
|
+
lag 110
|
352
|
+
tur 110
|
353
|
+
ega 109
|
354
|
+
h� 109
|
355
|
+
�� 109
|
356
|
+
gn 109
|
357
|
+
hef 109
|
358
|
+
kj 109
|
359
|
+
_hef 109
|
360
|
+
_sta 108
|
361
|
+
B 108
|
362
|
+
V 108
|
363
|
+
sam 107
|
364
|
+
_ei 106
|
365
|
+
_B 106
|
366
|
+
ft 106
|
367
|
+
ga_ 106
|
368
|
+
G 106
|
369
|
+
_G 105
|
370
|
+
l� 105
|
371
|
+
kki_ 105
|
372
|
+
star 104
|
373
|
+
in_ 104
|
374
|
+
R 104
|
375
|
+
me� 104
|
376
|
+
_�r 103
|
377
|
+
_me� 103
|
378
|
+
ekk 103
|
379
|
+
inu 103
|
380
|
+
t� 103
|
381
|
+
_V 103
|
382
|
+
m. 103
|
383
|
+
a�i 103
|
384
|
+
j� 102
|
385
|
+
�j 102
|
386
|
+
_b� 102
|
387
|
+
ess 102
|
388
|
+
h�s 101
|
389
|
+
ut 101
|
390
|
+
gs 101
|
391
|
+
av� 101
|
392
|
+
mu 101
|
393
|
+
_R 101
|
394
|
+
_� 101
|
395
|
+
rst 100
|
396
|
+
�jar 100
|
397
|
+
leg 100
|
398
|
+
�ja 100
|
399
|
+
ja_ 99
|
400
|
+
av�k 99
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 19406
|
2
|
+
a 10666
|
3
|
+
n 5455
|
4
|
+
e 4535
|
5
|
+
i 4387
|
6
|
+
r 2936
|
7
|
+
t 2902
|
8
|
+
an 2853
|
9
|
+
u 2841
|
10
|
+
k 2761
|
11
|
+
s 2311
|
12
|
+
m 2178
|
13
|
+
d 2134
|
14
|
+
g 2105
|
15
|
+
l 1780
|
16
|
+
a_ 1506
|
17
|
+
n_ 1476
|
18
|
+
ng 1449
|
19
|
+
p 1397
|
20
|
+
b 1275
|
21
|
+
an_ 1270
|
22
|
+
o 1246
|
23
|
+
h 1130
|
24
|
+
i_ 1108
|
25
|
+
er 1038
|
26
|
+
ka 1032
|
27
|
+
_d 1006
|
28
|
+
y 997
|
29
|
+
, 951
|
30
|
+
en 941
|
31
|
+
ar 914
|
32
|
+
,_ 900
|
33
|
+
_m 880
|
34
|
+
ya 842
|
35
|
+
ta 838
|
36
|
+
ang 797
|
37
|
+
di 787
|
38
|
+
da 773
|
39
|
+
. 754
|
40
|
+
la 742
|
41
|
+
._ 738
|
42
|
+
me 732
|
43
|
+
ak 728
|
44
|
+
_s 718
|
45
|
+
at 690
|
46
|
+
ra 688
|
47
|
+
ga 683
|
48
|
+
_k 671
|
49
|
+
_me 650
|
50
|
+
in 628
|
51
|
+
ah 601
|
52
|
+
_t 583
|
53
|
+
_p 570
|
54
|
+
g_ 551
|
55
|
+
_b 551
|
56
|
+
_di 548
|
57
|
+
ng_ 544
|
58
|
+
ma 536
|
59
|
+
se 526
|
60
|
+
tu 511
|
61
|
+
na 506
|
62
|
+
al 500
|
63
|
+
ri 490
|
64
|
+
as 483
|
65
|
+
k_ 482
|
66
|
+
j 480
|
67
|
+
si 470
|
68
|
+
ny 467
|
69
|
+
h_ 457
|
70
|
+
sa 452
|
71
|
+
ang_ 439
|
72
|
+
it 424
|
73
|
+
kan 423
|
74
|
+
ti 418
|
75
|
+
_se 417
|
76
|
+
pe 412
|
77
|
+
S 409
|
78
|
+
ba 407
|
79
|
+
ke 407
|
80
|
+
em 405
|
81
|
+
men 405
|
82
|
+
be 403
|
83
|
+
un 401
|
84
|
+
te 401
|
85
|
+
am 396
|
86
|
+
pa 395
|
87
|
+
nya 390
|
88
|
+
_men 374
|
89
|
+
el 374
|
90
|
+
t_ 371
|
91
|
+
_a 360
|
92
|
+
_i 356
|
93
|
+
u_ 355
|
94
|
+
kan_ 345
|
95
|
+
_ke 339
|
96
|
+
is 335
|
97
|
+
ah_ 324
|
98
|
+
_S 324
|
99
|
+
eng 321
|
100
|
+
nga 320
|
101
|
+
ia 318
|
102
|
+
_pe 316
|
103
|
+
ha 313
|
104
|
+
ap 311
|
105
|
+
r_ 308
|
106
|
+
w 305
|
107
|
+
li 301
|
108
|
+
_da 300
|
109
|
+
s_ 299
|
110
|
+
P 296
|
111
|
+
nd 290
|
112
|
+
_be 287
|
113
|
+
ik 283
|
114
|
+
ja 281
|
115
|
+
yan 281
|
116
|
+
ad 275
|
117
|
+
ek 273
|
118
|
+
uk 272
|
119
|
+
di_ 270
|
120
|
+
bu 269
|
121
|
+
ya_ 268
|
122
|
+
yang 268
|
123
|
+
ak_ 266
|
124
|
+
ber 265
|
125
|
+
_y 265
|
126
|
+
_ya 264
|
127
|
+
_P 263
|
128
|
+
ru 260
|
129
|
+
K 259
|
130
|
+
yang_ 256
|
131
|
+
_yang 256
|
132
|
+
_yan 256
|
133
|
+
nt 255
|
134
|
+
de 253
|
135
|
+
_te 250
|
136
|
+
wa 249
|
137
|
+
et 247
|
138
|
+
at_ 246
|
139
|
+
ara 245
|
140
|
+
gan 243
|
141
|
+
A 237
|
142
|
+
ari 235
|
143
|
+
ala 230
|
144
|
+
itu 229
|
145
|
+
c 225
|
146
|
+
ol 225
|
147
|
+
ni 225
|
148
|
+
us 225
|
149
|
+
dan 224
|
150
|
+
_K 224
|
151
|
+
M 224
|
152
|
+
B 223
|
153
|
+
ata 222
|
154
|
+
ai 221
|
155
|
+
ur 219
|
156
|
+
nya_ 217
|
157
|
+
ua 215
|
158
|
+
_ka 214
|
159
|
+
_ber 210
|
160
|
+
eb 209
|
161
|
+
ran 206
|
162
|
+
D 206
|
163
|
+
ela 206
|
164
|
+
_di_ 205
|
165
|
+
_l 204
|
166
|
+
ngan 204
|
167
|
+
ter 203
|
168
|
+
re 201
|
169
|
+
- 199
|
170
|
+
aka 198
|
171
|
+
l_ 194
|
172
|
+
_A 191
|
173
|
+
era 191
|
174
|
+
a, 191
|
175
|
+
e_ 190
|
176
|
+
ir 187
|
177
|
+
I 186
|
178
|
+
tan 185
|
179
|
+
_B 184
|
180
|
+
ut 184
|
181
|
+
ku 183
|
182
|
+
a,_ 183
|
183
|
+
il 182
|
184
|
+
J 181
|
185
|
+
um 180
|
186
|
+
_it 180
|
187
|
+
_itu 180
|
188
|
+
_ta 179
|
189
|
+
su 179
|
190
|
+
dan_ 177
|
191
|
+
es 177
|
192
|
+
on 177
|
193
|
+
or 177
|
194
|
+
_dan 176
|
195
|
+
lu 174
|
196
|
+
_M 172
|
197
|
+
tu_ 172
|
198
|
+
_dan_ 172
|
199
|
+
enga 171
|
200
|
+
mb 169
|
201
|
+
R 169
|
202
|
+
si_ 168
|
203
|
+
per 168
|
204
|
+
gan_ 168
|
205
|
+
ngan_ 165
|
206
|
+
" 162
|
207
|
+
_ter 162
|
208
|
+
a. 161
|
209
|
+
man 161
|
210
|
+
gk 160
|
211
|
+
a._ 160
|
212
|
+
asi 160
|
213
|
+
ngk 160
|
214
|
+
ep 160
|
215
|
+
ag 159
|
216
|
+
ul 158
|
217
|
+
da_ 157
|
218
|
+
m_ 155
|
219
|
+
du 155
|
220
|
+
ada 153
|
221
|
+
ki 153
|
222
|
+
rt 150
|
223
|
+
mp 150
|
224
|
+
T 150
|
225
|
+
ama 148
|
226
|
+
ing 148
|
227
|
+
na_ 147
|
228
|
+
_J 147
|
229
|
+
_D 145
|
230
|
+
ung 145
|
231
|
+
ana 145
|
232
|
+
n, 144
|
233
|
+
ju 144
|
234
|
+
ud 144
|
235
|
+
rin 143
|
236
|
+
gi 143
|
237
|
+
aw 141
|
238
|
+
lah 138
|
239
|
+
lan 138
|
240
|
+
_sa 136
|
241
|
+
ri_ 136
|
242
|
+
meng 136
|
243
|
+
_meng 135
|
244
|
+
_ma 134
|
245
|
+
n,_ 134
|
246
|
+
awa 134
|
247
|
+
st 134
|
248
|
+
eka 133
|
249
|
+
mi 133
|
250
|
+
mu 132
|
251
|
+
_T 132
|
252
|
+
po 131
|
253
|
+
ge 131
|
254
|
+
ar_ 130
|
255
|
+
id 129
|
256
|
+
ko 129
|
257
|
+
le 128
|
258
|
+
_h 128
|
259
|
+
ena 127
|
260
|
+
_j 126
|
261
|
+
emb 126
|
262
|
+
ina 125
|
263
|
+
_r 124
|
264
|
+
itu_ 124
|
265
|
+
ay 123
|
266
|
+
ngg 123
|
267
|
+
gg 123
|
268
|
+
rang 123
|
269
|
+
pi 120
|
270
|
+
nan 120
|
271
|
+
_ba 119
|
272
|
+
_la 119
|
273
|
+
apa 119
|
274
|
+
_I 118
|
275
|
+
p_ 118
|
276
|
+
bi 117
|
277
|
+
ai_ 117
|
278
|
+
ta_ 116
|
279
|
+
san 116
|
280
|
+
Ke 116
|
281
|
+
ro 115
|
282
|
+
eri 114
|
283
|
+
kar 113
|
284
|
+
lah_ 113
|
285
|
+
_itu_ 112
|
286
|
+
aya 111
|
287
|
+
i, 110
|
288
|
+
an, 110
|
289
|
+
ra_ 110
|
290
|
+
_per 110
|
291
|
+
im 110
|
292
|
+
ika 109
|
293
|
+
isi 109
|
294
|
+
mem 109
|
295
|
+
tah 108
|
296
|
+
_Ke 108
|
297
|
+
ian 108
|
298
|
+
_mem 108
|
299
|
+
akan 108
|
300
|
+
Se 108
|
301
|
+
to 107
|
302
|
+
ab 107
|
303
|
+
ngka 106
|
304
|
+
rs 106
|
305
|
+
gka 106
|
306
|
+
uk_ 105
|
307
|
+
seb 104
|
308
|
+
_de 104
|
309
|
+
pu 104
|
310
|
+
i,_ 104
|
311
|
+
ita 104
|
312
|
+
nda 103
|
313
|
+
_ti 103
|
314
|
+
ni_ 103
|
315
|
+
ca 103
|
316
|
+
_Se 103
|
317
|
+
ers 103
|
318
|
+
pen 103
|
319
|
+
ini 102
|
320
|
+
an,_ 102
|
321
|
+
angk 101
|
322
|
+
uh 101
|
323
|
+
han 101
|
324
|
+
nta 100
|
325
|
+
_in 99
|
326
|
+
f 99
|
327
|
+
nj 99
|
328
|
+
ok 99
|
329
|
+
aga 99
|
330
|
+
_R 98
|
331
|
+
as_ 98
|
332
|
+
tr 98
|
333
|
+
mer 97
|
334
|
+
lam 97
|
335
|
+
and 97
|
336
|
+
end 96
|
337
|
+
anga 96
|
338
|
+
ne 96
|
339
|
+
Sa 96
|
340
|
+
ka_ 96
|
341
|
+
arin 95
|
342
|
+
gu 95
|
343
|
+
_ha 94
|
344
|
+
Z 94
|
345
|
+
al_ 94
|
346
|
+
ga_ 94
|
347
|
+
_Z 93
|
348
|
+
_pen 93
|
349
|
+
dar 93
|
350
|
+
Ad 93
|
351
|
+
i._ 93
|
352
|
+
ada_ 93
|
353
|
+
atan 93
|
354
|
+
tak 93
|
355
|
+
i. 93
|
356
|
+
ia_ 92
|
357
|
+
asa 92
|
358
|
+
ap_ 92
|
359
|
+
ari_ 92
|
360
|
+
kat 92
|
361
|
+
_seb 92
|
362
|
+
_Za 91
|
363
|
+
Za 91
|
364
|
+
den 91
|
365
|
+
n. 90
|
366
|
+
_u 90
|
367
|
+
_Ad 90
|
368
|
+
engan 89
|
369
|
+
ib 89
|
370
|
+
any 89
|
371
|
+
n._ 89
|
372
|
+
o_ 89
|
373
|
+
Zar 88
|
374
|
+
Zari 88
|
375
|
+
rina 88
|
376
|
+
_Zar 88
|
377
|
+
_Zari 88
|
378
|
+
Zarin 88
|
379
|
+
arina 88
|
380
|
+
ks 88
|
381
|
+
angka 87
|
382
|
+
oli 87
|
383
|
+
eg 87
|
384
|
+
kt 86
|
385
|
+
_Sa 86
|
386
|
+
hu 85
|
387
|
+
ih 85
|
388
|
+
us_ 85
|
389
|
+
adi 85
|
390
|
+
om 85
|
391
|
+
eba 85
|
392
|
+
anya 85
|
393
|
+
_bu 84
|
394
|
+
denga 83
|
395
|
+
L 83
|
396
|
+
ed 83
|
397
|
+
dak 83
|
398
|
+
deng 83
|
399
|
+
ma_ 82
|
400
|
+
asi_ 82
|