language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 35328
|
2
|
+
a 10423
|
3
|
+
e 10132
|
4
|
+
o 8919
|
5
|
+
s 6795
|
6
|
+
r 6033
|
7
|
+
i 5443
|
8
|
+
n 4588
|
9
|
+
d 4531
|
10
|
+
t 4217
|
11
|
+
m 3476
|
12
|
+
u 3404
|
13
|
+
o_ 3240
|
14
|
+
a_ 3029
|
15
|
+
e_ 2879
|
16
|
+
c 2756
|
17
|
+
s_ 2461
|
18
|
+
_d 2379
|
19
|
+
l 2307
|
20
|
+
p 2242
|
21
|
+
_a 1753
|
22
|
+
de 1751
|
23
|
+
, 1660
|
24
|
+
,_ 1658
|
25
|
+
_e 1454
|
26
|
+
es 1447
|
27
|
+
os 1412
|
28
|
+
ra 1343
|
29
|
+
_p 1328
|
30
|
+
nt 1302
|
31
|
+
_de 1248
|
32
|
+
do 1215
|
33
|
+
en 1176
|
34
|
+
re 1150
|
35
|
+
as 1123
|
36
|
+
v 1115
|
37
|
+
m_ 1113
|
38
|
+
de_ 1096
|
39
|
+
er 1082
|
40
|
+
g 1053
|
41
|
+
_c 1047
|
42
|
+
da 1008
|
43
|
+
co 986
|
44
|
+
os_ 975
|
45
|
+
te 974
|
46
|
+
ar 950
|
47
|
+
or 943
|
48
|
+
q 938
|
49
|
+
qu 938
|
50
|
+
_s 908
|
51
|
+
ta 902
|
52
|
+
_de_ 901
|
53
|
+
_o 858
|
54
|
+
se 841
|
55
|
+
ue 831
|
56
|
+
to 799
|
57
|
+
ad 777
|
58
|
+
. 761
|
59
|
+
que 752
|
60
|
+
em 751
|
61
|
+
an 748
|
62
|
+
f 746
|
63
|
+
r_ 745
|
64
|
+
b 732
|
65
|
+
st 718
|
66
|
+
is 716
|
67
|
+
al 712
|
68
|
+
_qu 706
|
69
|
+
_q 706
|
70
|
+
in 701
|
71
|
+
as_ 696
|
72
|
+
� 695
|
73
|
+
do_ 685
|
74
|
+
ent 678
|
75
|
+
�o 677
|
76
|
+
_n 671
|
77
|
+
_co 660
|
78
|
+
_a_ 654
|
79
|
+
_m 646
|
80
|
+
on 645
|
81
|
+
� 624
|
82
|
+
ri 623
|
83
|
+
_que 619
|
84
|
+
ma 602
|
85
|
+
po 581
|
86
|
+
ia 580
|
87
|
+
�o_ 575
|
88
|
+
._ 573
|
89
|
+
na 572
|
90
|
+
me 564
|
91
|
+
ro 554
|
92
|
+
_t 544
|
93
|
+
pa 533
|
94
|
+
da_ 528
|
95
|
+
h 523
|
96
|
+
ue_ 515
|
97
|
+
ca 511
|
98
|
+
que_ 509
|
99
|
+
nte 503
|
100
|
+
no 499
|
101
|
+
tr 498
|
102
|
+
am 496
|
103
|
+
em_ 491
|
104
|
+
_que_ 487
|
105
|
+
_se 485
|
106
|
+
om 471
|
107
|
+
io 460
|
108
|
+
_do 459
|
109
|
+
ti 448
|
110
|
+
ci 445
|
111
|
+
_da 444
|
112
|
+
nd 442
|
113
|
+
ei 435
|
114
|
+
ra_ 435
|
115
|
+
pr 427
|
116
|
+
_r 423
|
117
|
+
_e_ 420
|
118
|
+
_f 420
|
119
|
+
ss 412
|
120
|
+
es_ 412
|
121
|
+
el 407
|
122
|
+
id 406
|
123
|
+
_o_ 399
|
124
|
+
_pa 390
|
125
|
+
um 379
|
126
|
+
pe 378
|
127
|
+
_po 376
|
128
|
+
la 374
|
129
|
+
ir 371
|
130
|
+
� 371
|
131
|
+
ic 362
|
132
|
+
di 362
|
133
|
+
li 359
|
134
|
+
� 359
|
135
|
+
_re 353
|
136
|
+
ve 353
|
137
|
+
mo 350
|
138
|
+
s, 349
|
139
|
+
s,_ 349
|
140
|
+
ou 347
|
141
|
+
com 340
|
142
|
+
sa 338
|
143
|
+
si 338
|
144
|
+
men 337
|
145
|
+
rt 331
|
146
|
+
_i 330
|
147
|
+
con 330
|
148
|
+
o, 327
|
149
|
+
_da_ 326
|
150
|
+
o,_ 326
|
151
|
+
se_ 325
|
152
|
+
_com 325
|
153
|
+
ado 323
|
154
|
+
to_ 322
|
155
|
+
ai 322
|
156
|
+
it 320
|
157
|
+
A 319
|
158
|
+
ec 316
|
159
|
+
dos 316
|
160
|
+
_em 312
|
161
|
+
��o 310
|
162
|
+
a� 310
|
163
|
+
�� 310
|
164
|
+
ara 305
|
165
|
+
so 299
|
166
|
+
tu 299
|
167
|
+
res 297
|
168
|
+
im 296
|
169
|
+
_pr 295
|
170
|
+
mi 293
|
171
|
+
ua 292
|
172
|
+
nto 291
|
173
|
+
ment 290
|
174
|
+
� 290
|
175
|
+
par 288
|
176
|
+
_do_ 287
|
177
|
+
ce 286
|
178
|
+
est 286
|
179
|
+
u_ 284
|
180
|
+
ente 284
|
181
|
+
S 278
|
182
|
+
l_ 278
|
183
|
+
_u 278
|
184
|
+
" 276
|
185
|
+
ni 276
|
186
|
+
z 274
|
187
|
+
sta 273
|
188
|
+
nc 272
|
189
|
+
_em_ 270
|
190
|
+
P 269
|
191
|
+
��o_ 267
|
192
|
+
_v 267
|
193
|
+
at 267
|
194
|
+
dos_ 266
|
195
|
+
_es 262
|
196
|
+
� 259
|
197
|
+
_� 259
|
198
|
+
te_ 258
|
199
|
+
� 257
|
200
|
+
va 255
|
201
|
+
le 252
|
202
|
+
ur 252
|
203
|
+
_um 252
|
204
|
+
vi 251
|
205
|
+
_par 250
|
206
|
+
a, 247
|
207
|
+
a,_ 247
|
208
|
+
_con 247
|
209
|
+
ant 242
|
210
|
+
lo 240
|
211
|
+
ia_ 240
|
212
|
+
gu 237
|
213
|
+
ar_ 235
|
214
|
+
ac 235
|
215
|
+
e,_ 234
|
216
|
+
e, 234
|
217
|
+
no_ 232
|
218
|
+
eg 232
|
219
|
+
il 232
|
220
|
+
ns 232
|
221
|
+
er_ 231
|
222
|
+
_ma 230
|
223
|
+
por 230
|
224
|
+
_in 228
|
225
|
+
_l 226
|
226
|
+
� 225
|
227
|
+
ont 224
|
228
|
+
_no 223
|
229
|
+
_P 222
|
230
|
+
tra 220
|
231
|
+
E 219
|
232
|
+
ida 218
|
233
|
+
is_ 217
|
234
|
+
ol 216
|
235
|
+
a�� 215
|
236
|
+
ter 215
|
237
|
+
a��o 215
|
238
|
+
_A 211
|
239
|
+
un 211
|
240
|
+
- 210
|
241
|
+
_te 210
|
242
|
+
or_ 209
|
243
|
+
ma_ 208
|
244
|
+
_pe 208
|
245
|
+
ara_ 208
|
246
|
+
C 206
|
247
|
+
ist 202
|
248
|
+
para 202
|
249
|
+
nta 201
|
250
|
+
ais 201
|
251
|
+
ut 198
|
252
|
+
nte_ 198
|
253
|
+
j 197
|
254
|
+
dad 196
|
255
|
+
_na 195
|
256
|
+
am_ 195
|
257
|
+
ade 193
|
258
|
+
ica 191
|
259
|
+
x 190
|
260
|
+
al_ 189
|
261
|
+
O 188
|
262
|
+
des 187
|
263
|
+
_para 187
|
264
|
+
ada 187
|
265
|
+
nh 186
|
266
|
+
_se_ 186
|
267
|
+
mp 185
|
268
|
+
ndo 184
|
269
|
+
R 183
|
270
|
+
_por 181
|
271
|
+
a��o_ 181
|
272
|
+
para_ 179
|
273
|
+
eir 177
|
274
|
+
ui 177
|
275
|
+
vo 177
|
276
|
+
ou_ 177
|
277
|
+
ta_ 177
|
278
|
+
M 176
|
279
|
+
ria 175
|
280
|
+
tos 175
|
281
|
+
rr 174
|
282
|
+
D 174
|
283
|
+
io_ 174
|
284
|
+
br 174
|
285
|
+
_di 173
|
286
|
+
� 173
|
287
|
+
�e 173
|
288
|
+
fo 173
|
289
|
+
I 172
|
290
|
+
�es 172
|
291
|
+
_C 171
|
292
|
+
mo_ 171
|
293
|
+
ov 170
|
294
|
+
pro 169
|
295
|
+
_os_ 169
|
296
|
+
_os 169
|
297
|
+
das 167
|
298
|
+
iv 166
|
299
|
+
uma 165
|
300
|
+
gr 165
|
301
|
+
su 164
|
302
|
+
fi 164
|
303
|
+
um_ 162
|
304
|
+
na_ 162
|
305
|
+
ga 162
|
306
|
+
ais_ 161
|
307
|
+
_S 161
|
308
|
+
lh 159
|
309
|
+
ort 159
|
310
|
+
cia 158
|
311
|
+
.. 157
|
312
|
+
_est 156
|
313
|
+
cont 156
|
314
|
+
ig 155
|
315
|
+
�_ 154
|
316
|
+
ran 154
|
317
|
+
�a 154
|
318
|
+
om_ 153
|
319
|
+
_en 152
|
320
|
+
dade 152
|
321
|
+
_as 152
|
322
|
+
ho 152
|
323
|
+
ntr 151
|
324
|
+
nto_ 151
|
325
|
+
fe 150
|
326
|
+
N 149
|
327
|
+
das_ 149
|
328
|
+
uma_ 149
|
329
|
+
ess 149
|
330
|
+
�_ 148
|
331
|
+
ndo_ 147
|
332
|
+
ob 147
|
333
|
+
�_ 147
|
334
|
+
ul 146
|
335
|
+
ente_ 146
|
336
|
+
go 146
|
337
|
+
ento 144
|
338
|
+
ver 144
|
339
|
+
_des 144
|
340
|
+
gi 144
|
341
|
+
ha 142
|
342
|
+
cu 142
|
343
|
+
idad 142
|
344
|
+
av 141
|
345
|
+
�es_ 141
|
346
|
+
_pro 141
|
347
|
+
ura 141
|
348
|
+
ap 139
|
349
|
+
_com_ 139
|
350
|
+
_ca 139
|
351
|
+
com_ 139
|
352
|
+
ao 139
|
353
|
+
ne 138
|
354
|
+
od 138
|
355
|
+
_" 137
|
356
|
+
_M 137
|
357
|
+
pre 137
|
358
|
+
ras 136
|
359
|
+
_me 136
|
360
|
+
_ao 136
|
361
|
+
_no_ 134
|
362
|
+
oc 134
|
363
|
+
str 133
|
364
|
+
tes 133
|
365
|
+
_b 133
|
366
|
+
and 133
|
367
|
+
_g 133
|
368
|
+
ro_ 133
|
369
|
+
omo 133
|
370
|
+
_dos 132
|
371
|
+
_fo 132
|
372
|
+
_dos_ 132
|
373
|
+
rn 132
|
374
|
+
mento 131
|
375
|
+
ito 131
|
376
|
+
ev 131
|
377
|
+
rio 130
|
378
|
+
ass 130
|
379
|
+
eu 130
|
380
|
+
be 128
|
381
|
+
os, 128
|
382
|
+
os,_ 128
|
383
|
+
sp 127
|
384
|
+
_uma 127
|
385
|
+
ep 126
|
386
|
+
tad 125
|
387
|
+
s. 125
|
388
|
+
_uma_ 125
|
389
|
+
_E 125
|
390
|
+
idade 124
|
391
|
+
_um_ 124
|
392
|
+
n� 124
|
393
|
+
n�o 124
|
394
|
+
ct 123
|
395
|
+
ram 123
|
396
|
+
ado_ 123
|
397
|
+
ela 123
|
398
|
+
omo_ 121
|
399
|
+
iz 121
|
400
|
+
_an 121
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 5766
|
2
|
+
a 4900
|
3
|
+
n 1941
|
4
|
+
i 1666
|
5
|
+
u 1384
|
6
|
+
s 1032
|
7
|
+
t 995
|
8
|
+
y 939
|
9
|
+
h 929
|
10
|
+
k 915
|
11
|
+
q 909
|
12
|
+
p 882
|
13
|
+
a_ 847
|
14
|
+
an 821
|
15
|
+
r 783
|
16
|
+
m 740
|
17
|
+
c 705
|
18
|
+
l 695
|
19
|
+
ta 637
|
20
|
+
ch 613
|
21
|
+
ay 587
|
22
|
+
qa 557
|
23
|
+
pa 490
|
24
|
+
ha 486
|
25
|
+
e 474
|
26
|
+
ma 457
|
27
|
+
o 441
|
28
|
+
na 434
|
29
|
+
ku 411
|
30
|
+
j 409
|
31
|
+
un 367
|
32
|
+
w 358
|
33
|
+
in 353
|
34
|
+
, 345
|
35
|
+
,_ 344
|
36
|
+
cha 318
|
37
|
+
ar 317
|
38
|
+
n_ 315
|
39
|
+
as 291
|
40
|
+
wa 289
|
41
|
+
ta_ 269
|
42
|
+
ll 259
|
43
|
+
man 255
|
44
|
+
_k 248
|
45
|
+
._ 243
|
46
|
+
. 243
|
47
|
+
nt 227
|
48
|
+
am 224
|
49
|
+
pi 222
|
50
|
+
la 222
|
51
|
+
ka 217
|
52
|
+
ac 214
|
53
|
+
ni 214
|
54
|
+
at 213
|
55
|
+
aq 213
|
56
|
+
i_ 208
|
57
|
+
ri 207
|
58
|
+
qa_ 204
|
59
|
+
una 204
|
60
|
+
y_ 192
|
61
|
+
aj 192
|
62
|
+
_p 192
|
63
|
+
is 188
|
64
|
+
_m 181
|
65
|
+
lla 175
|
66
|
+
ach 174
|
67
|
+
rq 173
|
68
|
+
us 172
|
69
|
+
an_ 171
|
70
|
+
_ka 171
|
71
|
+
ata 169
|
72
|
+
rqa 165
|
73
|
+
sq 163
|
74
|
+
hu 162
|
75
|
+
sp 161
|
76
|
+
_w 157
|
77
|
+
nk 157
|
78
|
+
hay 157
|
79
|
+
_s 156
|
80
|
+
sqa 155
|
81
|
+
ki 153
|
82
|
+
kun 152
|
83
|
+
_c 152
|
84
|
+
al 150
|
85
|
+
nta 149
|
86
|
+
ap 147
|
87
|
+
ant 146
|
88
|
+
yk 146
|
89
|
+
ay_ 144
|
90
|
+
spa 141
|
91
|
+
hi 137
|
92
|
+
_ch 136
|
93
|
+
_n 136
|
94
|
+
ya 135
|
95
|
+
' 134
|
96
|
+
j_ 133
|
97
|
+
uy 132
|
98
|
+
ra 132
|
99
|
+
a,_ 132
|
100
|
+
a, 132
|
101
|
+
ti 130
|
102
|
+
_a 125
|
103
|
+
nc 125
|
104
|
+
kuna 122
|
105
|
+
s_ 121
|
106
|
+
su 121
|
107
|
+
ak 121
|
108
|
+
_ma 118
|
109
|
+
ana 118
|
110
|
+
ari 115
|
111
|
+
_t 114
|
112
|
+
ama 114
|
113
|
+
chi 114
|
114
|
+
� 113
|
115
|
+
a._ 111
|
116
|
+
a. 111
|
117
|
+
nch 111
|
118
|
+
iy 111
|
119
|
+
all 110
|
120
|
+
aw 110
|
121
|
+
_r 110
|
122
|
+
anta 109
|
123
|
+
ayk 109
|
124
|
+
na_ 109
|
125
|
+
chay 108
|
126
|
+
sa 104
|
127
|
+
_wa 104
|
128
|
+
si 103
|
129
|
+
chu 102
|
130
|
+
pa_ 101
|
131
|
+
acha 101
|
132
|
+
_cha 101
|
133
|
+
pi_ 101
|
134
|
+
qan 100
|
135
|
+
_pa 99
|
136
|
+
_q 97
|
137
|
+
aj_ 97
|
138
|
+
awa 97
|
139
|
+
ank 95
|
140
|
+
nku 95
|
141
|
+
im 94
|
142
|
+
q_ 92
|
143
|
+
uk 92
|
144
|
+
C 92
|
145
|
+
mu 90
|
146
|
+
tu 89
|
147
|
+
J 89
|
148
|
+
_ni 88
|
149
|
+
taj 87
|
150
|
+
_J 87
|
151
|
+
nin 86
|
152
|
+
_chay 86
|
153
|
+
u_ 86
|
154
|
+
_C 84
|
155
|
+
wan 83
|
156
|
+
nta_ 81
|
157
|
+
_j 81
|
158
|
+
mant 80
|
159
|
+
ut 79
|
160
|
+
in_ 79
|
161
|
+
ik 79
|
162
|
+
manta 79
|
163
|
+
asq 79
|
164
|
+
yt 78
|
165
|
+
n, 78
|
166
|
+
asqa 78
|
167
|
+
n,_ 78
|
168
|
+
pay 78
|
169
|
+
li 77
|
170
|
+
yn 77
|
171
|
+
nq 76
|
172
|
+
yta 76
|
173
|
+
ic 76
|
174
|
+
up 76
|
175
|
+
_Ch 75
|
176
|
+
yku 75
|
177
|
+
Ch 75
|
178
|
+
he 75
|
179
|
+
hay_ 74
|
180
|
+
nan 74
|
181
|
+
ina 74
|
182
|
+
ur 73
|
183
|
+
er 72
|
184
|
+
S 72
|
185
|
+
arq 72
|
186
|
+
or 72
|
187
|
+
_l 71
|
188
|
+
_u 71
|
189
|
+
aq_ 70
|
190
|
+
os 70
|
191
|
+
yp 70
|
192
|
+
anc 69
|
193
|
+
man_ 69
|
194
|
+
mi 69
|
195
|
+
ich 68
|
196
|
+
_i 68
|
197
|
+
st 67
|
198
|
+
_S 67
|
199
|
+
arqa 66
|
200
|
+
it 66
|
201
|
+
anch 66
|
202
|
+
ru 66
|
203
|
+
aku 65
|
204
|
+
pu 65
|
205
|
+
�a 65
|
206
|
+
alla 64
|
207
|
+
mp 64
|
208
|
+
sqa_ 64
|
209
|
+
'a 64
|
210
|
+
ayku 63
|
211
|
+
es 63
|
212
|
+
A 63
|
213
|
+
ia 63
|
214
|
+
_man 63
|
215
|
+
_Cha 61
|
216
|
+
Cha 61
|
217
|
+
taj_ 60
|
218
|
+
api 60
|
219
|
+
_ll 60
|
220
|
+
wi 60
|
221
|
+
ayp 60
|
222
|
+
aman 59
|
223
|
+
g 58
|
224
|
+
anku 58
|
225
|
+
yki 57
|
226
|
+
ima 57
|
227
|
+
yni 57
|
228
|
+
oj 57
|
229
|
+
mana 57
|
230
|
+
anta_ 57
|
231
|
+
_su 57
|
232
|
+
uc 56
|
233
|
+
isp 56
|
234
|
+
ispa 56
|
235
|
+
uch 56
|
236
|
+
M 56
|
237
|
+
ir 56
|
238
|
+
_h 55
|
239
|
+
nqa 55
|
240
|
+
kuy 55
|
241
|
+
ayt 54
|
242
|
+
_M 54
|
243
|
+
b 54
|
244
|
+
_y 53
|
245
|
+
_mana 53
|
246
|
+
: 53
|
247
|
+
uku 53
|
248
|
+
:_ 53
|
249
|
+
nm 53
|
250
|
+
au 52
|
251
|
+
ayta 52
|
252
|
+
io 52
|
253
|
+
qo 51
|
254
|
+
an,_ 51
|
255
|
+
apa 51
|
256
|
+
spa_ 51
|
257
|
+
erq 51
|
258
|
+
_wi 51
|
259
|
+
erqa 51
|
260
|
+
_sa 51
|
261
|
+
an, 51
|
262
|
+
el 50
|
263
|
+
um 50
|
264
|
+
ana_ 50
|
265
|
+
han 50
|
266
|
+
il 50
|
267
|
+
on 49
|
268
|
+
chay_ 49
|
269
|
+
sta 49
|
270
|
+
_D 49
|
271
|
+
D 49
|
272
|
+
iku 49
|
273
|
+
aqa 49
|
274
|
+
che 48
|
275
|
+
en 48
|
276
|
+
yta_ 48
|
277
|
+
Ma 47
|
278
|
+
P 47
|
279
|
+
_lla 47
|
280
|
+
_Je 47
|
281
|
+
yq 47
|
282
|
+
Je 47
|
283
|
+
ita 47
|
284
|
+
rqan 47
|
285
|
+
ypi 46
|
286
|
+
har 46
|
287
|
+
Jes 46
|
288
|
+
_Jes 46
|
289
|
+
ios 46
|
290
|
+
ayq 46
|
291
|
+
Dio 46
|
292
|
+
un_ 46
|
293
|
+
kus 46
|
294
|
+
_Dio 46
|
295
|
+
taq 46
|
296
|
+
_Dios 46
|
297
|
+
_Ma 46
|
298
|
+
_Di 46
|
299
|
+
Dios 46
|
300
|
+
Di 46
|
301
|
+
d 46
|
302
|
+
kan 45
|
303
|
+
Chay 45
|
304
|
+
oq 45
|
305
|
+
_Chay 45
|
306
|
+
_pay 45
|
307
|
+
upa 45
|
308
|
+
mun 45
|
309
|
+
ata_ 44
|
310
|
+
_tu 44
|
311
|
+
nis 44
|
312
|
+
re 44
|
313
|
+
paq 44
|
314
|
+
yo 44
|
315
|
+
ej 44
|
316
|
+
qay 43
|
317
|
+
ncha 43
|
318
|
+
ha_ 43
|
319
|
+
_A 43
|
320
|
+
I 43
|
321
|
+
_kan 43
|
322
|
+
_nis 43
|
323
|
+
_P 43
|
324
|
+
nman 43
|
325
|
+
nma 43
|
326
|
+
ataj 42
|
327
|
+
ara 42
|
328
|
+
ku_ 42
|
329
|
+
nata 42
|
330
|
+
nat 42
|
331
|
+
i, 41
|
332
|
+
tin 41
|
333
|
+
qh 41
|
334
|
+
t' 41
|
335
|
+
orq 41
|
336
|
+
nki 41
|
337
|
+
_ru 41
|
338
|
+
_ku 41
|
339
|
+
i,_ 41
|
340
|
+
ip 40
|
341
|
+
ham 40
|
342
|
+
usq 40
|
343
|
+
_ya 40
|
344
|
+
qank 39
|
345
|
+
orqa 39
|
346
|
+
ayn 39
|
347
|
+
mana_ 39
|
348
|
+
ray 39
|
349
|
+
ym 39
|
350
|
+
uma 39
|
351
|
+
_pu 39
|
352
|
+
par 39
|
353
|
+
kay 39
|
354
|
+
n. 38
|
355
|
+
qa,_ 38
|
356
|
+
n._ 38
|
357
|
+
sus 38
|
358
|
+
aypi 38
|
359
|
+
usqa 38
|
360
|
+
qanku 38
|
361
|
+
ill 38
|
362
|
+
qa, 38
|
363
|
+
was 38
|
364
|
+
pa, 38
|
365
|
+
pa,_ 38
|
366
|
+
asp 38
|
367
|
+
qa._ 37
|
368
|
+
_mu 37
|
369
|
+
paj 37
|
370
|
+
amp 37
|
371
|
+
hin 37
|
372
|
+
uti 37
|
373
|
+
rin 37
|
374
|
+
_im 37
|
375
|
+
_ima 37
|
376
|
+
ja 37
|
377
|
+
_ri 37
|
378
|
+
rqa_ 37
|
379
|
+
taq_ 37
|
380
|
+
qa. 37
|
381
|
+
sh 36
|
382
|
+
spa,_ 36
|
383
|
+
cha_ 36
|
384
|
+
spa, 36
|
385
|
+
achi 36
|
386
|
+
una_ 36
|
387
|
+
rqank 36
|
388
|
+
jt 36
|
389
|
+
K 36
|
390
|
+
amu 36
|
391
|
+
aspa 35
|
392
|
+
_Jesu 35
|
393
|
+
Jesus 35
|
394
|
+
nispa 35
|
395
|
+
ki_ 35
|
396
|
+
waw 35
|
397
|
+
ko 35
|
398
|
+
ne 35
|
399
|
+
esus 35
|
400
|
+
int 35
|