language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 9464
|
2
|
+
a 2787
|
3
|
+
i 2108
|
4
|
+
e 2077
|
5
|
+
o 2018
|
6
|
+
j 1396
|
7
|
+
n 1328
|
8
|
+
s 1170
|
9
|
+
u 1010
|
10
|
+
r 988
|
11
|
+
d 957
|
12
|
+
a_ 889
|
13
|
+
e_ 833
|
14
|
+
t 774
|
15
|
+
je 771
|
16
|
+
k 756
|
17
|
+
l 743
|
18
|
+
m 702
|
19
|
+
v 685
|
20
|
+
p 604
|
21
|
+
c 538
|
22
|
+
i_ 538
|
23
|
+
_s 522
|
24
|
+
u_ 476
|
25
|
+
z 463
|
26
|
+
_p 450
|
27
|
+
o_ 433
|
28
|
+
,_ 381
|
29
|
+
, 381
|
30
|
+
_i 369
|
31
|
+
_n 358
|
32
|
+
b 349
|
33
|
+
_d 349
|
34
|
+
. 346
|
35
|
+
na 341
|
36
|
+
je_ 337
|
37
|
+
._ 321
|
38
|
+
_j 307
|
39
|
+
g 299
|
40
|
+
ra 292
|
41
|
+
st 283
|
42
|
+
ko 278
|
43
|
+
_je 274
|
44
|
+
ij 254
|
45
|
+
_o 245
|
46
|
+
ni 240
|
47
|
+
_k 236
|
48
|
+
an 228
|
49
|
+
oj 227
|
50
|
+
da 226
|
51
|
+
_u 222
|
52
|
+
pr 221
|
53
|
+
no 219
|
54
|
+
ma 219
|
55
|
+
la 211
|
56
|
+
ri 206
|
57
|
+
_je_ 203
|
58
|
+
po 203
|
59
|
+
ci 196
|
60
|
+
_pr 191
|
61
|
+
os 190
|
62
|
+
od 187
|
63
|
+
ka 186
|
64
|
+
im 185
|
65
|
+
ti 184
|
66
|
+
li 182
|
67
|
+
vo 178
|
68
|
+
_po 174
|
69
|
+
ja 171
|
70
|
+
_i_ 169
|
71
|
+
ov 169
|
72
|
+
al 168
|
73
|
+
re 167
|
74
|
+
ne 167
|
75
|
+
m_ 164
|
76
|
+
ta 160
|
77
|
+
na_ 158
|
78
|
+
ed 157
|
79
|
+
_m 157
|
80
|
+
_na 157
|
81
|
+
se 156
|
82
|
+
_u_ 154
|
83
|
+
en 153
|
84
|
+
ic 151
|
85
|
+
sa 151
|
86
|
+
_b 149
|
87
|
+
ak 141
|
88
|
+
va 140
|
89
|
+
ad 137
|
90
|
+
h 136
|
91
|
+
ju 135
|
92
|
+
su 134
|
93
|
+
dj 133
|
94
|
+
ije 133
|
95
|
+
ar 127
|
96
|
+
ca 127
|
97
|
+
_z 126
|
98
|
+
nj 125
|
99
|
+
ji 124
|
100
|
+
da_ 124
|
101
|
+
_ko 123
|
102
|
+
_da 122
|
103
|
+
il 121
|
104
|
+
" 120
|
105
|
+
av 120
|
106
|
+
_t 118
|
107
|
+
aj 116
|
108
|
+
ob 115
|
109
|
+
ro 114
|
110
|
+
am 114
|
111
|
+
vi 114
|
112
|
+
_su 113
|
113
|
+
om 112
|
114
|
+
dje 112
|
115
|
+
za 112
|
116
|
+
at 111
|
117
|
+
le 111
|
118
|
+
di 110
|
119
|
+
su_ 107
|
120
|
+
iz 107
|
121
|
+
ve 107
|
122
|
+
lj 106
|
123
|
+
_se 105
|
124
|
+
ev 105
|
125
|
+
is 105
|
126
|
+
es 103
|
127
|
+
se_ 103
|
128
|
+
do 101
|
129
|
+
ih 100
|
130
|
+
a, 99
|
131
|
+
_su_ 99
|
132
|
+
a,_ 99
|
133
|
+
on 98
|
134
|
+
bi 98
|
135
|
+
in 97
|
136
|
+
voj 97
|
137
|
+
az 96
|
138
|
+
ac 95
|
139
|
+
_da_ 95
|
140
|
+
la_ 95
|
141
|
+
_r 93
|
142
|
+
_g 93
|
143
|
+
jev 92
|
144
|
+
ma_ 92
|
145
|
+
er 92
|
146
|
+
or 91
|
147
|
+
h_ 90
|
148
|
+
_sa 90
|
149
|
+
e, 90
|
150
|
+
e,_ 90
|
151
|
+
ba 90
|
152
|
+
ima 89
|
153
|
+
a. 88
|
154
|
+
ol 88
|
155
|
+
_do 87
|
156
|
+
dn 87
|
157
|
+
it 87
|
158
|
+
ko_ 86
|
159
|
+
ne_ 86
|
160
|
+
ost 85
|
161
|
+
ek 85
|
162
|
+
to 85
|
163
|
+
d_ 84
|
164
|
+
as 84
|
165
|
+
ju_ 84
|
166
|
+
ao 84
|
167
|
+
ih_ 84
|
168
|
+
a._ 84
|
169
|
+
te 83
|
170
|
+
evo 83
|
171
|
+
koj 83
|
172
|
+
pri 82
|
173
|
+
jevo 82
|
174
|
+
ce 81
|
175
|
+
_se_ 81
|
176
|
+
og 80
|
177
|
+
go 80
|
178
|
+
jevoj 79
|
179
|
+
de 79
|
180
|
+
uc 79
|
181
|
+
evoj 79
|
182
|
+
_od 78
|
183
|
+
_za 78
|
184
|
+
tr 78
|
185
|
+
S 77
|
186
|
+
_koj 76
|
187
|
+
ke 75
|
188
|
+
_v 75
|
189
|
+
ao_ 75
|
190
|
+
_dje 74
|
191
|
+
_bi 74
|
192
|
+
sta 74
|
193
|
+
_dj 74
|
194
|
+
cij 74
|
195
|
+
ik 74
|
196
|
+
djev 73
|
197
|
+
sl 73
|
198
|
+
_djev 72
|
199
|
+
ga 72
|
200
|
+
djevo 72
|
201
|
+
_ka 71
|
202
|
+
rij 71
|
203
|
+
_iz 71
|
204
|
+
P 71
|
205
|
+
_pri 70
|
206
|
+
_a 69
|
207
|
+
us 68
|
208
|
+
_S 68
|
209
|
+
mo 67
|
210
|
+
el 67
|
211
|
+
sk 66
|
212
|
+
me 66
|
213
|
+
zi 66
|
214
|
+
ija 65
|
215
|
+
n_ 65
|
216
|
+
ku 64
|
217
|
+
im_ 63
|
218
|
+
_st 63
|
219
|
+
ica 63
|
220
|
+
_na_ 62
|
221
|
+
_ne 62
|
222
|
+
em 61
|
223
|
+
edn 61
|
224
|
+
jk 61
|
225
|
+
io 61
|
226
|
+
li_ 60
|
227
|
+
ojk 60
|
228
|
+
evojk 60
|
229
|
+
_" 60
|
230
|
+
zn 60
|
231
|
+
vojk 60
|
232
|
+
pro 59
|
233
|
+
lo 59
|
234
|
+
ije_ 59
|
235
|
+
jed 58
|
236
|
+
ke_ 58
|
237
|
+
om_ 58
|
238
|
+
jen 58
|
239
|
+
sti 57
|
240
|
+
_im 57
|
241
|
+
le_ 57
|
242
|
+
_ra 56
|
243
|
+
e. 56
|
244
|
+
ze 55
|
245
|
+
_pro 55
|
246
|
+
nu 55
|
247
|
+
nje 55
|
248
|
+
ti_ 55
|
249
|
+
ec 55
|
250
|
+
pre 55
|
251
|
+
oc 54
|
252
|
+
aci 54
|
253
|
+
no_ 54
|
254
|
+
et 54
|
255
|
+
oji 53
|
256
|
+
si 53
|
257
|
+
ara 53
|
258
|
+
ama 53
|
259
|
+
z_ 53
|
260
|
+
pos 52
|
261
|
+
rad 52
|
262
|
+
ran 52
|
263
|
+
ima_ 52
|
264
|
+
ru 52
|
265
|
+
_P 52
|
266
|
+
tu 52
|
267
|
+
mu 51
|
268
|
+
e._ 51
|
269
|
+
ja_ 50
|
270
|
+
_pre 50
|
271
|
+
sa_ 49
|
272
|
+
io_ 49
|
273
|
+
od_ 48
|
274
|
+
ni_ 48
|
275
|
+
_nj 48
|
276
|
+
j_ 48
|
277
|
+
_pos 47
|
278
|
+
_c 47
|
279
|
+
ila 47
|
280
|
+
K 46
|
281
|
+
_sa_ 46
|
282
|
+
uz 46
|
283
|
+
N 46
|
284
|
+
_ni 45
|
285
|
+
zna 45
|
286
|
+
U 45
|
287
|
+
za_ 45
|
288
|
+
_no 45
|
289
|
+
ako 45
|
290
|
+
u, 44
|
291
|
+
lu 44
|
292
|
+
ali 44
|
293
|
+
u,_ 44
|
294
|
+
sto 44
|
295
|
+
ste 44
|
296
|
+
ve_ 44
|
297
|
+
ani 44
|
298
|
+
oli 44
|
299
|
+
aka 44
|
300
|
+
_jed 43
|
301
|
+
i,_ 43
|
302
|
+
ji_ 43
|
303
|
+
uci 43
|
304
|
+
i, 43
|
305
|
+
ci_ 43
|
306
|
+
osti 43
|
307
|
+
_N 42
|
308
|
+
dr 42
|
309
|
+
so 42
|
310
|
+
ust 41
|
311
|
+
ila_ 41
|
312
|
+
B 41
|
313
|
+
- 41
|
314
|
+
red 41
|
315
|
+
jke 41
|
316
|
+
sv 41
|
317
|
+
_go 41
|
318
|
+
bar 41
|
319
|
+
g_ 41
|
320
|
+
est 40
|
321
|
+
D 40
|
322
|
+
iv 40
|
323
|
+
vojke 40
|
324
|
+
aju 40
|
325
|
+
ta_ 40
|
326
|
+
A 40
|
327
|
+
lje 40
|
328
|
+
jedn 40
|
329
|
+
bil 40
|
330
|
+
ojke 40
|
331
|
+
ova 40
|
332
|
+
ati 39
|
333
|
+
_mu 39
|
334
|
+
pa 39
|
335
|
+
M 39
|
336
|
+
_ba 39
|
337
|
+
ca_ 39
|
338
|
+
O 39
|
339
|
+
ka_ 39
|
340
|
+
_a_ 38
|
341
|
+
_B 38
|
342
|
+
_ima 38
|
343
|
+
sn 38
|
344
|
+
nu_ 38
|
345
|
+
T 38
|
346
|
+
to_ 38
|
347
|
+
eg 38
|
348
|
+
ava 38
|
349
|
+
ros 37
|
350
|
+
ir 37
|
351
|
+
ala 37
|
352
|
+
og_ 37
|
353
|
+
osl 37
|
354
|
+
ovi 37
|
355
|
+
koji 37
|
356
|
+
_sv 37
|
357
|
+
dv 36
|
358
|
+
ric 36
|
359
|
+
_za_ 36
|
360
|
+
br 36
|
361
|
+
_on 36
|
362
|
+
odi 36
|
363
|
+
_koji 36
|
364
|
+
_jedn 35
|
365
|
+
nik 35
|
366
|
+
dno 35
|
367
|
+
_D 35
|
368
|
+
jo 35
|
369
|
+
tra 35
|
370
|
+
_M 35
|
371
|
+
sp 35
|
372
|
+
iz_ 35
|
373
|
+
oz 35
|
374
|
+
vr 35
|
375
|
+
u. 35
|
376
|
+
eri 35
|
377
|
+
I 35
|
378
|
+
eko 35
|
379
|
+
ale 35
|
380
|
+
_ma 34
|
381
|
+
lik 34
|
382
|
+
_bil 34
|
383
|
+
c_ 34
|
384
|
+
ut 34
|
385
|
+
je,_ 34
|
386
|
+
u._ 34
|
387
|
+
str 34
|
388
|
+
je, 34
|
389
|
+
adi 34
|
390
|
+
tit 34
|
391
|
+
_iz_ 34
|
392
|
+
iti 34
|
393
|
+
i. 33
|
394
|
+
_rad 33
|
395
|
+
ici 33
|
396
|
+
rost 33
|
397
|
+
aju_ 33
|
398
|
+
va_ 33
|
399
|
+
_ob 33
|
400
|
+
nog 33
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 21447
|
2
|
+
e 6375
|
3
|
+
a 5414
|
4
|
+
n 3228
|
5
|
+
r 3039
|
6
|
+
o 2968
|
7
|
+
t 2392
|
8
|
+
i 1812
|
9
|
+
h 1751
|
10
|
+
u 1650
|
11
|
+
l 1630
|
12
|
+
d 1506
|
13
|
+
a_ 1352
|
14
|
+
z 1319
|
15
|
+
t_ 1310
|
16
|
+
_e 1168
|
17
|
+
_a 1168
|
18
|
+
e_ 1133
|
19
|
+
m 1105
|
20
|
+
s 1100
|
21
|
+
g 1090
|
22
|
+
r_ 998
|
23
|
+
k 997
|
24
|
+
n_ 958
|
25
|
+
et 941
|
26
|
+
v 888
|
27
|
+
_d 868
|
28
|
+
an 859
|
29
|
+
. 846
|
30
|
+
' 841
|
31
|
+
en 836
|
32
|
+
b 757
|
33
|
+
, 749
|
34
|
+
,_ 743
|
35
|
+
._ 716
|
36
|
+
ar 703
|
37
|
+
ou 700
|
38
|
+
et_ 689
|
39
|
+
c 686
|
40
|
+
ez 572
|
41
|
+
'h 572
|
42
|
+
_g 565
|
43
|
+
er 555
|
44
|
+
p 553
|
45
|
+
_k 535
|
46
|
+
c'h 530
|
47
|
+
c' 530
|
48
|
+
nt 513
|
49
|
+
_h 505
|
50
|
+
re 505
|
51
|
+
ra 478
|
52
|
+
ha 466
|
53
|
+
� 458
|
54
|
+
ne 456
|
55
|
+
oa 454
|
56
|
+
_o 442
|
57
|
+
_b 434
|
58
|
+
- 432
|
59
|
+
zh 422
|
60
|
+
ar_ 415
|
61
|
+
_m 414
|
62
|
+
_e_ 414
|
63
|
+
nn 384
|
64
|
+
el 376
|
65
|
+
_a_ 356
|
66
|
+
ur 350
|
67
|
+
o_ 346
|
68
|
+
h_ 345
|
69
|
+
ve 340
|
70
|
+
nt_ 340
|
71
|
+
w 339
|
72
|
+
ke 338
|
73
|
+
de 333
|
74
|
+
a� 332
|
75
|
+
_p 332
|
76
|
+
s_ 327
|
77
|
+
he 325
|
78
|
+
on 318
|
79
|
+
le 318
|
80
|
+
ga 316
|
81
|
+
ma 315
|
82
|
+
_ar 312
|
83
|
+
eu 312
|
84
|
+
_n 310
|
85
|
+
an_ 298
|
86
|
+
ant 296
|
87
|
+
enn 285
|
88
|
+
z_ 282
|
89
|
+
_ar_ 281
|
90
|
+
be 280
|
91
|
+
_v 276
|
92
|
+
_r 272
|
93
|
+
al 270
|
94
|
+
en_ 268
|
95
|
+
_ke 267
|
96
|
+
l_ 264
|
97
|
+
em 264
|
98
|
+
_c 263
|
99
|
+
�_ 262
|
100
|
+
da 262
|
101
|
+
_s 261
|
102
|
+
ho 260
|
103
|
+
di 259
|
104
|
+
_ha 252
|
105
|
+
ll 250
|
106
|
+
tr 248
|
107
|
+
oa_ 247
|
108
|
+
me 246
|
109
|
+
us 242
|
110
|
+
_ga 234
|
111
|
+
la 231
|
112
|
+
ket 227
|
113
|
+
ant_ 219
|
114
|
+
_da 219
|
115
|
+
_l 216
|
116
|
+
ur_ 216
|
117
|
+
_oa 215
|
118
|
+
in 214
|
119
|
+
ket_ 211
|
120
|
+
gan 211
|
121
|
+
_c' 207
|
122
|
+
_u 207
|
123
|
+
_c'h 207
|
124
|
+
ad 207
|
125
|
+
a�_ 207
|
126
|
+
ao 204
|
127
|
+
_ma 204
|
128
|
+
_t 204
|
129
|
+
_ket 201
|
130
|
+
_an 199
|
131
|
+
_di 197
|
132
|
+
ezh 196
|
133
|
+
� 196
|
134
|
+
o� 196
|
135
|
+
_de 195
|
136
|
+
ev 193
|
137
|
+
? 192
|
138
|
+
st 192
|
139
|
+
ro 192
|
140
|
+
P 192
|
141
|
+
_ket_ 188
|
142
|
+
er_ 188
|
143
|
+
f 186
|
144
|
+
na 186
|
145
|
+
ue 185
|
146
|
+
da_ 184
|
147
|
+
?_ 184
|
148
|
+
_gan 184
|
149
|
+
_da_ 184
|
150
|
+
_ne 183
|
151
|
+
ed 182
|
152
|
+
_P 180
|
153
|
+
g_ 180
|
154
|
+
pe 179
|
155
|
+
m_ 178
|
156
|
+
A 177
|
157
|
+
ri 176
|
158
|
+
us_ 175
|
159
|
+
ta 174
|
160
|
+
ze 174
|
161
|
+
gant 174
|
162
|
+
ka 174
|
163
|
+
i_ 172
|
164
|
+
d_ 171
|
165
|
+
G 167
|
166
|
+
te 167
|
167
|
+
ae 166
|
168
|
+
zh_ 164
|
169
|
+
ha_ 163
|
170
|
+
_ha_ 163
|
171
|
+
_he 161
|
172
|
+
_gant 159
|
173
|
+
do 159
|
174
|
+
oue 159
|
175
|
+
_G 158
|
176
|
+
eus 158
|
177
|
+
eo 158
|
178
|
+
'h_ 157
|
179
|
+
_en 157
|
180
|
+
go 157
|
181
|
+
am 157
|
182
|
+
c'h_ 157
|
183
|
+
_be 156
|
184
|
+
we 156
|
185
|
+
iz 154
|
186
|
+
_an_ 151
|
187
|
+
_A 150
|
188
|
+
eus_ 147
|
189
|
+
sk 147
|
190
|
+
li 146
|
191
|
+
as 146
|
192
|
+
_pe 146
|
193
|
+
j 146
|
194
|
+
_oa_ 146
|
195
|
+
av 144
|
196
|
+
gant_ 143
|
197
|
+
ut 142
|
198
|
+
no 141
|
199
|
+
vez 140
|
200
|
+
va 140
|
201
|
+
_ra 140
|
202
|
+
ge 138
|
203
|
+
ez_ 138
|
204
|
+
bo 137
|
205
|
+
� 137
|
206
|
+
_ur 136
|
207
|
+
lo 134
|
208
|
+
he_ 134
|
209
|
+
o�_ 133
|
210
|
+
�_ 133
|
211
|
+
_ur_ 132
|
212
|
+
es 130
|
213
|
+
'ho 129
|
214
|
+
ni 129
|
215
|
+
uz 129
|
216
|
+
tra 127
|
217
|
+
se 126
|
218
|
+
it 125
|
219
|
+
ra_ 125
|
220
|
+
out 125
|
221
|
+
is 125
|
222
|
+
at 125
|
223
|
+
hi 125
|
224
|
+
eg 125
|
225
|
+
ig 124
|
226
|
+
ko 124
|
227
|
+
io 123
|
228
|
+
k_ 123
|
229
|
+
ch 123
|
230
|
+
_w 121
|
231
|
+
or 121
|
232
|
+
Pe 121
|
233
|
+
_ma_ 119
|
234
|
+
ma_ 119
|
235
|
+
gw 118
|
236
|
+
_em 118
|
237
|
+
_Pe 118
|
238
|
+
un 118
|
239
|
+
eme 117
|
240
|
+
ne_ 117
|
241
|
+
nn_ 117
|
242
|
+
c'ho 117
|
243
|
+
ol 116
|
244
|
+
ag 116
|
245
|
+
M 115
|
246
|
+
'ha 115
|
247
|
+
_en_ 115
|
248
|
+
iv 115
|
249
|
+
vi 113
|
250
|
+
_ka 113
|
251
|
+
K 113
|
252
|
+
ud 112
|
253
|
+
_he_ 111
|
254
|
+
ont 110
|
255
|
+
oc 110
|
256
|
+
vo 110
|
257
|
+
ec 109
|
258
|
+
wa 109
|
259
|
+
.. 107
|
260
|
+
_M 107
|
261
|
+
_z 107
|
262
|
+
br 107
|
263
|
+
om 106
|
264
|
+
to 105
|
265
|
+
_f 105
|
266
|
+
N 105
|
267
|
+
_c'ho 104
|
268
|
+
ti 104
|
269
|
+
ut_ 104
|
270
|
+
D 104
|
271
|
+
_o_ 103
|
272
|
+
_la 103
|
273
|
+
_go 101
|
274
|
+
az 101
|
275
|
+
out_ 101
|
276
|
+
ba 101
|
277
|
+
enn_ 101
|
278
|
+
c'ha 101
|
279
|
+
our 100
|
280
|
+
oc'h 100
|
281
|
+
ell 100
|
282
|
+
oc' 100
|
283
|
+
etr 99
|
284
|
+
el_ 99
|
285
|
+
_K 99
|
286
|
+
_D 99
|
287
|
+
: 99
|
288
|
+
:_ 99
|
289
|
+
eve 98
|
290
|
+
_d' 97
|
291
|
+
all 97
|
292
|
+
d' 97
|
293
|
+
E 97
|
294
|
+
_ne_ 97
|
295
|
+
_me 95
|
296
|
+
eo_ 95
|
297
|
+
ak 95
|
298
|
+
bet 95
|
299
|
+
_eu 95
|
300
|
+
rc 94
|
301
|
+
_do 94
|
302
|
+
_gw 94
|
303
|
+
zi 93
|
304
|
+
oz 93
|
305
|
+
aou 93
|
306
|
+
etra 92
|
307
|
+
pa 91
|
308
|
+
ab 90
|
309
|
+
on_ 90
|
310
|
+
ei 90
|
311
|
+
tra_ 90
|
312
|
+
n, 89
|
313
|
+
zo 89
|
314
|
+
ag_ 89
|
315
|
+
_ev 88
|
316
|
+
ul 88
|
317
|
+
'e 88
|
318
|
+
n' 88
|
319
|
+
n,_ 88
|
320
|
+
ouz 87
|
321
|
+
v_ 86
|
322
|
+
_n' 86
|
323
|
+
_eus 84
|
324
|
+
H 83
|
325
|
+
za 83
|
326
|
+
S 83
|
327
|
+
etra_ 83
|
328
|
+
_eo 82
|
329
|
+
t,_ 82
|
330
|
+
t, 82
|
331
|
+
il 81
|
332
|
+
ent 81
|
333
|
+
fe 81
|
334
|
+
rc'h 81
|
335
|
+
rc' 81
|
336
|
+
_eus_ 80
|
337
|
+
ie 80
|
338
|
+
_bo 79
|
339
|
+
ele 79
|
340
|
+
_ve 79
|
341
|
+
mp 79
|
342
|
+
_bet 78
|
343
|
+
B 78
|
344
|
+
it_ 77
|
345
|
+
_vo 77
|
346
|
+
'a 77
|
347
|
+
n. 76
|
348
|
+
_S 76
|
349
|
+
hag 76
|
350
|
+
hoa 75
|
351
|
+
_hag 75
|
352
|
+
len 75
|
353
|
+
_N 75
|
354
|
+
'hoa 74
|
355
|
+
_E 74
|
356
|
+
ir 74
|
357
|
+
hag_ 74
|
358
|
+
_hag_ 74
|
359
|
+
ma� 74
|
360
|
+
as_ 73
|
361
|
+
eze 73
|
362
|
+
ont_ 73
|
363
|
+
_. 73
|
364
|
+
i� 73
|
365
|
+
r, 72
|
366
|
+
pr 72
|
367
|
+
ed_ 72
|
368
|
+
n._ 72
|
369
|
+
re_ 72
|
370
|
+
in_ 72
|
371
|
+
r,_ 72
|
372
|
+
_H 71
|
373
|
+
'he 70
|
374
|
+
t. 70
|
375
|
+
gou 70
|
376
|
+
em_ 70
|
377
|
+
_br 70
|
378
|
+
rae 70
|
379
|
+
rez 69
|
380
|
+
t._ 69
|
381
|
+
bet_ 69
|
382
|
+
net 69
|
383
|
+
dr 68
|
384
|
+
_eo_ 68
|
385
|
+
ll_ 68
|
386
|
+
mo 67
|
387
|
+
po 67
|
388
|
+
oul 67
|
389
|
+
rou 67
|
390
|
+
c'hoa 67
|
391
|
+
a- 67
|
392
|
+
vel 67
|
393
|
+
oc'h_ 67
|
394
|
+
nna 66
|
395
|
+
_B 66
|
396
|
+
met 65
|
397
|
+
ec' 65
|
398
|
+
ec'h 65
|
399
|
+
R 64
|
400
|
+
den 64
|