language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 9464
|
2
|
+
a 2787
|
3
|
+
i 2108
|
4
|
+
e 2077
|
5
|
+
o 2018
|
6
|
+
j 1396
|
7
|
+
n 1328
|
8
|
+
s 1170
|
9
|
+
u 1010
|
10
|
+
r 988
|
11
|
+
d 957
|
12
|
+
a_ 889
|
13
|
+
e_ 833
|
14
|
+
t 774
|
15
|
+
je 771
|
16
|
+
k 756
|
17
|
+
l 743
|
18
|
+
m 702
|
19
|
+
v 685
|
20
|
+
p 604
|
21
|
+
c 538
|
22
|
+
i_ 538
|
23
|
+
_s 522
|
24
|
+
u_ 476
|
25
|
+
z 463
|
26
|
+
_p 450
|
27
|
+
o_ 433
|
28
|
+
,_ 381
|
29
|
+
, 381
|
30
|
+
_i 369
|
31
|
+
_n 358
|
32
|
+
b 349
|
33
|
+
_d 349
|
34
|
+
. 346
|
35
|
+
na 341
|
36
|
+
je_ 337
|
37
|
+
._ 321
|
38
|
+
_j 307
|
39
|
+
g 299
|
40
|
+
ra 292
|
41
|
+
st 283
|
42
|
+
ko 278
|
43
|
+
_je 274
|
44
|
+
ij 254
|
45
|
+
_o 245
|
46
|
+
ni 240
|
47
|
+
_k 236
|
48
|
+
an 228
|
49
|
+
oj 227
|
50
|
+
da 226
|
51
|
+
_u 222
|
52
|
+
pr 221
|
53
|
+
no 219
|
54
|
+
ma 219
|
55
|
+
la 211
|
56
|
+
ri 206
|
57
|
+
_je_ 203
|
58
|
+
po 203
|
59
|
+
ci 196
|
60
|
+
_pr 191
|
61
|
+
os 190
|
62
|
+
od 187
|
63
|
+
ka 186
|
64
|
+
im 185
|
65
|
+
ti 184
|
66
|
+
li 182
|
67
|
+
vo 178
|
68
|
+
_po 174
|
69
|
+
ja 171
|
70
|
+
_i_ 169
|
71
|
+
ov 169
|
72
|
+
al 168
|
73
|
+
re 167
|
74
|
+
ne 167
|
75
|
+
m_ 164
|
76
|
+
ta 160
|
77
|
+
na_ 158
|
78
|
+
ed 157
|
79
|
+
_m 157
|
80
|
+
_na 157
|
81
|
+
se 156
|
82
|
+
_u_ 154
|
83
|
+
en 153
|
84
|
+
ic 151
|
85
|
+
sa 151
|
86
|
+
_b 149
|
87
|
+
ak 141
|
88
|
+
va 140
|
89
|
+
ad 137
|
90
|
+
h 136
|
91
|
+
ju 135
|
92
|
+
su 134
|
93
|
+
dj 133
|
94
|
+
ije 133
|
95
|
+
ar 127
|
96
|
+
ca 127
|
97
|
+
_z 126
|
98
|
+
nj 125
|
99
|
+
ji 124
|
100
|
+
da_ 124
|
101
|
+
_ko 123
|
102
|
+
_da 122
|
103
|
+
il 121
|
104
|
+
" 120
|
105
|
+
av 120
|
106
|
+
_t 118
|
107
|
+
aj 116
|
108
|
+
ob 115
|
109
|
+
ro 114
|
110
|
+
am 114
|
111
|
+
vi 114
|
112
|
+
_su 113
|
113
|
+
om 112
|
114
|
+
dje 112
|
115
|
+
za 112
|
116
|
+
at 111
|
117
|
+
le 111
|
118
|
+
di 110
|
119
|
+
su_ 107
|
120
|
+
iz 107
|
121
|
+
ve 107
|
122
|
+
lj 106
|
123
|
+
_se 105
|
124
|
+
ev 105
|
125
|
+
is 105
|
126
|
+
es 103
|
127
|
+
se_ 103
|
128
|
+
do 101
|
129
|
+
ih 100
|
130
|
+
a, 99
|
131
|
+
_su_ 99
|
132
|
+
a,_ 99
|
133
|
+
on 98
|
134
|
+
bi 98
|
135
|
+
in 97
|
136
|
+
voj 97
|
137
|
+
az 96
|
138
|
+
ac 95
|
139
|
+
_da_ 95
|
140
|
+
la_ 95
|
141
|
+
_r 93
|
142
|
+
_g 93
|
143
|
+
jev 92
|
144
|
+
ma_ 92
|
145
|
+
er 92
|
146
|
+
or 91
|
147
|
+
h_ 90
|
148
|
+
_sa 90
|
149
|
+
e, 90
|
150
|
+
e,_ 90
|
151
|
+
ba 90
|
152
|
+
ima 89
|
153
|
+
a. 88
|
154
|
+
ol 88
|
155
|
+
_do 87
|
156
|
+
dn 87
|
157
|
+
it 87
|
158
|
+
ko_ 86
|
159
|
+
ne_ 86
|
160
|
+
ost 85
|
161
|
+
ek 85
|
162
|
+
to 85
|
163
|
+
d_ 84
|
164
|
+
as 84
|
165
|
+
ju_ 84
|
166
|
+
ao 84
|
167
|
+
ih_ 84
|
168
|
+
a._ 84
|
169
|
+
te 83
|
170
|
+
evo 83
|
171
|
+
koj 83
|
172
|
+
pri 82
|
173
|
+
jevo 82
|
174
|
+
ce 81
|
175
|
+
_se_ 81
|
176
|
+
og 80
|
177
|
+
go 80
|
178
|
+
jevoj 79
|
179
|
+
de 79
|
180
|
+
uc 79
|
181
|
+
evoj 79
|
182
|
+
_od 78
|
183
|
+
_za 78
|
184
|
+
tr 78
|
185
|
+
S 77
|
186
|
+
_koj 76
|
187
|
+
ke 75
|
188
|
+
_v 75
|
189
|
+
ao_ 75
|
190
|
+
_dje 74
|
191
|
+
_bi 74
|
192
|
+
sta 74
|
193
|
+
_dj 74
|
194
|
+
cij 74
|
195
|
+
ik 74
|
196
|
+
djev 73
|
197
|
+
sl 73
|
198
|
+
_djev 72
|
199
|
+
ga 72
|
200
|
+
djevo 72
|
201
|
+
_ka 71
|
202
|
+
rij 71
|
203
|
+
_iz 71
|
204
|
+
P 71
|
205
|
+
_pri 70
|
206
|
+
_a 69
|
207
|
+
us 68
|
208
|
+
_S 68
|
209
|
+
mo 67
|
210
|
+
el 67
|
211
|
+
sk 66
|
212
|
+
me 66
|
213
|
+
zi 66
|
214
|
+
ija 65
|
215
|
+
n_ 65
|
216
|
+
ku 64
|
217
|
+
im_ 63
|
218
|
+
_st 63
|
219
|
+
ica 63
|
220
|
+
_na_ 62
|
221
|
+
_ne 62
|
222
|
+
em 61
|
223
|
+
edn 61
|
224
|
+
jk 61
|
225
|
+
io 61
|
226
|
+
li_ 60
|
227
|
+
ojk 60
|
228
|
+
evojk 60
|
229
|
+
_" 60
|
230
|
+
zn 60
|
231
|
+
vojk 60
|
232
|
+
pro 59
|
233
|
+
lo 59
|
234
|
+
ije_ 59
|
235
|
+
jed 58
|
236
|
+
ke_ 58
|
237
|
+
om_ 58
|
238
|
+
jen 58
|
239
|
+
sti 57
|
240
|
+
_im 57
|
241
|
+
le_ 57
|
242
|
+
_ra 56
|
243
|
+
e. 56
|
244
|
+
ze 55
|
245
|
+
_pro 55
|
246
|
+
nu 55
|
247
|
+
nje 55
|
248
|
+
ti_ 55
|
249
|
+
ec 55
|
250
|
+
pre 55
|
251
|
+
oc 54
|
252
|
+
aci 54
|
253
|
+
no_ 54
|
254
|
+
et 54
|
255
|
+
oji 53
|
256
|
+
si 53
|
257
|
+
ara 53
|
258
|
+
ama 53
|
259
|
+
z_ 53
|
260
|
+
pos 52
|
261
|
+
rad 52
|
262
|
+
ran 52
|
263
|
+
ima_ 52
|
264
|
+
ru 52
|
265
|
+
_P 52
|
266
|
+
tu 52
|
267
|
+
mu 51
|
268
|
+
e._ 51
|
269
|
+
ja_ 50
|
270
|
+
_pre 50
|
271
|
+
sa_ 49
|
272
|
+
io_ 49
|
273
|
+
od_ 48
|
274
|
+
ni_ 48
|
275
|
+
_nj 48
|
276
|
+
j_ 48
|
277
|
+
_pos 47
|
278
|
+
_c 47
|
279
|
+
ila 47
|
280
|
+
K 46
|
281
|
+
_sa_ 46
|
282
|
+
uz 46
|
283
|
+
N 46
|
284
|
+
_ni 45
|
285
|
+
zna 45
|
286
|
+
U 45
|
287
|
+
za_ 45
|
288
|
+
_no 45
|
289
|
+
ako 45
|
290
|
+
u, 44
|
291
|
+
lu 44
|
292
|
+
ali 44
|
293
|
+
u,_ 44
|
294
|
+
sto 44
|
295
|
+
ste 44
|
296
|
+
ve_ 44
|
297
|
+
ani 44
|
298
|
+
oli 44
|
299
|
+
aka 44
|
300
|
+
_jed 43
|
301
|
+
i,_ 43
|
302
|
+
ji_ 43
|
303
|
+
uci 43
|
304
|
+
i, 43
|
305
|
+
ci_ 43
|
306
|
+
osti 43
|
307
|
+
_N 42
|
308
|
+
dr 42
|
309
|
+
so 42
|
310
|
+
ust 41
|
311
|
+
ila_ 41
|
312
|
+
B 41
|
313
|
+
- 41
|
314
|
+
red 41
|
315
|
+
jke 41
|
316
|
+
sv 41
|
317
|
+
_go 41
|
318
|
+
bar 41
|
319
|
+
g_ 41
|
320
|
+
est 40
|
321
|
+
D 40
|
322
|
+
iv 40
|
323
|
+
vojke 40
|
324
|
+
aju 40
|
325
|
+
ta_ 40
|
326
|
+
A 40
|
327
|
+
lje 40
|
328
|
+
jedn 40
|
329
|
+
bil 40
|
330
|
+
ojke 40
|
331
|
+
ova 40
|
332
|
+
ati 39
|
333
|
+
_mu 39
|
334
|
+
pa 39
|
335
|
+
M 39
|
336
|
+
_ba 39
|
337
|
+
ca_ 39
|
338
|
+
O 39
|
339
|
+
ka_ 39
|
340
|
+
_a_ 38
|
341
|
+
_B 38
|
342
|
+
_ima 38
|
343
|
+
sn 38
|
344
|
+
nu_ 38
|
345
|
+
T 38
|
346
|
+
to_ 38
|
347
|
+
eg 38
|
348
|
+
ava 38
|
349
|
+
ros 37
|
350
|
+
ir 37
|
351
|
+
ala 37
|
352
|
+
og_ 37
|
353
|
+
osl 37
|
354
|
+
ovi 37
|
355
|
+
koji 37
|
356
|
+
_sv 37
|
357
|
+
dv 36
|
358
|
+
ric 36
|
359
|
+
_za_ 36
|
360
|
+
br 36
|
361
|
+
_on 36
|
362
|
+
odi 36
|
363
|
+
_koji 36
|
364
|
+
_jedn 35
|
365
|
+
nik 35
|
366
|
+
dno 35
|
367
|
+
_D 35
|
368
|
+
jo 35
|
369
|
+
tra 35
|
370
|
+
_M 35
|
371
|
+
sp 35
|
372
|
+
iz_ 35
|
373
|
+
oz 35
|
374
|
+
vr 35
|
375
|
+
u. 35
|
376
|
+
eri 35
|
377
|
+
I 35
|
378
|
+
eko 35
|
379
|
+
ale 35
|
380
|
+
_ma 34
|
381
|
+
lik 34
|
382
|
+
_bil 34
|
383
|
+
c_ 34
|
384
|
+
ut 34
|
385
|
+
je,_ 34
|
386
|
+
u._ 34
|
387
|
+
str 34
|
388
|
+
je, 34
|
389
|
+
adi 34
|
390
|
+
tit 34
|
391
|
+
_iz_ 34
|
392
|
+
iti 34
|
393
|
+
i. 33
|
394
|
+
_rad 33
|
395
|
+
ici 33
|
396
|
+
rost 33
|
397
|
+
aju_ 33
|
398
|
+
va_ 33
|
399
|
+
_ob 33
|
400
|
+
nog 33
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 21447
|
2
|
+
e 6375
|
3
|
+
a 5414
|
4
|
+
n 3228
|
5
|
+
r 3039
|
6
|
+
o 2968
|
7
|
+
t 2392
|
8
|
+
i 1812
|
9
|
+
h 1751
|
10
|
+
u 1650
|
11
|
+
l 1630
|
12
|
+
d 1506
|
13
|
+
a_ 1352
|
14
|
+
z 1319
|
15
|
+
t_ 1310
|
16
|
+
_e 1168
|
17
|
+
_a 1168
|
18
|
+
e_ 1133
|
19
|
+
m 1105
|
20
|
+
s 1100
|
21
|
+
g 1090
|
22
|
+
r_ 998
|
23
|
+
k 997
|
24
|
+
n_ 958
|
25
|
+
et 941
|
26
|
+
v 888
|
27
|
+
_d 868
|
28
|
+
an 859
|
29
|
+
. 846
|
30
|
+
' 841
|
31
|
+
en 836
|
32
|
+
b 757
|
33
|
+
, 749
|
34
|
+
,_ 743
|
35
|
+
._ 716
|
36
|
+
ar 703
|
37
|
+
ou 700
|
38
|
+
et_ 689
|
39
|
+
c 686
|
40
|
+
ez 572
|
41
|
+
'h 572
|
42
|
+
_g 565
|
43
|
+
er 555
|
44
|
+
p 553
|
45
|
+
_k 535
|
46
|
+
c'h 530
|
47
|
+
c' 530
|
48
|
+
nt 513
|
49
|
+
_h 505
|
50
|
+
re 505
|
51
|
+
ra 478
|
52
|
+
ha 466
|
53
|
+
� 458
|
54
|
+
ne 456
|
55
|
+
oa 454
|
56
|
+
_o 442
|
57
|
+
_b 434
|
58
|
+
- 432
|
59
|
+
zh 422
|
60
|
+
ar_ 415
|
61
|
+
_m 414
|
62
|
+
_e_ 414
|
63
|
+
nn 384
|
64
|
+
el 376
|
65
|
+
_a_ 356
|
66
|
+
ur 350
|
67
|
+
o_ 346
|
68
|
+
h_ 345
|
69
|
+
ve 340
|
70
|
+
nt_ 340
|
71
|
+
w 339
|
72
|
+
ke 338
|
73
|
+
de 333
|
74
|
+
a� 332
|
75
|
+
_p 332
|
76
|
+
s_ 327
|
77
|
+
he 325
|
78
|
+
on 318
|
79
|
+
le 318
|
80
|
+
ga 316
|
81
|
+
ma 315
|
82
|
+
_ar 312
|
83
|
+
eu 312
|
84
|
+
_n 310
|
85
|
+
an_ 298
|
86
|
+
ant 296
|
87
|
+
enn 285
|
88
|
+
z_ 282
|
89
|
+
_ar_ 281
|
90
|
+
be 280
|
91
|
+
_v 276
|
92
|
+
_r 272
|
93
|
+
al 270
|
94
|
+
en_ 268
|
95
|
+
_ke 267
|
96
|
+
l_ 264
|
97
|
+
em 264
|
98
|
+
_c 263
|
99
|
+
�_ 262
|
100
|
+
da 262
|
101
|
+
_s 261
|
102
|
+
ho 260
|
103
|
+
di 259
|
104
|
+
_ha 252
|
105
|
+
ll 250
|
106
|
+
tr 248
|
107
|
+
oa_ 247
|
108
|
+
me 246
|
109
|
+
us 242
|
110
|
+
_ga 234
|
111
|
+
la 231
|
112
|
+
ket 227
|
113
|
+
ant_ 219
|
114
|
+
_da 219
|
115
|
+
_l 216
|
116
|
+
ur_ 216
|
117
|
+
_oa 215
|
118
|
+
in 214
|
119
|
+
ket_ 211
|
120
|
+
gan 211
|
121
|
+
_c' 207
|
122
|
+
_u 207
|
123
|
+
_c'h 207
|
124
|
+
ad 207
|
125
|
+
a�_ 207
|
126
|
+
ao 204
|
127
|
+
_ma 204
|
128
|
+
_t 204
|
129
|
+
_ket 201
|
130
|
+
_an 199
|
131
|
+
_di 197
|
132
|
+
ezh 196
|
133
|
+
� 196
|
134
|
+
o� 196
|
135
|
+
_de 195
|
136
|
+
ev 193
|
137
|
+
? 192
|
138
|
+
st 192
|
139
|
+
ro 192
|
140
|
+
P 192
|
141
|
+
_ket_ 188
|
142
|
+
er_ 188
|
143
|
+
f 186
|
144
|
+
na 186
|
145
|
+
ue 185
|
146
|
+
da_ 184
|
147
|
+
?_ 184
|
148
|
+
_gan 184
|
149
|
+
_da_ 184
|
150
|
+
_ne 183
|
151
|
+
ed 182
|
152
|
+
_P 180
|
153
|
+
g_ 180
|
154
|
+
pe 179
|
155
|
+
m_ 178
|
156
|
+
A 177
|
157
|
+
ri 176
|
158
|
+
us_ 175
|
159
|
+
ta 174
|
160
|
+
ze 174
|
161
|
+
gant 174
|
162
|
+
ka 174
|
163
|
+
i_ 172
|
164
|
+
d_ 171
|
165
|
+
G 167
|
166
|
+
te 167
|
167
|
+
ae 166
|
168
|
+
zh_ 164
|
169
|
+
ha_ 163
|
170
|
+
_ha_ 163
|
171
|
+
_he 161
|
172
|
+
_gant 159
|
173
|
+
do 159
|
174
|
+
oue 159
|
175
|
+
_G 158
|
176
|
+
eus 158
|
177
|
+
eo 158
|
178
|
+
'h_ 157
|
179
|
+
_en 157
|
180
|
+
go 157
|
181
|
+
am 157
|
182
|
+
c'h_ 157
|
183
|
+
_be 156
|
184
|
+
we 156
|
185
|
+
iz 154
|
186
|
+
_an_ 151
|
187
|
+
_A 150
|
188
|
+
eus_ 147
|
189
|
+
sk 147
|
190
|
+
li 146
|
191
|
+
as 146
|
192
|
+
_pe 146
|
193
|
+
j 146
|
194
|
+
_oa_ 146
|
195
|
+
av 144
|
196
|
+
gant_ 143
|
197
|
+
ut 142
|
198
|
+
no 141
|
199
|
+
vez 140
|
200
|
+
va 140
|
201
|
+
_ra 140
|
202
|
+
ge 138
|
203
|
+
ez_ 138
|
204
|
+
bo 137
|
205
|
+
� 137
|
206
|
+
_ur 136
|
207
|
+
lo 134
|
208
|
+
he_ 134
|
209
|
+
o�_ 133
|
210
|
+
�_ 133
|
211
|
+
_ur_ 132
|
212
|
+
es 130
|
213
|
+
'ho 129
|
214
|
+
ni 129
|
215
|
+
uz 129
|
216
|
+
tra 127
|
217
|
+
se 126
|
218
|
+
it 125
|
219
|
+
ra_ 125
|
220
|
+
out 125
|
221
|
+
is 125
|
222
|
+
at 125
|
223
|
+
hi 125
|
224
|
+
eg 125
|
225
|
+
ig 124
|
226
|
+
ko 124
|
227
|
+
io 123
|
228
|
+
k_ 123
|
229
|
+
ch 123
|
230
|
+
_w 121
|
231
|
+
or 121
|
232
|
+
Pe 121
|
233
|
+
_ma_ 119
|
234
|
+
ma_ 119
|
235
|
+
gw 118
|
236
|
+
_em 118
|
237
|
+
_Pe 118
|
238
|
+
un 118
|
239
|
+
eme 117
|
240
|
+
ne_ 117
|
241
|
+
nn_ 117
|
242
|
+
c'ho 117
|
243
|
+
ol 116
|
244
|
+
ag 116
|
245
|
+
M 115
|
246
|
+
'ha 115
|
247
|
+
_en_ 115
|
248
|
+
iv 115
|
249
|
+
vi 113
|
250
|
+
_ka 113
|
251
|
+
K 113
|
252
|
+
ud 112
|
253
|
+
_he_ 111
|
254
|
+
ont 110
|
255
|
+
oc 110
|
256
|
+
vo 110
|
257
|
+
ec 109
|
258
|
+
wa 109
|
259
|
+
.. 107
|
260
|
+
_M 107
|
261
|
+
_z 107
|
262
|
+
br 107
|
263
|
+
om 106
|
264
|
+
to 105
|
265
|
+
_f 105
|
266
|
+
N 105
|
267
|
+
_c'ho 104
|
268
|
+
ti 104
|
269
|
+
ut_ 104
|
270
|
+
D 104
|
271
|
+
_o_ 103
|
272
|
+
_la 103
|
273
|
+
_go 101
|
274
|
+
az 101
|
275
|
+
out_ 101
|
276
|
+
ba 101
|
277
|
+
enn_ 101
|
278
|
+
c'ha 101
|
279
|
+
our 100
|
280
|
+
oc'h 100
|
281
|
+
ell 100
|
282
|
+
oc' 100
|
283
|
+
etr 99
|
284
|
+
el_ 99
|
285
|
+
_K 99
|
286
|
+
_D 99
|
287
|
+
: 99
|
288
|
+
:_ 99
|
289
|
+
eve 98
|
290
|
+
_d' 97
|
291
|
+
all 97
|
292
|
+
d' 97
|
293
|
+
E 97
|
294
|
+
_ne_ 97
|
295
|
+
_me 95
|
296
|
+
eo_ 95
|
297
|
+
ak 95
|
298
|
+
bet 95
|
299
|
+
_eu 95
|
300
|
+
rc 94
|
301
|
+
_do 94
|
302
|
+
_gw 94
|
303
|
+
zi 93
|
304
|
+
oz 93
|
305
|
+
aou 93
|
306
|
+
etra 92
|
307
|
+
pa 91
|
308
|
+
ab 90
|
309
|
+
on_ 90
|
310
|
+
ei 90
|
311
|
+
tra_ 90
|
312
|
+
n, 89
|
313
|
+
zo 89
|
314
|
+
ag_ 89
|
315
|
+
_ev 88
|
316
|
+
ul 88
|
317
|
+
'e 88
|
318
|
+
n' 88
|
319
|
+
n,_ 88
|
320
|
+
ouz 87
|
321
|
+
v_ 86
|
322
|
+
_n' 86
|
323
|
+
_eus 84
|
324
|
+
H 83
|
325
|
+
za 83
|
326
|
+
S 83
|
327
|
+
etra_ 83
|
328
|
+
_eo 82
|
329
|
+
t,_ 82
|
330
|
+
t, 82
|
331
|
+
il 81
|
332
|
+
ent 81
|
333
|
+
fe 81
|
334
|
+
rc'h 81
|
335
|
+
rc' 81
|
336
|
+
_eus_ 80
|
337
|
+
ie 80
|
338
|
+
_bo 79
|
339
|
+
ele 79
|
340
|
+
_ve 79
|
341
|
+
mp 79
|
342
|
+
_bet 78
|
343
|
+
B 78
|
344
|
+
it_ 77
|
345
|
+
_vo 77
|
346
|
+
'a 77
|
347
|
+
n. 76
|
348
|
+
_S 76
|
349
|
+
hag 76
|
350
|
+
hoa 75
|
351
|
+
_hag 75
|
352
|
+
len 75
|
353
|
+
_N 75
|
354
|
+
'hoa 74
|
355
|
+
_E 74
|
356
|
+
ir 74
|
357
|
+
hag_ 74
|
358
|
+
_hag_ 74
|
359
|
+
ma� 74
|
360
|
+
as_ 73
|
361
|
+
eze 73
|
362
|
+
ont_ 73
|
363
|
+
_. 73
|
364
|
+
i� 73
|
365
|
+
r, 72
|
366
|
+
pr 72
|
367
|
+
ed_ 72
|
368
|
+
n._ 72
|
369
|
+
re_ 72
|
370
|
+
in_ 72
|
371
|
+
r,_ 72
|
372
|
+
_H 71
|
373
|
+
'he 70
|
374
|
+
t. 70
|
375
|
+
gou 70
|
376
|
+
em_ 70
|
377
|
+
_br 70
|
378
|
+
rae 70
|
379
|
+
rez 69
|
380
|
+
t._ 69
|
381
|
+
bet_ 69
|
382
|
+
net 69
|
383
|
+
dr 68
|
384
|
+
_eo_ 68
|
385
|
+
ll_ 68
|
386
|
+
mo 67
|
387
|
+
po 67
|
388
|
+
oul 67
|
389
|
+
rou 67
|
390
|
+
c'hoa 67
|
391
|
+
a- 67
|
392
|
+
vel 67
|
393
|
+
oc'h_ 67
|
394
|
+
nna 66
|
395
|
+
_B 66
|
396
|
+
met 65
|
397
|
+
ec' 65
|
398
|
+
ec'h 65
|
399
|
+
R 64
|
400
|
+
den 64
|