language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 20800
|
2
|
+
e 7258
|
3
|
+
i 4051
|
4
|
+
s 4003
|
5
|
+
a 3972
|
6
|
+
n 3903
|
7
|
+
r 3650
|
8
|
+
t 3590
|
9
|
+
u 2968
|
10
|
+
o 2823
|
11
|
+
l 2723
|
12
|
+
e_ 2632
|
13
|
+
d 2241
|
14
|
+
s_ 1721
|
15
|
+
_d 1693
|
16
|
+
c 1663
|
17
|
+
p 1528
|
18
|
+
� 1320
|
19
|
+
m 1297
|
20
|
+
es 1164
|
21
|
+
t_ 1106
|
22
|
+
_l 1079
|
23
|
+
de 1048
|
24
|
+
on 959
|
25
|
+
_de 940
|
26
|
+
en 939
|
27
|
+
_p 852
|
28
|
+
nt 825
|
29
|
+
le 808
|
30
|
+
es_ 791
|
31
|
+
re 777
|
32
|
+
, 721
|
33
|
+
,_ 720
|
34
|
+
n_ 703
|
35
|
+
de_ 685
|
36
|
+
' 670
|
37
|
+
an 667
|
38
|
+
_de_ 645
|
39
|
+
v 641
|
40
|
+
_s 610
|
41
|
+
r_ 596
|
42
|
+
_c 594
|
43
|
+
er 585
|
44
|
+
ai 575
|
45
|
+
_a 558
|
46
|
+
_e 554
|
47
|
+
ou 554
|
48
|
+
q 549
|
49
|
+
qu 538
|
50
|
+
is 530
|
51
|
+
te 528
|
52
|
+
ti 525
|
53
|
+
ur 519
|
54
|
+
it 514
|
55
|
+
g 498
|
56
|
+
a_ 490
|
57
|
+
f 480
|
58
|
+
la 476
|
59
|
+
in 475
|
60
|
+
_le 441
|
61
|
+
me 436
|
62
|
+
nt_ 432
|
63
|
+
. 427
|
64
|
+
b 427
|
65
|
+
ra 423
|
66
|
+
io 416
|
67
|
+
ent 415
|
68
|
+
._ 404
|
69
|
+
ne 395
|
70
|
+
ns 392
|
71
|
+
ion 383
|
72
|
+
h 381
|
73
|
+
ue 376
|
74
|
+
se 371
|
75
|
+
le_ 370
|
76
|
+
ar 370
|
77
|
+
ie 362
|
78
|
+
co 361
|
79
|
+
at 359
|
80
|
+
tr 359
|
81
|
+
et 349
|
82
|
+
pr 342
|
83
|
+
ce 336
|
84
|
+
au 328
|
85
|
+
u_ 321
|
86
|
+
il 314
|
87
|
+
_r 313
|
88
|
+
_la 304
|
89
|
+
un 303
|
90
|
+
eu 303
|
91
|
+
st 300
|
92
|
+
re_ 296
|
93
|
+
ro 290
|
94
|
+
la_ 288
|
95
|
+
on_ 287
|
96
|
+
_m 286
|
97
|
+
_la_ 283
|
98
|
+
que 281
|
99
|
+
_qu 280
|
100
|
+
_q 280
|
101
|
+
po 275
|
102
|
+
tio 273
|
103
|
+
tion 273
|
104
|
+
pa 273
|
105
|
+
li 271
|
106
|
+
_t 269
|
107
|
+
nc 268
|
108
|
+
si 266
|
109
|
+
_pr 265
|
110
|
+
ri 264
|
111
|
+
al 263
|
112
|
+
ui 262
|
113
|
+
_co 259
|
114
|
+
i_ 255
|
115
|
+
ta 255
|
116
|
+
�_ 251
|
117
|
+
x 247
|
118
|
+
em 244
|
119
|
+
l_ 243
|
120
|
+
et_ 238
|
121
|
+
_l' 236
|
122
|
+
l' 236
|
123
|
+
les 233
|
124
|
+
ns_ 233
|
125
|
+
ir 232
|
126
|
+
_le_ 228
|
127
|
+
ent_ 227
|
128
|
+
or 226
|
129
|
+
r� 224
|
130
|
+
_f 224
|
131
|
+
ne_ 222
|
132
|
+
� 221
|
133
|
+
ve 220
|
134
|
+
ch 220
|
135
|
+
it_ 219
|
136
|
+
di 219
|
137
|
+
oi 217
|
138
|
+
- 216
|
139
|
+
ni 215
|
140
|
+
�_ 215
|
141
|
+
les_ 215
|
142
|
+
d' 214
|
143
|
+
el 212
|
144
|
+
ss 212
|
145
|
+
_n 212
|
146
|
+
ut 211
|
147
|
+
our 210
|
148
|
+
des 210
|
149
|
+
" 208
|
150
|
+
ur_ 207
|
151
|
+
nd 207
|
152
|
+
er_ 206
|
153
|
+
ait 206
|
154
|
+
ion_ 204
|
155
|
+
rs 202
|
156
|
+
_en 201
|
157
|
+
_et 200
|
158
|
+
j 200
|
159
|
+
_d' 200
|
160
|
+
ll 199
|
161
|
+
_des 198
|
162
|
+
des_ 197
|
163
|
+
_pa 197
|
164
|
+
t� 196
|
165
|
+
_et_ 195
|
166
|
+
_� 195
|
167
|
+
_�_ 195
|
168
|
+
om 193
|
169
|
+
ma 192
|
170
|
+
ati 190
|
171
|
+
_des_ 189
|
172
|
+
L 188
|
173
|
+
so 187
|
174
|
+
_u 185
|
175
|
+
� 184
|
176
|
+
_" 183
|
177
|
+
sa 182
|
178
|
+
_po 181
|
179
|
+
tre 181
|
180
|
+
d� 181
|
181
|
+
ue_ 180
|
182
|
+
pe 179
|
183
|
+
en_ 179
|
184
|
+
ont 178
|
185
|
+
_un 178
|
186
|
+
_L 178
|
187
|
+
us 176
|
188
|
+
_les 176
|
189
|
+
_les_ 176
|
190
|
+
rt 176
|
191
|
+
is_ 173
|
192
|
+
_i 173
|
193
|
+
du 172
|
194
|
+
e,_ 171
|
195
|
+
e, 171
|
196
|
+
na 171
|
197
|
+
s, 170
|
198
|
+
s,_ 170
|
199
|
+
as 169
|
200
|
+
men 169
|
201
|
+
M 167
|
202
|
+
ait_ 167
|
203
|
+
'a 166
|
204
|
+
vi 162
|
205
|
+
ci 159
|
206
|
+
ant 158
|
207
|
+
_au 158
|
208
|
+
da 157
|
209
|
+
_M 157
|
210
|
+
ation 155
|
211
|
+
atio 155
|
212
|
+
con 154
|
213
|
+
que_ 153
|
214
|
+
ons 153
|
215
|
+
eur 151
|
216
|
+
est 149
|
217
|
+
me_ 149
|
218
|
+
mi 149
|
219
|
+
par 148
|
220
|
+
tion_ 148
|
221
|
+
_so 147
|
222
|
+
te_ 147
|
223
|
+
res 144
|
224
|
+
lo 144
|
225
|
+
ment 144
|
226
|
+
�s 144
|
227
|
+
ans 143
|
228
|
+
_du 142
|
229
|
+
du_ 141
|
230
|
+
ux 141
|
231
|
+
un_ 140
|
232
|
+
y 138
|
233
|
+
pro 138
|
234
|
+
_du_ 136
|
235
|
+
_d� 136
|
236
|
+
ce_ 135
|
237
|
+
_se 134
|
238
|
+
_re 134
|
239
|
+
pl 133
|
240
|
+
A 132
|
241
|
+
ge 131
|
242
|
+
ic 131
|
243
|
+
su 130
|
244
|
+
x_ 129
|
245
|
+
ien 129
|
246
|
+
nce 129
|
247
|
+
"_ 129
|
248
|
+
ac 128
|
249
|
+
il_ 128
|
250
|
+
qui 128
|
251
|
+
_pro 127
|
252
|
+
no 127
|
253
|
+
av 126
|
254
|
+
_v 125
|
255
|
+
_o 125
|
256
|
+
rs_ 125
|
257
|
+
ans_ 124
|
258
|
+
eme 124
|
259
|
+
bl 123
|
260
|
+
emen 122
|
261
|
+
_en_ 122
|
262
|
+
iqu 122
|
263
|
+
ct 122
|
264
|
+
iq 122
|
265
|
+
lle 122
|
266
|
+
nn 121
|
267
|
+
ts 121
|
268
|
+
ement 121
|
269
|
+
�t 120
|
270
|
+
_"_ 120
|
271
|
+
�r 119
|
272
|
+
t�_ 119
|
273
|
+
_ce 119
|
274
|
+
mp 119
|
275
|
+
ire 119
|
276
|
+
ui_ 119
|
277
|
+
to 118
|
278
|
+
he 117
|
279
|
+
_� 117
|
280
|
+
ca 117
|
281
|
+
_j 116
|
282
|
+
ec 116
|
283
|
+
va 116
|
284
|
+
_par 116
|
285
|
+
�e 115
|
286
|
+
_con 115
|
287
|
+
se_ 114
|
288
|
+
tre_ 113
|
289
|
+
ique 112
|
290
|
+
dan 111
|
291
|
+
�c 111
|
292
|
+
ha 110
|
293
|
+
une 110
|
294
|
+
P 110
|
295
|
+
lu 110
|
296
|
+
ux_ 109
|
297
|
+
_b 108
|
298
|
+
s. 108
|
299
|
+
pou 108
|
300
|
+
_pou 108
|
301
|
+
ier 107
|
302
|
+
C 107
|
303
|
+
ais 106
|
304
|
+
s._ 105
|
305
|
+
ain 104
|
306
|
+
_un_ 104
|
307
|
+
nte 103
|
308
|
+
'e 103
|
309
|
+
mo 103
|
310
|
+
mm 103
|
311
|
+
ment_ 102
|
312
|
+
une_ 102
|
313
|
+
com 101
|
314
|
+
_P 101
|
315
|
+
'i 101
|
316
|
+
_ma 100
|
317
|
+
do 99
|
318
|
+
ant_ 98
|
319
|
+
anc 98
|
320
|
+
che 97
|
321
|
+
ap 97
|
322
|
+
ont_ 97
|
323
|
+
_que 97
|
324
|
+
os 97
|
325
|
+
urs 96
|
326
|
+
_di 96
|
327
|
+
fi 96
|
328
|
+
im 96
|
329
|
+
pour 96
|
330
|
+
_pour 96
|
331
|
+
� 95
|
332
|
+
ts_ 95
|
333
|
+
_g 95
|
334
|
+
our_ 94
|
335
|
+
_sa 94
|
336
|
+
ntr 94
|
337
|
+
_da 94
|
338
|
+
_r� 93
|
339
|
+
rai 93
|
340
|
+
rm 93
|
341
|
+
_qui 93
|
342
|
+
e. 92
|
343
|
+
am 92
|
344
|
+
_com 91
|
345
|
+
uv 91
|
346
|
+
_C 91
|
347
|
+
D 91
|
348
|
+
qui_ 90
|
349
|
+
e._ 90
|
350
|
+
pu 89
|
351
|
+
_qui_ 88
|
352
|
+
ia 87
|
353
|
+
_dan 87
|
354
|
+
_dans 87
|
355
|
+
dans 87
|
356
|
+
ter 87
|
357
|
+
fo 87
|
358
|
+
son 87
|
359
|
+
dans_ 87
|
360
|
+
id 86
|
361
|
+
ag 86
|
362
|
+
ine 86
|
363
|
+
tu 85
|
364
|
+
ran 85
|
365
|
+
au_ 85
|
366
|
+
ol 85
|
367
|
+
oc 84
|
368
|
+
est_ 84
|
369
|
+
st_ 84
|
370
|
+
enc 84
|
371
|
+
F 82
|
372
|
+
_tr 81
|
373
|
+
'u 81
|
374
|
+
tai 81
|
375
|
+
ell 80
|
376
|
+
R 79
|
377
|
+
_su 79
|
378
|
+
S 79
|
379
|
+
ions 79
|
380
|
+
pr� 79
|
381
|
+
s� 78
|
382
|
+
ab 78
|
383
|
+
n� 77
|
384
|
+
_que_ 77
|
385
|
+
_in 77
|
386
|
+
_av 76
|
387
|
+
pour_ 76
|
388
|
+
fa 76
|
389
|
+
rr 76
|
390
|
+
air 75
|
391
|
+
_ch 75
|
392
|
+
_a_ 75
|
393
|
+
ba 74
|
394
|
+
_pl 74
|
395
|
+
gr 74
|
396
|
+
tt 74
|
397
|
+
ssi 74
|
398
|
+
rd 73
|
399
|
+
pas 73
|
400
|
+
bi 73
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 46446
|
2
|
+
e 15767
|
3
|
+
n 9616
|
4
|
+
i 7837
|
5
|
+
a 7830
|
6
|
+
t 7562
|
7
|
+
r 7297
|
8
|
+
s 6307
|
9
|
+
o 4784
|
10
|
+
n_ 4595
|
11
|
+
d 4564
|
12
|
+
e_ 4213
|
13
|
+
l 4032
|
14
|
+
k 3951
|
15
|
+
t_ 3079
|
16
|
+
en 2863
|
17
|
+
m 2605
|
18
|
+
er 2492
|
19
|
+
y 2462
|
20
|
+
f 2371
|
21
|
+
_d 2253
|
22
|
+
de 2124
|
23
|
+
h 1943
|
24
|
+
in 1899
|
25
|
+
w 1898
|
26
|
+
en_ 1857
|
27
|
+
_i 1815
|
28
|
+
u 1769
|
29
|
+
g 1738
|
30
|
+
an 1690
|
31
|
+
j 1678
|
32
|
+
p 1651
|
33
|
+
r_ 1619
|
34
|
+
_f 1602
|
35
|
+
. 1601
|
36
|
+
ar 1561
|
37
|
+
te 1545
|
38
|
+
b 1488
|
39
|
+
s_ 1476
|
40
|
+
._ 1435
|
41
|
+
_s 1370
|
42
|
+
de_ 1235
|
43
|
+
_de 1224
|
44
|
+
_w 1181
|
45
|
+
it 1172
|
46
|
+
ie 1140
|
47
|
+
, 1078
|
48
|
+
_o 1056
|
49
|
+
,_ 1056
|
50
|
+
oa 1045
|
51
|
+
_e 1040
|
52
|
+
st 1038
|
53
|
+
an_ 1010
|
54
|
+
_b 972
|
55
|
+
_de_ 965
|
56
|
+
ke 949
|
57
|
+
_m 947
|
58
|
+
_t 933
|
59
|
+
ne 920
|
60
|
+
er_ 895
|
61
|
+
at 863
|
62
|
+
sk 856
|
63
|
+
c 850
|
64
|
+
it_ 848
|
65
|
+
_h 838
|
66
|
+
ei 832
|
67
|
+
k_ 817
|
68
|
+
yn 790
|
69
|
+
ch 784
|
70
|
+
le 772
|
71
|
+
is 769
|
72
|
+
je 765
|
73
|
+
el 761
|
74
|
+
me 758
|
75
|
+
ea 754
|
76
|
+
_k 740
|
77
|
+
fa 736
|
78
|
+
in_ 735
|
79
|
+
' 733
|
80
|
+
_it 733
|
81
|
+
_it_ 727
|
82
|
+
ge 727
|
83
|
+
re 725
|
84
|
+
al 724
|
85
|
+
_fa 684
|
86
|
+
yn_ 640
|
87
|
+
aa 628
|
88
|
+
fan 626
|
89
|
+
_y 618
|
90
|
+
ar_ 617
|
91
|
+
et 616
|
92
|
+
ri 615
|
93
|
+
_fan 613
|
94
|
+
_n 612
|
95
|
+
li 611
|
96
|
+
_yn 610
|
97
|
+
_en 598
|
98
|
+
fan_ 594
|
99
|
+
oe 589
|
100
|
+
_fan_ 584
|
101
|
+
_en_ 583
|
102
|
+
at_ 581
|
103
|
+
_in 570
|
104
|
+
oar 565
|
105
|
+
_in_ 560
|
106
|
+
y_ 555
|
107
|
+
F 549
|
108
|
+
es 540
|
109
|
+
_a 536
|
110
|
+
ng 523
|
111
|
+
be 514
|
112
|
+
sj 512
|
113
|
+
nt 510
|
114
|
+
l_ 510
|
115
|
+
_F 506
|
116
|
+
ns 506
|
117
|
+
D 499
|
118
|
+
te_ 499
|
119
|
+
der 497
|
120
|
+
_yn_ 497
|
121
|
+
ti 493
|
122
|
+
ek 490
|
123
|
+
ro 476
|
124
|
+
rs 474
|
125
|
+
rd 473
|
126
|
+
se 470
|
127
|
+
fo 470
|
128
|
+
ys 462
|
129
|
+
op 461
|
130
|
+
� 461
|
131
|
+
we 456
|
132
|
+
ry 451
|
133
|
+
da 445
|
134
|
+
d_ 443
|
135
|
+
is_ 442
|
136
|
+
_D 440
|
137
|
+
ur 433
|
138
|
+
i_ 432
|
139
|
+
ha 426
|
140
|
+
_me 425
|
141
|
+
ear 422
|
142
|
+
_l 417
|
143
|
+
ed 410
|
144
|
+
as 409
|
145
|
+
om 407
|
146
|
+
ei_ 406
|
147
|
+
Fr 405
|
148
|
+
nd 404
|
149
|
+
_fo 394
|
150
|
+
_p 393
|
151
|
+
oc 390
|
152
|
+
rys 389
|
153
|
+
ol 386
|
154
|
+
_Fr 386
|
155
|
+
och 383
|
156
|
+
fe 381
|
157
|
+
ik 379
|
158
|
+
ma 379
|
159
|
+
ra 377
|
160
|
+
nn 374
|
161
|
+
_g 368
|
162
|
+
_da 367
|
163
|
+
di 363
|
164
|
+
ts 362
|
165
|
+
ta 361
|
166
|
+
a_ 360
|
167
|
+
ko 359
|
168
|
+
et_ 358
|
169
|
+
ysk 356
|
170
|
+
Fry 354
|
171
|
+
Frys 354
|
172
|
+
z 354
|
173
|
+
ll 350
|
174
|
+
_be 349
|
175
|
+
ke_ 348
|
176
|
+
I 347
|
177
|
+
ing 346
|
178
|
+
_' 345
|
179
|
+
m_ 343
|
180
|
+
h_ 340
|
181
|
+
ske 339
|
182
|
+
_ha 338
|
183
|
+
sje 336
|
184
|
+
_Fry 336
|
185
|
+
_Frys 336
|
186
|
+
wi 335
|
187
|
+
_op 334
|
188
|
+
p_ 334
|
189
|
+
_is 333
|
190
|
+
ch_ 333
|
191
|
+
tr 330
|
192
|
+
ten 328
|
193
|
+
ers 327
|
194
|
+
wa 325
|
195
|
+
ter 322
|
196
|
+
ji 322
|
197
|
+
rysk 319
|
198
|
+
_ne 319
|
199
|
+
je_ 312
|
200
|
+
foa 311
|
201
|
+
� 309
|
202
|
+
jo 307
|
203
|
+
_is_ 307
|
204
|
+
ste 307
|
205
|
+
_te 306
|
206
|
+
� 301
|
207
|
+
n. 301
|
208
|
+
nne 300
|
209
|
+
rt 300
|
210
|
+
foar 299
|
211
|
+
S 299
|
212
|
+
mei 299
|
213
|
+
_r 298
|
214
|
+
_oa 297
|
215
|
+
wu 293
|
216
|
+
ak 291
|
217
|
+
si 290
|
218
|
+
wur 290
|
219
|
+
ni 290
|
220
|
+
pe 288
|
221
|
+
Frysk 287
|
222
|
+
oan 286
|
223
|
+
n._ 285
|
224
|
+
_mei 284
|
225
|
+
der_ 281
|
226
|
+
_foar 281
|
227
|
+
_foa 281
|
228
|
+
ht 278
|
229
|
+
cht 277
|
230
|
+
- 275
|
231
|
+
� 275
|
232
|
+
_j 274
|
233
|
+
ne_ 273
|
234
|
+
ken 273
|
235
|
+
fer 271
|
236
|
+
�n 270
|
237
|
+
am 270
|
238
|
+
on 268
|
239
|
+
nde 264
|
240
|
+
_S 264
|
241
|
+
B 262
|
242
|
+
ki 261
|
243
|
+
id 261
|
244
|
+
le_ 261
|
245
|
+
dat 260
|
246
|
+
v 259
|
247
|
+
_I 256
|
248
|
+
oar_ 255
|
249
|
+
op_ 254
|
250
|
+
lle 252
|
251
|
+
la 244
|
252
|
+
tt 243
|
253
|
+
dat_ 242
|
254
|
+
_fe 242
|
255
|
+
mm 240
|
256
|
+
inn 239
|
257
|
+
_wi 239
|
258
|
+
_dat 237
|
259
|
+
g_ 236
|
260
|
+
mei_ 236
|
261
|
+
al_ 235
|
262
|
+
_B 234
|
263
|
+
e. 234
|
264
|
+
ld 231
|
265
|
+
_al 230
|
266
|
+
_wur 230
|
267
|
+
urd 230
|
268
|
+
_wu 230
|
269
|
+
_op_ 230
|
270
|
+
De 229
|
271
|
+
inne 228
|
272
|
+
ng_ 227
|
273
|
+
_mei_ 227
|
274
|
+
't 226
|
275
|
+
ks 226
|
276
|
+
't_ 224
|
277
|
+
_dat_ 224
|
278
|
+
_ko 223
|
279
|
+
_st 220
|
280
|
+
�n 219
|
281
|
+
rk 219
|
282
|
+
sa 219
|
283
|
+
e._ 219
|
284
|
+
tte 218
|
285
|
+
en. 218
|
286
|
+
n, 217
|
287
|
+
ier 216
|
288
|
+
ten_ 216
|
289
|
+
_fer 215
|
290
|
+
mi 215
|
291
|
+
hi 215
|
292
|
+
ien 214
|
293
|
+
_wa 213
|
294
|
+
n,_ 213
|
295
|
+
no 213
|
296
|
+
_te_ 212
|
297
|
+
ig 212
|
298
|
+
_De 212
|
299
|
+
ske_ 212
|
300
|
+
_der 212
|
301
|
+
W 211
|
302
|
+
H 211
|
303
|
+
_oan 210
|
304
|
+
ee 209
|
305
|
+
dy 208
|
306
|
+
ek_ 207
|
307
|
+
en._ 207
|
308
|
+
ic 207
|
309
|
+
mme 206
|
310
|
+
yk 204
|
311
|
+
pr 204
|
312
|
+
net 203
|
313
|
+
foar_ 203
|
314
|
+
he 203
|
315
|
+
wurd 201
|
316
|
+
_der_ 199
|
317
|
+
jen 199
|
318
|
+
_dy 199
|
319
|
+
kr 198
|
320
|
+
ka 197
|
321
|
+
im 196
|
322
|
+
_H 196
|
323
|
+
il 196
|
324
|
+
ze 196
|
325
|
+
_ma 195
|
326
|
+
by 194
|
327
|
+
oer 194
|
328
|
+
kt 193
|
329
|
+
us 193
|
330
|
+
M 193
|
331
|
+
sk_ 192
|
332
|
+
wo 192
|
333
|
+
_hi 191
|
334
|
+
or 190
|
335
|
+
ing_ 190
|
336
|
+
_W 190
|
337
|
+
ich 189
|
338
|
+
De_ 187
|
339
|
+
rr 186
|
340
|
+
int 185
|
341
|
+
_� 184
|
342
|
+
: 184
|
343
|
+
ij 184
|
344
|
+
_ek 183
|
345
|
+
eg 182
|
346
|
+
:_ 180
|
347
|
+
gen 180
|
348
|
+
as_ 180
|
349
|
+
_se 179
|
350
|
+
e, 178
|
351
|
+
_net 177
|
352
|
+
e,_ 177
|
353
|
+
ins 177
|
354
|
+
N 177
|
355
|
+
ls 176
|
356
|
+
st_ 176
|
357
|
+
_wurd 176
|
358
|
+
ie_ 175
|
359
|
+
E 175
|
360
|
+
nne_ 175
|
361
|
+
_De_ 175
|
362
|
+
sy 175
|
363
|
+
wer 174
|
364
|
+
gr 174
|
365
|
+
f_ 173
|
366
|
+
nk 172
|
367
|
+
och_ 172
|
368
|
+
net_ 169
|
369
|
+
ad 169
|
370
|
+
_we 169
|
371
|
+
rde 168
|
372
|
+
sl 168
|
373
|
+
bi 168
|
374
|
+
of 168
|
375
|
+
so 168
|
376
|
+
_no 167
|
377
|
+
_ta 167
|
378
|
+
re_ 167
|
379
|
+
to 167
|
380
|
+
den 167
|
381
|
+
J 167
|
382
|
+
t. 166
|
383
|
+
It 165
|
384
|
+
�t 165
|
385
|
+
inne_ 165
|
386
|
+
ysk_ 165
|
387
|
+
lik 164
|
388
|
+
sp 163
|
389
|
+
_ek_ 162
|
390
|
+
ou 162
|
391
|
+
tsj 162
|
392
|
+
It_ 161
|
393
|
+
_sa 160
|
394
|
+
A 160
|
395
|
+
wol 160
|
396
|
+
l� 159
|
397
|
+
_wo 159
|
398
|
+
ge_ 159
|
399
|
+
l�n 159
|
400
|
+
ige 158
|