language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 21274
|
2
|
+
e 9291
|
3
|
+
r 5307
|
4
|
+
n 4733
|
5
|
+
i 3976
|
6
|
+
t 3948
|
7
|
+
s 3751
|
8
|
+
a 3296
|
9
|
+
l 3063
|
10
|
+
d 3025
|
11
|
+
o 2868
|
12
|
+
g 2471
|
13
|
+
er 2164
|
14
|
+
k 2002
|
15
|
+
m 1680
|
16
|
+
e_ 1655
|
17
|
+
en 1613
|
18
|
+
f 1507
|
19
|
+
de 1484
|
20
|
+
r_ 1379
|
21
|
+
v 1245
|
22
|
+
u 1176
|
23
|
+
t_ 1081
|
24
|
+
n_ 1032
|
25
|
+
er_ 992
|
26
|
+
b 942
|
27
|
+
. 870
|
28
|
+
ge 868
|
29
|
+
._ 831
|
30
|
+
re 816
|
31
|
+
h 816
|
32
|
+
et 813
|
33
|
+
te 813
|
34
|
+
p 806
|
35
|
+
in 788
|
36
|
+
or 775
|
37
|
+
_s 753
|
38
|
+
_a 749
|
39
|
+
en_ 712
|
40
|
+
_e 691
|
41
|
+
ti 689
|
42
|
+
an 687
|
43
|
+
, 681
|
44
|
+
,_ 677
|
45
|
+
_f 655
|
46
|
+
_d 645
|
47
|
+
el 642
|
48
|
+
ng 635
|
49
|
+
nd 634
|
50
|
+
g_ 634
|
51
|
+
se 615
|
52
|
+
le 615
|
53
|
+
st 607
|
54
|
+
s_ 601
|
55
|
+
_o 572
|
56
|
+
ne 560
|
57
|
+
li 537
|
58
|
+
et_ 524
|
59
|
+
es 521
|
60
|
+
_i 512
|
61
|
+
ri 511
|
62
|
+
sk 510
|
63
|
+
_de 498
|
64
|
+
� 497
|
65
|
+
ar 475
|
66
|
+
ed 473
|
67
|
+
ig 463
|
68
|
+
at 452
|
69
|
+
_m 446
|
70
|
+
is 443
|
71
|
+
fo 441
|
72
|
+
� 441
|
73
|
+
ve 438
|
74
|
+
_k 434
|
75
|
+
� 432
|
76
|
+
der 429
|
77
|
+
ke 428
|
78
|
+
ing 427
|
79
|
+
og 426
|
80
|
+
_b 412
|
81
|
+
me 408
|
82
|
+
il 407
|
83
|
+
for 405
|
84
|
+
ns 394
|
85
|
+
y 389
|
86
|
+
_h 380
|
87
|
+
_t 374
|
88
|
+
on 371
|
89
|
+
d_ 370
|
90
|
+
al 362
|
91
|
+
be 359
|
92
|
+
_fo 351
|
93
|
+
af 336
|
94
|
+
de_ 335
|
95
|
+
_og 333
|
96
|
+
_p 332
|
97
|
+
og_ 325
|
98
|
+
om 325
|
99
|
+
_for 324
|
100
|
+
_og_ 313
|
101
|
+
l_ 308
|
102
|
+
nge 302
|
103
|
+
i_ 295
|
104
|
+
_v 294
|
105
|
+
c 289
|
106
|
+
ter 283
|
107
|
+
ll 280
|
108
|
+
ni 278
|
109
|
+
nde 278
|
110
|
+
rs 277
|
111
|
+
_af 277
|
112
|
+
un 275
|
113
|
+
ra 271
|
114
|
+
ko 271
|
115
|
+
den 270
|
116
|
+
_i_ 268
|
117
|
+
id 265
|
118
|
+
til 265
|
119
|
+
j 265
|
120
|
+
vi 264
|
121
|
+
D 260
|
122
|
+
ere 256
|
123
|
+
ma 255
|
124
|
+
si 253
|
125
|
+
f_ 252
|
126
|
+
af_ 238
|
127
|
+
_af_ 235
|
128
|
+
ik 235
|
129
|
+
m_ 234
|
130
|
+
�_ 232
|
131
|
+
_ti 227
|
132
|
+
_D 226
|
133
|
+
_u 226
|
134
|
+
_er 225
|
135
|
+
nt 224
|
136
|
+
_en 224
|
137
|
+
ls 221
|
138
|
+
es_ 216
|
139
|
+
lig 216
|
140
|
+
ger 216
|
141
|
+
re_ 210
|
142
|
+
ag 210
|
143
|
+
_me 207
|
144
|
+
at_ 204
|
145
|
+
lle 200
|
146
|
+
ge_ 200
|
147
|
+
_til 200
|
148
|
+
ige 199
|
149
|
+
_er_ 199
|
150
|
+
der_ 199
|
151
|
+
em 199
|
152
|
+
ds 197
|
153
|
+
r. 195
|
154
|
+
io 195
|
155
|
+
r._ 195
|
156
|
+
ud 193
|
157
|
+
_at 192
|
158
|
+
_at_ 191
|
159
|
+
ta 190
|
160
|
+
els 190
|
161
|
+
_l 190
|
162
|
+
ha 190
|
163
|
+
il_ 189
|
164
|
+
or_ 189
|
165
|
+
ke_ 186
|
166
|
+
rt 185
|
167
|
+
gen 184
|
168
|
+
ka 183
|
169
|
+
- 180
|
170
|
+
rk 180
|
171
|
+
ning 178
|
172
|
+
ol 178
|
173
|
+
nin 178
|
174
|
+
la 177
|
175
|
+
ld 175
|
176
|
+
De 175
|
177
|
+
it 173
|
178
|
+
ede 172
|
179
|
+
ed_ 171
|
180
|
+
_ko 171
|
181
|
+
lse 171
|
182
|
+
ek 168
|
183
|
+
else 167
|
184
|
+
inge 167
|
185
|
+
p� 167
|
186
|
+
ng_ 167
|
187
|
+
_p� 167
|
188
|
+
iv 166
|
189
|
+
�r 166
|
190
|
+
so 165
|
191
|
+
he 165
|
192
|
+
ens 165
|
193
|
+
ske 165
|
194
|
+
ind 164
|
195
|
+
til_ 163
|
196
|
+
rn 163
|
197
|
+
ide 162
|
198
|
+
ev 162
|
199
|
+
den_ 162
|
200
|
+
to 162
|
201
|
+
sen 160
|
202
|
+
_be 160
|
203
|
+
sa 160
|
204
|
+
bl 158
|
205
|
+
_g 158
|
206
|
+
an_ 157
|
207
|
+
det 156
|
208
|
+
om_ 156
|
209
|
+
ru 156
|
210
|
+
va 155
|
211
|
+
_til_ 155
|
212
|
+
ste 154
|
213
|
+
rd 153
|
214
|
+
_p�_ 152
|
215
|
+
k_ 152
|
216
|
+
p�_ 152
|
217
|
+
di 152
|
218
|
+
kr 152
|
219
|
+
K 151
|
220
|
+
_De 149
|
221
|
+
for_ 148
|
222
|
+
te_ 148
|
223
|
+
kon 148
|
224
|
+
ver 147
|
225
|
+
mm 146
|
226
|
+
am 146
|
227
|
+
_en_ 145
|
228
|
+
_r 145
|
229
|
+
ne_ 144
|
230
|
+
ing_ 144
|
231
|
+
tr 143
|
232
|
+
le_ 142
|
233
|
+
del 142
|
234
|
+
_in 142
|
235
|
+
gt 140
|
236
|
+
_st 138
|
237
|
+
S 138
|
238
|
+
eg 138
|
239
|
+
gs 138
|
240
|
+
tt 138
|
241
|
+
r, 137
|
242
|
+
ser 137
|
243
|
+
r,_ 137
|
244
|
+
er. 137
|
245
|
+
ro 137
|
246
|
+
er._ 137
|
247
|
+
_for_ 136
|
248
|
+
ent 136
|
249
|
+
kt 136
|
250
|
+
eri 135
|
251
|
+
ur 134
|
252
|
+
lin 134
|
253
|
+
B 133
|
254
|
+
A 133
|
255
|
+
sti 133
|
256
|
+
ner 133
|
257
|
+
da 133
|
258
|
+
ris 132
|
259
|
+
ion 132
|
260
|
+
_K 131
|
261
|
+
ern 131
|
262
|
+
ers 130
|
263
|
+
ist 130
|
264
|
+
�r 130
|
265
|
+
ige_ 130
|
266
|
+
_si 130
|
267
|
+
tte 129
|
268
|
+
E 128
|
269
|
+
_n 128
|
270
|
+
nn 127
|
271
|
+
_B 126
|
272
|
+
_ha 126
|
273
|
+
_. 126
|
274
|
+
rne 125
|
275
|
+
H 125
|
276
|
+
_ud 125
|
277
|
+
rin 124
|
278
|
+
na 124
|
279
|
+
und 124
|
280
|
+
ft 124
|
281
|
+
_der 124
|
282
|
+
ku 123
|
283
|
+
_A 122
|
284
|
+
ler 120
|
285
|
+
and 120
|
286
|
+
end 120
|
287
|
+
ns_ 120
|
288
|
+
rg 119
|
289
|
+
op 119
|
290
|
+
er,_ 119
|
291
|
+
er, 119
|
292
|
+
ar_ 118
|
293
|
+
P 118
|
294
|
+
_S 117
|
295
|
+
_H 117
|
296
|
+
_._ 116
|
297
|
+
ov 116
|
298
|
+
erne 115
|
299
|
+
tio 115
|
300
|
+
med 115
|
301
|
+
tion 115
|
302
|
+
_E 115
|
303
|
+
_P 115
|
304
|
+
det_ 114
|
305
|
+
pr 114
|
306
|
+
e. 113
|
307
|
+
ter_ 113
|
308
|
+
: 113
|
309
|
+
kk 113
|
310
|
+
e._ 113
|
311
|
+
e,_ 113
|
312
|
+
e, 113
|
313
|
+
od 113
|
314
|
+
kke 113
|
315
|
+
ten 113
|
316
|
+
ling 113
|
317
|
+
:_ 112
|
318
|
+
mi 112
|
319
|
+
eli 112
|
320
|
+
lo 111
|
321
|
+
som 111
|
322
|
+
_den 111
|
323
|
+
rb 110
|
324
|
+
se_ 110
|
325
|
+
ell 110
|
326
|
+
sid 110
|
327
|
+
nne 109
|
328
|
+
fi 108
|
329
|
+
lt 107
|
330
|
+
v_ 107
|
331
|
+
_de_ 107
|
332
|
+
ark 106
|
333
|
+
lige 106
|
334
|
+
ngen 106
|
335
|
+
ie 105
|
336
|
+
_med 105
|
337
|
+
_der_ 105
|
338
|
+
ring 105
|
339
|
+
a_ 105
|
340
|
+
_vi 104
|
341
|
+
-_ 104
|
342
|
+
ys 103
|
343
|
+
gel 103
|
344
|
+
_so 103
|
345
|
+
ia 103
|
346
|
+
ive 102
|
347
|
+
ej 101
|
348
|
+
ati 101
|
349
|
+
ren 101
|
350
|
+
_det 101
|
351
|
+
side 101
|
352
|
+
ske_ 101
|
353
|
+
br 100
|
354
|
+
gi 100
|
355
|
+
F 100
|
356
|
+
M 100
|
357
|
+
ul 99
|
358
|
+
isk 99
|
359
|
+
men 99
|
360
|
+
n,_ 99
|
361
|
+
age 99
|
362
|
+
fr 99
|
363
|
+
n, 99
|
364
|
+
tu 98
|
365
|
+
ts 98
|
366
|
+
_ma 98
|
367
|
+
nder 98
|
368
|
+
ot 97
|
369
|
+
dt 97
|
370
|
+
R 97
|
371
|
+
med_ 96
|
372
|
+
ho 96
|
373
|
+
ans 95
|
374
|
+
_kon 95
|
375
|
+
pe 95
|
376
|
+
ce 94
|
377
|
+
gr 93
|
378
|
+
mme 92
|
379
|
+
ret 92
|
380
|
+
lige_ 92
|
381
|
+
mu 91
|
382
|
+
_med_ 91
|
383
|
+
hv 91
|
384
|
+
v� 91
|
385
|
+
Det 91
|
386
|
+
ens_ 91
|
387
|
+
kl 91
|
388
|
+
_M 90
|
389
|
+
T 90
|
390
|
+
ingen 90
|
391
|
+
rm 90
|
392
|
+
ill 89
|
393
|
+
elle 89
|
394
|
+
ef 89
|
395
|
+
ene 89
|
396
|
+
nds 89
|
397
|
+
ove 89
|
398
|
+
som_ 89
|
399
|
+
C 88
|
400
|
+
_den_ 88
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 20104
|
2
|
+
e 9848
|
3
|
+
n 5323
|
4
|
+
a 3733
|
5
|
+
t 3683
|
6
|
+
i 3490
|
7
|
+
r 3195
|
8
|
+
d 2876
|
9
|
+
o 2845
|
10
|
+
n_ 2443
|
11
|
+
en 2439
|
12
|
+
s 2195
|
13
|
+
e_ 1842
|
14
|
+
l 1837
|
15
|
+
g 1522
|
16
|
+
en_ 1500
|
17
|
+
de 1489
|
18
|
+
er 1388
|
19
|
+
t_ 1377
|
20
|
+
v 1253
|
21
|
+
u 1217
|
22
|
+
k 1204
|
23
|
+
_d 1136
|
24
|
+
h 1102
|
25
|
+
m 1084
|
26
|
+
an 939
|
27
|
+
te 875
|
28
|
+
j 857
|
29
|
+
in 810
|
30
|
+
_v 793
|
31
|
+
r_ 751
|
32
|
+
de_ 742
|
33
|
+
ee 737
|
34
|
+
p 732
|
35
|
+
et 718
|
36
|
+
ge 716
|
37
|
+
aa 708
|
38
|
+
b 703
|
39
|
+
_e 686
|
40
|
+
st 669
|
41
|
+
z 668
|
42
|
+
ie 662
|
43
|
+
_de 655
|
44
|
+
w 631
|
45
|
+
c 611
|
46
|
+
. 604
|
47
|
+
s_ 582
|
48
|
+
_de_ 576
|
49
|
+
_h 572
|
50
|
+
el 570
|
51
|
+
ij 564
|
52
|
+
._ 554
|
53
|
+
et_ 531
|
54
|
+
an_ 522
|
55
|
+
he 505
|
56
|
+
_o 497
|
57
|
+
nd 478
|
58
|
+
_i 475
|
59
|
+
ar 459
|
60
|
+
_m 451
|
61
|
+
re 442
|
62
|
+
ve 441
|
63
|
+
' 428
|
64
|
+
or 424
|
65
|
+
ng 421
|
66
|
+
at 418
|
67
|
+
_s 415
|
68
|
+
oo 403
|
69
|
+
_z 401
|
70
|
+
le 395
|
71
|
+
_b 394
|
72
|
+
_a 391
|
73
|
+
_he 386
|
74
|
+
va 385
|
75
|
+
er_ 381
|
76
|
+
me 372
|
77
|
+
_w 368
|
78
|
+
f 361
|
79
|
+
on 351
|
80
|
+
_t 351
|
81
|
+
_va 345
|
82
|
+
_g 342
|
83
|
+
di 342
|
84
|
+
nt 340
|
85
|
+
, 335
|
86
|
+
g_ 335
|
87
|
+
,_ 334
|
88
|
+
van 327
|
89
|
+
ch 326
|
90
|
+
is 326
|
91
|
+
ing 325
|
92
|
+
be 325
|
93
|
+
ni 320
|
94
|
+
it 317
|
95
|
+
een 316
|
96
|
+
_van 315
|
97
|
+
al 310
|
98
|
+
den 309
|
99
|
+
ti 309
|
100
|
+
van_ 307
|
101
|
+
oe 302
|
102
|
+
ke 302
|
103
|
+
_van_ 299
|
104
|
+
aar 299
|
105
|
+
d_ 295
|
106
|
+
we 293
|
107
|
+
da 292
|
108
|
+
tu 290
|
109
|
+
_ee 290
|
110
|
+
ud 287
|
111
|
+
een_ 286
|
112
|
+
li 284
|
113
|
+
es 282
|
114
|
+
_st 281
|
115
|
+
ver 281
|
116
|
+
ten 281
|
117
|
+
ri 275
|
118
|
+
nde 275
|
119
|
+
der 274
|
120
|
+
_in 270
|
121
|
+
k_ 268
|
122
|
+
vo 267
|
123
|
+
het 266
|
124
|
+
oor 264
|
125
|
+
_het 262
|
126
|
+
het_ 262
|
127
|
+
_het_ 259
|
128
|
+
_een 258
|
129
|
+
l_ 258
|
130
|
+
ze 257
|
131
|
+
_n 254
|
132
|
+
ro 248
|
133
|
+
gen 243
|
134
|
+
_een_ 241
|
135
|
+
at_ 240
|
136
|
+
op 238
|
137
|
+
n. 238
|
138
|
+
_en 237
|
139
|
+
rs 237
|
140
|
+
_da 235
|
141
|
+
stu 232
|
142
|
+
in_ 230
|
143
|
+
_be 229
|
144
|
+
_ge 228
|
145
|
+
_k 226
|
146
|
+
rd 226
|
147
|
+
tud 220
|
148
|
+
_en_ 220
|
149
|
+
n._ 217
|
150
|
+
te_ 209
|
151
|
+
ei 208
|
152
|
+
ent 206
|
153
|
+
_me 203
|
154
|
+
la 202
|
155
|
+
ek 202
|
156
|
+
ed 201
|
157
|
+
ra 200
|
158
|
+
stud 200
|
159
|
+
en. 200
|
160
|
+
ie_ 197
|
161
|
+
ste 196
|
162
|
+
_vo 195
|
163
|
+
_in_ 193
|
164
|
+
_stu 191
|
165
|
+
zi 191
|
166
|
+
om 189
|
167
|
+
ui 189
|
168
|
+
en._ 186
|
169
|
+
ten_ 185
|
170
|
+
_stud 185
|
171
|
+
ude 184
|
172
|
+
die 183
|
173
|
+
ns 183
|
174
|
+
_j 181
|
175
|
+
D 179
|
176
|
+
aan 179
|
177
|
+
se 179
|
178
|
+
ma 178
|
179
|
+
_ve 176
|
180
|
+
ne 174
|
181
|
+
_p 174
|
182
|
+
eg 173
|
183
|
+
p_ 172
|
184
|
+
ar_ 172
|
185
|
+
aar_ 171
|
186
|
+
_te 170
|
187
|
+
ng_ 169
|
188
|
+
_we 169
|
189
|
+
'' 167
|
190
|
+
_D 165
|
191
|
+
ers 164
|
192
|
+
_op 163
|
193
|
+
dat 161
|
194
|
+
dat_ 160
|
195
|
+
ig 160
|
196
|
+
ere 159
|
197
|
+
eer 158
|
198
|
+
_zi 158
|
199
|
+
voor 156
|
200
|
+
voo 156
|
201
|
+
nge 155
|
202
|
+
nder 151
|
203
|
+
nte 151
|
204
|
+
or_ 150
|
205
|
+
ta 150
|
206
|
+
je 149
|
207
|
+
ing_ 148
|
208
|
+
ll 148
|
209
|
+
_ver 147
|
210
|
+
jk 146
|
211
|
+
oor_ 146
|
212
|
+
_dat 145
|
213
|
+
ijk 145
|
214
|
+
ren 145
|
215
|
+
is_ 145
|
216
|
+
_dat_ 144
|
217
|
+
_l 144
|
218
|
+
and 144
|
219
|
+
lij 143
|
220
|
+
ter 143
|
221
|
+
na 142
|
222
|
+
uden 139
|
223
|
+
tude 138
|
224
|
+
_voor 136
|
225
|
+
_voo 136
|
226
|
+
ond 136
|
227
|
+
ken 135
|
228
|
+
cht 135
|
229
|
+
_al 135
|
230
|
+
ht 135
|
231
|
+
wa 134
|
232
|
+
ho 133
|
233
|
+
em 133
|
234
|
+
den_ 133
|
235
|
+
pe 132
|
236
|
+
sc 132
|
237
|
+
un 131
|
238
|
+
ur 131
|
239
|
+
_di 130
|
240
|
+
gen_ 130
|
241
|
+
zo 129
|
242
|
+
rt 129
|
243
|
+
ev 128
|
244
|
+
mo 128
|
245
|
+
lijk 127
|
246
|
+
_is 126
|
247
|
+
stude 124
|
248
|
+
ha 123
|
249
|
+
to 122
|
250
|
+
el_ 121
|
251
|
+
og 121
|
252
|
+
op_ 121
|
253
|
+
sch 120
|
254
|
+
ol 120
|
255
|
+
ente 119
|
256
|
+
_u 118
|
257
|
+
pr 118
|
258
|
+
end 118
|
259
|
+
mi 117
|
260
|
+
iet 116
|
261
|
+
_aa 116
|
262
|
+
eli 115
|
263
|
+
dent 115
|
264
|
+
ijn 115
|
265
|
+
jn 115
|
266
|
+
ou 115
|
267
|
+
men 114
|
268
|
+
_' 114
|
269
|
+
tie 113
|
270
|
+
_is_ 113
|
271
|
+
nie 113
|
272
|
+
tr 112
|
273
|
+
ak 112
|
274
|
+
id 112
|
275
|
+
udent 111
|
276
|
+
tuden 111
|
277
|
+
uit 110
|
278
|
+
_te_ 109
|
279
|
+
aan_ 109
|
280
|
+
ld 109
|
281
|
+
S 108
|
282
|
+
_aan 108
|
283
|
+
ede 108
|
284
|
+
ja 107
|
285
|
+
nten 107
|
286
|
+
it_ 107
|
287
|
+
je_ 107
|
288
|
+
ts 107
|
289
|
+
erd 106
|
290
|
+
est 106
|
291
|
+
E 105
|
292
|
+
_op_ 105
|
293
|
+
ad 104
|
294
|
+
al_ 104
|
295
|
+
_ze 104
|
296
|
+
_on 104
|
297
|
+
rk 104
|
298
|
+
lle 103
|
299
|
+
ens 103
|
300
|
+
gel 103
|
301
|
+
m_ 103
|
302
|
+
len 103
|
303
|
+
_r 102
|
304
|
+
ec 102
|
305
|
+
inge 102
|
306
|
+
met 102
|
307
|
+
_met 101
|
308
|
+
si 100
|
309
|
+
die_ 100
|
310
|
+
us 100
|
311
|
+
onde 99
|
312
|
+
_ni 99
|
313
|
+
De 99
|
314
|
+
eu 99
|
315
|
+
dente 99
|
316
|
+
enten 99
|
317
|
+
ic 99
|
318
|
+
_met_ 98
|
319
|
+
f_ 98
|
320
|
+
met_ 98
|
321
|
+
no 97
|
322
|
+
ko 96
|
323
|
+
voor_ 96
|
324
|
+
rde 96
|
325
|
+
H 96
|
326
|
+
ngen 95
|
327
|
+
lo 95
|
328
|
+
ot 95
|
329
|
+
as 94
|
330
|
+
zij 93
|
331
|
+
_nie 92
|
332
|
+
vi 92
|
333
|
+
eb 92
|
334
|
+
_De 92
|
335
|
+
_zij 91
|
336
|
+
ep 91
|
337
|
+
wi 91
|
338
|
+
_zo 91
|
339
|
+
kt 91
|
340
|
+
ege 91
|
341
|
+
G 91
|
342
|
+
bi 90
|
343
|
+
j_ 90
|
344
|
+
ij_ 90
|
345
|
+
ze_ 90
|
346
|
+
do 90
|
347
|
+
lan 89
|
348
|
+
ov 89
|
349
|
+
udi 89
|
350
|
+
ord 89
|
351
|
+
onder 89
|
352
|
+
V 88
|
353
|
+
elij 88
|
354
|
+
_wa 88
|
355
|
+
elijk 88
|
356
|
+
ef 88
|
357
|
+
_die 87
|
358
|
+
ag 86
|
359
|
+
erk 86
|
360
|
+
eren 86
|
361
|
+
R 85
|
362
|
+
ik 85
|
363
|
+
_ma 85
|
364
|
+
gr 85
|
365
|
+
am 85
|
366
|
+
_mo 84
|
367
|
+
ul 84
|
368
|
+
nn 83
|
369
|
+
eve 83
|
370
|
+
De_ 83
|
371
|
+
maa 83
|
372
|
+
ingen 83
|
373
|
+
wo 83
|
374
|
+
_'' 83
|
375
|
+
O 83
|
376
|
+
tudi 82
|
377
|
+
I 82
|
378
|
+
nt_ 82
|
379
|
+
tudie 81
|
380
|
+
ven 81
|
381
|
+
udie 81
|
382
|
+
nten_ 81
|
383
|
+
_die_ 81
|
384
|
+
jaa 80
|
385
|
+
ka 80
|
386
|
+
eke 80
|
387
|
+
ite 80
|
388
|
+
a_ 80
|
389
|
+
_je 80
|
390
|
+
ac 80
|
391
|
+
jaar 80
|
392
|
+
_je_ 79
|
393
|
+
_H 79
|
394
|
+
_zijn 79
|
395
|
+
zijn 79
|
396
|
+
n, 78
|
397
|
+
nen 78
|
398
|
+
N 78
|
399
|
+
n,_ 78
|
400
|
+
ijn_ 77
|