language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 21274
|
2
|
+
e 9291
|
3
|
+
r 5307
|
4
|
+
n 4733
|
5
|
+
i 3976
|
6
|
+
t 3948
|
7
|
+
s 3751
|
8
|
+
a 3296
|
9
|
+
l 3063
|
10
|
+
d 3025
|
11
|
+
o 2868
|
12
|
+
g 2471
|
13
|
+
er 2164
|
14
|
+
k 2002
|
15
|
+
m 1680
|
16
|
+
e_ 1655
|
17
|
+
en 1613
|
18
|
+
f 1507
|
19
|
+
de 1484
|
20
|
+
r_ 1379
|
21
|
+
v 1245
|
22
|
+
u 1176
|
23
|
+
t_ 1081
|
24
|
+
n_ 1032
|
25
|
+
er_ 992
|
26
|
+
b 942
|
27
|
+
. 870
|
28
|
+
ge 868
|
29
|
+
._ 831
|
30
|
+
re 816
|
31
|
+
h 816
|
32
|
+
et 813
|
33
|
+
te 813
|
34
|
+
p 806
|
35
|
+
in 788
|
36
|
+
or 775
|
37
|
+
_s 753
|
38
|
+
_a 749
|
39
|
+
en_ 712
|
40
|
+
_e 691
|
41
|
+
ti 689
|
42
|
+
an 687
|
43
|
+
, 681
|
44
|
+
,_ 677
|
45
|
+
_f 655
|
46
|
+
_d 645
|
47
|
+
el 642
|
48
|
+
ng 635
|
49
|
+
nd 634
|
50
|
+
g_ 634
|
51
|
+
se 615
|
52
|
+
le 615
|
53
|
+
st 607
|
54
|
+
s_ 601
|
55
|
+
_o 572
|
56
|
+
ne 560
|
57
|
+
li 537
|
58
|
+
et_ 524
|
59
|
+
es 521
|
60
|
+
_i 512
|
61
|
+
ri 511
|
62
|
+
sk 510
|
63
|
+
_de 498
|
64
|
+
� 497
|
65
|
+
ar 475
|
66
|
+
ed 473
|
67
|
+
ig 463
|
68
|
+
at 452
|
69
|
+
_m 446
|
70
|
+
is 443
|
71
|
+
fo 441
|
72
|
+
� 441
|
73
|
+
ve 438
|
74
|
+
_k 434
|
75
|
+
� 432
|
76
|
+
der 429
|
77
|
+
ke 428
|
78
|
+
ing 427
|
79
|
+
og 426
|
80
|
+
_b 412
|
81
|
+
me 408
|
82
|
+
il 407
|
83
|
+
for 405
|
84
|
+
ns 394
|
85
|
+
y 389
|
86
|
+
_h 380
|
87
|
+
_t 374
|
88
|
+
on 371
|
89
|
+
d_ 370
|
90
|
+
al 362
|
91
|
+
be 359
|
92
|
+
_fo 351
|
93
|
+
af 336
|
94
|
+
de_ 335
|
95
|
+
_og 333
|
96
|
+
_p 332
|
97
|
+
og_ 325
|
98
|
+
om 325
|
99
|
+
_for 324
|
100
|
+
_og_ 313
|
101
|
+
l_ 308
|
102
|
+
nge 302
|
103
|
+
i_ 295
|
104
|
+
_v 294
|
105
|
+
c 289
|
106
|
+
ter 283
|
107
|
+
ll 280
|
108
|
+
ni 278
|
109
|
+
nde 278
|
110
|
+
rs 277
|
111
|
+
_af 277
|
112
|
+
un 275
|
113
|
+
ra 271
|
114
|
+
ko 271
|
115
|
+
den 270
|
116
|
+
_i_ 268
|
117
|
+
id 265
|
118
|
+
til 265
|
119
|
+
j 265
|
120
|
+
vi 264
|
121
|
+
D 260
|
122
|
+
ere 256
|
123
|
+
ma 255
|
124
|
+
si 253
|
125
|
+
f_ 252
|
126
|
+
af_ 238
|
127
|
+
_af_ 235
|
128
|
+
ik 235
|
129
|
+
m_ 234
|
130
|
+
�_ 232
|
131
|
+
_ti 227
|
132
|
+
_D 226
|
133
|
+
_u 226
|
134
|
+
_er 225
|
135
|
+
nt 224
|
136
|
+
_en 224
|
137
|
+
ls 221
|
138
|
+
es_ 216
|
139
|
+
lig 216
|
140
|
+
ger 216
|
141
|
+
re_ 210
|
142
|
+
ag 210
|
143
|
+
_me 207
|
144
|
+
at_ 204
|
145
|
+
lle 200
|
146
|
+
ge_ 200
|
147
|
+
_til 200
|
148
|
+
ige 199
|
149
|
+
_er_ 199
|
150
|
+
der_ 199
|
151
|
+
em 199
|
152
|
+
ds 197
|
153
|
+
r. 195
|
154
|
+
io 195
|
155
|
+
r._ 195
|
156
|
+
ud 193
|
157
|
+
_at 192
|
158
|
+
_at_ 191
|
159
|
+
ta 190
|
160
|
+
els 190
|
161
|
+
_l 190
|
162
|
+
ha 190
|
163
|
+
il_ 189
|
164
|
+
or_ 189
|
165
|
+
ke_ 186
|
166
|
+
rt 185
|
167
|
+
gen 184
|
168
|
+
ka 183
|
169
|
+
- 180
|
170
|
+
rk 180
|
171
|
+
ning 178
|
172
|
+
ol 178
|
173
|
+
nin 178
|
174
|
+
la 177
|
175
|
+
ld 175
|
176
|
+
De 175
|
177
|
+
it 173
|
178
|
+
ede 172
|
179
|
+
ed_ 171
|
180
|
+
_ko 171
|
181
|
+
lse 171
|
182
|
+
ek 168
|
183
|
+
else 167
|
184
|
+
inge 167
|
185
|
+
p� 167
|
186
|
+
ng_ 167
|
187
|
+
_p� 167
|
188
|
+
iv 166
|
189
|
+
�r 166
|
190
|
+
so 165
|
191
|
+
he 165
|
192
|
+
ens 165
|
193
|
+
ske 165
|
194
|
+
ind 164
|
195
|
+
til_ 163
|
196
|
+
rn 163
|
197
|
+
ide 162
|
198
|
+
ev 162
|
199
|
+
den_ 162
|
200
|
+
to 162
|
201
|
+
sen 160
|
202
|
+
_be 160
|
203
|
+
sa 160
|
204
|
+
bl 158
|
205
|
+
_g 158
|
206
|
+
an_ 157
|
207
|
+
det 156
|
208
|
+
om_ 156
|
209
|
+
ru 156
|
210
|
+
va 155
|
211
|
+
_til_ 155
|
212
|
+
ste 154
|
213
|
+
rd 153
|
214
|
+
_p�_ 152
|
215
|
+
k_ 152
|
216
|
+
p�_ 152
|
217
|
+
di 152
|
218
|
+
kr 152
|
219
|
+
K 151
|
220
|
+
_De 149
|
221
|
+
for_ 148
|
222
|
+
te_ 148
|
223
|
+
kon 148
|
224
|
+
ver 147
|
225
|
+
mm 146
|
226
|
+
am 146
|
227
|
+
_en_ 145
|
228
|
+
_r 145
|
229
|
+
ne_ 144
|
230
|
+
ing_ 144
|
231
|
+
tr 143
|
232
|
+
le_ 142
|
233
|
+
del 142
|
234
|
+
_in 142
|
235
|
+
gt 140
|
236
|
+
_st 138
|
237
|
+
S 138
|
238
|
+
eg 138
|
239
|
+
gs 138
|
240
|
+
tt 138
|
241
|
+
r, 137
|
242
|
+
ser 137
|
243
|
+
r,_ 137
|
244
|
+
er. 137
|
245
|
+
ro 137
|
246
|
+
er._ 137
|
247
|
+
_for_ 136
|
248
|
+
ent 136
|
249
|
+
kt 136
|
250
|
+
eri 135
|
251
|
+
ur 134
|
252
|
+
lin 134
|
253
|
+
B 133
|
254
|
+
A 133
|
255
|
+
sti 133
|
256
|
+
ner 133
|
257
|
+
da 133
|
258
|
+
ris 132
|
259
|
+
ion 132
|
260
|
+
_K 131
|
261
|
+
ern 131
|
262
|
+
ers 130
|
263
|
+
ist 130
|
264
|
+
�r 130
|
265
|
+
ige_ 130
|
266
|
+
_si 130
|
267
|
+
tte 129
|
268
|
+
E 128
|
269
|
+
_n 128
|
270
|
+
nn 127
|
271
|
+
_B 126
|
272
|
+
_ha 126
|
273
|
+
_. 126
|
274
|
+
rne 125
|
275
|
+
H 125
|
276
|
+
_ud 125
|
277
|
+
rin 124
|
278
|
+
na 124
|
279
|
+
und 124
|
280
|
+
ft 124
|
281
|
+
_der 124
|
282
|
+
ku 123
|
283
|
+
_A 122
|
284
|
+
ler 120
|
285
|
+
and 120
|
286
|
+
end 120
|
287
|
+
ns_ 120
|
288
|
+
rg 119
|
289
|
+
op 119
|
290
|
+
er,_ 119
|
291
|
+
er, 119
|
292
|
+
ar_ 118
|
293
|
+
P 118
|
294
|
+
_S 117
|
295
|
+
_H 117
|
296
|
+
_._ 116
|
297
|
+
ov 116
|
298
|
+
erne 115
|
299
|
+
tio 115
|
300
|
+
med 115
|
301
|
+
tion 115
|
302
|
+
_E 115
|
303
|
+
_P 115
|
304
|
+
det_ 114
|
305
|
+
pr 114
|
306
|
+
e. 113
|
307
|
+
ter_ 113
|
308
|
+
: 113
|
309
|
+
kk 113
|
310
|
+
e._ 113
|
311
|
+
e,_ 113
|
312
|
+
e, 113
|
313
|
+
od 113
|
314
|
+
kke 113
|
315
|
+
ten 113
|
316
|
+
ling 113
|
317
|
+
:_ 112
|
318
|
+
mi 112
|
319
|
+
eli 112
|
320
|
+
lo 111
|
321
|
+
som 111
|
322
|
+
_den 111
|
323
|
+
rb 110
|
324
|
+
se_ 110
|
325
|
+
ell 110
|
326
|
+
sid 110
|
327
|
+
nne 109
|
328
|
+
fi 108
|
329
|
+
lt 107
|
330
|
+
v_ 107
|
331
|
+
_de_ 107
|
332
|
+
ark 106
|
333
|
+
lige 106
|
334
|
+
ngen 106
|
335
|
+
ie 105
|
336
|
+
_med 105
|
337
|
+
_der_ 105
|
338
|
+
ring 105
|
339
|
+
a_ 105
|
340
|
+
_vi 104
|
341
|
+
-_ 104
|
342
|
+
ys 103
|
343
|
+
gel 103
|
344
|
+
_so 103
|
345
|
+
ia 103
|
346
|
+
ive 102
|
347
|
+
ej 101
|
348
|
+
ati 101
|
349
|
+
ren 101
|
350
|
+
_det 101
|
351
|
+
side 101
|
352
|
+
ske_ 101
|
353
|
+
br 100
|
354
|
+
gi 100
|
355
|
+
F 100
|
356
|
+
M 100
|
357
|
+
ul 99
|
358
|
+
isk 99
|
359
|
+
men 99
|
360
|
+
n,_ 99
|
361
|
+
age 99
|
362
|
+
fr 99
|
363
|
+
n, 99
|
364
|
+
tu 98
|
365
|
+
ts 98
|
366
|
+
_ma 98
|
367
|
+
nder 98
|
368
|
+
ot 97
|
369
|
+
dt 97
|
370
|
+
R 97
|
371
|
+
med_ 96
|
372
|
+
ho 96
|
373
|
+
ans 95
|
374
|
+
_kon 95
|
375
|
+
pe 95
|
376
|
+
ce 94
|
377
|
+
gr 93
|
378
|
+
mme 92
|
379
|
+
ret 92
|
380
|
+
lige_ 92
|
381
|
+
mu 91
|
382
|
+
_med_ 91
|
383
|
+
hv 91
|
384
|
+
v� 91
|
385
|
+
Det 91
|
386
|
+
ens_ 91
|
387
|
+
kl 91
|
388
|
+
_M 90
|
389
|
+
T 90
|
390
|
+
ingen 90
|
391
|
+
rm 90
|
392
|
+
ill 89
|
393
|
+
elle 89
|
394
|
+
ef 89
|
395
|
+
ene 89
|
396
|
+
nds 89
|
397
|
+
ove 89
|
398
|
+
som_ 89
|
399
|
+
C 88
|
400
|
+
_den_ 88
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 20104
|
2
|
+
e 9848
|
3
|
+
n 5323
|
4
|
+
a 3733
|
5
|
+
t 3683
|
6
|
+
i 3490
|
7
|
+
r 3195
|
8
|
+
d 2876
|
9
|
+
o 2845
|
10
|
+
n_ 2443
|
11
|
+
en 2439
|
12
|
+
s 2195
|
13
|
+
e_ 1842
|
14
|
+
l 1837
|
15
|
+
g 1522
|
16
|
+
en_ 1500
|
17
|
+
de 1489
|
18
|
+
er 1388
|
19
|
+
t_ 1377
|
20
|
+
v 1253
|
21
|
+
u 1217
|
22
|
+
k 1204
|
23
|
+
_d 1136
|
24
|
+
h 1102
|
25
|
+
m 1084
|
26
|
+
an 939
|
27
|
+
te 875
|
28
|
+
j 857
|
29
|
+
in 810
|
30
|
+
_v 793
|
31
|
+
r_ 751
|
32
|
+
de_ 742
|
33
|
+
ee 737
|
34
|
+
p 732
|
35
|
+
et 718
|
36
|
+
ge 716
|
37
|
+
aa 708
|
38
|
+
b 703
|
39
|
+
_e 686
|
40
|
+
st 669
|
41
|
+
z 668
|
42
|
+
ie 662
|
43
|
+
_de 655
|
44
|
+
w 631
|
45
|
+
c 611
|
46
|
+
. 604
|
47
|
+
s_ 582
|
48
|
+
_de_ 576
|
49
|
+
_h 572
|
50
|
+
el 570
|
51
|
+
ij 564
|
52
|
+
._ 554
|
53
|
+
et_ 531
|
54
|
+
an_ 522
|
55
|
+
he 505
|
56
|
+
_o 497
|
57
|
+
nd 478
|
58
|
+
_i 475
|
59
|
+
ar 459
|
60
|
+
_m 451
|
61
|
+
re 442
|
62
|
+
ve 441
|
63
|
+
' 428
|
64
|
+
or 424
|
65
|
+
ng 421
|
66
|
+
at 418
|
67
|
+
_s 415
|
68
|
+
oo 403
|
69
|
+
_z 401
|
70
|
+
le 395
|
71
|
+
_b 394
|
72
|
+
_a 391
|
73
|
+
_he 386
|
74
|
+
va 385
|
75
|
+
er_ 381
|
76
|
+
me 372
|
77
|
+
_w 368
|
78
|
+
f 361
|
79
|
+
on 351
|
80
|
+
_t 351
|
81
|
+
_va 345
|
82
|
+
_g 342
|
83
|
+
di 342
|
84
|
+
nt 340
|
85
|
+
, 335
|
86
|
+
g_ 335
|
87
|
+
,_ 334
|
88
|
+
van 327
|
89
|
+
ch 326
|
90
|
+
is 326
|
91
|
+
ing 325
|
92
|
+
be 325
|
93
|
+
ni 320
|
94
|
+
it 317
|
95
|
+
een 316
|
96
|
+
_van 315
|
97
|
+
al 310
|
98
|
+
den 309
|
99
|
+
ti 309
|
100
|
+
van_ 307
|
101
|
+
oe 302
|
102
|
+
ke 302
|
103
|
+
_van_ 299
|
104
|
+
aar 299
|
105
|
+
d_ 295
|
106
|
+
we 293
|
107
|
+
da 292
|
108
|
+
tu 290
|
109
|
+
_ee 290
|
110
|
+
ud 287
|
111
|
+
een_ 286
|
112
|
+
li 284
|
113
|
+
es 282
|
114
|
+
_st 281
|
115
|
+
ver 281
|
116
|
+
ten 281
|
117
|
+
ri 275
|
118
|
+
nde 275
|
119
|
+
der 274
|
120
|
+
_in 270
|
121
|
+
k_ 268
|
122
|
+
vo 267
|
123
|
+
het 266
|
124
|
+
oor 264
|
125
|
+
_het 262
|
126
|
+
het_ 262
|
127
|
+
_het_ 259
|
128
|
+
_een 258
|
129
|
+
l_ 258
|
130
|
+
ze 257
|
131
|
+
_n 254
|
132
|
+
ro 248
|
133
|
+
gen 243
|
134
|
+
_een_ 241
|
135
|
+
at_ 240
|
136
|
+
op 238
|
137
|
+
n. 238
|
138
|
+
_en 237
|
139
|
+
rs 237
|
140
|
+
_da 235
|
141
|
+
stu 232
|
142
|
+
in_ 230
|
143
|
+
_be 229
|
144
|
+
_ge 228
|
145
|
+
_k 226
|
146
|
+
rd 226
|
147
|
+
tud 220
|
148
|
+
_en_ 220
|
149
|
+
n._ 217
|
150
|
+
te_ 209
|
151
|
+
ei 208
|
152
|
+
ent 206
|
153
|
+
_me 203
|
154
|
+
la 202
|
155
|
+
ek 202
|
156
|
+
ed 201
|
157
|
+
ra 200
|
158
|
+
stud 200
|
159
|
+
en. 200
|
160
|
+
ie_ 197
|
161
|
+
ste 196
|
162
|
+
_vo 195
|
163
|
+
_in_ 193
|
164
|
+
_stu 191
|
165
|
+
zi 191
|
166
|
+
om 189
|
167
|
+
ui 189
|
168
|
+
en._ 186
|
169
|
+
ten_ 185
|
170
|
+
_stud 185
|
171
|
+
ude 184
|
172
|
+
die 183
|
173
|
+
ns 183
|
174
|
+
_j 181
|
175
|
+
D 179
|
176
|
+
aan 179
|
177
|
+
se 179
|
178
|
+
ma 178
|
179
|
+
_ve 176
|
180
|
+
ne 174
|
181
|
+
_p 174
|
182
|
+
eg 173
|
183
|
+
p_ 172
|
184
|
+
ar_ 172
|
185
|
+
aar_ 171
|
186
|
+
_te 170
|
187
|
+
ng_ 169
|
188
|
+
_we 169
|
189
|
+
'' 167
|
190
|
+
_D 165
|
191
|
+
ers 164
|
192
|
+
_op 163
|
193
|
+
dat 161
|
194
|
+
dat_ 160
|
195
|
+
ig 160
|
196
|
+
ere 159
|
197
|
+
eer 158
|
198
|
+
_zi 158
|
199
|
+
voor 156
|
200
|
+
voo 156
|
201
|
+
nge 155
|
202
|
+
nder 151
|
203
|
+
nte 151
|
204
|
+
or_ 150
|
205
|
+
ta 150
|
206
|
+
je 149
|
207
|
+
ing_ 148
|
208
|
+
ll 148
|
209
|
+
_ver 147
|
210
|
+
jk 146
|
211
|
+
oor_ 146
|
212
|
+
_dat 145
|
213
|
+
ijk 145
|
214
|
+
ren 145
|
215
|
+
is_ 145
|
216
|
+
_dat_ 144
|
217
|
+
_l 144
|
218
|
+
and 144
|
219
|
+
lij 143
|
220
|
+
ter 143
|
221
|
+
na 142
|
222
|
+
uden 139
|
223
|
+
tude 138
|
224
|
+
_voor 136
|
225
|
+
_voo 136
|
226
|
+
ond 136
|
227
|
+
ken 135
|
228
|
+
cht 135
|
229
|
+
_al 135
|
230
|
+
ht 135
|
231
|
+
wa 134
|
232
|
+
ho 133
|
233
|
+
em 133
|
234
|
+
den_ 133
|
235
|
+
pe 132
|
236
|
+
sc 132
|
237
|
+
un 131
|
238
|
+
ur 131
|
239
|
+
_di 130
|
240
|
+
gen_ 130
|
241
|
+
zo 129
|
242
|
+
rt 129
|
243
|
+
ev 128
|
244
|
+
mo 128
|
245
|
+
lijk 127
|
246
|
+
_is 126
|
247
|
+
stude 124
|
248
|
+
ha 123
|
249
|
+
to 122
|
250
|
+
el_ 121
|
251
|
+
og 121
|
252
|
+
op_ 121
|
253
|
+
sch 120
|
254
|
+
ol 120
|
255
|
+
ente 119
|
256
|
+
_u 118
|
257
|
+
pr 118
|
258
|
+
end 118
|
259
|
+
mi 117
|
260
|
+
iet 116
|
261
|
+
_aa 116
|
262
|
+
eli 115
|
263
|
+
dent 115
|
264
|
+
ijn 115
|
265
|
+
jn 115
|
266
|
+
ou 115
|
267
|
+
men 114
|
268
|
+
_' 114
|
269
|
+
tie 113
|
270
|
+
_is_ 113
|
271
|
+
nie 113
|
272
|
+
tr 112
|
273
|
+
ak 112
|
274
|
+
id 112
|
275
|
+
udent 111
|
276
|
+
tuden 111
|
277
|
+
uit 110
|
278
|
+
_te_ 109
|
279
|
+
aan_ 109
|
280
|
+
ld 109
|
281
|
+
S 108
|
282
|
+
_aan 108
|
283
|
+
ede 108
|
284
|
+
ja 107
|
285
|
+
nten 107
|
286
|
+
it_ 107
|
287
|
+
je_ 107
|
288
|
+
ts 107
|
289
|
+
erd 106
|
290
|
+
est 106
|
291
|
+
E 105
|
292
|
+
_op_ 105
|
293
|
+
ad 104
|
294
|
+
al_ 104
|
295
|
+
_ze 104
|
296
|
+
_on 104
|
297
|
+
rk 104
|
298
|
+
lle 103
|
299
|
+
ens 103
|
300
|
+
gel 103
|
301
|
+
m_ 103
|
302
|
+
len 103
|
303
|
+
_r 102
|
304
|
+
ec 102
|
305
|
+
inge 102
|
306
|
+
met 102
|
307
|
+
_met 101
|
308
|
+
si 100
|
309
|
+
die_ 100
|
310
|
+
us 100
|
311
|
+
onde 99
|
312
|
+
_ni 99
|
313
|
+
De 99
|
314
|
+
eu 99
|
315
|
+
dente 99
|
316
|
+
enten 99
|
317
|
+
ic 99
|
318
|
+
_met_ 98
|
319
|
+
f_ 98
|
320
|
+
met_ 98
|
321
|
+
no 97
|
322
|
+
ko 96
|
323
|
+
voor_ 96
|
324
|
+
rde 96
|
325
|
+
H 96
|
326
|
+
ngen 95
|
327
|
+
lo 95
|
328
|
+
ot 95
|
329
|
+
as 94
|
330
|
+
zij 93
|
331
|
+
_nie 92
|
332
|
+
vi 92
|
333
|
+
eb 92
|
334
|
+
_De 92
|
335
|
+
_zij 91
|
336
|
+
ep 91
|
337
|
+
wi 91
|
338
|
+
_zo 91
|
339
|
+
kt 91
|
340
|
+
ege 91
|
341
|
+
G 91
|
342
|
+
bi 90
|
343
|
+
j_ 90
|
344
|
+
ij_ 90
|
345
|
+
ze_ 90
|
346
|
+
do 90
|
347
|
+
lan 89
|
348
|
+
ov 89
|
349
|
+
udi 89
|
350
|
+
ord 89
|
351
|
+
onder 89
|
352
|
+
V 88
|
353
|
+
elij 88
|
354
|
+
_wa 88
|
355
|
+
elijk 88
|
356
|
+
ef 88
|
357
|
+
_die 87
|
358
|
+
ag 86
|
359
|
+
erk 86
|
360
|
+
eren 86
|
361
|
+
R 85
|
362
|
+
ik 85
|
363
|
+
_ma 85
|
364
|
+
gr 85
|
365
|
+
am 85
|
366
|
+
_mo 84
|
367
|
+
ul 84
|
368
|
+
nn 83
|
369
|
+
eve 83
|
370
|
+
De_ 83
|
371
|
+
maa 83
|
372
|
+
ingen 83
|
373
|
+
wo 83
|
374
|
+
_'' 83
|
375
|
+
O 83
|
376
|
+
tudi 82
|
377
|
+
I 82
|
378
|
+
nt_ 82
|
379
|
+
tudie 81
|
380
|
+
ven 81
|
381
|
+
udie 81
|
382
|
+
nten_ 81
|
383
|
+
_die_ 81
|
384
|
+
jaa 80
|
385
|
+
ka 80
|
386
|
+
eke 80
|
387
|
+
ite 80
|
388
|
+
a_ 80
|
389
|
+
_je 80
|
390
|
+
ac 80
|
391
|
+
jaar 80
|
392
|
+
_je_ 79
|
393
|
+
_H 79
|
394
|
+
_zijn 79
|
395
|
+
zijn 79
|
396
|
+
n, 78
|
397
|
+
nen 78
|
398
|
+
N 78
|
399
|
+
n,_ 78
|
400
|
+
ijn_ 77
|