language_detector 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 87128
|
2
|
+
a 50232
|
3
|
+
n 25424
|
4
|
+
e 18746
|
5
|
+
i 18605
|
6
|
+
an 14419
|
7
|
+
u 12470
|
8
|
+
k 11955
|
9
|
+
t 11875
|
10
|
+
r 11007
|
11
|
+
d 9856
|
12
|
+
g 9545
|
13
|
+
m 9390
|
14
|
+
s 8926
|
15
|
+
l 8631
|
16
|
+
n_ 8004
|
17
|
+
an_ 7095
|
18
|
+
p 6890
|
19
|
+
a_ 6739
|
20
|
+
b 6645
|
21
|
+
ng 6630
|
22
|
+
h 5964
|
23
|
+
da 5153
|
24
|
+
_d 5107
|
25
|
+
er 4625
|
26
|
+
ka 4448
|
27
|
+
la 4339
|
28
|
+
y 4323
|
29
|
+
i_ 4295
|
30
|
+
en 4159
|
31
|
+
ya 3800
|
32
|
+
ang 3778
|
33
|
+
_m 3750
|
34
|
+
o 3593
|
35
|
+
. 3539
|
36
|
+
._ 3310
|
37
|
+
at 3288
|
38
|
+
ah 3216
|
39
|
+
_b 3199
|
40
|
+
_s 3177
|
41
|
+
ta 3076
|
42
|
+
ra 3036
|
43
|
+
_k 2949
|
44
|
+
g_ 2939
|
45
|
+
ng_ 2933
|
46
|
+
ar 2915
|
47
|
+
_p 2906
|
48
|
+
me 2884
|
49
|
+
ga 2781
|
50
|
+
di 2778
|
51
|
+
ak 2727
|
52
|
+
al 2705
|
53
|
+
_me 2671
|
54
|
+
ang_ 2524
|
55
|
+
h_ 2509
|
56
|
+
ba 2508
|
57
|
+
pa 2454
|
58
|
+
kan 2449
|
59
|
+
in 2437
|
60
|
+
tu 2411
|
61
|
+
_t 2376
|
62
|
+
sa 2338
|
63
|
+
_da 2276
|
64
|
+
j 2276
|
65
|
+
pe 2225
|
66
|
+
, 2150
|
67
|
+
k_ 2147
|
68
|
+
ma 2143
|
69
|
+
se 2138
|
70
|
+
am 2131
|
71
|
+
kan_ 2117
|
72
|
+
_di 2082
|
73
|
+
,_ 2079
|
74
|
+
ke 2048
|
75
|
+
un 2004
|
76
|
+
be 1947
|
77
|
+
_a 1925
|
78
|
+
na 1871
|
79
|
+
ti 1868
|
80
|
+
ri 1861
|
81
|
+
u_ 1857
|
82
|
+
as 1854
|
83
|
+
ny 1827
|
84
|
+
ha 1789
|
85
|
+
te 1788
|
86
|
+
_pe 1768
|
87
|
+
em 1750
|
88
|
+
it 1737
|
89
|
+
_i 1732
|
90
|
+
_ke 1711
|
91
|
+
yan 1706
|
92
|
+
ad 1698
|
93
|
+
ia 1673
|
94
|
+
yang 1673
|
95
|
+
_y 1668
|
96
|
+
_ya 1655
|
97
|
+
yang_ 1653
|
98
|
+
_se 1648
|
99
|
+
ah_ 1646
|
100
|
+
_yan 1639
|
101
|
+
_yang 1639
|
102
|
+
ala 1612
|
103
|
+
nya 1587
|
104
|
+
el 1576
|
105
|
+
ik 1571
|
106
|
+
t_ 1568
|
107
|
+
ai 1549
|
108
|
+
men 1531
|
109
|
+
eng 1522
|
110
|
+
_men 1464
|
111
|
+
nga 1441
|
112
|
+
dan 1366
|
113
|
+
_be 1365
|
114
|
+
si 1343
|
115
|
+
uk 1328
|
116
|
+
ada 1299
|
117
|
+
nt 1291
|
118
|
+
__ 1287
|
119
|
+
ap 1276
|
120
|
+
ua 1265
|
121
|
+
___ 1238
|
122
|
+
- 1213
|
123
|
+
ja 1211
|
124
|
+
ber 1204
|
125
|
+
gan 1203
|
126
|
+
_ba 1193
|
127
|
+
____ 1189
|
128
|
+
ni 1181
|
129
|
+
_te 1169
|
130
|
+
c 1143
|
131
|
+
ran 1141
|
132
|
+
_____ 1140
|
133
|
+
m_ 1127
|
134
|
+
ara 1118
|
135
|
+
per 1099
|
136
|
+
le 1084
|
137
|
+
_dan 1083
|
138
|
+
dan_ 1079
|
139
|
+
ngan 1060
|
140
|
+
_dan_ 1050
|
141
|
+
ya_ 1046
|
142
|
+
at_ 1044
|
143
|
+
da_ 1021
|
144
|
+
li 1016
|
145
|
+
aka 1013
|
146
|
+
A 999
|
147
|
+
r_ 999
|
148
|
+
w 997
|
149
|
+
eb 995
|
150
|
+
lah 980
|
151
|
+
ata 980
|
152
|
+
ak_ 978
|
153
|
+
nd 974
|
154
|
+
_ber 955
|
155
|
+
gi 936
|
156
|
+
is 933
|
157
|
+
il 931
|
158
|
+
tu_ 923
|
159
|
+
s_ 920
|
160
|
+
gan_ 915
|
161
|
+
mb 913
|
162
|
+
wa 904
|
163
|
+
ag 903
|
164
|
+
ngan_ 898
|
165
|
+
ter 887
|
166
|
+
nya_ 877
|
167
|
+
S 873
|
168
|
+
ek 853
|
169
|
+
ru 852
|
170
|
+
_l 838
|
171
|
+
ela 828
|
172
|
+
itu 824
|
173
|
+
ol 822
|
174
|
+
aha 822
|
175
|
+
ada_ 820
|
176
|
+
pu 812
|
177
|
+
di_ 807
|
178
|
+
bu 807
|
179
|
+
am_ 804
|
180
|
+
ur 801
|
181
|
+
tan 790
|
182
|
+
mp 790
|
183
|
+
_per 786
|
184
|
+
_sa 784
|
185
|
+
M 782
|
186
|
+
ut 781
|
187
|
+
us 779
|
188
|
+
era 779
|
189
|
+
lam 778
|
190
|
+
lah_ 775
|
191
|
+
asa 767
|
192
|
+
ki 761
|
193
|
+
ir 759
|
194
|
+
de 756
|
195
|
+
enga 750
|
196
|
+
su 748
|
197
|
+
du 741
|
198
|
+
id 739
|
199
|
+
" 733
|
200
|
+
akan 732
|
201
|
+
apa 728
|
202
|
+
_S 724
|
203
|
+
ul 721
|
204
|
+
lu 717
|
205
|
+
ari 717
|
206
|
+
dal 704
|
207
|
+
et 698
|
208
|
+
es 698
|
209
|
+
pad 688
|
210
|
+
_ma 688
|
211
|
+
_M 685
|
212
|
+
ana 684
|
213
|
+
bi 679
|
214
|
+
pada 673
|
215
|
+
dala 673
|
216
|
+
l_ 671
|
217
|
+
ep 664
|
218
|
+
f 662
|
219
|
+
_di_ 658
|
220
|
+
B 655
|
221
|
+
ing 655
|
222
|
+
_j 654
|
223
|
+
ika 653
|
224
|
+
ku 650
|
225
|
+
_. 644
|
226
|
+
akan_ 642
|
227
|
+
ama 637
|
228
|
+
pen 636
|
229
|
+
alam 634
|
230
|
+
eh 634
|
231
|
+
pada_ 633
|
232
|
+
ai_ 632
|
233
|
+
_ter 632
|
234
|
+
K 631
|
235
|
+
mu 628
|
236
|
+
ju 628
|
237
|
+
P 626
|
238
|
+
mem 625
|
239
|
+
au 622
|
240
|
+
_mem 614
|
241
|
+
lan 612
|
242
|
+
_._ 611
|
243
|
+
ntu 608
|
244
|
+
lam_ 605
|
245
|
+
um 601
|
246
|
+
on 600
|
247
|
+
gk 597
|
248
|
+
_in 597
|
249
|
+
ngk 597
|
250
|
+
a. 584
|
251
|
+
meng 582
|
252
|
+
_meng 578
|
253
|
+
alam_ 577
|
254
|
+
_A 576
|
255
|
+
aa 575
|
256
|
+
uk_ 572
|
257
|
+
_pen 569
|
258
|
+
ban 569
|
259
|
+
or 569
|
260
|
+
st 566
|
261
|
+
ay 566
|
262
|
+
dar 565
|
263
|
+
_pa 564
|
264
|
+
a._ 564
|
265
|
+
_h 562
|
266
|
+
bah 562
|
267
|
+
_P 560
|
268
|
+
D 559
|
269
|
+
ri_ 558
|
270
|
+
ini 552
|
271
|
+
_de 551
|
272
|
+
rt 550
|
273
|
+
aan 545
|
274
|
+
_it 542
|
275
|
+
_itu 542
|
276
|
+
nda 540
|
277
|
+
eri 540
|
278
|
+
dalam 537
|
279
|
+
_B 533
|
280
|
+
_dal 532
|
281
|
+
ip 532
|
282
|
+
_dala 532
|
283
|
+
ta_ 528
|
284
|
+
_u 527
|
285
|
+
ung 525
|
286
|
+
ih 524
|
287
|
+
aw 520
|
288
|
+
_n 519
|
289
|
+
atu 517
|
290
|
+
ila 513
|
291
|
+
mi 513
|
292
|
+
leh 513
|
293
|
+
ian 512
|
294
|
+
tuk 509
|
295
|
+
awa 508
|
296
|
+
gu 506
|
297
|
+
ert 506
|
298
|
+
engan 505
|
299
|
+
ole 504
|
300
|
+
_K 501
|
301
|
+
seb 497
|
302
|
+
ca 496
|
303
|
+
gg 493
|
304
|
+
_ta 489
|
305
|
+
ra_ 488
|
306
|
+
ngg 488
|
307
|
+
itu_ 487
|
308
|
+
emb 482
|
309
|
+
ni_ 482
|
310
|
+
ida 482
|
311
|
+
nj 482
|
312
|
+
_ti 479
|
313
|
+
man 478
|
314
|
+
den 477
|
315
|
+
_D 474
|
316
|
+
_ka 473
|
317
|
+
aj 470
|
318
|
+
oleh 468
|
319
|
+
n. 468
|
320
|
+
n._ 464
|
321
|
+
ngka 464
|
322
|
+
gka 464
|
323
|
+
dak 464
|
324
|
+
anga 461
|
325
|
+
ena 459
|
326
|
+
san 458
|
327
|
+
pat 458
|
328
|
+
rk 458
|
329
|
+
( 455
|
330
|
+
ent 454
|
331
|
+
agi 453
|
332
|
+
) 451
|
333
|
+
ia_ 450
|
334
|
+
ge 450
|
335
|
+
ab 449
|
336
|
+
im 447
|
337
|
+
_ini 446
|
338
|
+
ntuk 445
|
339
|
+
I 445
|
340
|
+
ar_ 440
|
341
|
+
N 439
|
342
|
+
aan_ 436
|
343
|
+
_la 433
|
344
|
+
pi 432
|
345
|
+
baha 431
|
346
|
+
deng 430
|
347
|
+
han 430
|
348
|
+
bag 429
|
349
|
+
eh_ 429
|
350
|
+
hu 429
|
351
|
+
denga 428
|
352
|
+
_o 427
|
353
|
+
na_ 427
|
354
|
+
T 425
|
355
|
+
leh_ 422
|
356
|
+
_den 422
|
357
|
+
ka_ 419
|
358
|
+
any 415
|
359
|
+
ud 415
|
360
|
+
rang 414
|
361
|
+
anya 413
|
362
|
+
gi_ 412
|
363
|
+
angan 412
|
364
|
+
a, 411
|
365
|
+
_deng 410
|
366
|
+
ita 409
|
367
|
+
kat 408
|
368
|
+
re 408
|
369
|
+
_( 406
|
370
|
+
tuk_ 403
|
371
|
+
aga 401
|
372
|
+
ne 400
|
373
|
+
and 399
|
374
|
+
aya 398
|
375
|
+
_dar 397
|
376
|
+
a,_ 396
|
377
|
+
ro 396
|
378
|
+
ntuk_ 393
|
379
|
+
eba 392
|
380
|
+
aran 390
|
381
|
+
_" 387
|
382
|
+
ed 385
|
383
|
+
end 384
|
384
|
+
ko 383
|
385
|
+
sa_ 381
|
386
|
+
p_ 381
|
387
|
+
ara_ 380
|
388
|
+
_seb 379
|
389
|
+
alah 379
|
390
|
+
oleh_ 379
|
391
|
+
an. 378
|
392
|
+
dak_ 378
|
393
|
+
eg 378
|
394
|
+
hi 376
|
395
|
+
dari 375
|
396
|
+
an._ 375
|
397
|
+
au_ 373
|
398
|
+
bo 373
|
399
|
+
ti_ 371
|
400
|
+
ula 371
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 36004
|
2
|
+
e 9455
|
3
|
+
a 8302
|
4
|
+
y 6395
|
5
|
+
n 6395
|
6
|
+
h 5736
|
7
|
+
r 4939
|
8
|
+
s 4799
|
9
|
+
o 4429
|
10
|
+
i 4129
|
11
|
+
l 3462
|
12
|
+
y_ 2896
|
13
|
+
g 2731
|
14
|
+
n_ 2549
|
15
|
+
d 2232
|
16
|
+
t 2160
|
17
|
+
_a 1836
|
18
|
+
m 1823
|
19
|
+
sh 1774
|
20
|
+
e_ 1746
|
21
|
+
h_ 1645
|
22
|
+
yn 1550
|
23
|
+
c 1531
|
24
|
+
ee 1515
|
25
|
+
gh 1461
|
26
|
+
s_ 1460
|
27
|
+
_s 1388
|
28
|
+
ey 1345
|
29
|
+
_e 1316
|
30
|
+
ag 1312
|
31
|
+
a_ 1201
|
32
|
+
r_ 1173
|
33
|
+
agh 1165
|
34
|
+
in 1148
|
35
|
+
as 1136
|
36
|
+
_d 1136
|
37
|
+
u 1124
|
38
|
+
he 1060
|
39
|
+
yn_ 1025
|
40
|
+
oo 1012
|
41
|
+
ey_ 1005
|
42
|
+
_m 1002
|
43
|
+
er 965
|
44
|
+
v 949
|
45
|
+
_y 941
|
46
|
+
_v 895
|
47
|
+
_c 891
|
48
|
+
_as 880
|
49
|
+
' 869
|
50
|
+
, 866
|
51
|
+
_n 832
|
52
|
+
ll 828
|
53
|
+
,_ 827
|
54
|
+
_sh 818
|
55
|
+
. 802
|
56
|
+
hi 797
|
57
|
+
as_ 770
|
58
|
+
ee_ 768
|
59
|
+
ne 767
|
60
|
+
_as_ 764
|
61
|
+
b 762
|
62
|
+
re 735
|
63
|
+
ay 730
|
64
|
+
._ 730
|
65
|
+
an 726
|
66
|
+
ar 725
|
67
|
+
gh_ 722
|
68
|
+
ny 709
|
69
|
+
en 703
|
70
|
+
_r 697
|
71
|
+
ch 688
|
72
|
+
agh_ 687
|
73
|
+
dy 686
|
74
|
+
t_ 677
|
75
|
+
le 667
|
76
|
+
k 658
|
77
|
+
er_ 616
|
78
|
+
oi 612
|
79
|
+
ea 607
|
80
|
+
_t 601
|
81
|
+
yr 596
|
82
|
+
_er 585
|
83
|
+
ra 574
|
84
|
+
_dy 572
|
85
|
+
in_ 570
|
86
|
+
l_ 564
|
87
|
+
f 557
|
88
|
+
_l 556
|
89
|
+
ha 551
|
90
|
+
_g 548
|
91
|
+
_ny 534
|
92
|
+
nn 530
|
93
|
+
" 528
|
94
|
+
_ch 527
|
95
|
+
_y_ 524
|
96
|
+
ie 514
|
97
|
+
dy_ 514
|
98
|
+
_dy_ 513
|
99
|
+
aa 510
|
100
|
+
_f 509
|
101
|
+
j 504
|
102
|
+
sh_ 487
|
103
|
+
oa 480
|
104
|
+
is 478
|
105
|
+
_h 470
|
106
|
+
rr 468
|
107
|
+
ny_ 467
|
108
|
+
_ny_ 463
|
109
|
+
_er_ 454
|
110
|
+
ish 445
|
111
|
+
ho 442
|
112
|
+
ai 441
|
113
|
+
d_ 435
|
114
|
+
ro 423
|
115
|
+
ht 418
|
116
|
+
ei 417
|
117
|
+
shi 416
|
118
|
+
il 409
|
119
|
+
me 408
|
120
|
+
_ay 403
|
121
|
+
_b 403
|
122
|
+
la 400
|
123
|
+
_j 400
|
124
|
+
my 394
|
125
|
+
va 391
|
126
|
+
ns 386
|
127
|
+
on 385
|
128
|
+
_o 381
|
129
|
+
ys 380
|
130
|
+
_shi 379
|
131
|
+
ia 377
|
132
|
+
ayn 373
|
133
|
+
_va 371
|
134
|
+
hen 362
|
135
|
+
she 356
|
136
|
+
ri 345
|
137
|
+
lle 342
|
138
|
+
ooi 342
|
139
|
+
mee 340
|
140
|
+
ley 335
|
141
|
+
_me 331
|
142
|
+
el 330
|
143
|
+
rt 328
|
144
|
+
ie_ 327
|
145
|
+
eh 324
|
146
|
+
w 316
|
147
|
+
_ayn 313
|
148
|
+
al 311
|
149
|
+
g_ 309
|
150
|
+
ish_ 308
|
151
|
+
lley 307
|
152
|
+
mee_ 305
|
153
|
+
_mee 304
|
154
|
+
ill 301
|
155
|
+
es 299
|
156
|
+
na 299
|
157
|
+
je 298
|
158
|
+
yns 296
|
159
|
+
C 294
|
160
|
+
_my 291
|
161
|
+
_she 290
|
162
|
+
ley_ 282
|
163
|
+
V 280
|
164
|
+
_yn 278
|
165
|
+
_" 278
|
166
|
+
_mee_ 277
|
167
|
+
ta 272
|
168
|
+
_V 271
|
169
|
+
ys_ 268
|
170
|
+
- 268
|
171
|
+
lley_ 265
|
172
|
+
hin 264
|
173
|
+
_ro 259
|
174
|
+
shin 256
|
175
|
+
_yn_ 255
|
176
|
+
_je 255
|
177
|
+
do 253
|
178
|
+
va_ 253
|
179
|
+
ne_ 253
|
180
|
+
_va_ 252
|
181
|
+
ns_ 252
|
182
|
+
_shin 251
|
183
|
+
yns_ 250
|
184
|
+
ayns 248
|
185
|
+
en_ 247
|
186
|
+
che 246
|
187
|
+
_ayns 246
|
188
|
+
eh_ 246
|
189
|
+
_do 242
|
190
|
+
ad 241
|
191
|
+
ney 240
|
192
|
+
o_ 240
|
193
|
+
ym 240
|
194
|
+
ed 239
|
195
|
+
yr_ 239
|
196
|
+
ayns_ 237
|
197
|
+
ur 237
|
198
|
+
st 234
|
199
|
+
_C 234
|
200
|
+
rt_ 234
|
201
|
+
'n 232
|
202
|
+
m_ 232
|
203
|
+
p 231
|
204
|
+
li 231
|
205
|
+
or 230
|
206
|
+
ow 228
|
207
|
+
hin_ 225
|
208
|
+
da 225
|
209
|
+
shen 223
|
210
|
+
"_ 223
|
211
|
+
'n_ 223
|
212
|
+
Va 221
|
213
|
+
ght 220
|
214
|
+
shin_ 219
|
215
|
+
tr 217
|
216
|
+
_Va 217
|
217
|
+
ry 216
|
218
|
+
ve 216
|
219
|
+
_shen 215
|
220
|
+
ty 214
|
221
|
+
mo 206
|
222
|
+
_' 205
|
223
|
+
_ve 205
|
224
|
+
ma 203
|
225
|
+
be 203
|
226
|
+
te 203
|
227
|
+
hie 203
|
228
|
+
hey 203
|
229
|
+
nag 202
|
230
|
+
ll_ 201
|
231
|
+
yl 200
|
232
|
+
w_ 200
|
233
|
+
ss 200
|
234
|
+
aa_ 198
|
235
|
+
nagh 198
|
236
|
+
an_ 197
|
237
|
+
io 195
|
238
|
+
ow_ 194
|
239
|
+
it 194
|
240
|
+
sy 193
|
241
|
+
ayr 193
|
242
|
+
ney_ 192
|
243
|
+
E 192
|
244
|
+
sht 192
|
245
|
+
ni 191
|
246
|
+
_k 190
|
247
|
+
ha_ 190
|
248
|
+
ain 189
|
249
|
+
u_ 189
|
250
|
+
hy 189
|
251
|
+
aght 188
|
252
|
+
oo_ 188
|
253
|
+
ree 188
|
254
|
+
lh 187
|
255
|
+
_tr 186
|
256
|
+
esh 186
|
257
|
+
_che 183
|
258
|
+
yrt 182
|
259
|
+
_da 182
|
260
|
+
oar 182
|
261
|
+
doo 181
|
262
|
+
k_ 181
|
263
|
+
se 180
|
264
|
+
au 180
|
265
|
+
ille 179
|
266
|
+
ar_ 179
|
267
|
+
_lh 179
|
268
|
+
ki 177
|
269
|
+
arr 176
|
270
|
+
ec 176
|
271
|
+
ol 175
|
272
|
+
_doo 175
|
273
|
+
T 175
|
274
|
+
row 175
|
275
|
+
_row 174
|
276
|
+
ge 173
|
277
|
+
so 172
|
278
|
+
oy 171
|
279
|
+
oil 170
|
280
|
+
_re 170
|
281
|
+
_ag 170
|
282
|
+
'e 169
|
283
|
+
rey 169
|
284
|
+
illey 169
|
285
|
+
ck 168
|
286
|
+
ad_ 168
|
287
|
+
ann 168
|
288
|
+
n, 166
|
289
|
+
eea 166
|
290
|
+
_ta 166
|
291
|
+
ht_ 165
|
292
|
+
ae 162
|
293
|
+
_row_ 162
|
294
|
+
! 162
|
295
|
+
row_ 162
|
296
|
+
ane 161
|
297
|
+
fe 161
|
298
|
+
dd 160
|
299
|
+
go 159
|
300
|
+
tyn 159
|
301
|
+
oin 158
|
302
|
+
ooa 158
|
303
|
+
n. 158
|
304
|
+
eg 156
|
305
|
+
_ec 156
|
306
|
+
_ma 156
|
307
|
+
_agh 155
|
308
|
+
n,_ 154
|
309
|
+
_fe 154
|
310
|
+
Ch 154
|
311
|
+
nyn 153
|
312
|
+
fo 152
|
313
|
+
eay 152
|
314
|
+
nagh_ 152
|
315
|
+
n._ 152
|
316
|
+
_go 151
|
317
|
+
S 150
|
318
|
+
ke 150
|
319
|
+
hey_ 150
|
320
|
+
enn 150
|
321
|
+
cha 149
|
322
|
+
rre 149
|
323
|
+
_fo 149
|
324
|
+
ghe 149
|
325
|
+
raa 149
|
326
|
+
G 148
|
327
|
+
lan 148
|
328
|
+
mm 147
|
329
|
+
ym_ 147
|
330
|
+
A 146
|
331
|
+
c_ 146
|
332
|
+
oill 145
|
333
|
+
hee 144
|
334
|
+
ooar 144
|
335
|
+
_E 144
|
336
|
+
nne 143
|
337
|
+
tey 142
|
338
|
+
ir 141
|
339
|
+
de 141
|
340
|
+
hyn 140
|
341
|
+
_he 140
|
342
|
+
nyn_ 140
|
343
|
+
's 139
|
344
|
+
_gh 139
|
345
|
+
_cha 138
|
346
|
+
L 138
|
347
|
+
yrt_ 138
|
348
|
+
_Ch 137
|
349
|
+
e, 137
|
350
|
+
lla 136
|
351
|
+
Va_ 136
|
352
|
+
ooin 136
|
353
|
+
ell 135
|
354
|
+
a' 135
|
355
|
+
os 135
|
356
|
+
_oo 134
|
357
|
+
am 134
|
358
|
+
rish 133
|
359
|
+
th 133
|
360
|
+
_mo 133
|
361
|
+
ris 133
|
362
|
+
iag 133
|
363
|
+
gg 133
|
364
|
+
_Va_ 132
|
365
|
+
iagh 132
|
366
|
+
_G 132
|
367
|
+
e,_ 132
|
368
|
+
ass 132
|
369
|
+
!_ 132
|
370
|
+
my_ 132
|
371
|
+
hoo 131
|
372
|
+
_T 131
|
373
|
+
nee 130
|
374
|
+
a'n_ 129
|
375
|
+
rag 129
|
376
|
+
a'n 129
|
377
|
+
_'s 129
|
378
|
+
_so 128
|
379
|
+
co 128
|
380
|
+
rey_ 128
|
381
|
+
_eh 126
|
382
|
+
_my_ 126
|
383
|
+
nni 126
|
384
|
+
ou 126
|
385
|
+
_'sy 125
|
386
|
+
'sy 125
|
387
|
+
_p 125
|
388
|
+
vo 125
|
389
|
+
_S 125
|
390
|
+
H 125
|
391
|
+
_agh_ 125
|
392
|
+
tra 124
|
393
|
+
moo 124
|
394
|
+
hu 123
|
395
|
+
ooy 123
|
396
|
+
ragh 123
|
397
|
+
al_ 123
|
398
|
+
it_ 123
|
399
|
+
hia 122
|
400
|
+
id 122
|