language_detector 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/language_detector.rb +232 -0
- data/lib/model-fm.yml +52504 -0
- data/lib/model-tc.yml +53985 -0
- data/lib/textcat_ngrams/afrikaans.lm +400 -0
- data/lib/textcat_ngrams/albanian.lm +400 -0
- data/lib/textcat_ngrams/amharic-utf.lm +400 -0
- data/lib/textcat_ngrams/arabic-iso8859_6.lm +400 -0
- data/lib/textcat_ngrams/arabic-windows1256.lm +400 -0
- data/lib/textcat_ngrams/armenian.lm +400 -0
- data/lib/textcat_ngrams/basque.lm +400 -0
- data/lib/textcat_ngrams/belarus-windows1251.lm +400 -0
- data/lib/textcat_ngrams/bosnian.lm +400 -0
- data/lib/textcat_ngrams/breton.lm +400 -0
- data/lib/textcat_ngrams/bulgarian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/catalan.lm +400 -0
- data/lib/textcat_ngrams/chinese-big5.lm +400 -0
- data/lib/textcat_ngrams/chinese-gb2312.lm +400 -0
- data/lib/textcat_ngrams/croatian-ascii.lm +400 -0
- data/lib/textcat_ngrams/czech-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/danish.lm +400 -0
- data/lib/textcat_ngrams/dutch.lm +400 -0
- data/lib/textcat_ngrams/english.lm +400 -0
- data/lib/textcat_ngrams/esperanto.lm +400 -0
- data/lib/textcat_ngrams/estonian.lm +400 -0
- data/lib/textcat_ngrams/finnish.lm +400 -0
- data/lib/textcat_ngrams/french.lm +400 -0
- data/lib/textcat_ngrams/frisian.lm +400 -0
- data/lib/textcat_ngrams/georgian.lm +400 -0
- data/lib/textcat_ngrams/german.lm +400 -0
- data/lib/textcat_ngrams/greek-iso8859-7.lm +400 -0
- data/lib/textcat_ngrams/hebrew-iso8859_8.lm +400 -0
- data/lib/textcat_ngrams/hindi.lm +400 -0
- data/lib/textcat_ngrams/hungarian.lm +400 -0
- data/lib/textcat_ngrams/icelandic.lm +400 -0
- data/lib/textcat_ngrams/indonesian.lm +400 -0
- data/lib/textcat_ngrams/irish.lm +400 -0
- data/lib/textcat_ngrams/italian.lm +400 -0
- data/lib/textcat_ngrams/japanese-euc_jp.lm +400 -0
- data/lib/textcat_ngrams/japanese-shift_jis.lm +400 -0
- data/lib/textcat_ngrams/korean.lm +400 -0
- data/lib/textcat_ngrams/latin.lm +400 -0
- data/lib/textcat_ngrams/latvian.lm +400 -0
- data/lib/textcat_ngrams/lithuanian.lm +400 -0
- data/lib/textcat_ngrams/malay.lm +400 -0
- data/lib/textcat_ngrams/manx.lm +400 -0
- data/lib/textcat_ngrams/marathi.lm +400 -0
- data/lib/textcat_ngrams/mingo.lm +400 -0
- data/lib/textcat_ngrams/nepali.lm +400 -0
- data/lib/textcat_ngrams/norwegian.lm +400 -0
- data/lib/textcat_ngrams/persian.lm +400 -0
- data/lib/textcat_ngrams/polish.lm +400 -0
- data/lib/textcat_ngrams/portuguese.lm +400 -0
- data/lib/textcat_ngrams/quechua.lm +400 -0
- data/lib/textcat_ngrams/romanian.lm +400 -0
- data/lib/textcat_ngrams/rumantsch.lm +400 -0
- data/lib/textcat_ngrams/russian-iso8859_5.lm +400 -0
- data/lib/textcat_ngrams/russian-koi8_r.lm +400 -0
- data/lib/textcat_ngrams/russian-windows1251.lm +400 -0
- data/lib/textcat_ngrams/sanskrit.lm +400 -0
- data/lib/textcat_ngrams/scots.lm +400 -0
- data/lib/textcat_ngrams/scots_gaelic.lm +400 -0
- data/lib/textcat_ngrams/serbian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovak-windows1250.lm +400 -0
- data/lib/textcat_ngrams/slovenian-ascii.lm +400 -0
- data/lib/textcat_ngrams/slovenian-iso8859_2.lm +400 -0
- data/lib/textcat_ngrams/spanish.lm +400 -0
- data/lib/textcat_ngrams/swahili.lm +400 -0
- data/lib/textcat_ngrams/swedish.lm +400 -0
- data/lib/textcat_ngrams/tagalog.lm +400 -0
- data/lib/textcat_ngrams/tamil.lm +400 -0
- data/lib/textcat_ngrams/thai.lm +400 -0
- data/lib/textcat_ngrams/turkish.lm +400 -0
- data/lib/textcat_ngrams/ukrainian-koi8_u.lm +400 -0
- data/lib/textcat_ngrams/vietnamese.lm +400 -0
- data/lib/textcat_ngrams/welsh.lm +400 -0
- data/lib/textcat_ngrams/yiddish-utf.lm +400 -0
- data/lib/training_data/ar-utf8.txt +54 -0
- data/lib/training_data/bg-utf8.txt +26 -0
- data/lib/training_data/cs-utf8.txt +48 -0
- data/lib/training_data/da-utf8.txt +159 -0
- data/lib/training_data/de-utf8.txt +569 -0
- data/lib/training_data/el-utf8.txt +48 -0
- data/lib/training_data/en-utf8.txt +81 -0
- data/lib/training_data/es-utf8.txt +185 -0
- data/lib/training_data/et-utf8.txt +50 -0
- data/lib/training_data/fa-utf8.txt +42 -0
- data/lib/training_data/fi-utf8.txt +83 -0
- data/lib/training_data/fr-utf8.txt +191 -0
- data/lib/training_data/fy-utf8.txt +22 -0
- data/lib/training_data/ga-utf8.txt +109 -0
- data/lib/training_data/he-utf8.txt +116 -0
- data/lib/training_data/hi-utf8.txt +49 -0
- data/lib/training_data/hr-utf8.txt +80 -0
- data/lib/training_data/hu-utf8.txt +87 -0
- data/lib/training_data/io-utf8.txt +41 -0
- data/lib/training_data/is-utf8.txt +94 -0
- data/lib/training_data/it-utf8.txt +228 -0
- data/lib/training_data/ja-utf8.txt +200 -0
- data/lib/training_data/ko-utf8.txt +147 -0
- data/lib/training_data/nl-utf8.txt +215 -0
- data/lib/training_data/no-utf8.txt +281 -0
- data/lib/training_data/pl-utf8.txt +120 -0
- data/lib/training_data/pt-utf8.txt +214 -0
- data/lib/training_data/ro-utf8.txt +66 -0
- data/lib/training_data/ru-utf8.txt +310 -0
- data/lib/training_data/sl-utf8.txt +263 -0
- data/lib/training_data/sv-utf8.txt +174 -0
- data/lib/training_data/th-utf8.txt +49 -0
- data/lib/training_data/tk-utf8.txt +101 -0
- data/lib/training_data/todo/af.txt +114 -0
- data/lib/training_data/todo/amharic-utf.txt +95 -0
- data/lib/training_data/todo/arabic-windows1256.txt +157 -0
- data/lib/training_data/todo/armenian.txt +86 -0
- data/lib/training_data/todo/basque.txt +136 -0
- data/lib/training_data/todo/belarus-windows1251.txt +97 -0
- data/lib/training_data/todo/bosnian.txt +97 -0
- data/lib/training_data/todo/breton.txt +159 -0
- data/lib/training_data/todo/bulgarian-iso8859_5.txt +115 -0
- data/lib/training_data/todo/catalan.txt +93 -0
- data/lib/training_data/todo/croatian-ascii.txt +104 -0
- data/lib/training_data/todo/esperanto.txt +95 -0
- data/lib/training_data/todo/estonian.txt +218 -0
- data/lib/training_data/todo/frisian.txt +99 -0
- data/lib/training_data/todo/georgian.txt +86 -0
- data/lib/training_data/todo/greek-iso8859-7.txt +139 -0
- data/lib/training_data/todo/hawaian.txt +108 -0
- data/lib/training_data/todo/hebrew-iso8859_8.txt +79 -0
- data/lib/training_data/todo/hindi.txt +77 -0
- data/lib/training_data/todo/hungarian.txt +102 -0
- data/lib/training_data/todo/icelandic.txt +131 -0
- data/lib/training_data/todo/indonesian.txt +93 -0
- data/lib/training_data/todo/irish.txt +209 -0
- data/lib/training_data/todo/latin.txt +120 -0
- data/lib/training_data/todo/latvian.txt +126 -0
- data/lib/training_data/todo/lithuanian.txt +99 -0
- data/lib/training_data/todo/malay.txt +108 -0
- data/lib/training_data/todo/manx.txt +78 -0
- data/lib/training_data/todo/marathi.txt +100 -0
- data/lib/training_data/todo/mf.txt +100 -0
- data/lib/training_data/todo/middle_frisian.txt +102 -0
- data/lib/training_data/todo/mingo.txt +146 -0
- data/lib/training_data/todo/nepali.txt +131 -0
- data/lib/training_data/todo/persian.txt +73 -0
- data/lib/training_data/todo/quechua.txt +108 -0
- data/lib/training_data/todo/romanian.txt +103 -0
- data/lib/training_data/todo/rumantsch.txt +110 -0
- data/lib/training_data/todo/sanskrit.txt +135 -0
- data/lib/training_data/todo/scots.txt +490 -0
- data/lib/training_data/todo/scots_gaelic.txt +93 -0
- data/lib/training_data/todo/serbian-ascii.txt +121 -0
- data/lib/training_data/todo/slovak-ascii.txt +102 -0
- data/lib/training_data/todo/slovak-windows1250.txt +115 -0
- data/lib/training_data/todo/slovenian-ascii.txt +100 -0
- data/lib/training_data/todo/slovenian-iso8859_2.txt +96 -0
- data/lib/training_data/todo/sq.txt +110 -0
- data/lib/training_data/todo/swahili.txt +120 -0
- data/lib/training_data/todo/tagalog.txt +135 -0
- data/lib/training_data/todo/tamil.txt +123 -0
- data/lib/training_data/todo/turkish.txt +117 -0
- data/lib/training_data/todo/ukrainian-koi8_r.txt +214 -0
- data/lib/training_data/todo/vietnamese.txt +92 -0
- data/lib/training_data/todo/welsh.txt +148 -0
- data/lib/training_data/todo/yiddish-utf.txt +83 -0
- data/lib/training_data/uk-utf8.txt +75 -0
- data/lib/training_data/vi-utf8.txt +47 -0
- data/lib/training_data/zh-utf8.txt +228 -0
- data/test/language_detector_test.rb +78 -0
- metadata +232 -0
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 87128
|
2
|
+
a 50232
|
3
|
+
n 25424
|
4
|
+
e 18746
|
5
|
+
i 18605
|
6
|
+
an 14419
|
7
|
+
u 12470
|
8
|
+
k 11955
|
9
|
+
t 11875
|
10
|
+
r 11007
|
11
|
+
d 9856
|
12
|
+
g 9545
|
13
|
+
m 9390
|
14
|
+
s 8926
|
15
|
+
l 8631
|
16
|
+
n_ 8004
|
17
|
+
an_ 7095
|
18
|
+
p 6890
|
19
|
+
a_ 6739
|
20
|
+
b 6645
|
21
|
+
ng 6630
|
22
|
+
h 5964
|
23
|
+
da 5153
|
24
|
+
_d 5107
|
25
|
+
er 4625
|
26
|
+
ka 4448
|
27
|
+
la 4339
|
28
|
+
y 4323
|
29
|
+
i_ 4295
|
30
|
+
en 4159
|
31
|
+
ya 3800
|
32
|
+
ang 3778
|
33
|
+
_m 3750
|
34
|
+
o 3593
|
35
|
+
. 3539
|
36
|
+
._ 3310
|
37
|
+
at 3288
|
38
|
+
ah 3216
|
39
|
+
_b 3199
|
40
|
+
_s 3177
|
41
|
+
ta 3076
|
42
|
+
ra 3036
|
43
|
+
_k 2949
|
44
|
+
g_ 2939
|
45
|
+
ng_ 2933
|
46
|
+
ar 2915
|
47
|
+
_p 2906
|
48
|
+
me 2884
|
49
|
+
ga 2781
|
50
|
+
di 2778
|
51
|
+
ak 2727
|
52
|
+
al 2705
|
53
|
+
_me 2671
|
54
|
+
ang_ 2524
|
55
|
+
h_ 2509
|
56
|
+
ba 2508
|
57
|
+
pa 2454
|
58
|
+
kan 2449
|
59
|
+
in 2437
|
60
|
+
tu 2411
|
61
|
+
_t 2376
|
62
|
+
sa 2338
|
63
|
+
_da 2276
|
64
|
+
j 2276
|
65
|
+
pe 2225
|
66
|
+
, 2150
|
67
|
+
k_ 2147
|
68
|
+
ma 2143
|
69
|
+
se 2138
|
70
|
+
am 2131
|
71
|
+
kan_ 2117
|
72
|
+
_di 2082
|
73
|
+
,_ 2079
|
74
|
+
ke 2048
|
75
|
+
un 2004
|
76
|
+
be 1947
|
77
|
+
_a 1925
|
78
|
+
na 1871
|
79
|
+
ti 1868
|
80
|
+
ri 1861
|
81
|
+
u_ 1857
|
82
|
+
as 1854
|
83
|
+
ny 1827
|
84
|
+
ha 1789
|
85
|
+
te 1788
|
86
|
+
_pe 1768
|
87
|
+
em 1750
|
88
|
+
it 1737
|
89
|
+
_i 1732
|
90
|
+
_ke 1711
|
91
|
+
yan 1706
|
92
|
+
ad 1698
|
93
|
+
ia 1673
|
94
|
+
yang 1673
|
95
|
+
_y 1668
|
96
|
+
_ya 1655
|
97
|
+
yang_ 1653
|
98
|
+
_se 1648
|
99
|
+
ah_ 1646
|
100
|
+
_yan 1639
|
101
|
+
_yang 1639
|
102
|
+
ala 1612
|
103
|
+
nya 1587
|
104
|
+
el 1576
|
105
|
+
ik 1571
|
106
|
+
t_ 1568
|
107
|
+
ai 1549
|
108
|
+
men 1531
|
109
|
+
eng 1522
|
110
|
+
_men 1464
|
111
|
+
nga 1441
|
112
|
+
dan 1366
|
113
|
+
_be 1365
|
114
|
+
si 1343
|
115
|
+
uk 1328
|
116
|
+
ada 1299
|
117
|
+
nt 1291
|
118
|
+
__ 1287
|
119
|
+
ap 1276
|
120
|
+
ua 1265
|
121
|
+
___ 1238
|
122
|
+
- 1213
|
123
|
+
ja 1211
|
124
|
+
ber 1204
|
125
|
+
gan 1203
|
126
|
+
_ba 1193
|
127
|
+
____ 1189
|
128
|
+
ni 1181
|
129
|
+
_te 1169
|
130
|
+
c 1143
|
131
|
+
ran 1141
|
132
|
+
_____ 1140
|
133
|
+
m_ 1127
|
134
|
+
ara 1118
|
135
|
+
per 1099
|
136
|
+
le 1084
|
137
|
+
_dan 1083
|
138
|
+
dan_ 1079
|
139
|
+
ngan 1060
|
140
|
+
_dan_ 1050
|
141
|
+
ya_ 1046
|
142
|
+
at_ 1044
|
143
|
+
da_ 1021
|
144
|
+
li 1016
|
145
|
+
aka 1013
|
146
|
+
A 999
|
147
|
+
r_ 999
|
148
|
+
w 997
|
149
|
+
eb 995
|
150
|
+
lah 980
|
151
|
+
ata 980
|
152
|
+
ak_ 978
|
153
|
+
nd 974
|
154
|
+
_ber 955
|
155
|
+
gi 936
|
156
|
+
is 933
|
157
|
+
il 931
|
158
|
+
tu_ 923
|
159
|
+
s_ 920
|
160
|
+
gan_ 915
|
161
|
+
mb 913
|
162
|
+
wa 904
|
163
|
+
ag 903
|
164
|
+
ngan_ 898
|
165
|
+
ter 887
|
166
|
+
nya_ 877
|
167
|
+
S 873
|
168
|
+
ek 853
|
169
|
+
ru 852
|
170
|
+
_l 838
|
171
|
+
ela 828
|
172
|
+
itu 824
|
173
|
+
ol 822
|
174
|
+
aha 822
|
175
|
+
ada_ 820
|
176
|
+
pu 812
|
177
|
+
di_ 807
|
178
|
+
bu 807
|
179
|
+
am_ 804
|
180
|
+
ur 801
|
181
|
+
tan 790
|
182
|
+
mp 790
|
183
|
+
_per 786
|
184
|
+
_sa 784
|
185
|
+
M 782
|
186
|
+
ut 781
|
187
|
+
us 779
|
188
|
+
era 779
|
189
|
+
lam 778
|
190
|
+
lah_ 775
|
191
|
+
asa 767
|
192
|
+
ki 761
|
193
|
+
ir 759
|
194
|
+
de 756
|
195
|
+
enga 750
|
196
|
+
su 748
|
197
|
+
du 741
|
198
|
+
id 739
|
199
|
+
" 733
|
200
|
+
akan 732
|
201
|
+
apa 728
|
202
|
+
_S 724
|
203
|
+
ul 721
|
204
|
+
lu 717
|
205
|
+
ari 717
|
206
|
+
dal 704
|
207
|
+
et 698
|
208
|
+
es 698
|
209
|
+
pad 688
|
210
|
+
_ma 688
|
211
|
+
_M 685
|
212
|
+
ana 684
|
213
|
+
bi 679
|
214
|
+
pada 673
|
215
|
+
dala 673
|
216
|
+
l_ 671
|
217
|
+
ep 664
|
218
|
+
f 662
|
219
|
+
_di_ 658
|
220
|
+
B 655
|
221
|
+
ing 655
|
222
|
+
_j 654
|
223
|
+
ika 653
|
224
|
+
ku 650
|
225
|
+
_. 644
|
226
|
+
akan_ 642
|
227
|
+
ama 637
|
228
|
+
pen 636
|
229
|
+
alam 634
|
230
|
+
eh 634
|
231
|
+
pada_ 633
|
232
|
+
ai_ 632
|
233
|
+
_ter 632
|
234
|
+
K 631
|
235
|
+
mu 628
|
236
|
+
ju 628
|
237
|
+
P 626
|
238
|
+
mem 625
|
239
|
+
au 622
|
240
|
+
_mem 614
|
241
|
+
lan 612
|
242
|
+
_._ 611
|
243
|
+
ntu 608
|
244
|
+
lam_ 605
|
245
|
+
um 601
|
246
|
+
on 600
|
247
|
+
gk 597
|
248
|
+
_in 597
|
249
|
+
ngk 597
|
250
|
+
a. 584
|
251
|
+
meng 582
|
252
|
+
_meng 578
|
253
|
+
alam_ 577
|
254
|
+
_A 576
|
255
|
+
aa 575
|
256
|
+
uk_ 572
|
257
|
+
_pen 569
|
258
|
+
ban 569
|
259
|
+
or 569
|
260
|
+
st 566
|
261
|
+
ay 566
|
262
|
+
dar 565
|
263
|
+
_pa 564
|
264
|
+
a._ 564
|
265
|
+
_h 562
|
266
|
+
bah 562
|
267
|
+
_P 560
|
268
|
+
D 559
|
269
|
+
ri_ 558
|
270
|
+
ini 552
|
271
|
+
_de 551
|
272
|
+
rt 550
|
273
|
+
aan 545
|
274
|
+
_it 542
|
275
|
+
_itu 542
|
276
|
+
nda 540
|
277
|
+
eri 540
|
278
|
+
dalam 537
|
279
|
+
_B 533
|
280
|
+
_dal 532
|
281
|
+
ip 532
|
282
|
+
_dala 532
|
283
|
+
ta_ 528
|
284
|
+
_u 527
|
285
|
+
ung 525
|
286
|
+
ih 524
|
287
|
+
aw 520
|
288
|
+
_n 519
|
289
|
+
atu 517
|
290
|
+
ila 513
|
291
|
+
mi 513
|
292
|
+
leh 513
|
293
|
+
ian 512
|
294
|
+
tuk 509
|
295
|
+
awa 508
|
296
|
+
gu 506
|
297
|
+
ert 506
|
298
|
+
engan 505
|
299
|
+
ole 504
|
300
|
+
_K 501
|
301
|
+
seb 497
|
302
|
+
ca 496
|
303
|
+
gg 493
|
304
|
+
_ta 489
|
305
|
+
ra_ 488
|
306
|
+
ngg 488
|
307
|
+
itu_ 487
|
308
|
+
emb 482
|
309
|
+
ni_ 482
|
310
|
+
ida 482
|
311
|
+
nj 482
|
312
|
+
_ti 479
|
313
|
+
man 478
|
314
|
+
den 477
|
315
|
+
_D 474
|
316
|
+
_ka 473
|
317
|
+
aj 470
|
318
|
+
oleh 468
|
319
|
+
n. 468
|
320
|
+
n._ 464
|
321
|
+
ngka 464
|
322
|
+
gka 464
|
323
|
+
dak 464
|
324
|
+
anga 461
|
325
|
+
ena 459
|
326
|
+
san 458
|
327
|
+
pat 458
|
328
|
+
rk 458
|
329
|
+
( 455
|
330
|
+
ent 454
|
331
|
+
agi 453
|
332
|
+
) 451
|
333
|
+
ia_ 450
|
334
|
+
ge 450
|
335
|
+
ab 449
|
336
|
+
im 447
|
337
|
+
_ini 446
|
338
|
+
ntuk 445
|
339
|
+
I 445
|
340
|
+
ar_ 440
|
341
|
+
N 439
|
342
|
+
aan_ 436
|
343
|
+
_la 433
|
344
|
+
pi 432
|
345
|
+
baha 431
|
346
|
+
deng 430
|
347
|
+
han 430
|
348
|
+
bag 429
|
349
|
+
eh_ 429
|
350
|
+
hu 429
|
351
|
+
denga 428
|
352
|
+
_o 427
|
353
|
+
na_ 427
|
354
|
+
T 425
|
355
|
+
leh_ 422
|
356
|
+
_den 422
|
357
|
+
ka_ 419
|
358
|
+
any 415
|
359
|
+
ud 415
|
360
|
+
rang 414
|
361
|
+
anya 413
|
362
|
+
gi_ 412
|
363
|
+
angan 412
|
364
|
+
a, 411
|
365
|
+
_deng 410
|
366
|
+
ita 409
|
367
|
+
kat 408
|
368
|
+
re 408
|
369
|
+
_( 406
|
370
|
+
tuk_ 403
|
371
|
+
aga 401
|
372
|
+
ne 400
|
373
|
+
and 399
|
374
|
+
aya 398
|
375
|
+
_dar 397
|
376
|
+
a,_ 396
|
377
|
+
ro 396
|
378
|
+
ntuk_ 393
|
379
|
+
eba 392
|
380
|
+
aran 390
|
381
|
+
_" 387
|
382
|
+
ed 385
|
383
|
+
end 384
|
384
|
+
ko 383
|
385
|
+
sa_ 381
|
386
|
+
p_ 381
|
387
|
+
ara_ 380
|
388
|
+
_seb 379
|
389
|
+
alah 379
|
390
|
+
oleh_ 379
|
391
|
+
an. 378
|
392
|
+
dak_ 378
|
393
|
+
eg 378
|
394
|
+
hi 376
|
395
|
+
dari 375
|
396
|
+
an._ 375
|
397
|
+
au_ 373
|
398
|
+
bo 373
|
399
|
+
ti_ 371
|
400
|
+
ula 371
|
@@ -0,0 +1,400 @@
|
|
1
|
+
_ 36004
|
2
|
+
e 9455
|
3
|
+
a 8302
|
4
|
+
y 6395
|
5
|
+
n 6395
|
6
|
+
h 5736
|
7
|
+
r 4939
|
8
|
+
s 4799
|
9
|
+
o 4429
|
10
|
+
i 4129
|
11
|
+
l 3462
|
12
|
+
y_ 2896
|
13
|
+
g 2731
|
14
|
+
n_ 2549
|
15
|
+
d 2232
|
16
|
+
t 2160
|
17
|
+
_a 1836
|
18
|
+
m 1823
|
19
|
+
sh 1774
|
20
|
+
e_ 1746
|
21
|
+
h_ 1645
|
22
|
+
yn 1550
|
23
|
+
c 1531
|
24
|
+
ee 1515
|
25
|
+
gh 1461
|
26
|
+
s_ 1460
|
27
|
+
_s 1388
|
28
|
+
ey 1345
|
29
|
+
_e 1316
|
30
|
+
ag 1312
|
31
|
+
a_ 1201
|
32
|
+
r_ 1173
|
33
|
+
agh 1165
|
34
|
+
in 1148
|
35
|
+
as 1136
|
36
|
+
_d 1136
|
37
|
+
u 1124
|
38
|
+
he 1060
|
39
|
+
yn_ 1025
|
40
|
+
oo 1012
|
41
|
+
ey_ 1005
|
42
|
+
_m 1002
|
43
|
+
er 965
|
44
|
+
v 949
|
45
|
+
_y 941
|
46
|
+
_v 895
|
47
|
+
_c 891
|
48
|
+
_as 880
|
49
|
+
' 869
|
50
|
+
, 866
|
51
|
+
_n 832
|
52
|
+
ll 828
|
53
|
+
,_ 827
|
54
|
+
_sh 818
|
55
|
+
. 802
|
56
|
+
hi 797
|
57
|
+
as_ 770
|
58
|
+
ee_ 768
|
59
|
+
ne 767
|
60
|
+
_as_ 764
|
61
|
+
b 762
|
62
|
+
re 735
|
63
|
+
ay 730
|
64
|
+
._ 730
|
65
|
+
an 726
|
66
|
+
ar 725
|
67
|
+
gh_ 722
|
68
|
+
ny 709
|
69
|
+
en 703
|
70
|
+
_r 697
|
71
|
+
ch 688
|
72
|
+
agh_ 687
|
73
|
+
dy 686
|
74
|
+
t_ 677
|
75
|
+
le 667
|
76
|
+
k 658
|
77
|
+
er_ 616
|
78
|
+
oi 612
|
79
|
+
ea 607
|
80
|
+
_t 601
|
81
|
+
yr 596
|
82
|
+
_er 585
|
83
|
+
ra 574
|
84
|
+
_dy 572
|
85
|
+
in_ 570
|
86
|
+
l_ 564
|
87
|
+
f 557
|
88
|
+
_l 556
|
89
|
+
ha 551
|
90
|
+
_g 548
|
91
|
+
_ny 534
|
92
|
+
nn 530
|
93
|
+
" 528
|
94
|
+
_ch 527
|
95
|
+
_y_ 524
|
96
|
+
ie 514
|
97
|
+
dy_ 514
|
98
|
+
_dy_ 513
|
99
|
+
aa 510
|
100
|
+
_f 509
|
101
|
+
j 504
|
102
|
+
sh_ 487
|
103
|
+
oa 480
|
104
|
+
is 478
|
105
|
+
_h 470
|
106
|
+
rr 468
|
107
|
+
ny_ 467
|
108
|
+
_ny_ 463
|
109
|
+
_er_ 454
|
110
|
+
ish 445
|
111
|
+
ho 442
|
112
|
+
ai 441
|
113
|
+
d_ 435
|
114
|
+
ro 423
|
115
|
+
ht 418
|
116
|
+
ei 417
|
117
|
+
shi 416
|
118
|
+
il 409
|
119
|
+
me 408
|
120
|
+
_ay 403
|
121
|
+
_b 403
|
122
|
+
la 400
|
123
|
+
_j 400
|
124
|
+
my 394
|
125
|
+
va 391
|
126
|
+
ns 386
|
127
|
+
on 385
|
128
|
+
_o 381
|
129
|
+
ys 380
|
130
|
+
_shi 379
|
131
|
+
ia 377
|
132
|
+
ayn 373
|
133
|
+
_va 371
|
134
|
+
hen 362
|
135
|
+
she 356
|
136
|
+
ri 345
|
137
|
+
lle 342
|
138
|
+
ooi 342
|
139
|
+
mee 340
|
140
|
+
ley 335
|
141
|
+
_me 331
|
142
|
+
el 330
|
143
|
+
rt 328
|
144
|
+
ie_ 327
|
145
|
+
eh 324
|
146
|
+
w 316
|
147
|
+
_ayn 313
|
148
|
+
al 311
|
149
|
+
g_ 309
|
150
|
+
ish_ 308
|
151
|
+
lley 307
|
152
|
+
mee_ 305
|
153
|
+
_mee 304
|
154
|
+
ill 301
|
155
|
+
es 299
|
156
|
+
na 299
|
157
|
+
je 298
|
158
|
+
yns 296
|
159
|
+
C 294
|
160
|
+
_my 291
|
161
|
+
_she 290
|
162
|
+
ley_ 282
|
163
|
+
V 280
|
164
|
+
_yn 278
|
165
|
+
_" 278
|
166
|
+
_mee_ 277
|
167
|
+
ta 272
|
168
|
+
_V 271
|
169
|
+
ys_ 268
|
170
|
+
- 268
|
171
|
+
lley_ 265
|
172
|
+
hin 264
|
173
|
+
_ro 259
|
174
|
+
shin 256
|
175
|
+
_yn_ 255
|
176
|
+
_je 255
|
177
|
+
do 253
|
178
|
+
va_ 253
|
179
|
+
ne_ 253
|
180
|
+
_va_ 252
|
181
|
+
ns_ 252
|
182
|
+
_shin 251
|
183
|
+
yns_ 250
|
184
|
+
ayns 248
|
185
|
+
en_ 247
|
186
|
+
che 246
|
187
|
+
_ayns 246
|
188
|
+
eh_ 246
|
189
|
+
_do 242
|
190
|
+
ad 241
|
191
|
+
ney 240
|
192
|
+
o_ 240
|
193
|
+
ym 240
|
194
|
+
ed 239
|
195
|
+
yr_ 239
|
196
|
+
ayns_ 237
|
197
|
+
ur 237
|
198
|
+
st 234
|
199
|
+
_C 234
|
200
|
+
rt_ 234
|
201
|
+
'n 232
|
202
|
+
m_ 232
|
203
|
+
p 231
|
204
|
+
li 231
|
205
|
+
or 230
|
206
|
+
ow 228
|
207
|
+
hin_ 225
|
208
|
+
da 225
|
209
|
+
shen 223
|
210
|
+
"_ 223
|
211
|
+
'n_ 223
|
212
|
+
Va 221
|
213
|
+
ght 220
|
214
|
+
shin_ 219
|
215
|
+
tr 217
|
216
|
+
_Va 217
|
217
|
+
ry 216
|
218
|
+
ve 216
|
219
|
+
_shen 215
|
220
|
+
ty 214
|
221
|
+
mo 206
|
222
|
+
_' 205
|
223
|
+
_ve 205
|
224
|
+
ma 203
|
225
|
+
be 203
|
226
|
+
te 203
|
227
|
+
hie 203
|
228
|
+
hey 203
|
229
|
+
nag 202
|
230
|
+
ll_ 201
|
231
|
+
yl 200
|
232
|
+
w_ 200
|
233
|
+
ss 200
|
234
|
+
aa_ 198
|
235
|
+
nagh 198
|
236
|
+
an_ 197
|
237
|
+
io 195
|
238
|
+
ow_ 194
|
239
|
+
it 194
|
240
|
+
sy 193
|
241
|
+
ayr 193
|
242
|
+
ney_ 192
|
243
|
+
E 192
|
244
|
+
sht 192
|
245
|
+
ni 191
|
246
|
+
_k 190
|
247
|
+
ha_ 190
|
248
|
+
ain 189
|
249
|
+
u_ 189
|
250
|
+
hy 189
|
251
|
+
aght 188
|
252
|
+
oo_ 188
|
253
|
+
ree 188
|
254
|
+
lh 187
|
255
|
+
_tr 186
|
256
|
+
esh 186
|
257
|
+
_che 183
|
258
|
+
yrt 182
|
259
|
+
_da 182
|
260
|
+
oar 182
|
261
|
+
doo 181
|
262
|
+
k_ 181
|
263
|
+
se 180
|
264
|
+
au 180
|
265
|
+
ille 179
|
266
|
+
ar_ 179
|
267
|
+
_lh 179
|
268
|
+
ki 177
|
269
|
+
arr 176
|
270
|
+
ec 176
|
271
|
+
ol 175
|
272
|
+
_doo 175
|
273
|
+
T 175
|
274
|
+
row 175
|
275
|
+
_row 174
|
276
|
+
ge 173
|
277
|
+
so 172
|
278
|
+
oy 171
|
279
|
+
oil 170
|
280
|
+
_re 170
|
281
|
+
_ag 170
|
282
|
+
'e 169
|
283
|
+
rey 169
|
284
|
+
illey 169
|
285
|
+
ck 168
|
286
|
+
ad_ 168
|
287
|
+
ann 168
|
288
|
+
n, 166
|
289
|
+
eea 166
|
290
|
+
_ta 166
|
291
|
+
ht_ 165
|
292
|
+
ae 162
|
293
|
+
_row_ 162
|
294
|
+
! 162
|
295
|
+
row_ 162
|
296
|
+
ane 161
|
297
|
+
fe 161
|
298
|
+
dd 160
|
299
|
+
go 159
|
300
|
+
tyn 159
|
301
|
+
oin 158
|
302
|
+
ooa 158
|
303
|
+
n. 158
|
304
|
+
eg 156
|
305
|
+
_ec 156
|
306
|
+
_ma 156
|
307
|
+
_agh 155
|
308
|
+
n,_ 154
|
309
|
+
_fe 154
|
310
|
+
Ch 154
|
311
|
+
nyn 153
|
312
|
+
fo 152
|
313
|
+
eay 152
|
314
|
+
nagh_ 152
|
315
|
+
n._ 152
|
316
|
+
_go 151
|
317
|
+
S 150
|
318
|
+
ke 150
|
319
|
+
hey_ 150
|
320
|
+
enn 150
|
321
|
+
cha 149
|
322
|
+
rre 149
|
323
|
+
_fo 149
|
324
|
+
ghe 149
|
325
|
+
raa 149
|
326
|
+
G 148
|
327
|
+
lan 148
|
328
|
+
mm 147
|
329
|
+
ym_ 147
|
330
|
+
A 146
|
331
|
+
c_ 146
|
332
|
+
oill 145
|
333
|
+
hee 144
|
334
|
+
ooar 144
|
335
|
+
_E 144
|
336
|
+
nne 143
|
337
|
+
tey 142
|
338
|
+
ir 141
|
339
|
+
de 141
|
340
|
+
hyn 140
|
341
|
+
_he 140
|
342
|
+
nyn_ 140
|
343
|
+
's 139
|
344
|
+
_gh 139
|
345
|
+
_cha 138
|
346
|
+
L 138
|
347
|
+
yrt_ 138
|
348
|
+
_Ch 137
|
349
|
+
e, 137
|
350
|
+
lla 136
|
351
|
+
Va_ 136
|
352
|
+
ooin 136
|
353
|
+
ell 135
|
354
|
+
a' 135
|
355
|
+
os 135
|
356
|
+
_oo 134
|
357
|
+
am 134
|
358
|
+
rish 133
|
359
|
+
th 133
|
360
|
+
_mo 133
|
361
|
+
ris 133
|
362
|
+
iag 133
|
363
|
+
gg 133
|
364
|
+
_Va_ 132
|
365
|
+
iagh 132
|
366
|
+
_G 132
|
367
|
+
e,_ 132
|
368
|
+
ass 132
|
369
|
+
!_ 132
|
370
|
+
my_ 132
|
371
|
+
hoo 131
|
372
|
+
_T 131
|
373
|
+
nee 130
|
374
|
+
a'n_ 129
|
375
|
+
rag 129
|
376
|
+
a'n 129
|
377
|
+
_'s 129
|
378
|
+
_so 128
|
379
|
+
co 128
|
380
|
+
rey_ 128
|
381
|
+
_eh 126
|
382
|
+
_my_ 126
|
383
|
+
nni 126
|
384
|
+
ou 126
|
385
|
+
_'sy 125
|
386
|
+
'sy 125
|
387
|
+
_p 125
|
388
|
+
vo 125
|
389
|
+
_S 125
|
390
|
+
H 125
|
391
|
+
_agh_ 125
|
392
|
+
tra 124
|
393
|
+
moo 124
|
394
|
+
hu 123
|
395
|
+
ooy 123
|
396
|
+
ragh 123
|
397
|
+
al_ 123
|
398
|
+
it_ 123
|
399
|
+
hia 122
|
400
|
+
id 122
|