xapian-fu 0.2 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +152 -13
- data/examples/query.rb +34 -6
- data/examples/spider.rb +44 -15
- data/lib/xapian_fu/query_parser.rb +179 -0
- data/lib/xapian_fu/result_set.rb +52 -0
- data/lib/xapian_fu/stopper_factory.rb +40 -0
- data/lib/xapian_fu/stopwords/README +7 -0
- data/lib/xapian_fu/stopwords/danish.txt +102 -0
- data/lib/xapian_fu/stopwords/dutch.txt +113 -0
- data/lib/xapian_fu/stopwords/english.txt +312 -0
- data/lib/xapian_fu/stopwords/finnish.txt +89 -0
- data/lib/xapian_fu/stopwords/french.txt +168 -0
- data/lib/xapian_fu/stopwords/german.txt +286 -0
- data/lib/xapian_fu/stopwords/hungarian.txt +203 -0
- data/lib/xapian_fu/stopwords/italian.txt +295 -0
- data/lib/xapian_fu/stopwords/norwegian.txt +186 -0
- data/lib/xapian_fu/stopwords/portuguese.txt +245 -0
- data/lib/xapian_fu/stopwords/russian.txt +236 -0
- data/lib/xapian_fu/stopwords/spanish.txt +348 -0
- data/lib/xapian_fu/stopwords/swedish.txt +125 -0
- data/lib/xapian_fu/stopwords/update.rb +7 -0
- data/lib/xapian_fu/xapian_db.rb +215 -99
- data/lib/xapian_fu/xapian_doc.rb +229 -47
- data/lib/xapian_fu/xapian_doc_value_accessor.rb +125 -0
- data/lib/xapian_fu/xapian_documents_accessor.rb +82 -0
- data/lib/xapian_fu.rb +1 -0
- data/spec/query_parser_spec.rb +43 -0
- data/spec/stopper_factory_spec.rb +57 -0
- data/spec/xapian_db_spec.rb +458 -215
- data/spec/xapian_doc_spec.rb +180 -0
- data/spec/xapian_doc_value_accessor_spec.rb +92 -0
- metadata +29 -5
@@ -0,0 +1,236 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
| a russian stop word list. comments begin with vertical bar. each stop
|
4
|
+
| word is at the start of a line.
|
5
|
+
|
6
|
+
| this is a ranked list (commonest to rarest) of stopwords derived from
|
7
|
+
| a large text sample.
|
8
|
+
|
9
|
+
| letter `ё' is translated to `е'.
|
10
|
+
|
11
|
+
и | and
|
12
|
+
в | in/into
|
13
|
+
во | alternative form
|
14
|
+
не | not
|
15
|
+
что | what/that
|
16
|
+
он | he
|
17
|
+
на | on/onto
|
18
|
+
я | i
|
19
|
+
с | from
|
20
|
+
со | alternative form
|
21
|
+
как | how
|
22
|
+
а | milder form of `no' (but)
|
23
|
+
то | conjunction and form of `that'
|
24
|
+
все | all
|
25
|
+
она | she
|
26
|
+
так | so, thus
|
27
|
+
его | him
|
28
|
+
но | but
|
29
|
+
да | yes/and
|
30
|
+
ты | thou
|
31
|
+
к | towards, by
|
32
|
+
у | around, chez
|
33
|
+
же | intensifier particle
|
34
|
+
вы | you
|
35
|
+
за | beyond, behind
|
36
|
+
бы | conditional/subj. particle
|
37
|
+
по | up to, along
|
38
|
+
только | only
|
39
|
+
ее | her
|
40
|
+
мне | to me
|
41
|
+
было | it was
|
42
|
+
вот | here is/are, particle
|
43
|
+
от | away from
|
44
|
+
меня | me
|
45
|
+
еще | still, yet, more
|
46
|
+
нет | no, there isnt/arent
|
47
|
+
о | about
|
48
|
+
из | out of
|
49
|
+
ему | to him
|
50
|
+
теперь | now
|
51
|
+
когда | when
|
52
|
+
даже | even
|
53
|
+
ну | so, well
|
54
|
+
вдруг | suddenly
|
55
|
+
ли | interrogative particle
|
56
|
+
если | if
|
57
|
+
уже | already, but homonym of `narrower'
|
58
|
+
или | or
|
59
|
+
ни | neither
|
60
|
+
быть | to be
|
61
|
+
был | he was
|
62
|
+
него | prepositional form of его
|
63
|
+
до | up to
|
64
|
+
вас | you accusative
|
65
|
+
нибудь | indef. suffix preceded by hyphen
|
66
|
+
опять | again
|
67
|
+
уж | already, but homonym of `adder'
|
68
|
+
вам | to you
|
69
|
+
сказал | he said
|
70
|
+
ведь | particle `after all'
|
71
|
+
там | there
|
72
|
+
потом | then
|
73
|
+
себя | oneself
|
74
|
+
ничего | nothing
|
75
|
+
ей | to her
|
76
|
+
может | usually with `быть' as `maybe'
|
77
|
+
они | they
|
78
|
+
тут | here
|
79
|
+
где | where
|
80
|
+
есть | there is/are
|
81
|
+
надо | got to, must
|
82
|
+
ней | prepositional form of ей
|
83
|
+
для | for
|
84
|
+
мы | we
|
85
|
+
тебя | thee
|
86
|
+
их | them, their
|
87
|
+
чем | than
|
88
|
+
была | she was
|
89
|
+
сам | self
|
90
|
+
чтоб | in order to
|
91
|
+
без | without
|
92
|
+
будто | as if
|
93
|
+
человек | man, person, one
|
94
|
+
чего | genitive form of `what'
|
95
|
+
раз | once
|
96
|
+
тоже | also
|
97
|
+
себе | to oneself
|
98
|
+
под | beneath
|
99
|
+
жизнь | life
|
100
|
+
будет | will be
|
101
|
+
ж | short form of intensifer particle `же'
|
102
|
+
тогда | then
|
103
|
+
кто | who
|
104
|
+
этот | this
|
105
|
+
говорил | was saying
|
106
|
+
того | genitive form of `that'
|
107
|
+
потому | for that reason
|
108
|
+
этого | genitive form of `this'
|
109
|
+
какой | which
|
110
|
+
совсем | altogether
|
111
|
+
ним | prepositional form of `его', `они'
|
112
|
+
здесь | here
|
113
|
+
этом | prepositional form of `этот'
|
114
|
+
один | one
|
115
|
+
почти | almost
|
116
|
+
мой | my
|
117
|
+
тем | instrumental/dative plural of `тот', `то'
|
118
|
+
чтобы | full form of `in order that'
|
119
|
+
нее | her (acc.)
|
120
|
+
кажется | it seems
|
121
|
+
сейчас | now
|
122
|
+
были | they were
|
123
|
+
куда | where to
|
124
|
+
зачем | why
|
125
|
+
сказать | to say
|
126
|
+
всех | all (acc., gen. preposn. plural)
|
127
|
+
никогда | never
|
128
|
+
сегодня | today
|
129
|
+
можно | possible, one can
|
130
|
+
при | by
|
131
|
+
наконец | finally
|
132
|
+
два | two
|
133
|
+
об | alternative form of `о', about
|
134
|
+
другой | another
|
135
|
+
хоть | even
|
136
|
+
после | after
|
137
|
+
над | above
|
138
|
+
больше | more
|
139
|
+
тот | that one (masc.)
|
140
|
+
через | across, in
|
141
|
+
эти | these
|
142
|
+
нас | us
|
143
|
+
про | about
|
144
|
+
всего | in all, only, of all
|
145
|
+
них | prepositional form of `они' (they)
|
146
|
+
какая | which, feminine
|
147
|
+
много | lots
|
148
|
+
разве | interrogative particle
|
149
|
+
сказала | she said
|
150
|
+
три | three
|
151
|
+
эту | this, acc. fem. sing.
|
152
|
+
моя | my, feminine
|
153
|
+
впрочем | moreover, besides
|
154
|
+
хорошо | good
|
155
|
+
свою | ones own, acc. fem. sing.
|
156
|
+
этой | oblique form of `эта', fem. `this'
|
157
|
+
перед | in front of
|
158
|
+
иногда | sometimes
|
159
|
+
лучше | better
|
160
|
+
чуть | a little
|
161
|
+
том | preposn. form of `that one'
|
162
|
+
нельзя | one must not
|
163
|
+
такой | such a one
|
164
|
+
им | to them
|
165
|
+
более | more
|
166
|
+
всегда | always
|
167
|
+
конечно | of course
|
168
|
+
всю | acc. fem. sing of `all'
|
169
|
+
между | between
|
170
|
+
|
171
|
+
|
172
|
+
| b: some paradigms
|
173
|
+
|
|
174
|
+
| personal pronouns
|
175
|
+
|
|
176
|
+
| я меня мне мной [мною]
|
177
|
+
| ты тебя тебе тобой [тобою]
|
178
|
+
| он его ему им [него, нему, ним]
|
179
|
+
| она ее эи ею [нее, нэи, нею]
|
180
|
+
| оно его ему им [него, нему, ним]
|
181
|
+
|
|
182
|
+
| мы нас нам нами
|
183
|
+
| вы вас вам вами
|
184
|
+
| они их им ими [них, ним, ними]
|
185
|
+
|
|
186
|
+
| себя себе собой [собою]
|
187
|
+
|
|
188
|
+
| demonstrative pronouns: этот (this), тот (that)
|
189
|
+
|
|
190
|
+
| этот эта это эти
|
191
|
+
| этого эты это эти
|
192
|
+
| этого этой этого этих
|
193
|
+
| этому этой этому этим
|
194
|
+
| этим этой этим [этою] этими
|
195
|
+
| этом этой этом этих
|
196
|
+
|
|
197
|
+
| тот та то те
|
198
|
+
| того ту то те
|
199
|
+
| того той того тех
|
200
|
+
| тому той тому тем
|
201
|
+
| тем той тем [тою] теми
|
202
|
+
| том той том тех
|
203
|
+
|
|
204
|
+
| determinative pronouns
|
205
|
+
|
|
206
|
+
| (a) весь (all)
|
207
|
+
|
|
208
|
+
| весь вся все все
|
209
|
+
| всего всю все все
|
210
|
+
| всего всей всего всех
|
211
|
+
| всему всей всему всем
|
212
|
+
| всем всей всем [всею] всеми
|
213
|
+
| всем всей всем всех
|
214
|
+
|
|
215
|
+
| (b) сам (himself etc)
|
216
|
+
|
|
217
|
+
| сам сама само сами
|
218
|
+
| самого саму само самих
|
219
|
+
| самого самой самого самих
|
220
|
+
| самому самой самому самим
|
221
|
+
| самим самой самим [самою] самими
|
222
|
+
| самом самой самом самих
|
223
|
+
|
|
224
|
+
| stems of verbs `to be', `to have', `to do' and modal
|
225
|
+
|
|
226
|
+
| быть бы буд быв есть суть
|
227
|
+
| име
|
228
|
+
| дел
|
229
|
+
| мог мож мочь
|
230
|
+
| уме
|
231
|
+
| хоч хот
|
232
|
+
| долж
|
233
|
+
| можн
|
234
|
+
| нужн
|
235
|
+
| нельзя
|
236
|
+
|
@@ -0,0 +1,348 @@
|
|
1
|
+
|
2
|
+
| A Spanish stop word list. Comments begin with vertical bar. Each stop
|
3
|
+
| word is at the start of a line.
|
4
|
+
|
5
|
+
|
6
|
+
| The following is a ranked list (commonest to rarest) of stopwords
|
7
|
+
| deriving from a large sample of text.
|
8
|
+
|
9
|
+
| Extra words have been added at the end.
|
10
|
+
|
11
|
+
de | from, of
|
12
|
+
la | the, her
|
13
|
+
que | who, that
|
14
|
+
el | the
|
15
|
+
en | in
|
16
|
+
y | and
|
17
|
+
a | to
|
18
|
+
los | the, them
|
19
|
+
del | de + el
|
20
|
+
se | himself, from him etc
|
21
|
+
las | the, them
|
22
|
+
por | for, by, etc
|
23
|
+
un | a
|
24
|
+
para | for
|
25
|
+
con | with
|
26
|
+
no | no
|
27
|
+
una | a
|
28
|
+
su | his, her
|
29
|
+
al | a + el
|
30
|
+
| es from SER
|
31
|
+
lo | him
|
32
|
+
como | how
|
33
|
+
más | more
|
34
|
+
pero | pero
|
35
|
+
sus | su plural
|
36
|
+
le | to him, her
|
37
|
+
ya | already
|
38
|
+
o | or
|
39
|
+
| fue from SER
|
40
|
+
este | this
|
41
|
+
| ha from HABER
|
42
|
+
sí | himself etc
|
43
|
+
porque | because
|
44
|
+
esta | this
|
45
|
+
| son from SER
|
46
|
+
entre | between
|
47
|
+
| está from ESTAR
|
48
|
+
cuando | when
|
49
|
+
muy | very
|
50
|
+
sin | without
|
51
|
+
sobre | on
|
52
|
+
| ser from SER
|
53
|
+
| tiene from TENER
|
54
|
+
también | also
|
55
|
+
me | me
|
56
|
+
hasta | until
|
57
|
+
hay | there is/are
|
58
|
+
donde | where
|
59
|
+
| han from HABER
|
60
|
+
quien | whom, that
|
61
|
+
| están from ESTAR
|
62
|
+
| estado from ESTAR
|
63
|
+
desde | from
|
64
|
+
todo | all
|
65
|
+
nos | us
|
66
|
+
durante | during
|
67
|
+
| estados from ESTAR
|
68
|
+
todos | all
|
69
|
+
uno | a
|
70
|
+
les | to them
|
71
|
+
ni | nor
|
72
|
+
contra | against
|
73
|
+
otros | other
|
74
|
+
| fueron from SER
|
75
|
+
ese | that
|
76
|
+
eso | that
|
77
|
+
| había from HABER
|
78
|
+
ante | before
|
79
|
+
ellos | they
|
80
|
+
e | and (variant of y)
|
81
|
+
esto | this
|
82
|
+
mí | me
|
83
|
+
antes | before
|
84
|
+
algunos | some
|
85
|
+
qué | what?
|
86
|
+
unos | a
|
87
|
+
yo | I
|
88
|
+
otro | other
|
89
|
+
otras | other
|
90
|
+
otra | other
|
91
|
+
él | he
|
92
|
+
tanto | so much, many
|
93
|
+
esa | that
|
94
|
+
estos | these
|
95
|
+
mucho | much, many
|
96
|
+
quienes | who
|
97
|
+
nada | nothing
|
98
|
+
muchos | many
|
99
|
+
cual | who
|
100
|
+
| sea from SER
|
101
|
+
poco | few
|
102
|
+
ella | she
|
103
|
+
estar | to be
|
104
|
+
| haber from HABER
|
105
|
+
estas | these
|
106
|
+
| estaba from ESTAR
|
107
|
+
| estamos from ESTAR
|
108
|
+
algunas | some
|
109
|
+
algo | something
|
110
|
+
nosotros | we
|
111
|
+
|
112
|
+
| other forms
|
113
|
+
|
114
|
+
mi | me
|
115
|
+
mis | mi plural
|
116
|
+
tú | thou
|
117
|
+
te | thee
|
118
|
+
ti | thee
|
119
|
+
tu | thy
|
120
|
+
tus | tu plural
|
121
|
+
ellas | they
|
122
|
+
nosotras | we
|
123
|
+
vosotros | you
|
124
|
+
vosotras | you
|
125
|
+
os | you
|
126
|
+
mío | mine
|
127
|
+
mía |
|
128
|
+
míos |
|
129
|
+
mías |
|
130
|
+
tuyo | thine
|
131
|
+
tuya |
|
132
|
+
tuyos |
|
133
|
+
tuyas |
|
134
|
+
suyo | his, hers, theirs
|
135
|
+
suya |
|
136
|
+
suyos |
|
137
|
+
suyas |
|
138
|
+
nuestro | ours
|
139
|
+
nuestra |
|
140
|
+
nuestros |
|
141
|
+
nuestras |
|
142
|
+
vuestro | yours
|
143
|
+
vuestra |
|
144
|
+
vuestros |
|
145
|
+
vuestras |
|
146
|
+
esos | those
|
147
|
+
esas | those
|
148
|
+
|
149
|
+
| forms of estar, to be (not including the infinitive):
|
150
|
+
estoy
|
151
|
+
estás
|
152
|
+
está
|
153
|
+
estamos
|
154
|
+
estáis
|
155
|
+
están
|
156
|
+
esté
|
157
|
+
estés
|
158
|
+
estemos
|
159
|
+
estéis
|
160
|
+
estén
|
161
|
+
estaré
|
162
|
+
estarás
|
163
|
+
estará
|
164
|
+
estaremos
|
165
|
+
estaréis
|
166
|
+
estarán
|
167
|
+
estaría
|
168
|
+
estarías
|
169
|
+
estaríamos
|
170
|
+
estaríais
|
171
|
+
estarían
|
172
|
+
estaba
|
173
|
+
estabas
|
174
|
+
estábamos
|
175
|
+
estabais
|
176
|
+
estaban
|
177
|
+
estuve
|
178
|
+
estuviste
|
179
|
+
estuvo
|
180
|
+
estuvimos
|
181
|
+
estuvisteis
|
182
|
+
estuvieron
|
183
|
+
estuviera
|
184
|
+
estuvieras
|
185
|
+
estuviéramos
|
186
|
+
estuvierais
|
187
|
+
estuvieran
|
188
|
+
estuviese
|
189
|
+
estuvieses
|
190
|
+
estuviésemos
|
191
|
+
estuvieseis
|
192
|
+
estuviesen
|
193
|
+
estando
|
194
|
+
estado
|
195
|
+
estada
|
196
|
+
estados
|
197
|
+
estadas
|
198
|
+
estad
|
199
|
+
|
200
|
+
| forms of haber, to have (not including the infinitive):
|
201
|
+
he
|
202
|
+
has
|
203
|
+
ha
|
204
|
+
hemos
|
205
|
+
habéis
|
206
|
+
han
|
207
|
+
haya
|
208
|
+
hayas
|
209
|
+
hayamos
|
210
|
+
hayáis
|
211
|
+
hayan
|
212
|
+
habré
|
213
|
+
habrás
|
214
|
+
habrá
|
215
|
+
habremos
|
216
|
+
habréis
|
217
|
+
habrán
|
218
|
+
habría
|
219
|
+
habrías
|
220
|
+
habríamos
|
221
|
+
habríais
|
222
|
+
habrían
|
223
|
+
había
|
224
|
+
habías
|
225
|
+
habíamos
|
226
|
+
habíais
|
227
|
+
habían
|
228
|
+
hube
|
229
|
+
hubiste
|
230
|
+
hubo
|
231
|
+
hubimos
|
232
|
+
hubisteis
|
233
|
+
hubieron
|
234
|
+
hubiera
|
235
|
+
hubieras
|
236
|
+
hubiéramos
|
237
|
+
hubierais
|
238
|
+
hubieran
|
239
|
+
hubiese
|
240
|
+
hubieses
|
241
|
+
hubiésemos
|
242
|
+
hubieseis
|
243
|
+
hubiesen
|
244
|
+
habiendo
|
245
|
+
habido
|
246
|
+
habida
|
247
|
+
habidos
|
248
|
+
habidas
|
249
|
+
|
250
|
+
| forms of ser, to be (not including the infinitive):
|
251
|
+
soy
|
252
|
+
eres
|
253
|
+
es
|
254
|
+
somos
|
255
|
+
sois
|
256
|
+
son
|
257
|
+
sea
|
258
|
+
seas
|
259
|
+
seamos
|
260
|
+
seáis
|
261
|
+
sean
|
262
|
+
seré
|
263
|
+
serás
|
264
|
+
será
|
265
|
+
seremos
|
266
|
+
seréis
|
267
|
+
serán
|
268
|
+
sería
|
269
|
+
serías
|
270
|
+
seríamos
|
271
|
+
seríais
|
272
|
+
serían
|
273
|
+
era
|
274
|
+
eras
|
275
|
+
éramos
|
276
|
+
erais
|
277
|
+
eran
|
278
|
+
fui
|
279
|
+
fuiste
|
280
|
+
fue
|
281
|
+
fuimos
|
282
|
+
fuisteis
|
283
|
+
fueron
|
284
|
+
fuera
|
285
|
+
fueras
|
286
|
+
fuéramos
|
287
|
+
fuerais
|
288
|
+
fueran
|
289
|
+
fuese
|
290
|
+
fueses
|
291
|
+
fuésemos
|
292
|
+
fueseis
|
293
|
+
fuesen
|
294
|
+
siendo
|
295
|
+
sido
|
296
|
+
| sed also means 'thirst'
|
297
|
+
|
298
|
+
| forms of tener, to have (not including the infinitive):
|
299
|
+
tengo
|
300
|
+
tienes
|
301
|
+
tiene
|
302
|
+
tenemos
|
303
|
+
tenéis
|
304
|
+
tienen
|
305
|
+
tenga
|
306
|
+
tengas
|
307
|
+
tengamos
|
308
|
+
tengáis
|
309
|
+
tengan
|
310
|
+
tendré
|
311
|
+
tendrás
|
312
|
+
tendrá
|
313
|
+
tendremos
|
314
|
+
tendréis
|
315
|
+
tendrán
|
316
|
+
tendría
|
317
|
+
tendrías
|
318
|
+
tendríamos
|
319
|
+
tendríais
|
320
|
+
tendrían
|
321
|
+
tenía
|
322
|
+
tenías
|
323
|
+
teníamos
|
324
|
+
teníais
|
325
|
+
tenían
|
326
|
+
tuve
|
327
|
+
tuviste
|
328
|
+
tuvo
|
329
|
+
tuvimos
|
330
|
+
tuvisteis
|
331
|
+
tuvieron
|
332
|
+
tuviera
|
333
|
+
tuvieras
|
334
|
+
tuviéramos
|
335
|
+
tuvierais
|
336
|
+
tuvieran
|
337
|
+
tuviese
|
338
|
+
tuvieses
|
339
|
+
tuviésemos
|
340
|
+
tuvieseis
|
341
|
+
tuviesen
|
342
|
+
teniendo
|
343
|
+
tenido
|
344
|
+
tenida
|
345
|
+
tenidos
|
346
|
+
tenidas
|
347
|
+
tened
|
348
|
+
|
@@ -0,0 +1,125 @@
|
|
1
|
+
|
2
|
+
| A Swedish stop word list. Comments begin with vertical bar. Each stop
|
3
|
+
| word is at the start of a line.
|
4
|
+
|
5
|
+
| This is a ranked list (commonest to rarest) of stopwords derived from
|
6
|
+
| a large text sample.
|
7
|
+
|
8
|
+
| Swedish stop words occasionally exhibit homonym clashes. For example
|
9
|
+
| så = so, but also seed. These are indicated clearly below.
|
10
|
+
|
11
|
+
och | and
|
12
|
+
det | it, this/that
|
13
|
+
att | to (with infinitive)
|
14
|
+
i | in, at
|
15
|
+
en | a
|
16
|
+
jag | I
|
17
|
+
hon | she
|
18
|
+
som | who, that
|
19
|
+
han | he
|
20
|
+
på | on
|
21
|
+
den | it, this/that
|
22
|
+
med | with
|
23
|
+
var | where, each
|
24
|
+
sig | him(self) etc
|
25
|
+
för | for
|
26
|
+
så | so (also: seed)
|
27
|
+
till | to
|
28
|
+
är | is
|
29
|
+
men | but
|
30
|
+
ett | a
|
31
|
+
om | if; around, about
|
32
|
+
hade | had
|
33
|
+
de | they, these/those
|
34
|
+
av | of
|
35
|
+
icke | not, no
|
36
|
+
mig | me
|
37
|
+
du | you
|
38
|
+
henne | her
|
39
|
+
då | then, when
|
40
|
+
sin | his
|
41
|
+
nu | now
|
42
|
+
har | have
|
43
|
+
inte | inte någon = no one
|
44
|
+
hans | his
|
45
|
+
honom | him
|
46
|
+
skulle | 'sake'
|
47
|
+
hennes | her
|
48
|
+
där | there
|
49
|
+
min | my
|
50
|
+
man | one (pronoun)
|
51
|
+
ej | nor
|
52
|
+
vid | at, by, on (also: vast)
|
53
|
+
kunde | could
|
54
|
+
något | some etc
|
55
|
+
från | from, off
|
56
|
+
ut | out
|
57
|
+
när | when
|
58
|
+
efter | after, behind
|
59
|
+
upp | up
|
60
|
+
vi | we
|
61
|
+
dem | them
|
62
|
+
vara | be
|
63
|
+
vad | what
|
64
|
+
över | over
|
65
|
+
än | than
|
66
|
+
dig | you
|
67
|
+
kan | can
|
68
|
+
sina | his
|
69
|
+
här | here
|
70
|
+
ha | have
|
71
|
+
mot | towards
|
72
|
+
alla | all
|
73
|
+
under | under (also: wonder)
|
74
|
+
någon | some etc
|
75
|
+
eller | or (else)
|
76
|
+
allt | all
|
77
|
+
mycket | much
|
78
|
+
sedan | since
|
79
|
+
ju | why
|
80
|
+
denna | this/that
|
81
|
+
själv | myself, yourself etc
|
82
|
+
detta | this/that
|
83
|
+
åt | to
|
84
|
+
utan | without
|
85
|
+
varit | was
|
86
|
+
hur | how
|
87
|
+
ingen | no
|
88
|
+
mitt | my
|
89
|
+
ni | you
|
90
|
+
bli | to be, become
|
91
|
+
blev | from bli
|
92
|
+
oss | us
|
93
|
+
din | thy
|
94
|
+
dessa | these/those
|
95
|
+
några | some etc
|
96
|
+
deras | their
|
97
|
+
blir | from bli
|
98
|
+
mina | my
|
99
|
+
samma | (the) same
|
100
|
+
vilken | who, that
|
101
|
+
er | you, your
|
102
|
+
sådan | such a
|
103
|
+
vår | our
|
104
|
+
blivit | from bli
|
105
|
+
dess | its
|
106
|
+
inom | within
|
107
|
+
mellan | between
|
108
|
+
sådant | such a
|
109
|
+
varför | why
|
110
|
+
varje | each
|
111
|
+
vilka | who, that
|
112
|
+
ditt | thy
|
113
|
+
vem | who
|
114
|
+
vilket | who, that
|
115
|
+
sitta | his
|
116
|
+
sådana | such a
|
117
|
+
vart | each
|
118
|
+
dina | thy
|
119
|
+
vars | whose
|
120
|
+
vårt | our
|
121
|
+
våra | our
|
122
|
+
ert | your
|
123
|
+
era | your
|
124
|
+
vilkas | whose
|
125
|
+
|
@@ -0,0 +1,7 @@
|
|
1
|
+
langs = %w(danish dutch english finnish french german hungarian italian norwegian portuguese spanish swedish)
|
2
|
+
|
3
|
+
langs.each { |l| system("curl http://snowball.tartarus.org/algorithms/%s/stop.txt | iconv -f ISO-8859-1 -t utf8 > %s.txt" % [l, l]) }
|
4
|
+
|
5
|
+
system("curl http://snowball.tartarus.org/algorithms/russian/stop.txt | iconv -f KOI8-R -t utf8 > russian.txt")
|
6
|
+
|
7
|
+
|