xapian-fu 0.2 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +152 -13
- data/examples/query.rb +34 -6
- data/examples/spider.rb +44 -15
- data/lib/xapian_fu/query_parser.rb +179 -0
- data/lib/xapian_fu/result_set.rb +52 -0
- data/lib/xapian_fu/stopper_factory.rb +40 -0
- data/lib/xapian_fu/stopwords/README +7 -0
- data/lib/xapian_fu/stopwords/danish.txt +102 -0
- data/lib/xapian_fu/stopwords/dutch.txt +113 -0
- data/lib/xapian_fu/stopwords/english.txt +312 -0
- data/lib/xapian_fu/stopwords/finnish.txt +89 -0
- data/lib/xapian_fu/stopwords/french.txt +168 -0
- data/lib/xapian_fu/stopwords/german.txt +286 -0
- data/lib/xapian_fu/stopwords/hungarian.txt +203 -0
- data/lib/xapian_fu/stopwords/italian.txt +295 -0
- data/lib/xapian_fu/stopwords/norwegian.txt +186 -0
- data/lib/xapian_fu/stopwords/portuguese.txt +245 -0
- data/lib/xapian_fu/stopwords/russian.txt +236 -0
- data/lib/xapian_fu/stopwords/spanish.txt +348 -0
- data/lib/xapian_fu/stopwords/swedish.txt +125 -0
- data/lib/xapian_fu/stopwords/update.rb +7 -0
- data/lib/xapian_fu/xapian_db.rb +215 -99
- data/lib/xapian_fu/xapian_doc.rb +229 -47
- data/lib/xapian_fu/xapian_doc_value_accessor.rb +125 -0
- data/lib/xapian_fu/xapian_documents_accessor.rb +82 -0
- data/lib/xapian_fu.rb +1 -0
- data/spec/query_parser_spec.rb +43 -0
- data/spec/stopper_factory_spec.rb +57 -0
- data/spec/xapian_db_spec.rb +458 -215
- data/spec/xapian_doc_spec.rb +180 -0
- data/spec/xapian_doc_value_accessor_spec.rb +92 -0
- metadata +29 -5
@@ -0,0 +1,236 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
| a russian stop word list. comments begin with vertical bar. each stop
|
4
|
+
| word is at the start of a line.
|
5
|
+
|
6
|
+
| this is a ranked list (commonest to rarest) of stopwords derived from
|
7
|
+
| a large text sample.
|
8
|
+
|
9
|
+
| letter `ё' is translated to `е'.
|
10
|
+
|
11
|
+
и | and
|
12
|
+
в | in/into
|
13
|
+
во | alternative form
|
14
|
+
не | not
|
15
|
+
что | what/that
|
16
|
+
он | he
|
17
|
+
на | on/onto
|
18
|
+
я | i
|
19
|
+
с | from
|
20
|
+
со | alternative form
|
21
|
+
как | how
|
22
|
+
а | milder form of `no' (but)
|
23
|
+
то | conjunction and form of `that'
|
24
|
+
все | all
|
25
|
+
она | she
|
26
|
+
так | so, thus
|
27
|
+
его | him
|
28
|
+
но | but
|
29
|
+
да | yes/and
|
30
|
+
ты | thou
|
31
|
+
к | towards, by
|
32
|
+
у | around, chez
|
33
|
+
же | intensifier particle
|
34
|
+
вы | you
|
35
|
+
за | beyond, behind
|
36
|
+
бы | conditional/subj. particle
|
37
|
+
по | up to, along
|
38
|
+
только | only
|
39
|
+
ее | her
|
40
|
+
мне | to me
|
41
|
+
было | it was
|
42
|
+
вот | here is/are, particle
|
43
|
+
от | away from
|
44
|
+
меня | me
|
45
|
+
еще | still, yet, more
|
46
|
+
нет | no, there isnt/arent
|
47
|
+
о | about
|
48
|
+
из | out of
|
49
|
+
ему | to him
|
50
|
+
теперь | now
|
51
|
+
когда | when
|
52
|
+
даже | even
|
53
|
+
ну | so, well
|
54
|
+
вдруг | suddenly
|
55
|
+
ли | interrogative particle
|
56
|
+
если | if
|
57
|
+
уже | already, but homonym of `narrower'
|
58
|
+
или | or
|
59
|
+
ни | neither
|
60
|
+
быть | to be
|
61
|
+
был | he was
|
62
|
+
него | prepositional form of его
|
63
|
+
до | up to
|
64
|
+
вас | you accusative
|
65
|
+
нибудь | indef. suffix preceded by hyphen
|
66
|
+
опять | again
|
67
|
+
уж | already, but homonym of `adder'
|
68
|
+
вам | to you
|
69
|
+
сказал | he said
|
70
|
+
ведь | particle `after all'
|
71
|
+
там | there
|
72
|
+
потом | then
|
73
|
+
себя | oneself
|
74
|
+
ничего | nothing
|
75
|
+
ей | to her
|
76
|
+
может | usually with `быть' as `maybe'
|
77
|
+
они | they
|
78
|
+
тут | here
|
79
|
+
где | where
|
80
|
+
есть | there is/are
|
81
|
+
надо | got to, must
|
82
|
+
ней | prepositional form of ей
|
83
|
+
для | for
|
84
|
+
мы | we
|
85
|
+
тебя | thee
|
86
|
+
их | them, their
|
87
|
+
чем | than
|
88
|
+
была | she was
|
89
|
+
сам | self
|
90
|
+
чтоб | in order to
|
91
|
+
без | without
|
92
|
+
будто | as if
|
93
|
+
человек | man, person, one
|
94
|
+
чего | genitive form of `what'
|
95
|
+
раз | once
|
96
|
+
тоже | also
|
97
|
+
себе | to oneself
|
98
|
+
под | beneath
|
99
|
+
жизнь | life
|
100
|
+
будет | will be
|
101
|
+
ж | short form of intensifer particle `же'
|
102
|
+
тогда | then
|
103
|
+
кто | who
|
104
|
+
этот | this
|
105
|
+
говорил | was saying
|
106
|
+
того | genitive form of `that'
|
107
|
+
потому | for that reason
|
108
|
+
этого | genitive form of `this'
|
109
|
+
какой | which
|
110
|
+
совсем | altogether
|
111
|
+
ним | prepositional form of `его', `они'
|
112
|
+
здесь | here
|
113
|
+
этом | prepositional form of `этот'
|
114
|
+
один | one
|
115
|
+
почти | almost
|
116
|
+
мой | my
|
117
|
+
тем | instrumental/dative plural of `тот', `то'
|
118
|
+
чтобы | full form of `in order that'
|
119
|
+
нее | her (acc.)
|
120
|
+
кажется | it seems
|
121
|
+
сейчас | now
|
122
|
+
были | they were
|
123
|
+
куда | where to
|
124
|
+
зачем | why
|
125
|
+
сказать | to say
|
126
|
+
всех | all (acc., gen. preposn. plural)
|
127
|
+
никогда | never
|
128
|
+
сегодня | today
|
129
|
+
можно | possible, one can
|
130
|
+
при | by
|
131
|
+
наконец | finally
|
132
|
+
два | two
|
133
|
+
об | alternative form of `о', about
|
134
|
+
другой | another
|
135
|
+
хоть | even
|
136
|
+
после | after
|
137
|
+
над | above
|
138
|
+
больше | more
|
139
|
+
тот | that one (masc.)
|
140
|
+
через | across, in
|
141
|
+
эти | these
|
142
|
+
нас | us
|
143
|
+
про | about
|
144
|
+
всего | in all, only, of all
|
145
|
+
них | prepositional form of `они' (they)
|
146
|
+
какая | which, feminine
|
147
|
+
много | lots
|
148
|
+
разве | interrogative particle
|
149
|
+
сказала | she said
|
150
|
+
три | three
|
151
|
+
эту | this, acc. fem. sing.
|
152
|
+
моя | my, feminine
|
153
|
+
впрочем | moreover, besides
|
154
|
+
хорошо | good
|
155
|
+
свою | ones own, acc. fem. sing.
|
156
|
+
этой | oblique form of `эта', fem. `this'
|
157
|
+
перед | in front of
|
158
|
+
иногда | sometimes
|
159
|
+
лучше | better
|
160
|
+
чуть | a little
|
161
|
+
том | preposn. form of `that one'
|
162
|
+
нельзя | one must not
|
163
|
+
такой | such a one
|
164
|
+
им | to them
|
165
|
+
более | more
|
166
|
+
всегда | always
|
167
|
+
конечно | of course
|
168
|
+
всю | acc. fem. sing of `all'
|
169
|
+
между | between
|
170
|
+
|
171
|
+
|
172
|
+
| b: some paradigms
|
173
|
+
|
|
174
|
+
| personal pronouns
|
175
|
+
|
|
176
|
+
| я меня мне мной [мною]
|
177
|
+
| ты тебя тебе тобой [тобою]
|
178
|
+
| он его ему им [него, нему, ним]
|
179
|
+
| она ее эи ею [нее, нэи, нею]
|
180
|
+
| оно его ему им [него, нему, ним]
|
181
|
+
|
|
182
|
+
| мы нас нам нами
|
183
|
+
| вы вас вам вами
|
184
|
+
| они их им ими [них, ним, ними]
|
185
|
+
|
|
186
|
+
| себя себе собой [собою]
|
187
|
+
|
|
188
|
+
| demonstrative pronouns: этот (this), тот (that)
|
189
|
+
|
|
190
|
+
| этот эта это эти
|
191
|
+
| этого эты это эти
|
192
|
+
| этого этой этого этих
|
193
|
+
| этому этой этому этим
|
194
|
+
| этим этой этим [этою] этими
|
195
|
+
| этом этой этом этих
|
196
|
+
|
|
197
|
+
| тот та то те
|
198
|
+
| того ту то те
|
199
|
+
| того той того тех
|
200
|
+
| тому той тому тем
|
201
|
+
| тем той тем [тою] теми
|
202
|
+
| том той том тех
|
203
|
+
|
|
204
|
+
| determinative pronouns
|
205
|
+
|
|
206
|
+
| (a) весь (all)
|
207
|
+
|
|
208
|
+
| весь вся все все
|
209
|
+
| всего всю все все
|
210
|
+
| всего всей всего всех
|
211
|
+
| всему всей всему всем
|
212
|
+
| всем всей всем [всею] всеми
|
213
|
+
| всем всей всем всех
|
214
|
+
|
|
215
|
+
| (b) сам (himself etc)
|
216
|
+
|
|
217
|
+
| сам сама само сами
|
218
|
+
| самого саму само самих
|
219
|
+
| самого самой самого самих
|
220
|
+
| самому самой самому самим
|
221
|
+
| самим самой самим [самою] самими
|
222
|
+
| самом самой самом самих
|
223
|
+
|
|
224
|
+
| stems of verbs `to be', `to have', `to do' and modal
|
225
|
+
|
|
226
|
+
| быть бы буд быв есть суть
|
227
|
+
| име
|
228
|
+
| дел
|
229
|
+
| мог мож мочь
|
230
|
+
| уме
|
231
|
+
| хоч хот
|
232
|
+
| долж
|
233
|
+
| можн
|
234
|
+
| нужн
|
235
|
+
| нельзя
|
236
|
+
|
@@ -0,0 +1,348 @@
|
|
1
|
+
|
2
|
+
| A Spanish stop word list. Comments begin with vertical bar. Each stop
|
3
|
+
| word is at the start of a line.
|
4
|
+
|
5
|
+
|
6
|
+
| The following is a ranked list (commonest to rarest) of stopwords
|
7
|
+
| deriving from a large sample of text.
|
8
|
+
|
9
|
+
| Extra words have been added at the end.
|
10
|
+
|
11
|
+
de | from, of
|
12
|
+
la | the, her
|
13
|
+
que | who, that
|
14
|
+
el | the
|
15
|
+
en | in
|
16
|
+
y | and
|
17
|
+
a | to
|
18
|
+
los | the, them
|
19
|
+
del | de + el
|
20
|
+
se | himself, from him etc
|
21
|
+
las | the, them
|
22
|
+
por | for, by, etc
|
23
|
+
un | a
|
24
|
+
para | for
|
25
|
+
con | with
|
26
|
+
no | no
|
27
|
+
una | a
|
28
|
+
su | his, her
|
29
|
+
al | a + el
|
30
|
+
| es from SER
|
31
|
+
lo | him
|
32
|
+
como | how
|
33
|
+
más | more
|
34
|
+
pero | pero
|
35
|
+
sus | su plural
|
36
|
+
le | to him, her
|
37
|
+
ya | already
|
38
|
+
o | or
|
39
|
+
| fue from SER
|
40
|
+
este | this
|
41
|
+
| ha from HABER
|
42
|
+
sí | himself etc
|
43
|
+
porque | because
|
44
|
+
esta | this
|
45
|
+
| son from SER
|
46
|
+
entre | between
|
47
|
+
| está from ESTAR
|
48
|
+
cuando | when
|
49
|
+
muy | very
|
50
|
+
sin | without
|
51
|
+
sobre | on
|
52
|
+
| ser from SER
|
53
|
+
| tiene from TENER
|
54
|
+
también | also
|
55
|
+
me | me
|
56
|
+
hasta | until
|
57
|
+
hay | there is/are
|
58
|
+
donde | where
|
59
|
+
| han from HABER
|
60
|
+
quien | whom, that
|
61
|
+
| están from ESTAR
|
62
|
+
| estado from ESTAR
|
63
|
+
desde | from
|
64
|
+
todo | all
|
65
|
+
nos | us
|
66
|
+
durante | during
|
67
|
+
| estados from ESTAR
|
68
|
+
todos | all
|
69
|
+
uno | a
|
70
|
+
les | to them
|
71
|
+
ni | nor
|
72
|
+
contra | against
|
73
|
+
otros | other
|
74
|
+
| fueron from SER
|
75
|
+
ese | that
|
76
|
+
eso | that
|
77
|
+
| había from HABER
|
78
|
+
ante | before
|
79
|
+
ellos | they
|
80
|
+
e | and (variant of y)
|
81
|
+
esto | this
|
82
|
+
mí | me
|
83
|
+
antes | before
|
84
|
+
algunos | some
|
85
|
+
qué | what?
|
86
|
+
unos | a
|
87
|
+
yo | I
|
88
|
+
otro | other
|
89
|
+
otras | other
|
90
|
+
otra | other
|
91
|
+
él | he
|
92
|
+
tanto | so much, many
|
93
|
+
esa | that
|
94
|
+
estos | these
|
95
|
+
mucho | much, many
|
96
|
+
quienes | who
|
97
|
+
nada | nothing
|
98
|
+
muchos | many
|
99
|
+
cual | who
|
100
|
+
| sea from SER
|
101
|
+
poco | few
|
102
|
+
ella | she
|
103
|
+
estar | to be
|
104
|
+
| haber from HABER
|
105
|
+
estas | these
|
106
|
+
| estaba from ESTAR
|
107
|
+
| estamos from ESTAR
|
108
|
+
algunas | some
|
109
|
+
algo | something
|
110
|
+
nosotros | we
|
111
|
+
|
112
|
+
| other forms
|
113
|
+
|
114
|
+
mi | me
|
115
|
+
mis | mi plural
|
116
|
+
tú | thou
|
117
|
+
te | thee
|
118
|
+
ti | thee
|
119
|
+
tu | thy
|
120
|
+
tus | tu plural
|
121
|
+
ellas | they
|
122
|
+
nosotras | we
|
123
|
+
vosotros | you
|
124
|
+
vosotras | you
|
125
|
+
os | you
|
126
|
+
mío | mine
|
127
|
+
mía |
|
128
|
+
míos |
|
129
|
+
mías |
|
130
|
+
tuyo | thine
|
131
|
+
tuya |
|
132
|
+
tuyos |
|
133
|
+
tuyas |
|
134
|
+
suyo | his, hers, theirs
|
135
|
+
suya |
|
136
|
+
suyos |
|
137
|
+
suyas |
|
138
|
+
nuestro | ours
|
139
|
+
nuestra |
|
140
|
+
nuestros |
|
141
|
+
nuestras |
|
142
|
+
vuestro | yours
|
143
|
+
vuestra |
|
144
|
+
vuestros |
|
145
|
+
vuestras |
|
146
|
+
esos | those
|
147
|
+
esas | those
|
148
|
+
|
149
|
+
| forms of estar, to be (not including the infinitive):
|
150
|
+
estoy
|
151
|
+
estás
|
152
|
+
está
|
153
|
+
estamos
|
154
|
+
estáis
|
155
|
+
están
|
156
|
+
esté
|
157
|
+
estés
|
158
|
+
estemos
|
159
|
+
estéis
|
160
|
+
estén
|
161
|
+
estaré
|
162
|
+
estarás
|
163
|
+
estará
|
164
|
+
estaremos
|
165
|
+
estaréis
|
166
|
+
estarán
|
167
|
+
estaría
|
168
|
+
estarías
|
169
|
+
estaríamos
|
170
|
+
estaríais
|
171
|
+
estarían
|
172
|
+
estaba
|
173
|
+
estabas
|
174
|
+
estábamos
|
175
|
+
estabais
|
176
|
+
estaban
|
177
|
+
estuve
|
178
|
+
estuviste
|
179
|
+
estuvo
|
180
|
+
estuvimos
|
181
|
+
estuvisteis
|
182
|
+
estuvieron
|
183
|
+
estuviera
|
184
|
+
estuvieras
|
185
|
+
estuviéramos
|
186
|
+
estuvierais
|
187
|
+
estuvieran
|
188
|
+
estuviese
|
189
|
+
estuvieses
|
190
|
+
estuviésemos
|
191
|
+
estuvieseis
|
192
|
+
estuviesen
|
193
|
+
estando
|
194
|
+
estado
|
195
|
+
estada
|
196
|
+
estados
|
197
|
+
estadas
|
198
|
+
estad
|
199
|
+
|
200
|
+
| forms of haber, to have (not including the infinitive):
|
201
|
+
he
|
202
|
+
has
|
203
|
+
ha
|
204
|
+
hemos
|
205
|
+
habéis
|
206
|
+
han
|
207
|
+
haya
|
208
|
+
hayas
|
209
|
+
hayamos
|
210
|
+
hayáis
|
211
|
+
hayan
|
212
|
+
habré
|
213
|
+
habrás
|
214
|
+
habrá
|
215
|
+
habremos
|
216
|
+
habréis
|
217
|
+
habrán
|
218
|
+
habría
|
219
|
+
habrías
|
220
|
+
habríamos
|
221
|
+
habríais
|
222
|
+
habrían
|
223
|
+
había
|
224
|
+
habías
|
225
|
+
habíamos
|
226
|
+
habíais
|
227
|
+
habían
|
228
|
+
hube
|
229
|
+
hubiste
|
230
|
+
hubo
|
231
|
+
hubimos
|
232
|
+
hubisteis
|
233
|
+
hubieron
|
234
|
+
hubiera
|
235
|
+
hubieras
|
236
|
+
hubiéramos
|
237
|
+
hubierais
|
238
|
+
hubieran
|
239
|
+
hubiese
|
240
|
+
hubieses
|
241
|
+
hubiésemos
|
242
|
+
hubieseis
|
243
|
+
hubiesen
|
244
|
+
habiendo
|
245
|
+
habido
|
246
|
+
habida
|
247
|
+
habidos
|
248
|
+
habidas
|
249
|
+
|
250
|
+
| forms of ser, to be (not including the infinitive):
|
251
|
+
soy
|
252
|
+
eres
|
253
|
+
es
|
254
|
+
somos
|
255
|
+
sois
|
256
|
+
son
|
257
|
+
sea
|
258
|
+
seas
|
259
|
+
seamos
|
260
|
+
seáis
|
261
|
+
sean
|
262
|
+
seré
|
263
|
+
serás
|
264
|
+
será
|
265
|
+
seremos
|
266
|
+
seréis
|
267
|
+
serán
|
268
|
+
sería
|
269
|
+
serías
|
270
|
+
seríamos
|
271
|
+
seríais
|
272
|
+
serían
|
273
|
+
era
|
274
|
+
eras
|
275
|
+
éramos
|
276
|
+
erais
|
277
|
+
eran
|
278
|
+
fui
|
279
|
+
fuiste
|
280
|
+
fue
|
281
|
+
fuimos
|
282
|
+
fuisteis
|
283
|
+
fueron
|
284
|
+
fuera
|
285
|
+
fueras
|
286
|
+
fuéramos
|
287
|
+
fuerais
|
288
|
+
fueran
|
289
|
+
fuese
|
290
|
+
fueses
|
291
|
+
fuésemos
|
292
|
+
fueseis
|
293
|
+
fuesen
|
294
|
+
siendo
|
295
|
+
sido
|
296
|
+
| sed also means 'thirst'
|
297
|
+
|
298
|
+
| forms of tener, to have (not including the infinitive):
|
299
|
+
tengo
|
300
|
+
tienes
|
301
|
+
tiene
|
302
|
+
tenemos
|
303
|
+
tenéis
|
304
|
+
tienen
|
305
|
+
tenga
|
306
|
+
tengas
|
307
|
+
tengamos
|
308
|
+
tengáis
|
309
|
+
tengan
|
310
|
+
tendré
|
311
|
+
tendrás
|
312
|
+
tendrá
|
313
|
+
tendremos
|
314
|
+
tendréis
|
315
|
+
tendrán
|
316
|
+
tendría
|
317
|
+
tendrías
|
318
|
+
tendríamos
|
319
|
+
tendríais
|
320
|
+
tendrían
|
321
|
+
tenía
|
322
|
+
tenías
|
323
|
+
teníamos
|
324
|
+
teníais
|
325
|
+
tenían
|
326
|
+
tuve
|
327
|
+
tuviste
|
328
|
+
tuvo
|
329
|
+
tuvimos
|
330
|
+
tuvisteis
|
331
|
+
tuvieron
|
332
|
+
tuviera
|
333
|
+
tuvieras
|
334
|
+
tuviéramos
|
335
|
+
tuvierais
|
336
|
+
tuvieran
|
337
|
+
tuviese
|
338
|
+
tuvieses
|
339
|
+
tuviésemos
|
340
|
+
tuvieseis
|
341
|
+
tuviesen
|
342
|
+
teniendo
|
343
|
+
tenido
|
344
|
+
tenida
|
345
|
+
tenidos
|
346
|
+
tenidas
|
347
|
+
tened
|
348
|
+
|
@@ -0,0 +1,125 @@
|
|
1
|
+
|
2
|
+
| A Swedish stop word list. Comments begin with vertical bar. Each stop
|
3
|
+
| word is at the start of a line.
|
4
|
+
|
5
|
+
| This is a ranked list (commonest to rarest) of stopwords derived from
|
6
|
+
| a large text sample.
|
7
|
+
|
8
|
+
| Swedish stop words occasionally exhibit homonym clashes. For example
|
9
|
+
| så = so, but also seed. These are indicated clearly below.
|
10
|
+
|
11
|
+
och | and
|
12
|
+
det | it, this/that
|
13
|
+
att | to (with infinitive)
|
14
|
+
i | in, at
|
15
|
+
en | a
|
16
|
+
jag | I
|
17
|
+
hon | she
|
18
|
+
som | who, that
|
19
|
+
han | he
|
20
|
+
på | on
|
21
|
+
den | it, this/that
|
22
|
+
med | with
|
23
|
+
var | where, each
|
24
|
+
sig | him(self) etc
|
25
|
+
för | for
|
26
|
+
så | so (also: seed)
|
27
|
+
till | to
|
28
|
+
är | is
|
29
|
+
men | but
|
30
|
+
ett | a
|
31
|
+
om | if; around, about
|
32
|
+
hade | had
|
33
|
+
de | they, these/those
|
34
|
+
av | of
|
35
|
+
icke | not, no
|
36
|
+
mig | me
|
37
|
+
du | you
|
38
|
+
henne | her
|
39
|
+
då | then, when
|
40
|
+
sin | his
|
41
|
+
nu | now
|
42
|
+
har | have
|
43
|
+
inte | inte någon = no one
|
44
|
+
hans | his
|
45
|
+
honom | him
|
46
|
+
skulle | 'sake'
|
47
|
+
hennes | her
|
48
|
+
där | there
|
49
|
+
min | my
|
50
|
+
man | one (pronoun)
|
51
|
+
ej | nor
|
52
|
+
vid | at, by, on (also: vast)
|
53
|
+
kunde | could
|
54
|
+
något | some etc
|
55
|
+
från | from, off
|
56
|
+
ut | out
|
57
|
+
när | when
|
58
|
+
efter | after, behind
|
59
|
+
upp | up
|
60
|
+
vi | we
|
61
|
+
dem | them
|
62
|
+
vara | be
|
63
|
+
vad | what
|
64
|
+
över | over
|
65
|
+
än | than
|
66
|
+
dig | you
|
67
|
+
kan | can
|
68
|
+
sina | his
|
69
|
+
här | here
|
70
|
+
ha | have
|
71
|
+
mot | towards
|
72
|
+
alla | all
|
73
|
+
under | under (also: wonder)
|
74
|
+
någon | some etc
|
75
|
+
eller | or (else)
|
76
|
+
allt | all
|
77
|
+
mycket | much
|
78
|
+
sedan | since
|
79
|
+
ju | why
|
80
|
+
denna | this/that
|
81
|
+
själv | myself, yourself etc
|
82
|
+
detta | this/that
|
83
|
+
åt | to
|
84
|
+
utan | without
|
85
|
+
varit | was
|
86
|
+
hur | how
|
87
|
+
ingen | no
|
88
|
+
mitt | my
|
89
|
+
ni | you
|
90
|
+
bli | to be, become
|
91
|
+
blev | from bli
|
92
|
+
oss | us
|
93
|
+
din | thy
|
94
|
+
dessa | these/those
|
95
|
+
några | some etc
|
96
|
+
deras | their
|
97
|
+
blir | from bli
|
98
|
+
mina | my
|
99
|
+
samma | (the) same
|
100
|
+
vilken | who, that
|
101
|
+
er | you, your
|
102
|
+
sådan | such a
|
103
|
+
vår | our
|
104
|
+
blivit | from bli
|
105
|
+
dess | its
|
106
|
+
inom | within
|
107
|
+
mellan | between
|
108
|
+
sådant | such a
|
109
|
+
varför | why
|
110
|
+
varje | each
|
111
|
+
vilka | who, that
|
112
|
+
ditt | thy
|
113
|
+
vem | who
|
114
|
+
vilket | who, that
|
115
|
+
sitta | his
|
116
|
+
sådana | such a
|
117
|
+
vart | each
|
118
|
+
dina | thy
|
119
|
+
vars | whose
|
120
|
+
vårt | our
|
121
|
+
våra | our
|
122
|
+
ert | your
|
123
|
+
era | your
|
124
|
+
vilkas | whose
|
125
|
+
|
@@ -0,0 +1,7 @@
|
|
1
|
+
langs = %w(danish dutch english finnish french german hungarian italian norwegian portuguese spanish swedish)
|
2
|
+
|
3
|
+
langs.each { |l| system("curl http://snowball.tartarus.org/algorithms/%s/stop.txt | iconv -f ISO-8859-1 -t utf8 > %s.txt" % [l, l]) }
|
4
|
+
|
5
|
+
system("curl http://snowball.tartarus.org/algorithms/russian/stop.txt | iconv -f KOI8-R -t utf8 > russian.txt")
|
6
|
+
|
7
|
+
|