xapian-fu 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.rdoc +2 -1
- data/lib/xapian_fu/stopper_factory.rb +1 -4
- data/lib/xapian_fu/stopwords/af.txt +51 -0
- data/lib/xapian_fu/stopwords/ar.txt +480 -0
- data/lib/xapian_fu/stopwords/bg.txt +259 -0
- data/lib/xapian_fu/stopwords/bn.txt +398 -0
- data/lib/xapian_fu/stopwords/br.txt +1203 -0
- data/lib/xapian_fu/stopwords/ca.txt +278 -0
- data/lib/xapian_fu/stopwords/cs.txt +423 -0
- data/lib/xapian_fu/stopwords/da.txt +170 -0
- data/lib/xapian_fu/stopwords/danish.txt +1 -0
- data/lib/xapian_fu/stopwords/de.txt +620 -0
- data/lib/xapian_fu/stopwords/dutch.txt +1 -0
- data/lib/xapian_fu/stopwords/el.txt +847 -0
- data/lib/xapian_fu/stopwords/en.txt +1298 -0
- data/lib/xapian_fu/stopwords/english.txt +1 -0
- data/lib/xapian_fu/stopwords/eo.txt +173 -0
- data/lib/xapian_fu/stopwords/es.txt +732 -0
- data/lib/xapian_fu/stopwords/et.txt +35 -0
- data/lib/xapian_fu/stopwords/eu.txt +98 -0
- data/lib/xapian_fu/stopwords/fa.txt +799 -0
- data/lib/xapian_fu/stopwords/fi.txt +847 -0
- data/lib/xapian_fu/stopwords/finnish.txt +1 -0
- data/lib/xapian_fu/stopwords/fr.txt +691 -0
- data/lib/xapian_fu/stopwords/french.txt +1 -0
- data/lib/xapian_fu/stopwords/ga.txt +109 -0
- data/lib/xapian_fu/stopwords/german.txt +1 -0
- data/lib/xapian_fu/stopwords/gl.txt +160 -0
- data/lib/xapian_fu/stopwords/gu.txt +224 -0
- data/lib/xapian_fu/stopwords/ha.txt +39 -0
- data/lib/xapian_fu/stopwords/he.txt +194 -0
- data/lib/xapian_fu/stopwords/hi.txt +225 -0
- data/lib/xapian_fu/stopwords/hr.txt +179 -0
- data/lib/xapian_fu/stopwords/hu.txt +789 -0
- data/lib/xapian_fu/stopwords/hungarian.txt +1 -0
- data/lib/xapian_fu/stopwords/hy.txt +45 -0
- data/lib/xapian_fu/stopwords/id.txt +758 -0
- data/lib/xapian_fu/stopwords/it.txt +632 -0
- data/lib/xapian_fu/stopwords/italian.txt +1 -0
- data/lib/xapian_fu/stopwords/ja.txt +134 -0
- data/lib/xapian_fu/stopwords/ko.txt +679 -0
- data/lib/xapian_fu/stopwords/ku.txt +62 -0
- data/lib/xapian_fu/stopwords/la.txt +49 -0
- data/lib/xapian_fu/stopwords/lt.txt +474 -0
- data/lib/xapian_fu/stopwords/lv.txt +161 -0
- data/lib/xapian_fu/stopwords/mr.txt +99 -0
- data/lib/xapian_fu/stopwords/ms.txt +475 -0
- data/lib/xapian_fu/stopwords/nl.txt +413 -0
- data/lib/xapian_fu/stopwords/no.txt +221 -0
- data/lib/xapian_fu/stopwords/norwegian.txt +1 -0
- data/lib/xapian_fu/stopwords/pl.txt +329 -0
- data/lib/xapian_fu/stopwords/portuguese.txt +1 -0
- data/lib/xapian_fu/stopwords/pt.txt +560 -0
- data/lib/xapian_fu/stopwords/ro.txt +434 -0
- data/lib/xapian_fu/stopwords/ru.txt +559 -0
- data/lib/xapian_fu/stopwords/russian.txt +1 -0
- data/lib/xapian_fu/stopwords/sk.txt +418 -0
- data/lib/xapian_fu/stopwords/sl.txt +446 -0
- data/lib/xapian_fu/stopwords/so.txt +30 -0
- data/lib/xapian_fu/stopwords/spanish.txt +1 -0
- data/lib/xapian_fu/stopwords/st.txt +31 -0
- data/lib/xapian_fu/stopwords/sv.txt +418 -0
- data/lib/xapian_fu/stopwords/sw.txt +74 -0
- data/lib/xapian_fu/stopwords/swedish.txt +1 -0
- data/lib/xapian_fu/stopwords/th.txt +116 -0
- data/lib/xapian_fu/stopwords/tl.txt +147 -0
- data/lib/xapian_fu/stopwords/tr.txt +504 -0
- data/lib/xapian_fu/stopwords/uk.txt +73 -0
- data/lib/xapian_fu/stopwords/update.rb +10 -3
- data/lib/xapian_fu/stopwords/ur.txt +517 -0
- data/lib/xapian_fu/stopwords/vi.txt +645 -0
- data/lib/xapian_fu/stopwords/yo.txt +60 -0
- data/lib/xapian_fu/stopwords/zh.txt +794 -0
- data/lib/xapian_fu/stopwords/zu.txt +29 -0
- data/lib/xapian_fu/version.rb +1 -1
- data/lib/xapian_fu/xapian_db.rb +3 -0
- data/spec/xapian_doc_spec.rb +1 -1
- metadata +109 -51
- data/lib/xapian_fu/stopwords/danish.txt +0 -102
- data/lib/xapian_fu/stopwords/dutch.txt +0 -113
- data/lib/xapian_fu/stopwords/english.txt +0 -312
- data/lib/xapian_fu/stopwords/finnish.txt +0 -89
- data/lib/xapian_fu/stopwords/french.txt +0 -168
- data/lib/xapian_fu/stopwords/german.txt +0 -286
- data/lib/xapian_fu/stopwords/hungarian.txt +0 -203
- data/lib/xapian_fu/stopwords/italian.txt +0 -295
- data/lib/xapian_fu/stopwords/norwegian.txt +0 -186
- data/lib/xapian_fu/stopwords/portuguese.txt +0 -245
- data/lib/xapian_fu/stopwords/russian.txt +0 -236
- data/lib/xapian_fu/stopwords/spanish.txt +0 -348
- data/lib/xapian_fu/stopwords/swedish.txt +0 -125
@@ -0,0 +1,29 @@
|
|
1
|
+
futhi
|
2
|
+
kahle
|
3
|
+
kakhulu
|
4
|
+
kanye
|
5
|
+
khona
|
6
|
+
kodwa
|
7
|
+
kungani
|
8
|
+
kusho
|
9
|
+
la
|
10
|
+
lakhe
|
11
|
+
lapho
|
12
|
+
mina
|
13
|
+
ngesikhathi
|
14
|
+
nje
|
15
|
+
phansi
|
16
|
+
phezulu
|
17
|
+
u
|
18
|
+
ukuba
|
19
|
+
ukuthi
|
20
|
+
ukuze
|
21
|
+
uma
|
22
|
+
wahamba
|
23
|
+
wakhe
|
24
|
+
wami
|
25
|
+
wase
|
26
|
+
wathi
|
27
|
+
yakhe
|
28
|
+
zakhe
|
29
|
+
zonke
|
data/lib/xapian_fu/version.rb
CHANGED
data/lib/xapian_fu/xapian_db.rb
CHANGED
@@ -58,6 +58,9 @@ module XapianFu #:nodoc:
|
|
58
58
|
# :portuguese, :romanian, :russian, :spanish, :swedish,
|
59
59
|
# :turkish</tt>. Set it to <tt>false</tt> to specify none.
|
60
60
|
#
|
61
|
+
# There are more stoppers available than stemmers. See
|
62
|
+
# <tt>lib/xapian_fu/stopwords/*.txt</tt> for a complete list.
|
63
|
+
#
|
61
64
|
# The default for all is <tt>:english</tt>.
|
62
65
|
#
|
63
66
|
# db = XapianDb.new(:language => :italian, :stopper => false)
|
data/spec/xapian_doc_spec.rb
CHANGED
@@ -203,7 +203,7 @@ describe XapianDoc do
|
|
203
203
|
it "should allow setting the stopper on initialisation" do
|
204
204
|
xdb = XapianDb.new(:stopper => :english)
|
205
205
|
xdoc = xdb.documents.new("And they made a cake", :stopper => :french)
|
206
|
-
xdoc.stopper.call("
|
206
|
+
xdoc.stopper.call("apres").should == true
|
207
207
|
xdoc.stopper.call("and").should == false
|
208
208
|
end
|
209
209
|
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian-fu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Leach
|
8
8
|
- Damian Janowski
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2025-10-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -17,14 +17,14 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: 2.7.0
|
21
21
|
type: :development
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: 2.7.0
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rake
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,20 +90,78 @@ files:
|
|
90
90
|
- lib/xapian_fu/result_set.rb
|
91
91
|
- lib/xapian_fu/stopper_factory.rb
|
92
92
|
- lib/xapian_fu/stopwords/README
|
93
|
+
- lib/xapian_fu/stopwords/af.txt
|
94
|
+
- lib/xapian_fu/stopwords/ar.txt
|
95
|
+
- lib/xapian_fu/stopwords/bg.txt
|
96
|
+
- lib/xapian_fu/stopwords/bn.txt
|
97
|
+
- lib/xapian_fu/stopwords/br.txt
|
98
|
+
- lib/xapian_fu/stopwords/ca.txt
|
99
|
+
- lib/xapian_fu/stopwords/cs.txt
|
100
|
+
- lib/xapian_fu/stopwords/da.txt
|
93
101
|
- lib/xapian_fu/stopwords/danish.txt
|
102
|
+
- lib/xapian_fu/stopwords/de.txt
|
94
103
|
- lib/xapian_fu/stopwords/dutch.txt
|
104
|
+
- lib/xapian_fu/stopwords/el.txt
|
105
|
+
- lib/xapian_fu/stopwords/en.txt
|
95
106
|
- lib/xapian_fu/stopwords/english.txt
|
107
|
+
- lib/xapian_fu/stopwords/eo.txt
|
108
|
+
- lib/xapian_fu/stopwords/es.txt
|
109
|
+
- lib/xapian_fu/stopwords/et.txt
|
110
|
+
- lib/xapian_fu/stopwords/eu.txt
|
111
|
+
- lib/xapian_fu/stopwords/fa.txt
|
112
|
+
- lib/xapian_fu/stopwords/fi.txt
|
96
113
|
- lib/xapian_fu/stopwords/finnish.txt
|
114
|
+
- lib/xapian_fu/stopwords/fr.txt
|
97
115
|
- lib/xapian_fu/stopwords/french.txt
|
116
|
+
- lib/xapian_fu/stopwords/ga.txt
|
98
117
|
- lib/xapian_fu/stopwords/german.txt
|
118
|
+
- lib/xapian_fu/stopwords/gl.txt
|
119
|
+
- lib/xapian_fu/stopwords/gu.txt
|
120
|
+
- lib/xapian_fu/stopwords/ha.txt
|
121
|
+
- lib/xapian_fu/stopwords/he.txt
|
122
|
+
- lib/xapian_fu/stopwords/hi.txt
|
123
|
+
- lib/xapian_fu/stopwords/hr.txt
|
124
|
+
- lib/xapian_fu/stopwords/hu.txt
|
99
125
|
- lib/xapian_fu/stopwords/hungarian.txt
|
126
|
+
- lib/xapian_fu/stopwords/hy.txt
|
127
|
+
- lib/xapian_fu/stopwords/id.txt
|
128
|
+
- lib/xapian_fu/stopwords/it.txt
|
100
129
|
- lib/xapian_fu/stopwords/italian.txt
|
130
|
+
- lib/xapian_fu/stopwords/ja.txt
|
131
|
+
- lib/xapian_fu/stopwords/ko.txt
|
132
|
+
- lib/xapian_fu/stopwords/ku.txt
|
133
|
+
- lib/xapian_fu/stopwords/la.txt
|
134
|
+
- lib/xapian_fu/stopwords/lt.txt
|
135
|
+
- lib/xapian_fu/stopwords/lv.txt
|
136
|
+
- lib/xapian_fu/stopwords/mr.txt
|
137
|
+
- lib/xapian_fu/stopwords/ms.txt
|
138
|
+
- lib/xapian_fu/stopwords/nl.txt
|
139
|
+
- lib/xapian_fu/stopwords/no.txt
|
101
140
|
- lib/xapian_fu/stopwords/norwegian.txt
|
141
|
+
- lib/xapian_fu/stopwords/pl.txt
|
102
142
|
- lib/xapian_fu/stopwords/portuguese.txt
|
143
|
+
- lib/xapian_fu/stopwords/pt.txt
|
144
|
+
- lib/xapian_fu/stopwords/ro.txt
|
145
|
+
- lib/xapian_fu/stopwords/ru.txt
|
103
146
|
- lib/xapian_fu/stopwords/russian.txt
|
147
|
+
- lib/xapian_fu/stopwords/sk.txt
|
148
|
+
- lib/xapian_fu/stopwords/sl.txt
|
149
|
+
- lib/xapian_fu/stopwords/so.txt
|
104
150
|
- lib/xapian_fu/stopwords/spanish.txt
|
151
|
+
- lib/xapian_fu/stopwords/st.txt
|
152
|
+
- lib/xapian_fu/stopwords/sv.txt
|
153
|
+
- lib/xapian_fu/stopwords/sw.txt
|
105
154
|
- lib/xapian_fu/stopwords/swedish.txt
|
155
|
+
- lib/xapian_fu/stopwords/th.txt
|
156
|
+
- lib/xapian_fu/stopwords/tl.txt
|
157
|
+
- lib/xapian_fu/stopwords/tr.txt
|
158
|
+
- lib/xapian_fu/stopwords/uk.txt
|
106
159
|
- lib/xapian_fu/stopwords/update.rb
|
160
|
+
- lib/xapian_fu/stopwords/ur.txt
|
161
|
+
- lib/xapian_fu/stopwords/vi.txt
|
162
|
+
- lib/xapian_fu/stopwords/yo.txt
|
163
|
+
- lib/xapian_fu/stopwords/zh.txt
|
164
|
+
- lib/xapian_fu/stopwords/zu.txt
|
107
165
|
- lib/xapian_fu/version.rb
|
108
166
|
- lib/xapian_fu/xapian_db.rb
|
109
167
|
- lib/xapian_fu/xapian_doc.rb
|
@@ -172,7 +230,7 @@ homepage: https://github.com/johnl/xapian-fu
|
|
172
230
|
licenses:
|
173
231
|
- MIT
|
174
232
|
metadata: {}
|
175
|
-
post_install_message:
|
233
|
+
post_install_message:
|
176
234
|
rdoc_options:
|
177
235
|
- "--title"
|
178
236
|
- Xapian Fu
|
@@ -193,67 +251,67 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
251
|
version: '0'
|
194
252
|
requirements:
|
195
253
|
- libxapian-dev, or the xapian-ruby gem
|
196
|
-
rubygems_version: 3.
|
197
|
-
signing_key:
|
254
|
+
rubygems_version: 3.3.26
|
255
|
+
signing_key:
|
198
256
|
specification_version: 4
|
199
257
|
summary: A Ruby interface to the Xapian search engine
|
200
258
|
test_files:
|
201
|
-
- spec/
|
202
|
-
- spec/
|
203
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.baseA
|
259
|
+
- spec/build_db_for_value_testing.rb
|
260
|
+
- spec/facets_spec.rb
|
204
261
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/flintlock
|
205
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseB
|
206
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.baseB
|
207
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseB
|
208
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseA
|
209
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.DB
|
210
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseA
|
211
262
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/iamchert
|
212
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/
|
263
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.DB
|
264
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.baseA
|
213
265
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.baseB
|
214
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/
|
215
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/
|
216
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/
|
266
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.DB
|
267
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseA
|
268
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseB
|
217
269
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.DB
|
218
270
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.baseA
|
271
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.baseB
|
272
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.DB
|
273
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseA
|
274
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseB
|
275
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.DB
|
219
276
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.baseA
|
220
|
-
- spec/fixtures/film_data/x86_64-linux~
|
221
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/flintlock
|
222
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseB
|
223
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseB
|
224
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseB
|
225
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseA
|
226
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.DB
|
227
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseA
|
228
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/iamchert
|
229
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.DB
|
230
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseB
|
231
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.DB
|
232
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseB
|
233
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.DB
|
234
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.DB
|
235
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseA
|
236
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseA
|
237
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
|
277
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.baseB
|
238
278
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
|
239
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseB
|
240
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
|
241
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
|
242
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseA
|
243
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
|
244
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
|
245
279
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/iamchert
|
246
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/
|
280
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
|
281
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
|
247
282
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
|
248
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/
|
249
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/
|
250
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/
|
283
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
|
284
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
|
285
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
|
251
286
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
|
252
287
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
|
288
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
|
289
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.DB
|
290
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseA
|
291
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseB
|
292
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
|
253
293
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
|
294
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
|
295
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/flintlock
|
296
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/iamchert
|
297
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.DB
|
298
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseA
|
299
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseB
|
300
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.DB
|
301
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseA
|
302
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseB
|
303
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.DB
|
304
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseA
|
305
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseB
|
306
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.DB
|
307
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseA
|
308
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseB
|
309
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.DB
|
310
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseA
|
311
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseB
|
312
|
+
- spec/fixtures/film_data.rb
|
254
313
|
- spec/query_parser_spec.rb
|
255
314
|
- spec/stopper_factory_spec.rb
|
315
|
+
- spec/xapian_db_spec.rb
|
256
316
|
- spec/xapian_doc_spec.rb
|
257
317
|
- spec/xapian_doc_value_accessor_spec.rb
|
258
|
-
- spec/build_db_for_value_testing.rb
|
259
|
-
- spec/facets_spec.rb
|
@@ -1,102 +0,0 @@
|
|
1
|
-
|
2
|
-
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
3
|
-
| word is at the start of a line.
|
4
|
-
|
5
|
-
| This is a ranked list (commonest to rarest) of stopwords derived from
|
6
|
-
| a large text sample.
|
7
|
-
|
8
|
-
|
9
|
-
og | and
|
10
|
-
i | in
|
11
|
-
jeg | I
|
12
|
-
det | that (dem. pronoun)/it (pers. pronoun)
|
13
|
-
at | that (in front of a sentence)/to (with infinitive)
|
14
|
-
en | a/an
|
15
|
-
den | it (pers. pronoun)/that (dem. pronoun)
|
16
|
-
til | to/at/for/until/against/by/of/into, more
|
17
|
-
er | present tense of "to be"
|
18
|
-
som | who, as
|
19
|
-
på | on/upon/in/on/at/to/after/of/with/for, on
|
20
|
-
de | they
|
21
|
-
med | with/by/in, along
|
22
|
-
han | he
|
23
|
-
af | of/by/from/off/for/in/with/on, off
|
24
|
-
for | at/for/to/from/by/of/ago, in front/before, because
|
25
|
-
ikke | not
|
26
|
-
der | who/which, there/those
|
27
|
-
var | past tense of "to be"
|
28
|
-
mig | me/myself
|
29
|
-
sig | oneself/himself/herself/itself/themselves
|
30
|
-
men | but
|
31
|
-
et | a/an/one, one (number), someone/somebody/one
|
32
|
-
har | present tense of "to have"
|
33
|
-
om | round/about/for/in/a, about/around/down, if
|
34
|
-
vi | we
|
35
|
-
min | my
|
36
|
-
havde | past tense of "to have"
|
37
|
-
ham | him
|
38
|
-
hun | she
|
39
|
-
nu | now
|
40
|
-
over | over/above/across/by/beyond/past/on/about, over/past
|
41
|
-
da | then, when/as/since
|
42
|
-
fra | from/off/since, off, since
|
43
|
-
du | you
|
44
|
-
ud | out
|
45
|
-
sin | his/her/its/one's
|
46
|
-
dem | them
|
47
|
-
os | us/ourselves
|
48
|
-
op | up
|
49
|
-
man | you/one
|
50
|
-
hans | his
|
51
|
-
hvor | where
|
52
|
-
eller | or
|
53
|
-
hvad | what
|
54
|
-
skal | must/shall etc.
|
55
|
-
selv | myself/youself/herself/ourselves etc., even
|
56
|
-
her | here
|
57
|
-
alle | all/everyone/everybody etc.
|
58
|
-
vil | will (verb)
|
59
|
-
blev | past tense of "to stay/to remain/to get/to become"
|
60
|
-
kunne | could
|
61
|
-
ind | in
|
62
|
-
når | when
|
63
|
-
være | present tense of "to be"
|
64
|
-
dog | however/yet/after all
|
65
|
-
noget | something
|
66
|
-
ville | would
|
67
|
-
jo | you know/you see (adv), yes
|
68
|
-
deres | their/theirs
|
69
|
-
efter | after/behind/according to/for/by/from, later/afterwards
|
70
|
-
ned | down
|
71
|
-
skulle | should
|
72
|
-
denne | this
|
73
|
-
end | than
|
74
|
-
dette | this
|
75
|
-
mit | my/mine
|
76
|
-
også | also
|
77
|
-
under | under/beneath/below/during, below/underneath
|
78
|
-
have | have
|
79
|
-
dig | you
|
80
|
-
anden | other
|
81
|
-
hende | her
|
82
|
-
mine | my
|
83
|
-
alt | everything
|
84
|
-
meget | much/very, plenty of
|
85
|
-
sit | his, her, its, one's
|
86
|
-
sine | his, her, its, one's
|
87
|
-
vor | our
|
88
|
-
mod | against
|
89
|
-
disse | these
|
90
|
-
hvis | if
|
91
|
-
din | your/yours
|
92
|
-
nogle | some
|
93
|
-
hos | by/at
|
94
|
-
blive | be/become
|
95
|
-
mange | many
|
96
|
-
ad | by/through
|
97
|
-
bliver | present tense of "to be/to become"
|
98
|
-
hendes | her/hers
|
99
|
-
været | be
|
100
|
-
thi | for (conj)
|
101
|
-
jer | you
|
102
|
-
sådan | such, like this/like that
|
@@ -1,113 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
4
|
-
| word is at the start of a line.
|
5
|
-
|
6
|
-
| This is a ranked list (commonest to rarest) of stopwords derived from
|
7
|
-
| a large sample of Dutch text.
|
8
|
-
|
9
|
-
| Dutch stop words frequently exhibit homonym clashes. These are indicated
|
10
|
-
| clearly below.
|
11
|
-
|
12
|
-
de | the
|
13
|
-
en | and
|
14
|
-
van | of, from
|
15
|
-
ik | I, the ego
|
16
|
-
te | (1) chez, at etc, (2) to, (3) too
|
17
|
-
dat | that, which
|
18
|
-
die | that, those, who, which
|
19
|
-
in | in, inside
|
20
|
-
een | a, an, one
|
21
|
-
hij | he
|
22
|
-
het | the, it
|
23
|
-
niet | not, nothing, naught
|
24
|
-
zijn | (1) to be, being, (2) his, one's, its
|
25
|
-
is | is
|
26
|
-
was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
|
27
|
-
op | on, upon, at, in, up, used up
|
28
|
-
aan | on, upon, to (as dative)
|
29
|
-
met | with, by
|
30
|
-
als | like, such as, when
|
31
|
-
voor | (1) before, in front of, (2) furrow
|
32
|
-
had | had, past tense all persons sing. of 'hebben' (have)
|
33
|
-
er | there
|
34
|
-
maar | but, only
|
35
|
-
om | round, about, for etc
|
36
|
-
hem | him
|
37
|
-
dan | then
|
38
|
-
zou | should/would, past tense all persons sing. of 'zullen'
|
39
|
-
of | or, whether, if
|
40
|
-
wat | what, something, anything
|
41
|
-
mijn | possessive and noun 'mine'
|
42
|
-
men | people, 'one'
|
43
|
-
dit | this
|
44
|
-
zo | so, thus, in this way
|
45
|
-
door | through by
|
46
|
-
over | over, across
|
47
|
-
ze | she, her, they, them
|
48
|
-
zich | oneself
|
49
|
-
bij | (1) a bee, (2) by, near, at
|
50
|
-
ook | also, too
|
51
|
-
tot | till, until
|
52
|
-
je | you
|
53
|
-
mij | me
|
54
|
-
uit | out of, from
|
55
|
-
der | Old Dutch form of 'van der' still found in surnames
|
56
|
-
daar | (1) there, (2) because
|
57
|
-
haar | (1) her, their, them, (2) hair
|
58
|
-
naar | (1) unpleasant, unwell etc, (2) towards, (3) as
|
59
|
-
heb | present first person sing. of 'to have'
|
60
|
-
hoe | how, why
|
61
|
-
heeft | present third person sing. of 'to have'
|
62
|
-
hebben | 'to have' and various parts thereof
|
63
|
-
deze | this
|
64
|
-
u | you
|
65
|
-
want | (1) for, (2) mitten, (3) rigging
|
66
|
-
nog | yet, still
|
67
|
-
zal | 'shall', first and third person sing. of verb 'zullen' (will)
|
68
|
-
me | me
|
69
|
-
zij | she, they
|
70
|
-
nu | now
|
71
|
-
ge | 'thou', still used in Belgium and south Netherlands
|
72
|
-
geen | none
|
73
|
-
omdat | because
|
74
|
-
iets | something, somewhat
|
75
|
-
worden | to become, grow, get
|
76
|
-
toch | yet, still
|
77
|
-
al | all, every, each
|
78
|
-
waren | (1) 'were' (2) to wander, (3) wares, (3)
|
79
|
-
veel | much, many
|
80
|
-
meer | (1) more, (2) lake
|
81
|
-
doen | to do, to make
|
82
|
-
toen | then, when
|
83
|
-
moet | noun 'spot/mote' and present form of 'to must'
|
84
|
-
ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
|
85
|
-
zonder | without
|
86
|
-
kan | noun 'can' and present form of 'to be able'
|
87
|
-
hun | their, them
|
88
|
-
dus | so, consequently
|
89
|
-
alles | all, everything, anything
|
90
|
-
onder | under, beneath
|
91
|
-
ja | yes, of course
|
92
|
-
eens | once, one day
|
93
|
-
hier | here
|
94
|
-
wie | who
|
95
|
-
werd | imperfect third person sing. of 'become'
|
96
|
-
altijd | always
|
97
|
-
doch | yet, but etc
|
98
|
-
wordt | present third person sing. of 'become'
|
99
|
-
wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
|
100
|
-
kunnen | to be able
|
101
|
-
ons | us/our
|
102
|
-
zelf | self
|
103
|
-
tegen | against, towards, at
|
104
|
-
na | after, near
|
105
|
-
reeds | already
|
106
|
-
wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
|
107
|
-
kon | could; past tense of 'to be able'
|
108
|
-
niets | nothing
|
109
|
-
uw | your
|
110
|
-
iemand | somebody
|
111
|
-
geweest | been; past participle of 'be'
|
112
|
-
andere | other
|
113
|
-
|