xapian-fu 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/README.rdoc +2 -1
  3. data/lib/xapian_fu/stopper_factory.rb +1 -4
  4. data/lib/xapian_fu/stopwords/af.txt +51 -0
  5. data/lib/xapian_fu/stopwords/ar.txt +480 -0
  6. data/lib/xapian_fu/stopwords/bg.txt +259 -0
  7. data/lib/xapian_fu/stopwords/bn.txt +398 -0
  8. data/lib/xapian_fu/stopwords/br.txt +1203 -0
  9. data/lib/xapian_fu/stopwords/ca.txt +278 -0
  10. data/lib/xapian_fu/stopwords/cs.txt +423 -0
  11. data/lib/xapian_fu/stopwords/da.txt +170 -0
  12. data/lib/xapian_fu/stopwords/danish.txt +1 -0
  13. data/lib/xapian_fu/stopwords/de.txt +620 -0
  14. data/lib/xapian_fu/stopwords/dutch.txt +1 -0
  15. data/lib/xapian_fu/stopwords/el.txt +847 -0
  16. data/lib/xapian_fu/stopwords/en.txt +1298 -0
  17. data/lib/xapian_fu/stopwords/english.txt +1 -0
  18. data/lib/xapian_fu/stopwords/eo.txt +173 -0
  19. data/lib/xapian_fu/stopwords/es.txt +732 -0
  20. data/lib/xapian_fu/stopwords/et.txt +35 -0
  21. data/lib/xapian_fu/stopwords/eu.txt +98 -0
  22. data/lib/xapian_fu/stopwords/fa.txt +799 -0
  23. data/lib/xapian_fu/stopwords/fi.txt +847 -0
  24. data/lib/xapian_fu/stopwords/finnish.txt +1 -0
  25. data/lib/xapian_fu/stopwords/fr.txt +691 -0
  26. data/lib/xapian_fu/stopwords/french.txt +1 -0
  27. data/lib/xapian_fu/stopwords/ga.txt +109 -0
  28. data/lib/xapian_fu/stopwords/german.txt +1 -0
  29. data/lib/xapian_fu/stopwords/gl.txt +160 -0
  30. data/lib/xapian_fu/stopwords/gu.txt +224 -0
  31. data/lib/xapian_fu/stopwords/ha.txt +39 -0
  32. data/lib/xapian_fu/stopwords/he.txt +194 -0
  33. data/lib/xapian_fu/stopwords/hi.txt +225 -0
  34. data/lib/xapian_fu/stopwords/hr.txt +179 -0
  35. data/lib/xapian_fu/stopwords/hu.txt +789 -0
  36. data/lib/xapian_fu/stopwords/hungarian.txt +1 -0
  37. data/lib/xapian_fu/stopwords/hy.txt +45 -0
  38. data/lib/xapian_fu/stopwords/id.txt +758 -0
  39. data/lib/xapian_fu/stopwords/it.txt +632 -0
  40. data/lib/xapian_fu/stopwords/italian.txt +1 -0
  41. data/lib/xapian_fu/stopwords/ja.txt +134 -0
  42. data/lib/xapian_fu/stopwords/ko.txt +679 -0
  43. data/lib/xapian_fu/stopwords/ku.txt +62 -0
  44. data/lib/xapian_fu/stopwords/la.txt +49 -0
  45. data/lib/xapian_fu/stopwords/lt.txt +474 -0
  46. data/lib/xapian_fu/stopwords/lv.txt +161 -0
  47. data/lib/xapian_fu/stopwords/mr.txt +99 -0
  48. data/lib/xapian_fu/stopwords/ms.txt +475 -0
  49. data/lib/xapian_fu/stopwords/nl.txt +413 -0
  50. data/lib/xapian_fu/stopwords/no.txt +221 -0
  51. data/lib/xapian_fu/stopwords/norwegian.txt +1 -0
  52. data/lib/xapian_fu/stopwords/pl.txt +329 -0
  53. data/lib/xapian_fu/stopwords/portuguese.txt +1 -0
  54. data/lib/xapian_fu/stopwords/pt.txt +560 -0
  55. data/lib/xapian_fu/stopwords/ro.txt +434 -0
  56. data/lib/xapian_fu/stopwords/ru.txt +559 -0
  57. data/lib/xapian_fu/stopwords/russian.txt +1 -0
  58. data/lib/xapian_fu/stopwords/sk.txt +418 -0
  59. data/lib/xapian_fu/stopwords/sl.txt +446 -0
  60. data/lib/xapian_fu/stopwords/so.txt +30 -0
  61. data/lib/xapian_fu/stopwords/spanish.txt +1 -0
  62. data/lib/xapian_fu/stopwords/st.txt +31 -0
  63. data/lib/xapian_fu/stopwords/sv.txt +418 -0
  64. data/lib/xapian_fu/stopwords/sw.txt +74 -0
  65. data/lib/xapian_fu/stopwords/swedish.txt +1 -0
  66. data/lib/xapian_fu/stopwords/th.txt +116 -0
  67. data/lib/xapian_fu/stopwords/tl.txt +147 -0
  68. data/lib/xapian_fu/stopwords/tr.txt +504 -0
  69. data/lib/xapian_fu/stopwords/uk.txt +73 -0
  70. data/lib/xapian_fu/stopwords/update.rb +10 -3
  71. data/lib/xapian_fu/stopwords/ur.txt +517 -0
  72. data/lib/xapian_fu/stopwords/vi.txt +645 -0
  73. data/lib/xapian_fu/stopwords/yo.txt +60 -0
  74. data/lib/xapian_fu/stopwords/zh.txt +794 -0
  75. data/lib/xapian_fu/stopwords/zu.txt +29 -0
  76. data/lib/xapian_fu/version.rb +1 -1
  77. data/lib/xapian_fu/xapian_db.rb +3 -0
  78. data/spec/xapian_doc_spec.rb +1 -1
  79. metadata +109 -51
  80. data/lib/xapian_fu/stopwords/danish.txt +0 -102
  81. data/lib/xapian_fu/stopwords/dutch.txt +0 -113
  82. data/lib/xapian_fu/stopwords/english.txt +0 -312
  83. data/lib/xapian_fu/stopwords/finnish.txt +0 -89
  84. data/lib/xapian_fu/stopwords/french.txt +0 -168
  85. data/lib/xapian_fu/stopwords/german.txt +0 -286
  86. data/lib/xapian_fu/stopwords/hungarian.txt +0 -203
  87. data/lib/xapian_fu/stopwords/italian.txt +0 -295
  88. data/lib/xapian_fu/stopwords/norwegian.txt +0 -186
  89. data/lib/xapian_fu/stopwords/portuguese.txt +0 -245
  90. data/lib/xapian_fu/stopwords/russian.txt +0 -236
  91. data/lib/xapian_fu/stopwords/spanish.txt +0 -348
  92. data/lib/xapian_fu/stopwords/swedish.txt +0 -125
@@ -0,0 +1,29 @@
1
+ futhi
2
+ kahle
3
+ kakhulu
4
+ kanye
5
+ khona
6
+ kodwa
7
+ kungani
8
+ kusho
9
+ la
10
+ lakhe
11
+ lapho
12
+ mina
13
+ ngesikhathi
14
+ nje
15
+ phansi
16
+ phezulu
17
+ u
18
+ ukuba
19
+ ukuthi
20
+ ukuze
21
+ uma
22
+ wahamba
23
+ wakhe
24
+ wami
25
+ wase
26
+ wathi
27
+ yakhe
28
+ zakhe
29
+ zonke
@@ -1,3 +1,3 @@
1
1
  module XapianFu #:nodoc:
2
- VERSION = "1.7.0"
2
+ VERSION = "1.8.0"
3
3
  end
@@ -58,6 +58,9 @@ module XapianFu #:nodoc:
58
58
  # :portuguese, :romanian, :russian, :spanish, :swedish,
59
59
  # :turkish</tt>. Set it to <tt>false</tt> to specify none.
60
60
  #
61
+ # There are more stoppers available than stemmers. See
62
+ # <tt>lib/xapian_fu/stopwords/*.txt</tt> for a complete list.
63
+ #
61
64
  # The default for all is <tt>:english</tt>.
62
65
  #
63
66
  # db = XapianDb.new(:language => :italian, :stopper => false)
@@ -203,7 +203,7 @@ describe XapianDoc do
203
203
  it "should allow setting the stopper on initialisation" do
204
204
  xdb = XapianDb.new(:stopper => :english)
205
205
  xdoc = xdb.documents.new("And they made a cake", :stopper => :french)
206
- xdoc.stopper.call("ayantes").should == true
206
+ xdoc.stopper.call("apres").should == true
207
207
  xdoc.stopper.call("and").should == false
208
208
  end
209
209
 
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian-fu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Leach
8
8
  - Damian Janowski
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-03-23 00:00:00.000000000 Z
12
+ date: 2025-10-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -17,14 +17,14 @@ dependencies:
17
17
  requirements:
18
18
  - - "~>"
19
19
  - !ruby/object:Gem::Version
20
- version: '2.7'
20
+ version: 2.7.0
21
21
  type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
25
  - - "~>"
26
26
  - !ruby/object:Gem::Version
27
- version: '2.7'
27
+ version: 2.7.0
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: rake
30
30
  requirement: !ruby/object:Gem::Requirement
@@ -90,20 +90,78 @@ files:
90
90
  - lib/xapian_fu/result_set.rb
91
91
  - lib/xapian_fu/stopper_factory.rb
92
92
  - lib/xapian_fu/stopwords/README
93
+ - lib/xapian_fu/stopwords/af.txt
94
+ - lib/xapian_fu/stopwords/ar.txt
95
+ - lib/xapian_fu/stopwords/bg.txt
96
+ - lib/xapian_fu/stopwords/bn.txt
97
+ - lib/xapian_fu/stopwords/br.txt
98
+ - lib/xapian_fu/stopwords/ca.txt
99
+ - lib/xapian_fu/stopwords/cs.txt
100
+ - lib/xapian_fu/stopwords/da.txt
93
101
  - lib/xapian_fu/stopwords/danish.txt
102
+ - lib/xapian_fu/stopwords/de.txt
94
103
  - lib/xapian_fu/stopwords/dutch.txt
104
+ - lib/xapian_fu/stopwords/el.txt
105
+ - lib/xapian_fu/stopwords/en.txt
95
106
  - lib/xapian_fu/stopwords/english.txt
107
+ - lib/xapian_fu/stopwords/eo.txt
108
+ - lib/xapian_fu/stopwords/es.txt
109
+ - lib/xapian_fu/stopwords/et.txt
110
+ - lib/xapian_fu/stopwords/eu.txt
111
+ - lib/xapian_fu/stopwords/fa.txt
112
+ - lib/xapian_fu/stopwords/fi.txt
96
113
  - lib/xapian_fu/stopwords/finnish.txt
114
+ - lib/xapian_fu/stopwords/fr.txt
97
115
  - lib/xapian_fu/stopwords/french.txt
116
+ - lib/xapian_fu/stopwords/ga.txt
98
117
  - lib/xapian_fu/stopwords/german.txt
118
+ - lib/xapian_fu/stopwords/gl.txt
119
+ - lib/xapian_fu/stopwords/gu.txt
120
+ - lib/xapian_fu/stopwords/ha.txt
121
+ - lib/xapian_fu/stopwords/he.txt
122
+ - lib/xapian_fu/stopwords/hi.txt
123
+ - lib/xapian_fu/stopwords/hr.txt
124
+ - lib/xapian_fu/stopwords/hu.txt
99
125
  - lib/xapian_fu/stopwords/hungarian.txt
126
+ - lib/xapian_fu/stopwords/hy.txt
127
+ - lib/xapian_fu/stopwords/id.txt
128
+ - lib/xapian_fu/stopwords/it.txt
100
129
  - lib/xapian_fu/stopwords/italian.txt
130
+ - lib/xapian_fu/stopwords/ja.txt
131
+ - lib/xapian_fu/stopwords/ko.txt
132
+ - lib/xapian_fu/stopwords/ku.txt
133
+ - lib/xapian_fu/stopwords/la.txt
134
+ - lib/xapian_fu/stopwords/lt.txt
135
+ - lib/xapian_fu/stopwords/lv.txt
136
+ - lib/xapian_fu/stopwords/mr.txt
137
+ - lib/xapian_fu/stopwords/ms.txt
138
+ - lib/xapian_fu/stopwords/nl.txt
139
+ - lib/xapian_fu/stopwords/no.txt
101
140
  - lib/xapian_fu/stopwords/norwegian.txt
141
+ - lib/xapian_fu/stopwords/pl.txt
102
142
  - lib/xapian_fu/stopwords/portuguese.txt
143
+ - lib/xapian_fu/stopwords/pt.txt
144
+ - lib/xapian_fu/stopwords/ro.txt
145
+ - lib/xapian_fu/stopwords/ru.txt
103
146
  - lib/xapian_fu/stopwords/russian.txt
147
+ - lib/xapian_fu/stopwords/sk.txt
148
+ - lib/xapian_fu/stopwords/sl.txt
149
+ - lib/xapian_fu/stopwords/so.txt
104
150
  - lib/xapian_fu/stopwords/spanish.txt
151
+ - lib/xapian_fu/stopwords/st.txt
152
+ - lib/xapian_fu/stopwords/sv.txt
153
+ - lib/xapian_fu/stopwords/sw.txt
105
154
  - lib/xapian_fu/stopwords/swedish.txt
155
+ - lib/xapian_fu/stopwords/th.txt
156
+ - lib/xapian_fu/stopwords/tl.txt
157
+ - lib/xapian_fu/stopwords/tr.txt
158
+ - lib/xapian_fu/stopwords/uk.txt
106
159
  - lib/xapian_fu/stopwords/update.rb
160
+ - lib/xapian_fu/stopwords/ur.txt
161
+ - lib/xapian_fu/stopwords/vi.txt
162
+ - lib/xapian_fu/stopwords/yo.txt
163
+ - lib/xapian_fu/stopwords/zh.txt
164
+ - lib/xapian_fu/stopwords/zu.txt
107
165
  - lib/xapian_fu/version.rb
108
166
  - lib/xapian_fu/xapian_db.rb
109
167
  - lib/xapian_fu/xapian_doc.rb
@@ -172,7 +230,7 @@ homepage: https://github.com/johnl/xapian-fu
172
230
  licenses:
173
231
  - MIT
174
232
  metadata: {}
175
- post_install_message:
233
+ post_install_message:
176
234
  rdoc_options:
177
235
  - "--title"
178
236
  - Xapian Fu
@@ -193,67 +251,67 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
251
  version: '0'
194
252
  requirements:
195
253
  - libxapian-dev, or the xapian-ruby gem
196
- rubygems_version: 3.1.2
197
- signing_key:
254
+ rubygems_version: 3.3.26
255
+ signing_key:
198
256
  specification_version: 4
199
257
  summary: A Ruby interface to the Xapian search engine
200
258
  test_files:
201
- - spec/xapian_db_spec.rb
202
- - spec/fixtures/film_data.rb
203
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.baseA
259
+ - spec/build_db_for_value_testing.rb
260
+ - spec/facets_spec.rb
204
261
  - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/flintlock
205
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseB
206
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.baseB
207
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseB
208
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseA
209
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.DB
210
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseA
211
262
  - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/iamchert
212
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.DB
263
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.DB
264
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.baseA
213
265
  - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.baseB
214
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.DB
215
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.baseB
216
- - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.DB
266
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.DB
267
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseA
268
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseB
217
269
  - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.DB
218
270
  - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.baseA
271
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.baseB
272
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.DB
273
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseA
274
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseB
275
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.DB
219
276
  - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.baseA
220
- - spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseA
221
- - spec/fixtures/film_data/x86_64-linux~1.9.3/flintlock
222
- - spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseB
223
- - spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseB
224
- - spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseB
225
- - spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseA
226
- - spec/fixtures/film_data/x86_64-linux~1.9.3/position.DB
227
- - spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseA
228
- - spec/fixtures/film_data/x86_64-linux~1.9.3/iamchert
229
- - spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.DB
230
- - spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseB
231
- - spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.DB
232
- - spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseB
233
- - spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.DB
234
- - spec/fixtures/film_data/x86_64-linux~1.9.3/record.DB
235
- - spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseA
236
- - spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseA
237
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
277
+ - spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.baseB
238
278
  - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
239
- - spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseB
240
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
241
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
242
- - spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseA
243
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
244
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
245
279
  - spec/fixtures/film_data/x86_64-linux~1.8.7/iamchert
246
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
280
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
281
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
247
282
  - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
248
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
249
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
250
- - spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.DB
283
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
284
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
285
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
251
286
  - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
252
287
  - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
288
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
289
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.DB
290
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseA
291
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseB
292
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
253
293
  - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
294
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
295
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/flintlock
296
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/iamchert
297
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/position.DB
298
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseA
299
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseB
300
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.DB
301
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseA
302
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseB
303
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/record.DB
304
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseA
305
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseB
306
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.DB
307
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseA
308
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseB
309
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.DB
310
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseA
311
+ - spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseB
312
+ - spec/fixtures/film_data.rb
254
313
  - spec/query_parser_spec.rb
255
314
  - spec/stopper_factory_spec.rb
315
+ - spec/xapian_db_spec.rb
256
316
  - spec/xapian_doc_spec.rb
257
317
  - spec/xapian_doc_value_accessor_spec.rb
258
- - spec/build_db_for_value_testing.rb
259
- - spec/facets_spec.rb
@@ -1,102 +0,0 @@
1
-
2
- | A Danish stop word list. Comments begin with vertical bar. Each stop
3
- | word is at the start of a line.
4
-
5
- | This is a ranked list (commonest to rarest) of stopwords derived from
6
- | a large text sample.
7
-
8
-
9
- og | and
10
- i | in
11
- jeg | I
12
- det | that (dem. pronoun)/it (pers. pronoun)
13
- at | that (in front of a sentence)/to (with infinitive)
14
- en | a/an
15
- den | it (pers. pronoun)/that (dem. pronoun)
16
- til | to/at/for/until/against/by/of/into, more
17
- er | present tense of "to be"
18
- som | who, as
19
- på | on/upon/in/on/at/to/after/of/with/for, on
20
- de | they
21
- med | with/by/in, along
22
- han | he
23
- af | of/by/from/off/for/in/with/on, off
24
- for | at/for/to/from/by/of/ago, in front/before, because
25
- ikke | not
26
- der | who/which, there/those
27
- var | past tense of "to be"
28
- mig | me/myself
29
- sig | oneself/himself/herself/itself/themselves
30
- men | but
31
- et | a/an/one, one (number), someone/somebody/one
32
- har | present tense of "to have"
33
- om | round/about/for/in/a, about/around/down, if
34
- vi | we
35
- min | my
36
- havde | past tense of "to have"
37
- ham | him
38
- hun | she
39
- nu | now
40
- over | over/above/across/by/beyond/past/on/about, over/past
41
- da | then, when/as/since
42
- fra | from/off/since, off, since
43
- du | you
44
- ud | out
45
- sin | his/her/its/one's
46
- dem | them
47
- os | us/ourselves
48
- op | up
49
- man | you/one
50
- hans | his
51
- hvor | where
52
- eller | or
53
- hvad | what
54
- skal | must/shall etc.
55
- selv | myself/youself/herself/ourselves etc., even
56
- her | here
57
- alle | all/everyone/everybody etc.
58
- vil | will (verb)
59
- blev | past tense of "to stay/to remain/to get/to become"
60
- kunne | could
61
- ind | in
62
- når | when
63
- være | present tense of "to be"
64
- dog | however/yet/after all
65
- noget | something
66
- ville | would
67
- jo | you know/you see (adv), yes
68
- deres | their/theirs
69
- efter | after/behind/according to/for/by/from, later/afterwards
70
- ned | down
71
- skulle | should
72
- denne | this
73
- end | than
74
- dette | this
75
- mit | my/mine
76
- også | also
77
- under | under/beneath/below/during, below/underneath
78
- have | have
79
- dig | you
80
- anden | other
81
- hende | her
82
- mine | my
83
- alt | everything
84
- meget | much/very, plenty of
85
- sit | his, her, its, one's
86
- sine | his, her, its, one's
87
- vor | our
88
- mod | against
89
- disse | these
90
- hvis | if
91
- din | your/yours
92
- nogle | some
93
- hos | by/at
94
- blive | be/become
95
- mange | many
96
- ad | by/through
97
- bliver | present tense of "to be/to become"
98
- hendes | her/hers
99
- været | be
100
- thi | for (conj)
101
- jer | you
102
- sådan | such, like this/like that
@@ -1,113 +0,0 @@
1
-
2
-
3
- | A Dutch stop word list. Comments begin with vertical bar. Each stop
4
- | word is at the start of a line.
5
-
6
- | This is a ranked list (commonest to rarest) of stopwords derived from
7
- | a large sample of Dutch text.
8
-
9
- | Dutch stop words frequently exhibit homonym clashes. These are indicated
10
- | clearly below.
11
-
12
- de | the
13
- en | and
14
- van | of, from
15
- ik | I, the ego
16
- te | (1) chez, at etc, (2) to, (3) too
17
- dat | that, which
18
- die | that, those, who, which
19
- in | in, inside
20
- een | a, an, one
21
- hij | he
22
- het | the, it
23
- niet | not, nothing, naught
24
- zijn | (1) to be, being, (2) his, one's, its
25
- is | is
26
- was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
27
- op | on, upon, at, in, up, used up
28
- aan | on, upon, to (as dative)
29
- met | with, by
30
- als | like, such as, when
31
- voor | (1) before, in front of, (2) furrow
32
- had | had, past tense all persons sing. of 'hebben' (have)
33
- er | there
34
- maar | but, only
35
- om | round, about, for etc
36
- hem | him
37
- dan | then
38
- zou | should/would, past tense all persons sing. of 'zullen'
39
- of | or, whether, if
40
- wat | what, something, anything
41
- mijn | possessive and noun 'mine'
42
- men | people, 'one'
43
- dit | this
44
- zo | so, thus, in this way
45
- door | through by
46
- over | over, across
47
- ze | she, her, they, them
48
- zich | oneself
49
- bij | (1) a bee, (2) by, near, at
50
- ook | also, too
51
- tot | till, until
52
- je | you
53
- mij | me
54
- uit | out of, from
55
- der | Old Dutch form of 'van der' still found in surnames
56
- daar | (1) there, (2) because
57
- haar | (1) her, their, them, (2) hair
58
- naar | (1) unpleasant, unwell etc, (2) towards, (3) as
59
- heb | present first person sing. of 'to have'
60
- hoe | how, why
61
- heeft | present third person sing. of 'to have'
62
- hebben | 'to have' and various parts thereof
63
- deze | this
64
- u | you
65
- want | (1) for, (2) mitten, (3) rigging
66
- nog | yet, still
67
- zal | 'shall', first and third person sing. of verb 'zullen' (will)
68
- me | me
69
- zij | she, they
70
- nu | now
71
- ge | 'thou', still used in Belgium and south Netherlands
72
- geen | none
73
- omdat | because
74
- iets | something, somewhat
75
- worden | to become, grow, get
76
- toch | yet, still
77
- al | all, every, each
78
- waren | (1) 'were' (2) to wander, (3) wares, (3)
79
- veel | much, many
80
- meer | (1) more, (2) lake
81
- doen | to do, to make
82
- toen | then, when
83
- moet | noun 'spot/mote' and present form of 'to must'
84
- ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
85
- zonder | without
86
- kan | noun 'can' and present form of 'to be able'
87
- hun | their, them
88
- dus | so, consequently
89
- alles | all, everything, anything
90
- onder | under, beneath
91
- ja | yes, of course
92
- eens | once, one day
93
- hier | here
94
- wie | who
95
- werd | imperfect third person sing. of 'become'
96
- altijd | always
97
- doch | yet, but etc
98
- wordt | present third person sing. of 'become'
99
- wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
100
- kunnen | to be able
101
- ons | us/our
102
- zelf | self
103
- tegen | against, towards, at
104
- na | after, near
105
- reeds | already
106
- wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
107
- kon | could; past tense of 'to be able'
108
- niets | nothing
109
- uw | your
110
- iemand | somebody
111
- geweest | been; past participle of 'be'
112
- andere | other
113
-