xapian-fu 1.6.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.rdoc +5 -0
- data/README.rdoc +3 -2
- data/lib/xapian_fu/stopper_factory.rb +1 -4
- data/lib/xapian_fu/stopwords/af.txt +51 -0
- data/lib/xapian_fu/stopwords/ar.txt +480 -0
- data/lib/xapian_fu/stopwords/bg.txt +259 -0
- data/lib/xapian_fu/stopwords/bn.txt +398 -0
- data/lib/xapian_fu/stopwords/br.txt +1203 -0
- data/lib/xapian_fu/stopwords/ca.txt +278 -0
- data/lib/xapian_fu/stopwords/cs.txt +423 -0
- data/lib/xapian_fu/stopwords/da.txt +170 -0
- data/lib/xapian_fu/stopwords/danish.txt +1 -0
- data/lib/xapian_fu/stopwords/de.txt +620 -0
- data/lib/xapian_fu/stopwords/dutch.txt +1 -0
- data/lib/xapian_fu/stopwords/el.txt +847 -0
- data/lib/xapian_fu/stopwords/en.txt +1298 -0
- data/lib/xapian_fu/stopwords/english.txt +1 -0
- data/lib/xapian_fu/stopwords/eo.txt +173 -0
- data/lib/xapian_fu/stopwords/es.txt +732 -0
- data/lib/xapian_fu/stopwords/et.txt +35 -0
- data/lib/xapian_fu/stopwords/eu.txt +98 -0
- data/lib/xapian_fu/stopwords/fa.txt +799 -0
- data/lib/xapian_fu/stopwords/fi.txt +847 -0
- data/lib/xapian_fu/stopwords/finnish.txt +1 -0
- data/lib/xapian_fu/stopwords/fr.txt +691 -0
- data/lib/xapian_fu/stopwords/french.txt +1 -0
- data/lib/xapian_fu/stopwords/ga.txt +109 -0
- data/lib/xapian_fu/stopwords/german.txt +1 -0
- data/lib/xapian_fu/stopwords/gl.txt +160 -0
- data/lib/xapian_fu/stopwords/gu.txt +224 -0
- data/lib/xapian_fu/stopwords/ha.txt +39 -0
- data/lib/xapian_fu/stopwords/he.txt +194 -0
- data/lib/xapian_fu/stopwords/hi.txt +225 -0
- data/lib/xapian_fu/stopwords/hr.txt +179 -0
- data/lib/xapian_fu/stopwords/hu.txt +789 -0
- data/lib/xapian_fu/stopwords/hungarian.txt +1 -0
- data/lib/xapian_fu/stopwords/hy.txt +45 -0
- data/lib/xapian_fu/stopwords/id.txt +758 -0
- data/lib/xapian_fu/stopwords/it.txt +632 -0
- data/lib/xapian_fu/stopwords/italian.txt +1 -0
- data/lib/xapian_fu/stopwords/ja.txt +134 -0
- data/lib/xapian_fu/stopwords/ko.txt +679 -0
- data/lib/xapian_fu/stopwords/ku.txt +62 -0
- data/lib/xapian_fu/stopwords/la.txt +49 -0
- data/lib/xapian_fu/stopwords/lt.txt +474 -0
- data/lib/xapian_fu/stopwords/lv.txt +161 -0
- data/lib/xapian_fu/stopwords/mr.txt +99 -0
- data/lib/xapian_fu/stopwords/ms.txt +475 -0
- data/lib/xapian_fu/stopwords/nl.txt +413 -0
- data/lib/xapian_fu/stopwords/no.txt +221 -0
- data/lib/xapian_fu/stopwords/norwegian.txt +1 -0
- data/lib/xapian_fu/stopwords/pl.txt +329 -0
- data/lib/xapian_fu/stopwords/portuguese.txt +1 -0
- data/lib/xapian_fu/stopwords/pt.txt +560 -0
- data/lib/xapian_fu/stopwords/ro.txt +434 -0
- data/lib/xapian_fu/stopwords/ru.txt +559 -0
- data/lib/xapian_fu/stopwords/russian.txt +1 -0
- data/lib/xapian_fu/stopwords/sk.txt +418 -0
- data/lib/xapian_fu/stopwords/sl.txt +446 -0
- data/lib/xapian_fu/stopwords/so.txt +30 -0
- data/lib/xapian_fu/stopwords/spanish.txt +1 -0
- data/lib/xapian_fu/stopwords/st.txt +31 -0
- data/lib/xapian_fu/stopwords/sv.txt +418 -0
- data/lib/xapian_fu/stopwords/sw.txt +74 -0
- data/lib/xapian_fu/stopwords/swedish.txt +1 -0
- data/lib/xapian_fu/stopwords/th.txt +116 -0
- data/lib/xapian_fu/stopwords/tl.txt +147 -0
- data/lib/xapian_fu/stopwords/tr.txt +504 -0
- data/lib/xapian_fu/stopwords/uk.txt +73 -0
- data/lib/xapian_fu/stopwords/update.rb +10 -3
- data/lib/xapian_fu/stopwords/ur.txt +517 -0
- data/lib/xapian_fu/stopwords/vi.txt +645 -0
- data/lib/xapian_fu/stopwords/yo.txt +60 -0
- data/lib/xapian_fu/stopwords/zh.txt +794 -0
- data/lib/xapian_fu/stopwords/zu.txt +29 -0
- data/lib/xapian_fu/version.rb +1 -1
- data/lib/xapian_fu/xapian_db.rb +27 -0
- data/lib/xapian_fu/xapian_doc.rb +22 -0
- data/spec/xapian_db_spec.rb +21 -0
- data/spec/xapian_doc_spec.rb +35 -1
- metadata +124 -53
- data/lib/xapian_fu/stopwords/danish.txt +0 -102
- data/lib/xapian_fu/stopwords/dutch.txt +0 -113
- data/lib/xapian_fu/stopwords/english.txt +0 -312
- data/lib/xapian_fu/stopwords/finnish.txt +0 -89
- data/lib/xapian_fu/stopwords/french.txt +0 -168
- data/lib/xapian_fu/stopwords/german.txt +0 -286
- data/lib/xapian_fu/stopwords/hungarian.txt +0 -203
- data/lib/xapian_fu/stopwords/italian.txt +0 -295
- data/lib/xapian_fu/stopwords/norwegian.txt +0 -186
- data/lib/xapian_fu/stopwords/portuguese.txt +0 -245
- data/lib/xapian_fu/stopwords/russian.txt +0 -236
- data/lib/xapian_fu/stopwords/spanish.txt +0 -348
- data/lib/xapian_fu/stopwords/swedish.txt +0 -125
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
futhi
|
|
2
|
+
kahle
|
|
3
|
+
kakhulu
|
|
4
|
+
kanye
|
|
5
|
+
khona
|
|
6
|
+
kodwa
|
|
7
|
+
kungani
|
|
8
|
+
kusho
|
|
9
|
+
la
|
|
10
|
+
lakhe
|
|
11
|
+
lapho
|
|
12
|
+
mina
|
|
13
|
+
ngesikhathi
|
|
14
|
+
nje
|
|
15
|
+
phansi
|
|
16
|
+
phezulu
|
|
17
|
+
u
|
|
18
|
+
ukuba
|
|
19
|
+
ukuthi
|
|
20
|
+
ukuze
|
|
21
|
+
uma
|
|
22
|
+
wahamba
|
|
23
|
+
wakhe
|
|
24
|
+
wami
|
|
25
|
+
wase
|
|
26
|
+
wathi
|
|
27
|
+
yakhe
|
|
28
|
+
zakhe
|
|
29
|
+
zonke
|
data/lib/xapian_fu/version.rb
CHANGED
data/lib/xapian_fu/xapian_db.rb
CHANGED
|
@@ -38,6 +38,11 @@ module XapianFu #:nodoc:
|
|
|
38
38
|
# XapianDb to wipe the current on-disk database and start afresh.
|
|
39
39
|
# The default is <tt>false</tt>.
|
|
40
40
|
#
|
|
41
|
+
# Setting the <tt>:type</tt> option to either :glass or :chert will force that
|
|
42
|
+
# database backend, if supported. Leave as nil to auto-detect existing
|
|
43
|
+
# databases and create new databases with the library default (recommended).
|
|
44
|
+
# Requires xapian >=1.4
|
|
45
|
+
#
|
|
41
46
|
# db = XapianDb.new(:dir => '/tmp/mydb', :create => true)
|
|
42
47
|
#
|
|
43
48
|
# == Language, Stemmers and Stoppers
|
|
@@ -53,10 +58,17 @@ module XapianFu #:nodoc:
|
|
|
53
58
|
# :portuguese, :romanian, :russian, :spanish, :swedish,
|
|
54
59
|
# :turkish</tt>. Set it to <tt>false</tt> to specify none.
|
|
55
60
|
#
|
|
61
|
+
# There are more stoppers available than stemmers. See
|
|
62
|
+
# <tt>lib/xapian_fu/stopwords/*.txt</tt> for a complete list.
|
|
63
|
+
#
|
|
56
64
|
# The default for all is <tt>:english</tt>.
|
|
57
65
|
#
|
|
58
66
|
# db = XapianDb.new(:language => :italian, :stopper => false)
|
|
59
67
|
#
|
|
68
|
+
# The <tt>:stopper_strategy</tt> option specifies the default stop strategy
|
|
69
|
+
# that will be used when indexing and can be: <tt>:none</tt>, <tt>:all</tt> or
|
|
70
|
+
# <tt>:stemmed</tt>. Defaults to <tt>:stemmed</tt>
|
|
71
|
+
#
|
|
60
72
|
# == Spelling suggestions
|
|
61
73
|
#
|
|
62
74
|
# The <tt>:spelling</tt> option controls generation of a spelling
|
|
@@ -167,6 +179,8 @@ module XapianFu #:nodoc:
|
|
|
167
179
|
attr_reader :field_options
|
|
168
180
|
attr_accessor :weights_function
|
|
169
181
|
attr :field_weights
|
|
182
|
+
# The default stopper strategy
|
|
183
|
+
attr_accessor :stopper_strategy
|
|
170
184
|
|
|
171
185
|
def initialize( options = { } )
|
|
172
186
|
@options = { :index_positions => true, :spelling => true }.merge(options)
|
|
@@ -175,10 +189,23 @@ module XapianFu #:nodoc:
|
|
|
175
189
|
@db_flag = Xapian::DB_OPEN
|
|
176
190
|
@db_flag = Xapian::DB_CREATE_OR_OPEN if @options[:create]
|
|
177
191
|
@db_flag = Xapian::DB_CREATE_OR_OVERWRITE if @options[:overwrite]
|
|
192
|
+
case @options[:type]
|
|
193
|
+
when :glass
|
|
194
|
+
raise XapianFuError.new("type glass not recognised") unless defined?(Xapian::DB_BACKEND_GLASS)
|
|
195
|
+
@db_flag |= Xapian::DB_BACKEND_GLASS
|
|
196
|
+
when :chert
|
|
197
|
+
raise XapianFuError.new("type chert not recognised") unless defined?(Xapian::DB_BACKEND_CHERT)
|
|
198
|
+
@db_flag |= Xapian::DB_BACKEND_CHERT
|
|
199
|
+
when nil
|
|
200
|
+
# use library defaults
|
|
201
|
+
else
|
|
202
|
+
raise XapianFuError.new("type #{@options[:type].inspect} not recognised")
|
|
203
|
+
end
|
|
178
204
|
@tx_mutex = Mutex.new
|
|
179
205
|
@language = @options.fetch(:language, :english)
|
|
180
206
|
@stemmer = @options.fetch(:stemmer, @language)
|
|
181
207
|
@stopper = @options.fetch(:stopper, @language)
|
|
208
|
+
@stopper_strategy = @options.fetch(:stopper_strategy, :stemmed)
|
|
182
209
|
@field_options = {}
|
|
183
210
|
setup_fields(@options[:fields])
|
|
184
211
|
@store_values << @options[:store]
|
data/lib/xapian_fu/xapian_doc.rb
CHANGED
|
@@ -227,6 +227,27 @@ module XapianFu #:nodoc:
|
|
|
227
227
|
end
|
|
228
228
|
end
|
|
229
229
|
|
|
230
|
+
STOPPER_STRATEGIES = {
|
|
231
|
+
:none => 0,
|
|
232
|
+
:all => 1,
|
|
233
|
+
:stemmed => 2
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
def stopper_strategy
|
|
237
|
+
if @stopper_strategy
|
|
238
|
+
@stopper_strategy
|
|
239
|
+
else
|
|
240
|
+
@stopper_strategy =
|
|
241
|
+
if ! @options[:stopper_strategy].nil?
|
|
242
|
+
@options[:stopper_strategy]
|
|
243
|
+
elsif db
|
|
244
|
+
db.stopper_strategy
|
|
245
|
+
else
|
|
246
|
+
:stemmed
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
230
251
|
# Return this document's language which is set on initialize, inherited
|
|
231
252
|
# from the database or defaults to :english
|
|
232
253
|
def language
|
|
@@ -276,6 +297,7 @@ module XapianFu #:nodoc:
|
|
|
276
297
|
tg.document = xapian_document
|
|
277
298
|
tg.stopper = stopper if stopper
|
|
278
299
|
tg.stemmer = stemmer
|
|
300
|
+
tg.set_stopper_strategy(XapianDoc::STOPPER_STRATEGIES.fetch(stopper_strategy, 2))
|
|
279
301
|
tg.set_flags Xapian::TermGenerator::FLAG_SPELLING if db.spelling
|
|
280
302
|
index_method = db.index_positions ? :index_text : :index_text_without_positions
|
|
281
303
|
fields.each do |k,o|
|
data/spec/xapian_db_spec.rb
CHANGED
|
@@ -38,6 +38,27 @@ describe XapianDb do
|
|
|
38
38
|
File.exists?(tmp_dir).should be_true
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
+
it "should create a glass database when type is glass" do
|
|
42
|
+
pending "this version of xapian doesn't support glass" unless defined?(Xapian::DB_BACKEND_GLASS)
|
|
43
|
+
glassdir = tmp_dir + 'glass'
|
|
44
|
+
XapianDb.new(:dir => glassdir, :create => true, :type => :glass).rw
|
|
45
|
+
File.exists?(glassdir + '/iamglass').should == true
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it "should create a chert database when type is chert" do
|
|
49
|
+
pending "this version of xapian doesn't support chert" unless defined?(Xapian::DB_BACKEND_CHERT)
|
|
50
|
+
chertdir = tmp_dir + 'chert'
|
|
51
|
+
XapianDb.new(:dir => chertdir, :create => true, :type => :chert).rw
|
|
52
|
+
File.exists?(chertdir + '/iamchert').should == true
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "should raise an exception when type is unrecognised" do
|
|
56
|
+
chertdir = tmp_dir + 'whatever'
|
|
57
|
+
lambda {
|
|
58
|
+
XapianDb.new(:dir => chertdir, :create => true, :type => :whatever).rw
|
|
59
|
+
}.should raise_error(XapianFu::XapianFuError)
|
|
60
|
+
end
|
|
61
|
+
|
|
41
62
|
it "should flush documents to the index when flush is called" do
|
|
42
63
|
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
|
43
64
|
xdb.flush
|
data/spec/xapian_doc_spec.rb
CHANGED
|
@@ -203,7 +203,7 @@ describe XapianDoc do
|
|
|
203
203
|
it "should allow setting the stopper on initialisation" do
|
|
204
204
|
xdb = XapianDb.new(:stopper => :english)
|
|
205
205
|
xdoc = xdb.documents.new("And they made a cake", :stopper => :french)
|
|
206
|
-
xdoc.stopper.call("
|
|
206
|
+
xdoc.stopper.call("apres").should == true
|
|
207
207
|
xdoc.stopper.call("and").should == false
|
|
208
208
|
end
|
|
209
209
|
|
|
@@ -261,4 +261,38 @@ describe XapianDoc do
|
|
|
261
261
|
end
|
|
262
262
|
end
|
|
263
263
|
|
|
264
|
+
describe "stopper_strategy" do
|
|
265
|
+
it "should stop all stop words when stopper_strategy is set to :all " do
|
|
266
|
+
xdb = XapianDb.new(:stopper_strategy => :all )
|
|
267
|
+
xdoc = xdb.documents.new("She fished for fish").to_xapian_document
|
|
268
|
+
terms = xdoc.terms.collect { |t| t.term }
|
|
269
|
+
terms.should_not include "for"
|
|
270
|
+
terms.should include "fish"
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
it "should stop stemmed words by when stopper_strategy is set to :stemmed " do
|
|
274
|
+
xdb = XapianDb.new(:stopper_strategy => :stemmed)
|
|
275
|
+
xdoc = xdb.documents.new("She fished for fish").to_xapian_document
|
|
276
|
+
terms = xdoc.terms.collect { |t| t.term }
|
|
277
|
+
terms.should_not include "Zfor"
|
|
278
|
+
terms.should include "fish"
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
it "should stop no words by when stopper_strategy is set to :none " do
|
|
282
|
+
xdb = XapianDb.new(:stopper_strategy => :none)
|
|
283
|
+
xdoc = xdb.documents.new("She fished for fish").to_xapian_document
|
|
284
|
+
terms = xdoc.terms.collect { |t| t.term }
|
|
285
|
+
terms.should include "Zfor"
|
|
286
|
+
terms.should include "for"
|
|
287
|
+
terms.should include "fish"
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
it "should stop stemmed words by default " do
|
|
291
|
+
xdb = XapianDb.new
|
|
292
|
+
xdoc = xdb.documents.new("She fished for fish").to_xapian_document
|
|
293
|
+
terms = xdoc.terms.collect { |t| t.term }
|
|
294
|
+
terms.should_not include "Zfor"
|
|
295
|
+
terms.should include "fish"
|
|
296
|
+
end
|
|
297
|
+
end
|
|
264
298
|
end
|
metadata
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: xapian-fu
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.8.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- John Leach
|
|
8
8
|
- Damian Janowski
|
|
9
|
-
autorequire:
|
|
9
|
+
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2025-10-19 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: rspec
|
|
@@ -17,14 +17,14 @@ dependencies:
|
|
|
17
17
|
requirements:
|
|
18
18
|
- - "~>"
|
|
19
19
|
- !ruby/object:Gem::Version
|
|
20
|
-
version:
|
|
20
|
+
version: 2.7.0
|
|
21
21
|
type: :development
|
|
22
22
|
prerelease: false
|
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
|
24
24
|
requirements:
|
|
25
25
|
- - "~>"
|
|
26
26
|
- !ruby/object:Gem::Version
|
|
27
|
-
version:
|
|
27
|
+
version: 2.7.0
|
|
28
28
|
- !ruby/object:Gem::Dependency
|
|
29
29
|
name: rake
|
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -39,6 +39,20 @@ dependencies:
|
|
|
39
39
|
- - "~>"
|
|
40
40
|
- !ruby/object:Gem::Version
|
|
41
41
|
version: '0'
|
|
42
|
+
- !ruby/object:Gem::Dependency
|
|
43
|
+
name: irb
|
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
|
45
|
+
requirements:
|
|
46
|
+
- - "~>"
|
|
47
|
+
- !ruby/object:Gem::Version
|
|
48
|
+
version: '0'
|
|
49
|
+
type: :development
|
|
50
|
+
prerelease: false
|
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
52
|
+
requirements:
|
|
53
|
+
- - "~>"
|
|
54
|
+
- !ruby/object:Gem::Version
|
|
55
|
+
version: '0'
|
|
42
56
|
- !ruby/object:Gem::Dependency
|
|
43
57
|
name: rdoc
|
|
44
58
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -76,20 +90,78 @@ files:
|
|
|
76
90
|
- lib/xapian_fu/result_set.rb
|
|
77
91
|
- lib/xapian_fu/stopper_factory.rb
|
|
78
92
|
- lib/xapian_fu/stopwords/README
|
|
93
|
+
- lib/xapian_fu/stopwords/af.txt
|
|
94
|
+
- lib/xapian_fu/stopwords/ar.txt
|
|
95
|
+
- lib/xapian_fu/stopwords/bg.txt
|
|
96
|
+
- lib/xapian_fu/stopwords/bn.txt
|
|
97
|
+
- lib/xapian_fu/stopwords/br.txt
|
|
98
|
+
- lib/xapian_fu/stopwords/ca.txt
|
|
99
|
+
- lib/xapian_fu/stopwords/cs.txt
|
|
100
|
+
- lib/xapian_fu/stopwords/da.txt
|
|
79
101
|
- lib/xapian_fu/stopwords/danish.txt
|
|
102
|
+
- lib/xapian_fu/stopwords/de.txt
|
|
80
103
|
- lib/xapian_fu/stopwords/dutch.txt
|
|
104
|
+
- lib/xapian_fu/stopwords/el.txt
|
|
105
|
+
- lib/xapian_fu/stopwords/en.txt
|
|
81
106
|
- lib/xapian_fu/stopwords/english.txt
|
|
107
|
+
- lib/xapian_fu/stopwords/eo.txt
|
|
108
|
+
- lib/xapian_fu/stopwords/es.txt
|
|
109
|
+
- lib/xapian_fu/stopwords/et.txt
|
|
110
|
+
- lib/xapian_fu/stopwords/eu.txt
|
|
111
|
+
- lib/xapian_fu/stopwords/fa.txt
|
|
112
|
+
- lib/xapian_fu/stopwords/fi.txt
|
|
82
113
|
- lib/xapian_fu/stopwords/finnish.txt
|
|
114
|
+
- lib/xapian_fu/stopwords/fr.txt
|
|
83
115
|
- lib/xapian_fu/stopwords/french.txt
|
|
116
|
+
- lib/xapian_fu/stopwords/ga.txt
|
|
84
117
|
- lib/xapian_fu/stopwords/german.txt
|
|
118
|
+
- lib/xapian_fu/stopwords/gl.txt
|
|
119
|
+
- lib/xapian_fu/stopwords/gu.txt
|
|
120
|
+
- lib/xapian_fu/stopwords/ha.txt
|
|
121
|
+
- lib/xapian_fu/stopwords/he.txt
|
|
122
|
+
- lib/xapian_fu/stopwords/hi.txt
|
|
123
|
+
- lib/xapian_fu/stopwords/hr.txt
|
|
124
|
+
- lib/xapian_fu/stopwords/hu.txt
|
|
85
125
|
- lib/xapian_fu/stopwords/hungarian.txt
|
|
126
|
+
- lib/xapian_fu/stopwords/hy.txt
|
|
127
|
+
- lib/xapian_fu/stopwords/id.txt
|
|
128
|
+
- lib/xapian_fu/stopwords/it.txt
|
|
86
129
|
- lib/xapian_fu/stopwords/italian.txt
|
|
130
|
+
- lib/xapian_fu/stopwords/ja.txt
|
|
131
|
+
- lib/xapian_fu/stopwords/ko.txt
|
|
132
|
+
- lib/xapian_fu/stopwords/ku.txt
|
|
133
|
+
- lib/xapian_fu/stopwords/la.txt
|
|
134
|
+
- lib/xapian_fu/stopwords/lt.txt
|
|
135
|
+
- lib/xapian_fu/stopwords/lv.txt
|
|
136
|
+
- lib/xapian_fu/stopwords/mr.txt
|
|
137
|
+
- lib/xapian_fu/stopwords/ms.txt
|
|
138
|
+
- lib/xapian_fu/stopwords/nl.txt
|
|
139
|
+
- lib/xapian_fu/stopwords/no.txt
|
|
87
140
|
- lib/xapian_fu/stopwords/norwegian.txt
|
|
141
|
+
- lib/xapian_fu/stopwords/pl.txt
|
|
88
142
|
- lib/xapian_fu/stopwords/portuguese.txt
|
|
143
|
+
- lib/xapian_fu/stopwords/pt.txt
|
|
144
|
+
- lib/xapian_fu/stopwords/ro.txt
|
|
145
|
+
- lib/xapian_fu/stopwords/ru.txt
|
|
89
146
|
- lib/xapian_fu/stopwords/russian.txt
|
|
147
|
+
- lib/xapian_fu/stopwords/sk.txt
|
|
148
|
+
- lib/xapian_fu/stopwords/sl.txt
|
|
149
|
+
- lib/xapian_fu/stopwords/so.txt
|
|
90
150
|
- lib/xapian_fu/stopwords/spanish.txt
|
|
151
|
+
- lib/xapian_fu/stopwords/st.txt
|
|
152
|
+
- lib/xapian_fu/stopwords/sv.txt
|
|
153
|
+
- lib/xapian_fu/stopwords/sw.txt
|
|
91
154
|
- lib/xapian_fu/stopwords/swedish.txt
|
|
155
|
+
- lib/xapian_fu/stopwords/th.txt
|
|
156
|
+
- lib/xapian_fu/stopwords/tl.txt
|
|
157
|
+
- lib/xapian_fu/stopwords/tr.txt
|
|
158
|
+
- lib/xapian_fu/stopwords/uk.txt
|
|
92
159
|
- lib/xapian_fu/stopwords/update.rb
|
|
160
|
+
- lib/xapian_fu/stopwords/ur.txt
|
|
161
|
+
- lib/xapian_fu/stopwords/vi.txt
|
|
162
|
+
- lib/xapian_fu/stopwords/yo.txt
|
|
163
|
+
- lib/xapian_fu/stopwords/zh.txt
|
|
164
|
+
- lib/xapian_fu/stopwords/zu.txt
|
|
93
165
|
- lib/xapian_fu/version.rb
|
|
94
166
|
- lib/xapian_fu/xapian_db.rb
|
|
95
167
|
- lib/xapian_fu/xapian_doc.rb
|
|
@@ -158,7 +230,7 @@ homepage: https://github.com/johnl/xapian-fu
|
|
|
158
230
|
licenses:
|
|
159
231
|
- MIT
|
|
160
232
|
metadata: {}
|
|
161
|
-
post_install_message:
|
|
233
|
+
post_install_message:
|
|
162
234
|
rdoc_options:
|
|
163
235
|
- "--title"
|
|
164
236
|
- Xapian Fu
|
|
@@ -171,7 +243,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
171
243
|
requirements:
|
|
172
244
|
- - ">="
|
|
173
245
|
- !ruby/object:Gem::Version
|
|
174
|
-
version: 1.
|
|
246
|
+
version: 2.1.0
|
|
175
247
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
176
248
|
requirements:
|
|
177
249
|
- - ">="
|
|
@@ -179,68 +251,67 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
179
251
|
version: '0'
|
|
180
252
|
requirements:
|
|
181
253
|
- libxapian-dev, or the xapian-ruby gem
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
signing_key:
|
|
254
|
+
rubygems_version: 3.3.26
|
|
255
|
+
signing_key:
|
|
185
256
|
specification_version: 4
|
|
186
257
|
summary: A Ruby interface to the Xapian search engine
|
|
187
258
|
test_files:
|
|
188
|
-
- spec/
|
|
189
|
-
- spec/
|
|
190
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.baseA
|
|
259
|
+
- spec/build_db_for_value_testing.rb
|
|
260
|
+
- spec/facets_spec.rb
|
|
191
261
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/flintlock
|
|
192
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseB
|
|
193
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.baseB
|
|
194
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseB
|
|
195
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseA
|
|
196
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.DB
|
|
197
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseA
|
|
198
262
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/iamchert
|
|
199
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/
|
|
263
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.DB
|
|
264
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.baseA
|
|
200
265
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/position.baseB
|
|
201
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/
|
|
202
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/
|
|
203
|
-
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/
|
|
266
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.DB
|
|
267
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseA
|
|
268
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/postlist.baseB
|
|
204
269
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.DB
|
|
205
270
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.baseA
|
|
271
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/record.baseB
|
|
272
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.DB
|
|
273
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseA
|
|
274
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/spelling.baseB
|
|
275
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.DB
|
|
206
276
|
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.baseA
|
|
207
|
-
- spec/fixtures/film_data/x86_64-linux~
|
|
208
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/flintlock
|
|
209
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseB
|
|
210
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseB
|
|
211
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseB
|
|
212
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseA
|
|
213
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.DB
|
|
214
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseA
|
|
215
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/iamchert
|
|
216
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.DB
|
|
217
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseB
|
|
218
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.DB
|
|
219
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseB
|
|
220
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.DB
|
|
221
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.DB
|
|
222
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseA
|
|
223
|
-
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseA
|
|
224
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
|
|
277
|
+
- spec/fixtures/film_data/x86_64-linux-gnu~2.5.5/termlist.baseB
|
|
225
278
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
|
|
226
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseB
|
|
227
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
|
|
228
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
|
|
229
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseA
|
|
230
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
|
|
231
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
|
|
232
279
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/iamchert
|
|
233
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/
|
|
280
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
|
|
281
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
|
|
234
282
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
|
|
235
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/
|
|
236
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/
|
|
237
|
-
- spec/fixtures/film_data/x86_64-linux~1.8.7/
|
|
283
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
|
|
284
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
|
|
285
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
|
|
238
286
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
|
|
239
287
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
|
|
288
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
|
|
289
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.DB
|
|
290
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseA
|
|
291
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/spelling.baseB
|
|
292
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
|
|
240
293
|
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
|
|
294
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
|
|
295
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/flintlock
|
|
296
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/iamchert
|
|
297
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.DB
|
|
298
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseA
|
|
299
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/position.baseB
|
|
300
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.DB
|
|
301
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseA
|
|
302
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/postlist.baseB
|
|
303
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.DB
|
|
304
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseA
|
|
305
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/record.baseB
|
|
306
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.DB
|
|
307
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseA
|
|
308
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/spelling.baseB
|
|
309
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.DB
|
|
310
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseA
|
|
311
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.3/termlist.baseB
|
|
312
|
+
- spec/fixtures/film_data.rb
|
|
241
313
|
- spec/query_parser_spec.rb
|
|
242
314
|
- spec/stopper_factory_spec.rb
|
|
315
|
+
- spec/xapian_db_spec.rb
|
|
243
316
|
- spec/xapian_doc_spec.rb
|
|
244
317
|
- spec/xapian_doc_value_accessor_spec.rb
|
|
245
|
-
- spec/build_db_for_value_testing.rb
|
|
246
|
-
- spec/facets_spec.rb
|
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
|
3
|
-
| word is at the start of a line.
|
|
4
|
-
|
|
5
|
-
| This is a ranked list (commonest to rarest) of stopwords derived from
|
|
6
|
-
| a large text sample.
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
og | and
|
|
10
|
-
i | in
|
|
11
|
-
jeg | I
|
|
12
|
-
det | that (dem. pronoun)/it (pers. pronoun)
|
|
13
|
-
at | that (in front of a sentence)/to (with infinitive)
|
|
14
|
-
en | a/an
|
|
15
|
-
den | it (pers. pronoun)/that (dem. pronoun)
|
|
16
|
-
til | to/at/for/until/against/by/of/into, more
|
|
17
|
-
er | present tense of "to be"
|
|
18
|
-
som | who, as
|
|
19
|
-
på | on/upon/in/on/at/to/after/of/with/for, on
|
|
20
|
-
de | they
|
|
21
|
-
med | with/by/in, along
|
|
22
|
-
han | he
|
|
23
|
-
af | of/by/from/off/for/in/with/on, off
|
|
24
|
-
for | at/for/to/from/by/of/ago, in front/before, because
|
|
25
|
-
ikke | not
|
|
26
|
-
der | who/which, there/those
|
|
27
|
-
var | past tense of "to be"
|
|
28
|
-
mig | me/myself
|
|
29
|
-
sig | oneself/himself/herself/itself/themselves
|
|
30
|
-
men | but
|
|
31
|
-
et | a/an/one, one (number), someone/somebody/one
|
|
32
|
-
har | present tense of "to have"
|
|
33
|
-
om | round/about/for/in/a, about/around/down, if
|
|
34
|
-
vi | we
|
|
35
|
-
min | my
|
|
36
|
-
havde | past tense of "to have"
|
|
37
|
-
ham | him
|
|
38
|
-
hun | she
|
|
39
|
-
nu | now
|
|
40
|
-
over | over/above/across/by/beyond/past/on/about, over/past
|
|
41
|
-
da | then, when/as/since
|
|
42
|
-
fra | from/off/since, off, since
|
|
43
|
-
du | you
|
|
44
|
-
ud | out
|
|
45
|
-
sin | his/her/its/one's
|
|
46
|
-
dem | them
|
|
47
|
-
os | us/ourselves
|
|
48
|
-
op | up
|
|
49
|
-
man | you/one
|
|
50
|
-
hans | his
|
|
51
|
-
hvor | where
|
|
52
|
-
eller | or
|
|
53
|
-
hvad | what
|
|
54
|
-
skal | must/shall etc.
|
|
55
|
-
selv | myself/youself/herself/ourselves etc., even
|
|
56
|
-
her | here
|
|
57
|
-
alle | all/everyone/everybody etc.
|
|
58
|
-
vil | will (verb)
|
|
59
|
-
blev | past tense of "to stay/to remain/to get/to become"
|
|
60
|
-
kunne | could
|
|
61
|
-
ind | in
|
|
62
|
-
når | when
|
|
63
|
-
være | present tense of "to be"
|
|
64
|
-
dog | however/yet/after all
|
|
65
|
-
noget | something
|
|
66
|
-
ville | would
|
|
67
|
-
jo | you know/you see (adv), yes
|
|
68
|
-
deres | their/theirs
|
|
69
|
-
efter | after/behind/according to/for/by/from, later/afterwards
|
|
70
|
-
ned | down
|
|
71
|
-
skulle | should
|
|
72
|
-
denne | this
|
|
73
|
-
end | than
|
|
74
|
-
dette | this
|
|
75
|
-
mit | my/mine
|
|
76
|
-
også | also
|
|
77
|
-
under | under/beneath/below/during, below/underneath
|
|
78
|
-
have | have
|
|
79
|
-
dig | you
|
|
80
|
-
anden | other
|
|
81
|
-
hende | her
|
|
82
|
-
mine | my
|
|
83
|
-
alt | everything
|
|
84
|
-
meget | much/very, plenty of
|
|
85
|
-
sit | his, her, its, one's
|
|
86
|
-
sine | his, her, its, one's
|
|
87
|
-
vor | our
|
|
88
|
-
mod | against
|
|
89
|
-
disse | these
|
|
90
|
-
hvis | if
|
|
91
|
-
din | your/yours
|
|
92
|
-
nogle | some
|
|
93
|
-
hos | by/at
|
|
94
|
-
blive | be/become
|
|
95
|
-
mange | many
|
|
96
|
-
ad | by/through
|
|
97
|
-
bliver | present tense of "to be/to become"
|
|
98
|
-
hendes | her/hers
|
|
99
|
-
været | be
|
|
100
|
-
thi | for (conj)
|
|
101
|
-
jer | you
|
|
102
|
-
sådan | such, like this/like that
|