xspond-xapian-ruby 0.1 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/extconf.rb +1 -1
- data/lib/xapian.rb +63 -23
- data/tests/smoketest.rb +18 -10
- metadata +2 -2
data/ext/extconf.rb
CHANGED
data/lib/xapian.rb
CHANGED
@@ -49,21 +49,64 @@ module Xapian
|
|
49
49
|
# underlying Iterator
|
50
50
|
def _safelyIterate(dangerousStart, dangerousEnd) #:nodoc:
|
51
51
|
retval = Array.new
|
52
|
-
|
52
|
+
|
53
53
|
item = dangerousStart
|
54
54
|
lastTerm = dangerousEnd
|
55
|
-
|
55
|
+
|
56
56
|
return retval if dangerousStart.equals(dangerousEnd)
|
57
57
|
|
58
|
-
begin
|
58
|
+
begin
|
59
59
|
retval.push(yield(item))
|
60
60
|
item.next()
|
61
|
-
end while not item.equals(lastTerm) # must use primitive C++ comparator
|
62
|
-
|
61
|
+
end while not item.equals(lastTerm) # must use primitive C++ comparator
|
62
|
+
|
63
63
|
return retval
|
64
64
|
end # _safelyIterate
|
65
65
|
module_function :_safelyIterate
|
66
66
|
|
67
|
+
# A class to wrap safelyIterate to eliminate the overhead of generating an array
|
68
|
+
#
|
69
|
+
# new takes the start iterorator, end iterator, and a block used to generate an
|
70
|
+
# appropriate Ruby object to wrap the underlying Iterator
|
71
|
+
class ProxyIterator
|
72
|
+
instance_methods.each {|name| undef_method name if name !~ /^(__|instance_eval)/ }
|
73
|
+
|
74
|
+
include Enumerable
|
75
|
+
|
76
|
+
def initialize(object, dangerousStart, dangerousEnd, &block)
|
77
|
+
@object = object
|
78
|
+
@dangerousStart = dangerousStart
|
79
|
+
@dangerousEnd = @object.send(dangerousEnd)
|
80
|
+
@block = block
|
81
|
+
@proxy_target = nil
|
82
|
+
end
|
83
|
+
|
84
|
+
def each
|
85
|
+
# if the array is already loaded use it
|
86
|
+
# otherwise use iterator
|
87
|
+
if @proxy_target
|
88
|
+
@proxy_target.each {|i| yield(i) }
|
89
|
+
else
|
90
|
+
item = @object.send(@dangerousStart)
|
91
|
+
# Must use .equals NOT ==
|
92
|
+
while !item.equals(@dangerousEnd)
|
93
|
+
yield(@block.call(item))
|
94
|
+
item.next
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def method_missing(method, *args)
|
100
|
+
self.load_target.send(method, *args)
|
101
|
+
end
|
102
|
+
|
103
|
+
protected
|
104
|
+
|
105
|
+
def load_target
|
106
|
+
@proxy_target ||= self.entries
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
67
110
|
#--
|
68
111
|
### safe Ruby wrapper for the dangerous C++ Xapian::TermIterator class
|
69
112
|
class Xapian::Term
|
@@ -85,7 +128,7 @@ module Xapian
|
|
85
128
|
# non-iterative data.
|
86
129
|
# (MSetIterator is not dangerous, but it is inconvenient to use from a Ruby
|
87
130
|
# idiom, so we wrap it..)
|
88
|
-
class Xapian::Match
|
131
|
+
class Xapian::Match
|
89
132
|
attr_accessor :docid, :document, :rank, :weight, :collapse_count, :percent
|
90
133
|
|
91
134
|
def initialize(docid, document, rank, weight, collapse_count, percent)
|
@@ -98,7 +141,7 @@ module Xapian
|
|
98
141
|
end # initialize
|
99
142
|
|
100
143
|
def ==(other)
|
101
|
-
return other.is_a?(Xapian::Match) && other.docid == @docid && other.rank == @rank &&
|
144
|
+
return other.is_a?(Xapian::Match) && other.docid == @docid && other.rank == @rank &&
|
102
145
|
other.weight == @weight && other.collapse_count == @collapse_count && other.percent == @percent
|
103
146
|
end
|
104
147
|
|
@@ -124,7 +167,7 @@ module Xapian
|
|
124
167
|
# Ruby wrapper for Xapian::ValueIterator
|
125
168
|
class Xapian::Value
|
126
169
|
attr_accessor :value, :valueno
|
127
|
-
|
170
|
+
|
128
171
|
def initialize(value, valueno)
|
129
172
|
@value = value
|
130
173
|
@valueno = valueno
|
@@ -139,13 +182,13 @@ module Xapian
|
|
139
182
|
# Extend Xapian::Document with a nice wrapper for its nasty input_iterators
|
140
183
|
class Xapian::Document
|
141
184
|
def terms
|
142
|
-
|
185
|
+
ProxyIterator.new(self, :_dangerous_termlist_begin, :_dangerous_termlist_end) { |item|
|
143
186
|
Xapian::Term.new(item.term, item.wdf)
|
144
187
|
}
|
145
188
|
end # terms
|
146
189
|
|
147
190
|
def values
|
148
|
-
|
191
|
+
ProxyIterator.new(self, :_dangerous_values_begin, :_dangerous_values_end) { |item|
|
149
192
|
Xapian::Value.new(item.value, item.valueno)
|
150
193
|
}
|
151
194
|
end # terms
|
@@ -156,7 +199,7 @@ module Xapian
|
|
156
199
|
# Extend Xapian::Query with a nice wrapper for its dangerous iterators
|
157
200
|
class Xapian::Query
|
158
201
|
def terms
|
159
|
-
|
202
|
+
ProxyIterator.new(self, :_dangerous_terms_begin, :_dangerous_terms_end) { |item|
|
160
203
|
Xapian::Term.new(item.term, item.wdf)
|
161
204
|
# termfreq is not supported by TermIterators from Queries
|
162
205
|
}
|
@@ -169,7 +212,7 @@ module Xapian
|
|
169
212
|
# Get matching terms for some document.
|
170
213
|
# document can be either a Xapian::DocID or a Xapian::MSetIterator
|
171
214
|
def matching_terms(document)
|
172
|
-
Xapian._safelyIterate(self._dangerous_matching_terms_begin(document),
|
215
|
+
Xapian._safelyIterate(self._dangerous_matching_terms_begin(document),
|
173
216
|
self._dangerous_matching_terms_end(document)) { |item|
|
174
217
|
Xapian::Term.new(item.term, item.wdf)
|
175
218
|
}
|
@@ -180,8 +223,7 @@ module Xapian
|
|
180
223
|
# programming idiom. So we wrap them.
|
181
224
|
class Xapian::MSet
|
182
225
|
def matches
|
183
|
-
|
184
|
-
self._end()) { |item|
|
226
|
+
ProxyIterator.new(self, :_begin, :_end) { |item|
|
185
227
|
Xapian::Match.new(item.docid, item.document, item.rank, item.weight, item.collapse_count, item.percent)
|
186
228
|
}
|
187
229
|
|
@@ -192,10 +234,9 @@ module Xapian
|
|
192
234
|
# programming idiom. So we wrap them.
|
193
235
|
class Xapian::ESet
|
194
236
|
def terms
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
# ESetIterator#termname() (defined in xapian.i) to ExpandTerm#term()
|
237
|
+
ProxyIterator.new(self, :_begin, :_end) { |item|
|
238
|
+
# note: in the ExpandTerm wrapper, we implicitly rename
|
239
|
+
# ESetIterator#termname() (defined in xapian.i) to ExpandTerm#term()
|
199
240
|
Xapian::ExpandTerm.new(item.termname, item.weight)
|
200
241
|
}
|
201
242
|
|
@@ -225,8 +266,7 @@ module Xapian
|
|
225
266
|
class Xapian::Database
|
226
267
|
# Returns an Array of all Xapian::Terms for this database.
|
227
268
|
def allterms
|
228
|
-
|
229
|
-
self._dangerous_allterms_end()) { |item|
|
269
|
+
ProxyIterator.new(self, :_dangerous_allterms_begin, :_dangerous_allterms_end) { |item|
|
230
270
|
Xapian::Term.new(item.term, 0, item.termfreq)
|
231
271
|
}
|
232
272
|
end # allterms
|
@@ -234,10 +274,10 @@ module Xapian
|
|
234
274
|
# Returns an Array of Xapian::Postings for the given term.
|
235
275
|
# term is a string.
|
236
276
|
def postlist(term)
|
237
|
-
Xapian._safelyIterate(self._dangerous_postlist_begin(term),
|
277
|
+
Xapian._safelyIterate(self._dangerous_postlist_begin(term),
|
238
278
|
self._dangerous_postlist_end(term)) { |item|
|
239
279
|
Xapian::Posting.new(item.docid, item.doclength, item.wdf)
|
240
|
-
}
|
280
|
+
}
|
241
281
|
end # postlist(term)
|
242
282
|
|
243
283
|
# Returns an Array of Terms for the given docid.
|
@@ -247,7 +287,7 @@ module Xapian
|
|
247
287
|
Xapian::Term.new(item.term, item.wdf, item.termfreq)
|
248
288
|
}
|
249
289
|
end # termlist(docid)
|
250
|
-
|
290
|
+
|
251
291
|
|
252
292
|
# Returns an Array of Xapian::Termpos objects for the given term (a String)
|
253
293
|
# in the given docid.
|
data/tests/smoketest.rb
CHANGED
@@ -34,7 +34,7 @@ end
|
|
34
34
|
|
35
35
|
class XapianSmoketest < Test::Unit::TestCase
|
36
36
|
|
37
|
-
def setup
|
37
|
+
def setup
|
38
38
|
@stem = Xapian::Stem.new("english")
|
39
39
|
|
40
40
|
@doc = Xapian::Document.new()
|
@@ -43,7 +43,7 @@ class XapianSmoketest < Test::Unit::TestCase
|
|
43
43
|
@doc.add_posting(@stem.call("there"), 2)
|
44
44
|
@doc.add_posting(@stem.call("anybody"), 3)
|
45
45
|
@doc.add_posting(@stem.call("out"), 4)
|
46
|
-
@doc.add_posting(@stem.call("there"), 5)
|
46
|
+
@doc.add_posting(@stem.call("there"), 5)
|
47
47
|
@doc.add_term("XYzzy")
|
48
48
|
|
49
49
|
@db = Xapian::inmemory_open()
|
@@ -61,7 +61,7 @@ class XapianSmoketest < Test::Unit::TestCase
|
|
61
61
|
end # test_version
|
62
62
|
|
63
63
|
def test_stem
|
64
|
-
assert_equal("Xapian::Stem(english)", @stem.description())
|
64
|
+
assert_equal("Xapian::Stem(english)", @stem.description())
|
65
65
|
|
66
66
|
assert_equal("is", @stem.call("is"))
|
67
67
|
assert_equal("go", @stem.call("going"))
|
@@ -84,6 +84,14 @@ class XapianSmoketest < Test::Unit::TestCase
|
|
84
84
|
|
85
85
|
end # test_document
|
86
86
|
|
87
|
+
def test_document_terms
|
88
|
+
terms = @doc.terms
|
89
|
+
|
90
|
+
assert_not_nil(terms.find {|i| i.term == "there" })
|
91
|
+
assert_not_nil(terms.find {|i| i.term == "out" })
|
92
|
+
assert_equal(terms.size, terms.map {|i| i }.size)
|
93
|
+
end
|
94
|
+
|
87
95
|
def test_001_database
|
88
96
|
assert_not_nil(@db)
|
89
97
|
assert_equal("WritableDatabase()", @db.description())
|
@@ -91,7 +99,7 @@ class XapianSmoketest < Test::Unit::TestCase
|
|
91
99
|
end # test_database
|
92
100
|
|
93
101
|
def test_002_queries
|
94
|
-
assert_equal("Xapian::Query((smoke OR test OR terms))",
|
102
|
+
assert_equal("Xapian::Query((smoke OR test OR terms))",
|
95
103
|
Xapian::Query.new(Xapian::Query::OP_OR ,["smoke", "test", "terms"]).description())
|
96
104
|
|
97
105
|
phraseQuery = Xapian::Query.new(Xapian::Query::OP_PHRASE ,["smoke", "test", "tuple"])
|
@@ -100,9 +108,9 @@ class XapianSmoketest < Test::Unit::TestCase
|
|
100
108
|
assert_equal("Xapian::Query((smoke PHRASE 3 test PHRASE 3 tuple))", phraseQuery.description())
|
101
109
|
assert_equal("Xapian::Query((smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string))", xorQuery.description())
|
102
110
|
|
103
|
-
assert_equal([Xapian::Term.new("smoke", 1),
|
104
|
-
Xapian::Term.new("string", 1),
|
105
|
-
Xapian::Term.new("test", 1),
|
111
|
+
assert_equal([Xapian::Term.new("smoke", 1),
|
112
|
+
Xapian::Term.new("string", 1),
|
113
|
+
Xapian::Term.new("test", 1),
|
106
114
|
Xapian::Term.new("tuple", 1)], xorQuery.terms())
|
107
115
|
|
108
116
|
assert_equal(Xapian::Query::OP_ELITE_SET, 10)
|
@@ -111,7 +119,7 @@ class XapianSmoketest < Test::Unit::TestCase
|
|
111
119
|
def test_003_enquire
|
112
120
|
@enq = Xapian::Enquire.new(@db)
|
113
121
|
assert_not_nil(@enq)
|
114
|
-
|
122
|
+
|
115
123
|
@enq.query = Xapian::Query.new(Xapian::Query::OP_OR, "there", "is")
|
116
124
|
mset = @enq.mset(0, 10)
|
117
125
|
|
@@ -126,7 +134,7 @@ class XapianSmoketest < Test::Unit::TestCase
|
|
126
134
|
def test_004_mset_iterator
|
127
135
|
@enq = Xapian::Enquire.new(@db)
|
128
136
|
assert_not_nil(@enq)
|
129
|
-
|
137
|
+
|
130
138
|
@enq.query = Xapian::Query.new(Xapian::Query::OP_OR, "there", "is")
|
131
139
|
mset = @enq.mset(0, 10)
|
132
140
|
|
@@ -152,7 +160,7 @@ class XapianSmoketest < Test::Unit::TestCase
|
|
152
160
|
def test_006_database_allterms
|
153
161
|
assert_equal(5, @db.allterms.size())
|
154
162
|
end
|
155
|
-
|
163
|
+
|
156
164
|
# Feature test for Database.postlist
|
157
165
|
def test_007_database_postlist
|
158
166
|
assert_equal(1, @db.postlist("there").size())
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xspond-xapian-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- William Weidendorf
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2009-03-
|
13
|
+
date: 2009-03-11 00:00:00 -07:00
|
14
14
|
default_executable:
|
15
15
|
dependencies: []
|
16
16
|
|