xspond-xapian-ruby 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/xapian_wrap.h ADDED
@@ -0,0 +1,65 @@
1
+ /* ----------------------------------------------------------------------------
2
+ * This file was automatically generated by SWIG (http://www.swig.org).
3
+ * Version 1.3.32
4
+ *
5
+ * This file is not intended to be easily readable and contains a number of
6
+ * coding conventions designed to improve portability and efficiency. Do not make
7
+ * changes to this file unless you know what you are doing--modify the SWIG
8
+ * interface file instead.
9
+ * ----------------------------------------------------------------------------- */
10
+
11
+ #ifndef SWIG_Xapian_WRAP_H_
12
+ #define SWIG_Xapian_WRAP_H_
13
+
14
+ namespace Swig {
15
+ class Director;
16
+ }
17
+
18
+
19
+ class SwigDirector_MatchDecider : public Xapian::MatchDecider, public Swig::Director {
20
+
21
+ public:
22
+ SwigDirector_MatchDecider(VALUE self);
23
+ virtual bool operator ()(Xapian::Document const &doc) const;
24
+ virtual ~SwigDirector_MatchDecider();
25
+ };
26
+
27
+
28
+ class SwigDirector_ExpandDecider : public Xapian::ExpandDecider, public Swig::Director {
29
+
30
+ public:
31
+ SwigDirector_ExpandDecider(VALUE self);
32
+ virtual bool operator ()(std::string const &term) const;
33
+ virtual ~SwigDirector_ExpandDecider();
34
+ };
35
+
36
+
37
+ class SwigDirector_Stopper : public Xapian::Stopper, public Swig::Director {
38
+
39
+ public:
40
+ SwigDirector_Stopper(VALUE self);
41
+ virtual bool operator ()(std::string const &term) const;
42
+ virtual ~SwigDirector_Stopper();
43
+ virtual std::string get_description() const;
44
+ };
45
+
46
+
47
+ struct SwigDirector_ValueRangeProcessor : public Xapian::ValueRangeProcessor, public Swig::Director {
48
+
49
+ public:
50
+ SwigDirector_ValueRangeProcessor(VALUE self);
51
+ virtual ~SwigDirector_ValueRangeProcessor();
52
+ virtual Xapian::valueno operator ()(std::string &begin, std::string &end);
53
+ };
54
+
55
+
56
+ class SwigDirector_Sorter : public Xapian::Sorter, public Swig::Director {
57
+
58
+ public:
59
+ SwigDirector_Sorter(VALUE self);
60
+ virtual std::string operator ()(Xapian::Document const &doc) const;
61
+ virtual ~SwigDirector_Sorter();
62
+ };
63
+
64
+
65
+ #endif
data/lib/xapian.rb ADDED
@@ -0,0 +1,264 @@
1
+ # :title:Ruby Xapian bindings
2
+ # =Ruby Xapian bindings
3
+ #
4
+ # Original version by Paul Legato (plegato@nks.net), 4/20/06.
5
+ #
6
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
7
+ #
8
+ # This program is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU General Public License as
10
+ # published by the Free Software Foundation; either version 2 of the
11
+ # License, or (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program; if not, write to the Free Software
20
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21
+ # USA
22
+ #
23
+ # ==Underscore methods
24
+ # Note: Methods whose names start with an underscore character _ are internal
25
+ # methods from the C++ API. Their functionality is not accessible in a
26
+ # Ruby-friendly way, so this file provides wrapper code to make it easier to
27
+ # use them from a Ruby programming idiom. Most are also dangerous insofar as
28
+ # misusing them can cause your program to segfault. In particular, all of
29
+ # Xapian's *Iterator classes are wrapped into nice Ruby-friendly Arrays.
30
+ #
31
+ # It should never be necessary to use any method whose name starts with an
32
+ # underscore from user-level code. Make sure you are _VERY_ certain that you
33
+ # know exactly what you're doing if you do use one of these methods. Beware.
34
+ # You've been warned...
35
+ #
36
+
37
+
38
+ module Xapian
39
+ ######## load the SWIG-generated library
40
+ require '_xapian'
41
+
42
+
43
+ # iterate over two dangerous iterators (i.e. those that can cause segfaults
44
+ # if used improperly.)
45
+ # Return the results as an Array.
46
+ # Users should never need to use this method.
47
+ #
48
+ # Takes a block that returns some appropriate Ruby object to wrap the
49
+ # underlying Iterator
50
+ def _safelyIterate(dangerousStart, dangerousEnd) #:nodoc:
51
+ retval = Array.new
52
+
53
+ item = dangerousStart
54
+ lastTerm = dangerousEnd
55
+
56
+ return retval if dangerousStart.equals(dangerousEnd)
57
+
58
+ begin
59
+ retval.push(yield(item))
60
+ item.next()
61
+ end while not item.equals(lastTerm) # must use primitive C++ comparator
62
+
63
+ return retval
64
+ end # _safelyIterate
65
+ module_function :_safelyIterate
66
+
67
+ #--
68
+ ### safe Ruby wrapper for the dangerous C++ Xapian::TermIterator class
69
+ class Xapian::Term
70
+ attr_accessor :term, :wdf, :termfreq
71
+
72
+ def initialize(term, wdf=nil, termfreq=nil)
73
+ @term = term
74
+ @wdf = wdf
75
+ @termfreq = termfreq
76
+ end
77
+
78
+ def ==(other)
79
+ return other.is_a?(Xapian::Term) && other.term == @term && other.wdf == @wdf && other.termfreq == @termfreq
80
+ end
81
+ end # class Term
82
+
83
+ ### Ruby wrapper for a Match, i.e. a Xapian::MSetIterator (Match Set) in C++.
84
+ # it's no longer an iterator in the Ruby version, but we want to preserve its
85
+ # non-iterative data.
86
+ # (MSetIterator is not dangerous, but it is inconvenient to use from a Ruby
87
+ # idiom, so we wrap it..)
88
+ class Xapian::Match
89
+ attr_accessor :docid, :document, :rank, :weight, :collapse_count, :percent
90
+
91
+ def initialize(docid, document, rank, weight, collapse_count, percent)
92
+ @docid = docid
93
+ @document = document
94
+ @rank = rank
95
+ @weight = weight
96
+ @collapse_count = collapse_count
97
+ @percent = percent
98
+ end # initialize
99
+
100
+ def ==(other)
101
+ return other.is_a?(Xapian::Match) && other.docid == @docid && other.rank == @rank &&
102
+ other.weight == @weight && other.collapse_count == @collapse_count && other.percent == @percent
103
+ end
104
+
105
+ end # class Xapian::Match
106
+
107
+ # Ruby wrapper for an ExpandTerm, i.e. a Xapian::ESetIterator in C++
108
+ # Not dangerous, but inconvenient to use from a Ruby programming idiom, so we
109
+ # wrap it.
110
+ class Xapian::ExpandTerm
111
+ attr_accessor :name, :weight
112
+
113
+ def initialize(name, weight)
114
+ @name = name
115
+ @weight = weight
116
+ end # initialize
117
+
118
+ def ==(other)
119
+ return other.is_a?(Xapian::ExpandTerm) && other.name == @name && other.weight == @weight
120
+ end
121
+
122
+ end # Xapian::ExpandTerm
123
+
124
+ # Ruby wrapper for Xapian::ValueIterator
125
+ class Xapian::Value
126
+ attr_accessor :value, :valueno
127
+
128
+ def initialize(value, valueno)
129
+ @value = value
130
+ @valueno = valueno
131
+ end # initialize
132
+
133
+ def ==(other)
134
+ return other.is_a?(Xapian::Value) && other.value == @value && other.valueno == @valueno
135
+ end
136
+ end # Xapian::Value
137
+
138
+ #--
139
+ # Extend Xapian::Document with a nice wrapper for its nasty input_iterators
140
+ class Xapian::Document
141
+ def terms
142
+ Xapian._safelyIterate(self._dangerous_termlist_begin(), self._dangerous_termlist_end()) { |item|
143
+ Xapian::Term.new(item.term, item.wdf)
144
+ }
145
+ end # terms
146
+
147
+ def values
148
+ Xapian._safelyIterate(self._dangerous_values_begin(), self._dangerous_values_end()) { |item|
149
+ Xapian::Value.new(item.value, item.valueno)
150
+ }
151
+ end # terms
152
+
153
+ end # class Xapian::Document
154
+
155
+ #--
156
+ # Extend Xapian::Query with a nice wrapper for its dangerous iterators
157
+ class Xapian::Query
158
+ def terms
159
+ Xapian._safelyIterate(self._dangerous_terms_begin(), self._dangerous_terms_end()) { |item|
160
+ Xapian::Term.new(item.term, item.wdf)
161
+ # termfreq is not supported by TermIterators from Queries
162
+ }
163
+ end
164
+ end # Xapian::Query
165
+
166
+ #--
167
+ # Extend Xapian::Enquire with a nice wrapper for its dangerous iterators
168
+ class Xapian::Enquire
169
+ # Get matching terms for some document.
170
+ # document can be either a Xapian::DocID or a Xapian::MSetIterator
171
+ def matching_terms(document)
172
+ Xapian._safelyIterate(self._dangerous_matching_terms_begin(document),
173
+ self._dangerous_matching_terms_end(document)) { |item|
174
+ Xapian::Term.new(item.term, item.wdf)
175
+ }
176
+ end
177
+ end # Xapian::Enquire
178
+
179
+ # MSetIterators are not dangerous, just inconvenient to use within a Ruby
180
+ # programming idiom. So we wrap them.
181
+ class Xapian::MSet
182
+ def matches
183
+ Xapian._safelyIterate(self._begin(),
184
+ self._end()) { |item|
185
+ Xapian::Match.new(item.docid, item.document, item.rank, item.weight, item.collapse_count, item.percent)
186
+ }
187
+
188
+ end # matches
189
+ end # Xapian::MSet
190
+
191
+ # ESetIterators are not dangerous, just inconvenient to use within a Ruby
192
+ # programming idiom. So we wrap them.
193
+ class Xapian::ESet
194
+ def terms
195
+ Xapian._safelyIterate(self._begin(),
196
+ self._end()) { |item|
197
+ # note: in the ExpandTerm wrapper, we implicitly rename
198
+ # ESetIterator#termname() (defined in xapian.i) to ExpandTerm#term()
199
+ Xapian::ExpandTerm.new(item.termname, item.weight)
200
+ }
201
+
202
+ end # terms
203
+ end # Xapian::ESet
204
+
205
+
206
+ #--
207
+ # Wrapper for the C++ class Xapian::PostingIterator
208
+ class Xapian::Posting
209
+ attr_accessor :docid, :doclength, :wdf
210
+
211
+ def initialize(docid, doclength, wdf)
212
+ @docid = docid
213
+ @doclength = doclength
214
+ @wdf = wdf
215
+ end
216
+
217
+ def ==(other)
218
+ return other.is_a?(Xapian::Posting) && other.docid == @docid && other.doclength == @doclength &&
219
+ other.wdf == @wdf
220
+ end
221
+ end # Xapian::Posting
222
+
223
+ #--
224
+ # Wrap some dangerous iterators..
225
+ class Xapian::Database
226
+ # Returns an Array of all Xapian::Terms for this database.
227
+ def allterms
228
+ Xapian._safelyIterate(self._dangerous_allterms_begin(),
229
+ self._dangerous_allterms_end()) { |item|
230
+ Xapian::Term.new(item.term, 0, item.termfreq)
231
+ }
232
+ end # allterms
233
+
234
+ # Returns an Array of Xapian::Postings for the given term.
235
+ # term is a string.
236
+ def postlist(term)
237
+ Xapian._safelyIterate(self._dangerous_postlist_begin(term),
238
+ self._dangerous_postlist_end(term)) { |item|
239
+ Xapian::Posting.new(item.docid, item.doclength, item.wdf)
240
+ }
241
+ end # postlist(term)
242
+
243
+ # Returns an Array of Terms for the given docid.
244
+ def termlist(docid)
245
+ Xapian._safelyIterate(self._dangerous_termlist_begin(docid),
246
+ self._dangerous_termlist_end(docid)) { |item|
247
+ Xapian::Term.new(item.term, item.wdf, item.termfreq)
248
+ }
249
+ end # termlist(docid)
250
+
251
+
252
+ # Returns an Array of Xapian::Termpos objects for the given term (a String)
253
+ # in the given docid.
254
+ def positionlist(docid, term)
255
+ Xapian._safelyIterate(self._dangerous_positionlist_begin(docid, term),
256
+ self._dangerous_positionlist_end(docid, term)) { |item|
257
+ item.termpos
258
+ }
259
+ end # positionlist
260
+
261
+ end # Xapian::Database
262
+
263
+
264
+ end # Xapian module
@@ -0,0 +1,204 @@
1
+ #!/usr/bin/ruby -w
2
+ #
3
+ # smoketest.rb - test Xapian bindings for Ruby
4
+ # Original version by Paul Legato (plegato@nks.net), 4/17/2006
5
+ #
6
+ # Originally based on smoketest.php from the PHP4 bindings.
7
+ #
8
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
9
+ # Copyright (C) 2008 Olly Betts
10
+ #
11
+ # This program is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU General Public License as
13
+ # published by the Free Software Foundation; either version 2 of the
14
+ # License, or (at your option) any later version.
15
+ #
16
+ # This program is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with this program; if not, write to the Free Software
23
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
24
+ # USA
25
+
26
+ require 'test/unit'
27
+ require 'lib/xapian'
28
+
29
+ class TestMatchDecider < Xapian::MatchDecider
30
+ def __call__(doc)
31
+ return doc.value(0) == "yes"
32
+ end
33
+ end
34
+
35
+ class XapianSmoketest < Test::Unit::TestCase
36
+
37
+ def setup
38
+ @stem = Xapian::Stem.new("english")
39
+
40
+ @doc = Xapian::Document.new()
41
+ @doc.data = "is there anybody out there?"
42
+ @doc.add_posting(@stem.call("is"), 1)
43
+ @doc.add_posting(@stem.call("there"), 2)
44
+ @doc.add_posting(@stem.call("anybody"), 3)
45
+ @doc.add_posting(@stem.call("out"), 4)
46
+ @doc.add_posting(@stem.call("there"), 5)
47
+ @doc.add_term("XYzzy")
48
+
49
+ @db = Xapian::inmemory_open()
50
+ @db.add_document(@doc)
51
+
52
+ @enq = Xapian::Enquire.new(@db)
53
+ end # setup
54
+
55
+ def test_version
56
+ # Test the version number reporting functions give plausible results.
57
+ @v = sprintf("%d.%d.%d", Xapian::major_version(), Xapian::minor_version(),
58
+ Xapian::revision())
59
+ @v2 = Xapian::version_string()
60
+ assert_equal(@v2, @v)
61
+ end # test_version
62
+
63
+ def test_stem
64
+ assert_equal("Xapian::Stem(english)", @stem.description())
65
+
66
+ assert_equal("is", @stem.call("is"))
67
+ assert_equal("go", @stem.call("going"))
68
+ assert_equal("want", @stem.call("wanted"))
69
+ assert_equal("refer", @stem.call("reference"))
70
+ end # test_stem
71
+
72
+ # subtests are those on which some test_foo() method depends.
73
+ def test_000_document
74
+ assert_not_nil(@doc)
75
+
76
+ assert_equal("is there anybody out there?", @doc.data())
77
+
78
+ assert_equal(@doc.termlist_count(), 5)
79
+ assert_equal("XYzzy", @doc.terms().first.term)
80
+
81
+ @doc.add_term("foo")
82
+ assert_equal(6, @doc.termlist_count())
83
+ assert_equal(@doc.terms.size(), @doc.termlist_count())
84
+
85
+ end # test_document
86
+
87
+ def test_001_database
88
+ assert_not_nil(@db)
89
+ assert_equal("WritableDatabase()", @db.description())
90
+ assert_equal(1, @db.doccount())
91
+ end # test_database
92
+
93
+ def test_002_queries
94
+ assert_equal("Xapian::Query((smoke OR test OR terms))",
95
+ Xapian::Query.new(Xapian::Query::OP_OR ,["smoke", "test", "terms"]).description())
96
+
97
+ phraseQuery = Xapian::Query.new(Xapian::Query::OP_PHRASE ,["smoke", "test", "tuple"])
98
+ xorQuery = Xapian::Query.new(Xapian::Query::OP_XOR, [ Xapian::Query.new("smoke"), phraseQuery, "string" ])
99
+
100
+ assert_equal("Xapian::Query((smoke PHRASE 3 test PHRASE 3 tuple))", phraseQuery.description())
101
+ assert_equal("Xapian::Query((smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string))", xorQuery.description())
102
+
103
+ assert_equal([Xapian::Term.new("smoke", 1),
104
+ Xapian::Term.new("string", 1),
105
+ Xapian::Term.new("test", 1),
106
+ Xapian::Term.new("tuple", 1)], xorQuery.terms())
107
+
108
+ assert_equal(Xapian::Query::OP_ELITE_SET, 10)
109
+ end # test_queries
110
+
111
+ def test_003_enquire
112
+ @enq = Xapian::Enquire.new(@db)
113
+ assert_not_nil(@enq)
114
+
115
+ @enq.query = Xapian::Query.new(Xapian::Query::OP_OR, "there", "is")
116
+ mset = @enq.mset(0, 10)
117
+
118
+ assert_equal(1, mset.size())
119
+
120
+ # Feature test for Enquire.matching_terms()
121
+ assert_equal(2, @enq.matching_terms(mset.hit(0)).size())
122
+ assert_equal([Xapian::Term.new("is", 1), Xapian::Term.new("there", 1)],
123
+ @enq.matching_terms(mset.hit(0)))
124
+ end # test_enquire
125
+
126
+ def test_004_mset_iterator
127
+ @enq = Xapian::Enquire.new(@db)
128
+ assert_not_nil(@enq)
129
+
130
+ @enq.query = Xapian::Query.new(Xapian::Query::OP_OR, "there", "is")
131
+ mset = @enq.mset(0, 10)
132
+
133
+ assert_equal(mset.matches().size(), mset.size())
134
+ end
135
+
136
+
137
+ def test_005_eset_iterator
138
+ rset = Xapian::RSet.new
139
+
140
+ rset.add_document(1)
141
+
142
+ @enq = Xapian::Enquire.new(@db)
143
+ @enq.query = Xapian::Query.new(Xapian::Query::OP_OR, "there", "is")
144
+
145
+ eset = @enq.eset(10, rset)
146
+ assert_not_nil(eset)
147
+
148
+ assert_equal(3, eset.terms.size())
149
+ end # test_eset_iter
150
+
151
+ # Feature test for Database.allterms
152
+ def test_006_database_allterms
153
+ assert_equal(5, @db.allterms.size())
154
+ end
155
+
156
+ # Feature test for Database.postlist
157
+ def test_007_database_postlist
158
+ assert_equal(1, @db.postlist("there").size())
159
+ end
160
+
161
+ # Feature test for Database.termlist
162
+ def test_008_database_termlist
163
+ assert_equal(5, @db.termlist(1).size())
164
+ end
165
+
166
+ # Feature test for Database.positionlist
167
+ def test_009_database_positionlist
168
+ assert_equal(2, @db.positionlist(1, "there").size())
169
+ end
170
+
171
+ # Feature test for Document.values
172
+ def test_010_document_values
173
+ assert_equal(0, @doc.values().size())
174
+ end
175
+
176
+ def test_011_matchdecider
177
+ @doc = Xapian::Document.new()
178
+ @doc.data = "Two"
179
+ @doc.add_posting(@stem.call("out"), 1)
180
+ @doc.add_posting(@stem.call("source"), 2)
181
+ @doc.add_value(0, "yes")
182
+ @db.add_document(@doc)
183
+
184
+ @query = Xapian::Query.new(@stem.call("out"))
185
+ enquire = Xapian::Enquire.new(@db)
186
+ enquire.query = @query
187
+ mset = enquire.mset(0, 10, nil, TestMatchDecider.new)
188
+ assert_equal(mset.size(), 1)
189
+ assert_equal(mset.docid(0), 2)
190
+ end
191
+
192
+ def test_012_metadata
193
+ assert_equal(@db.get_metadata('Foo'), '')
194
+ @db.set_metadata('Foo', 'Foo')
195
+ assert_equal(@db.get_metadata('Foo'), 'Foo')
196
+ end
197
+
198
+ def test_013_scaleweight
199
+ query = Xapian::Query.new("foo")
200
+ query2 = Xapian::Query.new(Xapian::Query::OP_SCALE_WEIGHT, query, 5);
201
+ assert_equal(query2.description(), "Xapian::Query(5 * foo)")
202
+ end
203
+
204
+ end # class XapianSmoketest