xspond-xapian-ruby 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/xapian_wrap.h ADDED
@@ -0,0 +1,65 @@
1
+ /* ----------------------------------------------------------------------------
2
+ * This file was automatically generated by SWIG (http://www.swig.org).
3
+ * Version 1.3.32
4
+ *
5
+ * This file is not intended to be easily readable and contains a number of
6
+ * coding conventions designed to improve portability and efficiency. Do not make
7
+ * changes to this file unless you know what you are doing--modify the SWIG
8
+ * interface file instead.
9
+ * ----------------------------------------------------------------------------- */
10
+
11
+ #ifndef SWIG_Xapian_WRAP_H_
12
+ #define SWIG_Xapian_WRAP_H_
13
+
14
+ namespace Swig {
15
+ class Director;
16
+ }
17
+
18
+
19
+ class SwigDirector_MatchDecider : public Xapian::MatchDecider, public Swig::Director {
20
+
21
+ public:
22
+ SwigDirector_MatchDecider(VALUE self);
23
+ virtual bool operator ()(Xapian::Document const &doc) const;
24
+ virtual ~SwigDirector_MatchDecider();
25
+ };
26
+
27
+
28
+ class SwigDirector_ExpandDecider : public Xapian::ExpandDecider, public Swig::Director {
29
+
30
+ public:
31
+ SwigDirector_ExpandDecider(VALUE self);
32
+ virtual bool operator ()(std::string const &term) const;
33
+ virtual ~SwigDirector_ExpandDecider();
34
+ };
35
+
36
+
37
+ class SwigDirector_Stopper : public Xapian::Stopper, public Swig::Director {
38
+
39
+ public:
40
+ SwigDirector_Stopper(VALUE self);
41
+ virtual bool operator ()(std::string const &term) const;
42
+ virtual ~SwigDirector_Stopper();
43
+ virtual std::string get_description() const;
44
+ };
45
+
46
+
47
+ struct SwigDirector_ValueRangeProcessor : public Xapian::ValueRangeProcessor, public Swig::Director {
48
+
49
+ public:
50
+ SwigDirector_ValueRangeProcessor(VALUE self);
51
+ virtual ~SwigDirector_ValueRangeProcessor();
52
+ virtual Xapian::valueno operator ()(std::string &begin, std::string &end);
53
+ };
54
+
55
+
56
+ class SwigDirector_Sorter : public Xapian::Sorter, public Swig::Director {
57
+
58
+ public:
59
+ SwigDirector_Sorter(VALUE self);
60
+ virtual std::string operator ()(Xapian::Document const &doc) const;
61
+ virtual ~SwigDirector_Sorter();
62
+ };
63
+
64
+
65
+ #endif
data/lib/xapian.rb ADDED
@@ -0,0 +1,264 @@
1
+ # :title:Ruby Xapian bindings
2
+ # =Ruby Xapian bindings
3
+ #
4
+ # Original version by Paul Legato (plegato@nks.net), 4/20/06.
5
+ #
6
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
7
+ #
8
+ # This program is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU General Public License as
10
+ # published by the Free Software Foundation; either version 2 of the
11
+ # License, or (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program; if not, write to the Free Software
20
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21
+ # USA
22
+ #
23
+ # ==Underscore methods
24
+ # Note: Methods whose names start with an underscore character _ are internal
25
+ # methods from the C++ API. Their functionality is not accessible in a
26
+ # Ruby-friendly way, so this file provides wrapper code to make it easier to
27
+ # use them from a Ruby programming idiom. Most are also dangerous insofar as
28
+ # misusing them can cause your program to segfault. In particular, all of
29
+ # Xapian's *Iterator classes are wrapped into nice Ruby-friendly Arrays.
30
+ #
31
+ # It should never be necessary to use any method whose name starts with an
32
+ # underscore from user-level code. Make sure you are _VERY_ certain that you
33
+ # know exactly what you're doing if you do use one of these methods. Beware.
34
+ # You've been warned...
35
+ #
36
+
37
+
38
+ module Xapian
39
+ ######## load the SWIG-generated library
40
+ require '_xapian'
41
+
42
+
43
+ # iterate over two dangerous iterators (i.e. those that can cause segfaults
44
+ # if used improperly.)
45
+ # Return the results as an Array.
46
+ # Users should never need to use this method.
47
+ #
48
+ # Takes a block that returns some appropriate Ruby object to wrap the
49
+ # underlying Iterator
50
+ def _safelyIterate(dangerousStart, dangerousEnd) #:nodoc:
51
+ retval = Array.new
52
+
53
+ item = dangerousStart
54
+ lastTerm = dangerousEnd
55
+
56
+ return retval if dangerousStart.equals(dangerousEnd)
57
+
58
+ begin
59
+ retval.push(yield(item))
60
+ item.next()
61
+ end while not item.equals(lastTerm) # must use primitive C++ comparator
62
+
63
+ return retval
64
+ end # _safelyIterate
65
+ module_function :_safelyIterate
66
+
67
+ #--
68
+ ### safe Ruby wrapper for the dangerous C++ Xapian::TermIterator class
69
+ class Xapian::Term
70
+ attr_accessor :term, :wdf, :termfreq
71
+
72
+ def initialize(term, wdf=nil, termfreq=nil)
73
+ @term = term
74
+ @wdf = wdf
75
+ @termfreq = termfreq
76
+ end
77
+
78
+ def ==(other)
79
+ return other.is_a?(Xapian::Term) && other.term == @term && other.wdf == @wdf && other.termfreq == @termfreq
80
+ end
81
+ end # class Term
82
+
83
+ ### Ruby wrapper for a Match, i.e. a Xapian::MSetIterator (Match Set) in C++.
84
+ # it's no longer an iterator in the Ruby version, but we want to preserve its
85
+ # non-iterative data.
86
+ # (MSetIterator is not dangerous, but it is inconvenient to use from a Ruby
87
+ # idiom, so we wrap it..)
88
+ class Xapian::Match
89
+ attr_accessor :docid, :document, :rank, :weight, :collapse_count, :percent
90
+
91
+ def initialize(docid, document, rank, weight, collapse_count, percent)
92
+ @docid = docid
93
+ @document = document
94
+ @rank = rank
95
+ @weight = weight
96
+ @collapse_count = collapse_count
97
+ @percent = percent
98
+ end # initialize
99
+
100
+ def ==(other)
101
+ return other.is_a?(Xapian::Match) && other.docid == @docid && other.rank == @rank &&
102
+ other.weight == @weight && other.collapse_count == @collapse_count && other.percent == @percent
103
+ end
104
+
105
+ end # class Xapian::Match
106
+
107
+ # Ruby wrapper for an ExpandTerm, i.e. a Xapian::ESetIterator in C++
108
+ # Not dangerous, but inconvenient to use from a Ruby programming idiom, so we
109
+ # wrap it.
110
+ class Xapian::ExpandTerm
111
+ attr_accessor :name, :weight
112
+
113
+ def initialize(name, weight)
114
+ @name = name
115
+ @weight = weight
116
+ end # initialize
117
+
118
+ def ==(other)
119
+ return other.is_a?(Xapian::ExpandTerm) && other.name == @name && other.weight == @weight
120
+ end
121
+
122
+ end # Xapian::ExpandTerm
123
+
124
+ # Ruby wrapper for Xapian::ValueIterator
125
+ class Xapian::Value
126
+ attr_accessor :value, :valueno
127
+
128
+ def initialize(value, valueno)
129
+ @value = value
130
+ @valueno = valueno
131
+ end # initialize
132
+
133
+ def ==(other)
134
+ return other.is_a?(Xapian::Value) && other.value == @value && other.valueno == @valueno
135
+ end
136
+ end # Xapian::Value
137
+
138
+ #--
139
+ # Extend Xapian::Document with a nice wrapper for its nasty input_iterators
140
+ class Xapian::Document
141
+ def terms
142
+ Xapian._safelyIterate(self._dangerous_termlist_begin(), self._dangerous_termlist_end()) { |item|
143
+ Xapian::Term.new(item.term, item.wdf)
144
+ }
145
+ end # terms
146
+
147
+ def values
148
+ Xapian._safelyIterate(self._dangerous_values_begin(), self._dangerous_values_end()) { |item|
149
+ Xapian::Value.new(item.value, item.valueno)
150
+ }
151
+ end # terms
152
+
153
+ end # class Xapian::Document
154
+
155
+ #--
156
+ # Extend Xapian::Query with a nice wrapper for its dangerous iterators
157
+ class Xapian::Query
158
+ def terms
159
+ Xapian._safelyIterate(self._dangerous_terms_begin(), self._dangerous_terms_end()) { |item|
160
+ Xapian::Term.new(item.term, item.wdf)
161
+ # termfreq is not supported by TermIterators from Queries
162
+ }
163
+ end
164
+ end # Xapian::Query
165
+
166
+ #--
167
+ # Extend Xapian::Enquire with a nice wrapper for its dangerous iterators
168
+ class Xapian::Enquire
169
+ # Get matching terms for some document.
170
+ # document can be either a Xapian::DocID or a Xapian::MSetIterator
171
+ def matching_terms(document)
172
+ Xapian._safelyIterate(self._dangerous_matching_terms_begin(document),
173
+ self._dangerous_matching_terms_end(document)) { |item|
174
+ Xapian::Term.new(item.term, item.wdf)
175
+ }
176
+ end
177
+ end # Xapian::Enquire
178
+
179
+ # MSetIterators are not dangerous, just inconvenient to use within a Ruby
180
+ # programming idiom. So we wrap them.
181
+ class Xapian::MSet
182
+ def matches
183
+ Xapian._safelyIterate(self._begin(),
184
+ self._end()) { |item|
185
+ Xapian::Match.new(item.docid, item.document, item.rank, item.weight, item.collapse_count, item.percent)
186
+ }
187
+
188
+ end # matches
189
+ end # Xapian::MSet
190
+
191
+ # ESetIterators are not dangerous, just inconvenient to use within a Ruby
192
+ # programming idiom. So we wrap them.
193
+ class Xapian::ESet
194
+ def terms
195
+ Xapian._safelyIterate(self._begin(),
196
+ self._end()) { |item|
197
+ # note: in the ExpandTerm wrapper, we implicitly rename
198
+ # ESetIterator#termname() (defined in xapian.i) to ExpandTerm#term()
199
+ Xapian::ExpandTerm.new(item.termname, item.weight)
200
+ }
201
+
202
+ end # terms
203
+ end # Xapian::ESet
204
+
205
+
206
+ #--
207
+ # Wrapper for the C++ class Xapian::PostingIterator
208
+ class Xapian::Posting
209
+ attr_accessor :docid, :doclength, :wdf
210
+
211
+ def initialize(docid, doclength, wdf)
212
+ @docid = docid
213
+ @doclength = doclength
214
+ @wdf = wdf
215
+ end
216
+
217
+ def ==(other)
218
+ return other.is_a?(Xapian::Posting) && other.docid == @docid && other.doclength == @doclength &&
219
+ other.wdf == @wdf
220
+ end
221
+ end # Xapian::Posting
222
+
223
+ #--
224
+ # Wrap some dangerous iterators..
225
+ class Xapian::Database
226
+ # Returns an Array of all Xapian::Terms for this database.
227
+ def allterms
228
+ Xapian._safelyIterate(self._dangerous_allterms_begin(),
229
+ self._dangerous_allterms_end()) { |item|
230
+ Xapian::Term.new(item.term, 0, item.termfreq)
231
+ }
232
+ end # allterms
233
+
234
+ # Returns an Array of Xapian::Postings for the given term.
235
+ # term is a string.
236
+ def postlist(term)
237
+ Xapian._safelyIterate(self._dangerous_postlist_begin(term),
238
+ self._dangerous_postlist_end(term)) { |item|
239
+ Xapian::Posting.new(item.docid, item.doclength, item.wdf)
240
+ }
241
+ end # postlist(term)
242
+
243
+ # Returns an Array of Terms for the given docid.
244
+ def termlist(docid)
245
+ Xapian._safelyIterate(self._dangerous_termlist_begin(docid),
246
+ self._dangerous_termlist_end(docid)) { |item|
247
+ Xapian::Term.new(item.term, item.wdf, item.termfreq)
248
+ }
249
+ end # termlist(docid)
250
+
251
+
252
+ # Returns an Array of Xapian::Termpos objects for the given term (a String)
253
+ # in the given docid.
254
+ def positionlist(docid, term)
255
+ Xapian._safelyIterate(self._dangerous_positionlist_begin(docid, term),
256
+ self._dangerous_positionlist_end(docid, term)) { |item|
257
+ item.termpos
258
+ }
259
+ end # positionlist
260
+
261
+ end # Xapian::Database
262
+
263
+
264
+ end # Xapian module
@@ -0,0 +1,204 @@
1
+ #!/usr/bin/ruby -w
2
+ #
3
+ # smoketest.rb - test Xapian bindings for Ruby
4
+ # Original version by Paul Legato (plegato@nks.net), 4/17/2006
5
+ #
6
+ # Originally based on smoketest.php from the PHP4 bindings.
7
+ #
8
+ # Copyright (C) 2006 Networked Knowledge Systems, Inc.
9
+ # Copyright (C) 2008 Olly Betts
10
+ #
11
+ # This program is free software; you can redistribute it and/or
12
+ # modify it under the terms of the GNU General Public License as
13
+ # published by the Free Software Foundation; either version 2 of the
14
+ # License, or (at your option) any later version.
15
+ #
16
+ # This program is distributed in the hope that it will be useful,
17
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ # GNU General Public License for more details.
20
+ #
21
+ # You should have received a copy of the GNU General Public License
22
+ # along with this program; if not, write to the Free Software
23
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
24
+ # USA
25
+
26
+ require 'test/unit'
27
+ require 'lib/xapian'
28
+
29
+ class TestMatchDecider < Xapian::MatchDecider
30
+ def __call__(doc)
31
+ return doc.value(0) == "yes"
32
+ end
33
+ end
34
+
35
+ class XapianSmoketest < Test::Unit::TestCase
36
+
37
+ def setup
38
+ @stem = Xapian::Stem.new("english")
39
+
40
+ @doc = Xapian::Document.new()
41
+ @doc.data = "is there anybody out there?"
42
+ @doc.add_posting(@stem.call("is"), 1)
43
+ @doc.add_posting(@stem.call("there"), 2)
44
+ @doc.add_posting(@stem.call("anybody"), 3)
45
+ @doc.add_posting(@stem.call("out"), 4)
46
+ @doc.add_posting(@stem.call("there"), 5)
47
+ @doc.add_term("XYzzy")
48
+
49
+ @db = Xapian::inmemory_open()
50
+ @db.add_document(@doc)
51
+
52
+ @enq = Xapian::Enquire.new(@db)
53
+ end # setup
54
+
55
+ def test_version
56
+ # Test the version number reporting functions give plausible results.
57
+ @v = sprintf("%d.%d.%d", Xapian::major_version(), Xapian::minor_version(),
58
+ Xapian::revision())
59
+ @v2 = Xapian::version_string()
60
+ assert_equal(@v2, @v)
61
+ end # test_version
62
+
63
+ def test_stem
64
+ assert_equal("Xapian::Stem(english)", @stem.description())
65
+
66
+ assert_equal("is", @stem.call("is"))
67
+ assert_equal("go", @stem.call("going"))
68
+ assert_equal("want", @stem.call("wanted"))
69
+ assert_equal("refer", @stem.call("reference"))
70
+ end # test_stem
71
+
72
+ # subtests are those on which some test_foo() method depends.
73
+ def test_000_document
74
+ assert_not_nil(@doc)
75
+
76
+ assert_equal("is there anybody out there?", @doc.data())
77
+
78
+ assert_equal(@doc.termlist_count(), 5)
79
+ assert_equal("XYzzy", @doc.terms().first.term)
80
+
81
+ @doc.add_term("foo")
82
+ assert_equal(6, @doc.termlist_count())
83
+ assert_equal(@doc.terms.size(), @doc.termlist_count())
84
+
85
+ end # test_document
86
+
87
+ def test_001_database
88
+ assert_not_nil(@db)
89
+ assert_equal("WritableDatabase()", @db.description())
90
+ assert_equal(1, @db.doccount())
91
+ end # test_database
92
+
93
+ def test_002_queries
94
+ assert_equal("Xapian::Query((smoke OR test OR terms))",
95
+ Xapian::Query.new(Xapian::Query::OP_OR ,["smoke", "test", "terms"]).description())
96
+
97
+ phraseQuery = Xapian::Query.new(Xapian::Query::OP_PHRASE ,["smoke", "test", "tuple"])
98
+ xorQuery = Xapian::Query.new(Xapian::Query::OP_XOR, [ Xapian::Query.new("smoke"), phraseQuery, "string" ])
99
+
100
+ assert_equal("Xapian::Query((smoke PHRASE 3 test PHRASE 3 tuple))", phraseQuery.description())
101
+ assert_equal("Xapian::Query((smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string))", xorQuery.description())
102
+
103
+ assert_equal([Xapian::Term.new("smoke", 1),
104
+ Xapian::Term.new("string", 1),
105
+ Xapian::Term.new("test", 1),
106
+ Xapian::Term.new("tuple", 1)], xorQuery.terms())
107
+
108
+ assert_equal(Xapian::Query::OP_ELITE_SET, 10)
109
+ end # test_queries
110
+
111
+ def test_003_enquire
112
+ @enq = Xapian::Enquire.new(@db)
113
+ assert_not_nil(@enq)
114
+
115
+ @enq.query = Xapian::Query.new(Xapian::Query::OP_OR, "there", "is")
116
+ mset = @enq.mset(0, 10)
117
+
118
+ assert_equal(1, mset.size())
119
+
120
+ # Feature test for Enquire.matching_terms()
121
+ assert_equal(2, @enq.matching_terms(mset.hit(0)).size())
122
+ assert_equal([Xapian::Term.new("is", 1), Xapian::Term.new("there", 1)],
123
+ @enq.matching_terms(mset.hit(0)))
124
+ end # test_enquire
125
+
126
+ def test_004_mset_iterator
127
+ @enq = Xapian::Enquire.new(@db)
128
+ assert_not_nil(@enq)
129
+
130
+ @enq.query = Xapian::Query.new(Xapian::Query::OP_OR, "there", "is")
131
+ mset = @enq.mset(0, 10)
132
+
133
+ assert_equal(mset.matches().size(), mset.size())
134
+ end
135
+
136
+
137
+ def test_005_eset_iterator
138
+ rset = Xapian::RSet.new
139
+
140
+ rset.add_document(1)
141
+
142
+ @enq = Xapian::Enquire.new(@db)
143
+ @enq.query = Xapian::Query.new(Xapian::Query::OP_OR, "there", "is")
144
+
145
+ eset = @enq.eset(10, rset)
146
+ assert_not_nil(eset)
147
+
148
+ assert_equal(3, eset.terms.size())
149
+ end # test_eset_iter
150
+
151
+ # Feature test for Database.allterms
152
+ def test_006_database_allterms
153
+ assert_equal(5, @db.allterms.size())
154
+ end
155
+
156
+ # Feature test for Database.postlist
157
+ def test_007_database_postlist
158
+ assert_equal(1, @db.postlist("there").size())
159
+ end
160
+
161
+ # Feature test for Database.termlist
162
+ def test_008_database_termlist
163
+ assert_equal(5, @db.termlist(1).size())
164
+ end
165
+
166
+ # Feature test for Database.positionlist
167
+ def test_009_database_positionlist
168
+ assert_equal(2, @db.positionlist(1, "there").size())
169
+ end
170
+
171
+ # Feature test for Document.values
172
+ def test_010_document_values
173
+ assert_equal(0, @doc.values().size())
174
+ end
175
+
176
+ def test_011_matchdecider
177
+ @doc = Xapian::Document.new()
178
+ @doc.data = "Two"
179
+ @doc.add_posting(@stem.call("out"), 1)
180
+ @doc.add_posting(@stem.call("source"), 2)
181
+ @doc.add_value(0, "yes")
182
+ @db.add_document(@doc)
183
+
184
+ @query = Xapian::Query.new(@stem.call("out"))
185
+ enquire = Xapian::Enquire.new(@db)
186
+ enquire.query = @query
187
+ mset = enquire.mset(0, 10, nil, TestMatchDecider.new)
188
+ assert_equal(mset.size(), 1)
189
+ assert_equal(mset.docid(0), 2)
190
+ end
191
+
192
+ def test_012_metadata
193
+ assert_equal(@db.get_metadata('Foo'), '')
194
+ @db.set_metadata('Foo', 'Foo')
195
+ assert_equal(@db.get_metadata('Foo'), 'Foo')
196
+ end
197
+
198
+ def test_013_scaleweight
199
+ query = Xapian::Query.new("foo")
200
+ query2 = Xapian::Query.new(Xapian::Query::OP_SCALE_WEIGHT, query, 5);
201
+ assert_equal(query2.description(), "Xapian::Query(5 * foo)")
202
+ end
203
+
204
+ end # class XapianSmoketest