estraier 1.4.10
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.txt +0 -0
- data/History.txt +0 -0
- data/README.txt +107 -0
- data/Rakefile +100 -0
- data/bin/estcmd +695 -0
- data/examples/Makefile +31 -0
- data/examples/example001.rb +32 -0
- data/examples/example002.rb +42 -0
- data/ext/estraier.c +1258 -0
- data/ext/extconf.rb +16 -0
- data/lib/estraier.rb +1 -0
- data/lib/estraier/estraier-doc.rb +612 -0
- data/lib/estraier/version.rb +9 -0
- data/setup.rb +1585 -0
- data/test/test_estraier.rb +11 -0
- data/test/test_helper.rb +2 -0
- metadata +73 -0
data/ext/extconf.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "mkmf"
|
2
|
+
#:nodoc:
|
3
|
+
|
4
|
+
dir_config('estraier')
|
5
|
+
|
6
|
+
ENV["PATH"] = ENV["PATH"] + ":/opt/local/bin:/usr/local/bin:.:..:../.."
|
7
|
+
estcflags = `estconfig --cflags`.chomp
|
8
|
+
estldflags = `estconfig --ldflags`.chomp
|
9
|
+
estlibs = `estconfig --libs`.chomp
|
10
|
+
$CFLAGS = "-I. -I.. -I../.. #{estcflags} -Wall #{$CFLAGS} -O3 -fomit-frame-pointer -fforce-addr"
|
11
|
+
$LDFLAGS = "#{$LDFLAGS} -L. -L.. -L../.. #{estldflags}"
|
12
|
+
$libs = "#{$libs} #{estlibs}"
|
13
|
+
|
14
|
+
if have_header('estraier.h') and have_library('estraier')
|
15
|
+
create_makefile('estraier')
|
16
|
+
end
|
data/lib/estraier.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Dir[File.join(File.dirname(__FILE__), 'estraier/**/*.rb')].sort.each { |lib| require lib }
|
@@ -0,0 +1,612 @@
|
|
1
|
+
#--
|
2
|
+
# Ruby binding of Hyper Estraier
|
3
|
+
# Copyright (C) 2004-2007 Mikio Hirabayashi
|
4
|
+
# This file is part of Hyper Estraier.
|
5
|
+
# Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
|
6
|
+
# the GNU Lesser General Public License as published by the Free Software Foundation; either
|
7
|
+
# version 2.1 of the License or any later version. Hyper Estraier is distributed in the hope
|
8
|
+
# that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
10
|
+
# License for more details.
|
11
|
+
# You should have received a copy of the GNU Lesser General Public License along with Hyper
|
12
|
+
# Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
|
13
|
+
# Boston, MA 02111-1307 USA.
|
14
|
+
#++
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
#
|
19
|
+
# Module for the namespace of Hyper Estraier
|
20
|
+
#
|
21
|
+
module Estraier
|
22
|
+
#----------------------------------------------------------------
|
23
|
+
#++ Abstraction of document.
|
24
|
+
#----------------------------------------------------------------
|
25
|
+
class Document
|
26
|
+
#--------------------------------
|
27
|
+
# public methods
|
28
|
+
#--------------------------------
|
29
|
+
public
|
30
|
+
# Add an attribute.
|
31
|
+
# `name' specifies the name of an attribute.
|
32
|
+
# `value' specifies the value of the attribute. If it is `nil', the attribute is removed.
|
33
|
+
# The return value is always `nil'.
|
34
|
+
def add_attr(name, value)
|
35
|
+
# native code ...
|
36
|
+
end
|
37
|
+
# Add a sentence of text.
|
38
|
+
# `text' specifies a sentence of text.
|
39
|
+
# The return value is always `nil'.
|
40
|
+
def add_text(text)
|
41
|
+
# native code ...
|
42
|
+
end
|
43
|
+
# Add a hidden sentence.
|
44
|
+
# `text' specifies a hidden sentence.
|
45
|
+
# The return value is always `nil'.
|
46
|
+
def add_hidden_text(text)
|
47
|
+
# native code ...
|
48
|
+
end
|
49
|
+
# Attach keywords.
|
50
|
+
# `kwords' specifies a hash object of keywords. Keys of the hash should be keywords of the
|
51
|
+
# document and values should be their scores in decimal string.
|
52
|
+
# The return value is always `nil'.
|
53
|
+
def set_keywords(kwords)
|
54
|
+
# native code ...
|
55
|
+
end
|
56
|
+
# Set the substitute score.
|
57
|
+
# `score' specifies the substitute score. It it is negative, the substitute score setting is
|
58
|
+
# nullified.
|
59
|
+
# The return value is always `nil'.
|
60
|
+
def set_score(score)
|
61
|
+
# native code ...
|
62
|
+
end
|
63
|
+
# Get the ID number.
|
64
|
+
# The return value is the ID number of the document object. If the object has never been
|
65
|
+
# registered, -1 is returned.
|
66
|
+
def id()
|
67
|
+
# native code ...
|
68
|
+
end
|
69
|
+
# Get an array of attribute names of a document object.
|
70
|
+
# The return value is an array object of attribute names.
|
71
|
+
def attr_names()
|
72
|
+
# native code ...
|
73
|
+
end
|
74
|
+
# Get the value of an attribute.
|
75
|
+
# `name' specifies the name of an attribute.
|
76
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
77
|
+
def attr(name)
|
78
|
+
# native code ...
|
79
|
+
end
|
80
|
+
# Get an array of sentences of the text.
|
81
|
+
# The return value is an array object of sentences of the text.
|
82
|
+
def texts()
|
83
|
+
# native code ...
|
84
|
+
end
|
85
|
+
# Concatenate sentences of the text of a document object.
|
86
|
+
# The return value is concatenated sentences.
|
87
|
+
def cat_texts()
|
88
|
+
# native code ...
|
89
|
+
end
|
90
|
+
# Get attached keywords.
|
91
|
+
# The return value is a hash object of keywords and their scores in decimal string. If no
|
92
|
+
# keyword is attached, `nil' is returned.
|
93
|
+
def keywords()
|
94
|
+
# native code ...
|
95
|
+
end
|
96
|
+
# Get the substitute score.
|
97
|
+
# The return value is the substitute score or -1 if it is not set.
|
98
|
+
def score()
|
99
|
+
# native code ...
|
100
|
+
end
|
101
|
+
# Dump draft data of a document object.
|
102
|
+
# The return value is draft data.
|
103
|
+
def dump_draft()
|
104
|
+
# native code ...
|
105
|
+
end
|
106
|
+
# Make a snippet of the body text.
|
107
|
+
# `words' specifies an array object of words to be highlight.
|
108
|
+
# `wwidth' specifies whole width of the result.
|
109
|
+
# `hwidth' specifies width of strings picked up from the beginning of the text.
|
110
|
+
# `awidth' width of strings picked up around each highlighted word.
|
111
|
+
# The return value is a snippet string of the body text. There are tab separated values.
|
112
|
+
# Each line is a string to be shown. Though most lines have only one field, some lines have
|
113
|
+
# two fields. If the second field exists, the first field is to be shown with highlighted,
|
114
|
+
# and the second field means its normalized form.
|
115
|
+
def make_snippet(words, wwidth, hwidth, awidth)
|
116
|
+
# native code ...
|
117
|
+
end
|
118
|
+
#--------------------------------
|
119
|
+
# private methods
|
120
|
+
#--------------------------------
|
121
|
+
private
|
122
|
+
# Create a document object.
|
123
|
+
# `draft' specifies a string of draft data.
|
124
|
+
def initialize(draft = "")
|
125
|
+
# native code ...
|
126
|
+
end
|
127
|
+
end
|
128
|
+
#----------------------------------------------------------------
|
129
|
+
#++ Abstraction of search condition.
|
130
|
+
#----------------------------------------------------------------
|
131
|
+
class Condition
|
132
|
+
#--------------------------------
|
133
|
+
# public constants
|
134
|
+
#--------------------------------
|
135
|
+
public
|
136
|
+
# option: check every N-gram key
|
137
|
+
SURE = 1 << 0
|
138
|
+
# option: check N-gram keys skipping by one
|
139
|
+
USUAL = 1 << 1
|
140
|
+
# option: check N-gram keys skipping by two
|
141
|
+
FAST = 1 << 2
|
142
|
+
# option: check N-gram keys skipping by three
|
143
|
+
AGITO = 1 << 3
|
144
|
+
# option: without TF-IDF tuning
|
145
|
+
NOIDF = 1 << 4
|
146
|
+
# option: with the simplified phrase
|
147
|
+
SIMPLE = 1 << 10
|
148
|
+
# option: with the rough phrase
|
149
|
+
ROUGH = 1 << 11
|
150
|
+
# option: with the union phrase
|
151
|
+
UNION = 1 << 15
|
152
|
+
# option: with the intersection phrase
|
153
|
+
ISECT = 1 << 16
|
154
|
+
# eclipse tuning: consider URL
|
155
|
+
ECLSIMURL = 10.0
|
156
|
+
# eclipse tuning: on server basis
|
157
|
+
ECLSERV = 100.0
|
158
|
+
# eclipse tuning: on directory basis
|
159
|
+
ECLDIR = 101.0
|
160
|
+
# eclipse tuning: on file basis
|
161
|
+
ECLFILE = 102.0
|
162
|
+
#--------------------------------
|
163
|
+
# public methods
|
164
|
+
#--------------------------------
|
165
|
+
public
|
166
|
+
# Set the search phrase.
|
167
|
+
# `phrase' specifies a search phrase.
|
168
|
+
# The return value is always `nil'.
|
169
|
+
def set_phrase(phrase)
|
170
|
+
# native code ...
|
171
|
+
end
|
172
|
+
# Add an expression for an attribute.
|
173
|
+
# `expr' specifies an expression for an attribute.
|
174
|
+
# The return value is always `nil'.
|
175
|
+
def add_attr(expr)
|
176
|
+
# native code ...
|
177
|
+
end
|
178
|
+
# Set the order of a condition object.
|
179
|
+
# `expr' specifies an expression for the order. By default, the order is by score descending.
|
180
|
+
# The return value is always `nil'.
|
181
|
+
def set_order(expr)
|
182
|
+
# native code ...
|
183
|
+
end
|
184
|
+
# Set the maximum number of retrieval.
|
185
|
+
# `max' specifies the maximum number of retrieval. By default, the number of retrieval is
|
186
|
+
# not limited.
|
187
|
+
# The return value is always `nil'.
|
188
|
+
def set_max(max)
|
189
|
+
# native code ...
|
190
|
+
end
|
191
|
+
# Set the number of skipped documents.
|
192
|
+
# `skip' specifies the number of documents to be skipped in the search result.
|
193
|
+
# The return value is always `nil'.
|
194
|
+
def set_skip(skip)
|
195
|
+
# native code ...
|
196
|
+
end
|
197
|
+
# Set options of retrieval.
|
198
|
+
# `options' specifies options: `Condition::SURE' specifies that it checks every N-gram
|
199
|
+
# key, `Condition::USU', which is the default, specifies that it checks N-gram keys
|
200
|
+
# with skipping one key, `Condition::FAST' skips two keys, `Condition::AGITO'
|
201
|
+
# skips three keys, `Condition::NOIDF' specifies not to perform TF-IDF tuning,
|
202
|
+
# `Condition::SIMPLE' specifies to use simplified phrase, `Condition::ROUGH' specifies to use
|
203
|
+
# rough phrase, `Condition::UNION' specifies to use union phrase, `Condition::ISECT' specifies
|
204
|
+
# to use intersection phrase. Each option can be specified at the same time by bitwise or.
|
205
|
+
# If keys are skipped, though search speed is improved, the relevance ratio grows less.
|
206
|
+
# The return value is always `nil'.
|
207
|
+
def set_options(options)
|
208
|
+
# native code ...
|
209
|
+
end
|
210
|
+
# Set permission to adopt result of the auxiliary index.
|
211
|
+
# `min' specifies the minimum hits to adopt result of the auxiliary index. If it is not more
|
212
|
+
# than 0, the auxiliary index is not used. By default, it is 32.
|
213
|
+
# The return value is always `nil'.
|
214
|
+
def set_auxiliary(min)
|
215
|
+
# native code ...
|
216
|
+
end
|
217
|
+
# Set the lower limit of similarity eclipse.
|
218
|
+
# `limit' specifies the lower limit of similarity for documents to be eclipsed. Similarity is
|
219
|
+
# between 0.0 and 1.0. If the limit is added by `Condition::ECLSIMURL', similarity is
|
220
|
+
# weighted by URL. If the limit is `Condition::ECLSERV', similarity is ignored and documents
|
221
|
+
# in the same server are eclipsed. If the limit is `Condition::ECLDIR', similarity is ignored
|
222
|
+
# and documents in the same directory are eclipsed. If the limit is `Condition::ECLFILE',
|
223
|
+
# similarity is ignored and documents of the same file are eclipsed.
|
224
|
+
# The return value is always `nil'.
|
225
|
+
def set_eclipse(limit)
|
226
|
+
# native code ...
|
227
|
+
end
|
228
|
+
# Set the attribute distinction filter.
|
229
|
+
# `name' specifies the name of an attribute to be distinct.
|
230
|
+
# The return value is always `nil'.
|
231
|
+
def set_distinct(name)
|
232
|
+
end
|
233
|
+
# Set the mask of targets of meta search.
|
234
|
+
# `mask' specifies a masking number. 1 means the first target, 2 means the second target, 4
|
235
|
+
# means the third target, and power values of 2 and their summation compose the mask.
|
236
|
+
# The return value is always `nil'.
|
237
|
+
def set_mask(mask)
|
238
|
+
# native code ...
|
239
|
+
end
|
240
|
+
#--------------------------------
|
241
|
+
# private methods
|
242
|
+
#--------------------------------
|
243
|
+
private
|
244
|
+
# Create a search condition object.
|
245
|
+
def initialize()
|
246
|
+
# native code ...
|
247
|
+
end
|
248
|
+
end
|
249
|
+
#----------------------------------------------------------------
|
250
|
+
#++ Abstraction of result set from database.
|
251
|
+
#----------------------------------------------------------------
|
252
|
+
class Result
|
253
|
+
#--------------------------------
|
254
|
+
# public methods
|
255
|
+
#--------------------------------
|
256
|
+
public
|
257
|
+
# Get the number of documents.
|
258
|
+
# The return value is the number of documents in the result.
|
259
|
+
def doc_num()
|
260
|
+
# native code ...
|
261
|
+
end
|
262
|
+
# Get the ID number of a document.
|
263
|
+
# `index' specifies the index of a document.
|
264
|
+
# The return value is the ID number of the document or -1 if the index is out of bounds.
|
265
|
+
def get_doc_id(index)
|
266
|
+
# native code ...
|
267
|
+
end
|
268
|
+
# Get the index of the container database of a document.
|
269
|
+
# `index' specifies the index of a document.
|
270
|
+
# The return value is the index of the container database of the document or -1 if the index
|
271
|
+
# is out of bounds.
|
272
|
+
def get_dbidx(index)
|
273
|
+
# native code ...
|
274
|
+
end
|
275
|
+
# Get an array of hint words.
|
276
|
+
# The return value is an array of hint words.
|
277
|
+
def hint_words()
|
278
|
+
# native code ...
|
279
|
+
end
|
280
|
+
# Get the value of a hint word.
|
281
|
+
# `word' specifies a hint word. An empty string means the number of whole result.
|
282
|
+
# The return value is the number of documents corresponding the hint word. If the word is
|
283
|
+
# in a negative condition, the value is negative.
|
284
|
+
def hint(word)
|
285
|
+
# native code ...
|
286
|
+
end
|
287
|
+
# Get the score of a document.
|
288
|
+
# `index' specifies the index of a document.
|
289
|
+
# The return value is the score of the document or -1 if the index is out of bounds.
|
290
|
+
def get_score(index)
|
291
|
+
# native code ...
|
292
|
+
end
|
293
|
+
# Get an array of ID numbers of eclipsed docuemnts of a document.
|
294
|
+
# `id' specifies the ID number of a parent document.
|
295
|
+
# The return value is an array whose elements expresse the ID numbers and their scores
|
296
|
+
# alternately.
|
297
|
+
def get_shadows(id)
|
298
|
+
# native code ...
|
299
|
+
end
|
300
|
+
#--------------------------------
|
301
|
+
# private methods
|
302
|
+
#--------------------------------
|
303
|
+
private
|
304
|
+
# Create a result set object.
|
305
|
+
def initialize()
|
306
|
+
# native code ...
|
307
|
+
end
|
308
|
+
end
|
309
|
+
#----------------------------------------------------------------
|
310
|
+
#++ Abstraction of database.
|
311
|
+
#----------------------------------------------------------------
|
312
|
+
class Database
|
313
|
+
#--------------------------------
|
314
|
+
# public constants
|
315
|
+
#--------------------------------
|
316
|
+
public
|
317
|
+
# version of Hyper Estraier
|
318
|
+
VERSION = "0.0.0"
|
319
|
+
# error code: no error
|
320
|
+
ERRNOERR = 0
|
321
|
+
# error code: invalid argument
|
322
|
+
ERRINVAL = 1
|
323
|
+
# error code: access forbidden
|
324
|
+
ERRACCES = 2
|
325
|
+
# error code: lock failure
|
326
|
+
ERRLOCK = 3
|
327
|
+
# error code: database problem
|
328
|
+
ERRDB = 4
|
329
|
+
# error code: I/O problem
|
330
|
+
ERRIO = 5
|
331
|
+
# error code: no item
|
332
|
+
ERRNOITEM = 6
|
333
|
+
# error code: miscellaneous
|
334
|
+
ERRMISC = 9999
|
335
|
+
# open mode: open as a reader
|
336
|
+
DBREADER = 1 << 0
|
337
|
+
# open mode: open as a writer
|
338
|
+
DBWRITER = 1 << 1
|
339
|
+
# open mode: a writer creating
|
340
|
+
DBCREAT = 1 << 2
|
341
|
+
# open mode: a writer truncating
|
342
|
+
DBTRUNC = 1 << 3
|
343
|
+
# open mode: open without locking
|
344
|
+
DBNOLCK = 1 << 4
|
345
|
+
# open mode: lock without blocking
|
346
|
+
DBLCKNB = 1 << 5
|
347
|
+
# open mode: use perfect N-gram analyzer
|
348
|
+
DBPERFNG = 1 << 10
|
349
|
+
# open mode: use character category analyzer
|
350
|
+
DBCHRCAT = 1 << 11
|
351
|
+
# open mode: small tuning
|
352
|
+
DBSMALL = 1 << 20
|
353
|
+
# open mode: large tuning
|
354
|
+
DBLARGE = 1 << 21
|
355
|
+
# open mode: huge tuning
|
356
|
+
DBHUGE = 1 << 22
|
357
|
+
# open mode: huge tuning second
|
358
|
+
DBHUGE2 = 1 << 23
|
359
|
+
# open mode: huge tuning third
|
360
|
+
DBHUGE3 = 1 << 24
|
361
|
+
# open mode: store scores as void
|
362
|
+
DBSCVOID = 1 << 25
|
363
|
+
# open mode: store scores as integer
|
364
|
+
DBSCINT = 1 << 26
|
365
|
+
# open mode: refrain from adjustment of scores
|
366
|
+
DBSCASIS = 1 << 27
|
367
|
+
# attribute index type: for multipurpose sequencial access method
|
368
|
+
IDXATTRSEQ = 0
|
369
|
+
# attribute index type: for narrowing with attributes as strings
|
370
|
+
IDXATTRSTR = 1
|
371
|
+
# attribute index type: for narrowing with attributes as numbers
|
372
|
+
IDXATTRNUM = 2
|
373
|
+
# optimize option: omit purging dispensable region of deleted
|
374
|
+
OPTNOPURGE = 1 << 0
|
375
|
+
# optimize option: omit optimization of the database files
|
376
|
+
OPTNODBOPT = 1 << 1
|
377
|
+
# merge option: clean up dispensable regions
|
378
|
+
MGCLEAN = 1 << 0
|
379
|
+
# put_doc option: clean up dispensable regions
|
380
|
+
PDCLEAN = 1 << 0
|
381
|
+
# put_doc option: weight scores statically when indexing
|
382
|
+
PDWEIGHT = 1 << 1
|
383
|
+
# out_doc option: clean up dispensable regions
|
384
|
+
ODCLEAN = 1 << 0
|
385
|
+
# get_doc option: no attributes
|
386
|
+
GDNOATTR = 1 << 0
|
387
|
+
# get_doc option: no text
|
388
|
+
GDNOTEXT = 1 << 1
|
389
|
+
# get_doc option: no keywords
|
390
|
+
GDNOKWD = 1 << 2
|
391
|
+
#--------------------------------
|
392
|
+
# public class methods
|
393
|
+
#--------------------------------
|
394
|
+
public
|
395
|
+
# Search plural databases for documents corresponding a condition.
|
396
|
+
# `dbs' specifies an array whose elements are database objects.
|
397
|
+
# `cond' specifies a condition object.
|
398
|
+
# The return value is a result object. On error, `nil' is returned.
|
399
|
+
def self.search_meta(dbs, cond)
|
400
|
+
# native code ...
|
401
|
+
end
|
402
|
+
#--------------------------------
|
403
|
+
# public methods
|
404
|
+
#--------------------------------
|
405
|
+
public
|
406
|
+
# Get the string of an error code.
|
407
|
+
# `ecode' specifies an error code.
|
408
|
+
# The return value is the string of the error code.
|
409
|
+
def err_msg(ecode)
|
410
|
+
# native code ...
|
411
|
+
end
|
412
|
+
# Open a database.
|
413
|
+
# `name' specifies the name of a database directory.
|
414
|
+
# `omode' specifies open modes: `Database::DBWRITER' as a writer, `Database::DBREADER' as a
|
415
|
+
# reader. If the mode is `Database::DBWRITER', the following may be added by bitwise or:
|
416
|
+
# `Database::DBCREAT', which means it creates a new database if not exist,
|
417
|
+
# `Database::DBTRUNC', which means it creates a new database regardless if one exists. Both
|
418
|
+
# of `Database::DBREADER' and `Database::DBWRITER' can be added to by bitwise or:
|
419
|
+
# `Database::DBNOLCK', which means it opens a database file without file locking, or
|
420
|
+
# `Database::DBLCKNB', which means locking is performed without blocking. If
|
421
|
+
# `Database::DBNOLCK' is used, the application is responsible for exclusion control.
|
422
|
+
# `Database::DBCREAT' can be added to by bitwise or: `Database::DBPERFNG', which means N-gram
|
423
|
+
# analysis is performed against European text also, `Database::DBCHACAT', which means
|
424
|
+
# character category analysis is performed instead of N-gram analysis, `Database::DBSMALL',
|
425
|
+
# which means the index is tuned to register less than 50000 documents, `Database::DBLARGE',
|
426
|
+
# which means the index is tuned to register more than 300000 documents, `Database::DBHUGE',
|
427
|
+
# which means the index is tuned to register more than 1000000 documents, `Database::DBHUGE2',
|
428
|
+
# which means the index is tuned to register more than 5000000 documents, `Database::DBHUGE3',
|
429
|
+
# which means the index is tuned to register more than 10000000 documents,
|
430
|
+
# `Database::DBSCVOID', which means scores are stored as void, `Database::DBSCINT', which
|
431
|
+
# means scores are stored as 32-bit integer, `Database::DBSCASIS', which means scores are
|
432
|
+
# stored as-is and marked not to be tuned when search.
|
433
|
+
# The return value is true if success, else it is false.
|
434
|
+
def open(name, omode)
|
435
|
+
# native code ...
|
436
|
+
end
|
437
|
+
# Close the database.
|
438
|
+
# The return value is true if success, else it is false.
|
439
|
+
def close()
|
440
|
+
# native code ...
|
441
|
+
end
|
442
|
+
# Get the last happened error code.
|
443
|
+
# The return value is the last happened error code.
|
444
|
+
def error()
|
445
|
+
# native code ...
|
446
|
+
end
|
447
|
+
# Check whether the database has a fatal error.
|
448
|
+
# The return value is true if the database has fatal erroor, else it is false.
|
449
|
+
def fatal()
|
450
|
+
# native code ...
|
451
|
+
end
|
452
|
+
# Add an index for narrowing or sorting with document attributes.
|
453
|
+
# `name' specifies the name of an attribute.
|
454
|
+
# `type' specifies the data type of attribute index; `Database::IDXATTRSEQ' for multipurpose
|
455
|
+
# sequencial access method, `Database::IDXATTRSTR' for narrowing with attributes as strings,
|
456
|
+
# `Database::IDXATTRNUM' for narrowing with attributes as numbers.
|
457
|
+
# The return value is true if success, else it is false.
|
458
|
+
def add_attr_index(name, type)
|
459
|
+
# native code ...
|
460
|
+
end
|
461
|
+
# Flush index words in the cache.
|
462
|
+
# `max' specifies the maximum number of words to be flushed. If it not more than zero, all
|
463
|
+
# words are flushed.
|
464
|
+
# The return value is true if success, else it is false.
|
465
|
+
def flush(max)
|
466
|
+
# native code ...
|
467
|
+
end
|
468
|
+
# Synchronize updating contents.
|
469
|
+
# The return value is true if success, else it is false.
|
470
|
+
def sync()
|
471
|
+
# native code ...
|
472
|
+
end
|
473
|
+
# Optimize the database.
|
474
|
+
# `options' specifies options: `Database::OPTNOPURGE' to omit purging dispensable region of
|
475
|
+
# deleted documents, `Database::OPTNODBOPT' to omit optimization of the database files. The
|
476
|
+
# two can be specified at the same time by bitwise or.
|
477
|
+
# The return value is true if success, else it is false.
|
478
|
+
def optimize(options)
|
479
|
+
# native code ...
|
480
|
+
end
|
481
|
+
# Merge another database.
|
482
|
+
# `name' specifies the name of another database directory.
|
483
|
+
# `options' specifies options: `Database::MGCLEAN' to clean up dispensable regions of the
|
484
|
+
# deleted document.
|
485
|
+
# The return value is true if success, else it is false.
|
486
|
+
def merge(name, options)
|
487
|
+
# native code ...
|
488
|
+
end
|
489
|
+
# Add a document.
|
490
|
+
# `doc' specifies a document object. The document object should have the URI attribute.
|
491
|
+
# `options' specifies options: `Database::PDCLEAN' to clean up dispensable regions of the
|
492
|
+
# overwritten document.
|
493
|
+
# The return value is true if success, else it is false.
|
494
|
+
def put_doc(doc, options)
|
495
|
+
# native code ...
|
496
|
+
end
|
497
|
+
# Remove a document.
|
498
|
+
# `id' specifies the ID number of a registered document.
|
499
|
+
# `options' specifies options: `Database::ODCLEAN' to clean up dispensable regions of the
|
500
|
+
# deleted document.
|
501
|
+
# The return value is true if success, else it is false.
|
502
|
+
def out_doc(id, options)
|
503
|
+
# native code ...
|
504
|
+
end
|
505
|
+
# Edit attributes of a document.
|
506
|
+
# `doc' specifies a document object.
|
507
|
+
# The return value is true if success, else it is false.
|
508
|
+
def edit_doc(doc)
|
509
|
+
# native code ...
|
510
|
+
end
|
511
|
+
# Retrieve a document.
|
512
|
+
# `id' specifies the ID number of a registered document.
|
513
|
+
# `options' specifies options: `Database::GDNOATTR' to ignore attributes, `Database::GDNOTEXT'
|
514
|
+
# to ignore the body text, `Database::GDNOKWD' to ignore keywords. The three can be
|
515
|
+
# specified at the same time by bitwise or.
|
516
|
+
# The return value is a document object. On error, `nil' is returned.
|
517
|
+
def get_doc(id, options)
|
518
|
+
# native code ...
|
519
|
+
end
|
520
|
+
# Retrieve the value of an attribute of a document.
|
521
|
+
# `id' specifies the ID number of a registered document.
|
522
|
+
# `name' specifies the name of an attribute.
|
523
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
524
|
+
def get_doc_attr(id, name)
|
525
|
+
# native code ...
|
526
|
+
end
|
527
|
+
# Get the ID of a document specified by URI.
|
528
|
+
# `uri' specifies the URI of a registered document.
|
529
|
+
# The return value is the ID of the document. On error, -1 is returned.
|
530
|
+
def uri_to_id(uri)
|
531
|
+
# native code ...
|
532
|
+
end
|
533
|
+
# Get the name.
|
534
|
+
# The return value is the name of the database.
|
535
|
+
def name()
|
536
|
+
# native code ...
|
537
|
+
end
|
538
|
+
# Get the number of documents.
|
539
|
+
# The return value is the number of documents in the database.
|
540
|
+
def doc_num()
|
541
|
+
# native code ...
|
542
|
+
end
|
543
|
+
# Get the number of unique words.
|
544
|
+
# The return value is the number of unique words in the database.
|
545
|
+
def word_num()
|
546
|
+
# native code ...
|
547
|
+
end
|
548
|
+
# Get the size.
|
549
|
+
# The return value is the size of the database.
|
550
|
+
def size()
|
551
|
+
# native code ...
|
552
|
+
end
|
553
|
+
# Search for documents corresponding a condition.
|
554
|
+
# `cond' specifies a condition object.
|
555
|
+
# The return value is a result object. On error, `nil' is returned.
|
556
|
+
def search(cond)
|
557
|
+
# native code ...
|
558
|
+
end
|
559
|
+
# Check whether a document object matches the phrase of a search condition object definitely.
|
560
|
+
# `doc' specifies a document object.
|
561
|
+
# `cond' specifies a search condition object.
|
562
|
+
# The return value is true if the document matches the phrase of the condition object
|
563
|
+
# definitely, else it is false.
|
564
|
+
def scan_doc(doc, cond)
|
565
|
+
# native code ...
|
566
|
+
end
|
567
|
+
# Set the maximum size of the cache memory.
|
568
|
+
# `size' specifies the maximum size of the index cache. By default, it is 64MB. If it is
|
569
|
+
# not more than 0, the current size is not changed.
|
570
|
+
# `anum' specifies the maximum number of cached records for document attributes. By default,
|
571
|
+
# it is 8192. If it is not more than 0, the current size is not changed.
|
572
|
+
# `tnum' specifies the maximum number of cached records for document texts. By default, it
|
573
|
+
# is 1024. If it is not more than 0, the current size is not changed.
|
574
|
+
# `rnum' specifies the maximum number of cached records for occurrence results. By default,
|
575
|
+
# it is 256. If it is not more than 0, the current size is not changed.
|
576
|
+
# The return value is always `nil'.
|
577
|
+
def set_cache_size(size, anum, tnum, rnum)
|
578
|
+
# native code ...
|
579
|
+
end
|
580
|
+
# Add a pseudo index directory.
|
581
|
+
# `path' specifies the path of a pseudo index directory.
|
582
|
+
# The return value is true if success, else it is false.
|
583
|
+
def add_pseudo_index(path)
|
584
|
+
# native code ...
|
585
|
+
end
|
586
|
+
# Set the maximum number of expansion of wild cards.
|
587
|
+
# `num' specifies the maximum number of expansion of wild cards.
|
588
|
+
# The return value is always `nil'.
|
589
|
+
def set_wildmax(num)
|
590
|
+
# native code ...
|
591
|
+
end
|
592
|
+
# Set the callback function to inform of database events.
|
593
|
+
# `informer' specifies an arbitrary object with a method named as `inform'. The method
|
594
|
+
# should have one parameter for a string of a message of each event.
|
595
|
+
# The return value is always `nil'.
|
596
|
+
def set_informer(informer)
|
597
|
+
# native code ...
|
598
|
+
end
|
599
|
+
#--------------------------------
|
600
|
+
# private methods
|
601
|
+
#--------------------------------
|
602
|
+
private
|
603
|
+
# Create a database object.
|
604
|
+
def initialize()
|
605
|
+
# native code ...
|
606
|
+
end
|
607
|
+
end
|
608
|
+
end
|
609
|
+
|
610
|
+
|
611
|
+
|
612
|
+
# END OF FILE
|