tantiny 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.toml +9 -6
- data/README.md +118 -42
- data/bin/console +2 -3
- data/lib/tantiny/errors.rb +1 -1
- data/lib/tantiny/index.rb +29 -19
- data/lib/tantiny/query.rb +21 -16
- data/lib/tantiny/schema.rb +2 -2
- data/lib/tantiny/version.rb +1 -1
- data/lib/tantiny.rb +21 -10
- data/lib/tantiny.so +0 -0
- data/src/helpers.rs +71 -191
- data/src/index.rs +310 -197
- data/src/lib.rs +12 -9
- data/src/query.rs +246 -203
- data/src/tokenizer.rs +62 -75
- metadata +44 -43
- data/lib/.rbnext/3.0/tantiny/schema.rb +0 -53
- data/sig/tantiny/errors.rbs +0 -20
- data/sig/tantiny/helpers.rbs +0 -8
- data/sig/tantiny/index.rbs +0 -103
- data/sig/tantiny/query.rbs +0 -135
- data/sig/tantiny/schema.rbs +0 -26
- data/sig/tantiny/tokenizer.rbs +0 -25
- data/sig/tantiny/version.rbs +0 -3
- data/sig/tantiny.rbs +0 -5
data/src/index.rs
CHANGED
|
@@ -1,77 +1,65 @@
|
|
|
1
|
+
use magnus::{r_hash::ForEach, Error, Module, Object, RHash, RModule, Ruby, TryConvert, Value};
|
|
2
|
+
use std::cell::RefCell;
|
|
1
3
|
use std::collections::HashMap;
|
|
2
|
-
use std::str::FromStr;
|
|
3
|
-
use rutie::{methods, Object, AnyObject, Integer, NilClass, Array, RString, Hash};
|
|
4
|
-
use tantivy::{doc, Document, Term, ReloadPolicy, Index, IndexWriter, IndexReader, DateTime};
|
|
5
|
-
use tantivy::schema::{Schema, TextOptions, TextFieldIndexing, IndexRecordOption, FacetOptions, STRING, STORED, INDEXED, FAST};
|
|
6
4
|
use tantivy::collector::TopDocs;
|
|
7
5
|
use tantivy::directory::MmapDirectory;
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
6
|
+
use tantivy::schema::{
|
|
7
|
+
FacetOptions, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, Value as TantivyValue,
|
|
8
|
+
FAST, INDEXED, STORED, STRING,
|
|
9
|
+
};
|
|
10
|
+
use tantivy::{IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, Term};
|
|
11
|
+
use time::OffsetDateTime;
|
|
12
|
+
|
|
13
|
+
use crate::helpers::hash_to_multivalue_map;
|
|
14
|
+
use crate::query::Query;
|
|
15
|
+
use crate::tokenizer::Tokenizer;
|
|
16
|
+
|
|
17
|
+
#[magnus::wrap(class = "Tantiny::Index", free_immediately, size)]
|
|
18
|
+
pub struct Index {
|
|
19
|
+
pub schema: Schema,
|
|
20
|
+
index: tantivy::Index,
|
|
21
|
+
index_writer: RefCell<Option<IndexWriter>>,
|
|
22
|
+
index_reader: IndexReader,
|
|
18
23
|
}
|
|
19
24
|
|
|
20
|
-
|
|
25
|
+
impl Index {
|
|
26
|
+
#[allow(clippy::too_many_arguments)]
|
|
27
|
+
fn new(
|
|
28
|
+
path: Option<String>,
|
|
29
|
+
default_tokenizer: &Tokenizer,
|
|
30
|
+
field_tokenizers: RHash,
|
|
31
|
+
text_fields: Vec<String>,
|
|
32
|
+
string_fields: Vec<String>,
|
|
33
|
+
integer_fields: Vec<String>,
|
|
34
|
+
double_fields: Vec<String>,
|
|
35
|
+
date_fields: Vec<String>,
|
|
36
|
+
facet_fields: Vec<String>,
|
|
37
|
+
) -> Result<Self, Error> {
|
|
38
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
|
39
|
+
let field_tokenizers_map: HashMap<String, &Tokenizer> = {
|
|
40
|
+
let mut map = HashMap::new();
|
|
41
|
+
field_tokenizers.foreach(|key: String, value: Value| {
|
|
42
|
+
let tokenizer: &Tokenizer = <&Tokenizer>::try_convert(value)?;
|
|
43
|
+
map.insert(key, tokenizer);
|
|
44
|
+
Ok(ForEach::Continue)
|
|
45
|
+
})?;
|
|
46
|
+
map
|
|
47
|
+
};
|
|
21
48
|
|
|
22
|
-
pub(crate) fn unwrap_index(index: &RTantinyIndex) -> &TantinyIndex {
|
|
23
|
-
index.get_data(&*TANTINY_INDEX_WRAPPER)
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
pub(crate) fn unwrap_index_mut(index: &mut RTantinyIndex) -> &mut TantinyIndex {
|
|
27
|
-
index.get_data_mut(&*TANTINY_INDEX_WRAPPER)
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
#[rustfmt::skip::macros(methods)]
|
|
31
|
-
methods!(
|
|
32
|
-
RTantinyIndex,
|
|
33
|
-
_itself,
|
|
34
|
-
|
|
35
|
-
fn new_index(
|
|
36
|
-
path: RString,
|
|
37
|
-
default_tokenizer: AnyObject,
|
|
38
|
-
field_tokenizers: Hash,
|
|
39
|
-
text_fields: Array,
|
|
40
|
-
string_fields: Array,
|
|
41
|
-
integer_fields: Array,
|
|
42
|
-
double_fields: Array,
|
|
43
|
-
date_fields: Array,
|
|
44
|
-
facet_fields: Array
|
|
45
|
-
) -> RTantinyIndex {
|
|
46
|
-
try_unwrap_params!(
|
|
47
|
-
path: String,
|
|
48
|
-
default_tokenizer: RTantinyTokenizer,
|
|
49
|
-
field_tokenizers: HashMap<String, RTantinyTokenizer>,
|
|
50
|
-
text_fields: Vec<String>,
|
|
51
|
-
string_fields: Vec<String>,
|
|
52
|
-
integer_fields: Vec<String>,
|
|
53
|
-
double_fields: Vec<String>,
|
|
54
|
-
date_fields: Vec<String>,
|
|
55
|
-
facet_fields: Vec<String>
|
|
56
|
-
);
|
|
57
|
-
|
|
58
|
-
let index_path = MmapDirectory::open(path).try_unwrap();
|
|
59
49
|
let mut schema_builder = Schema::builder();
|
|
60
50
|
|
|
61
51
|
schema_builder.add_text_field("id", STRING | STORED);
|
|
62
52
|
|
|
63
53
|
for field in text_fields {
|
|
64
|
-
let tokenizer_name =
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
};
|
|
54
|
+
let tokenizer_name = if field_tokenizers_map.contains_key(&field) {
|
|
55
|
+
&field
|
|
56
|
+
} else {
|
|
57
|
+
"default"
|
|
58
|
+
};
|
|
70
59
|
let indexing = TextFieldIndexing::default()
|
|
71
60
|
.set_tokenizer(tokenizer_name)
|
|
72
61
|
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
|
73
|
-
let options = TextOptions::default()
|
|
74
|
-
.set_indexing_options(indexing);
|
|
62
|
+
let options = TextOptions::default().set_indexing_options(indexing);
|
|
75
63
|
schema_builder.add_text_field(&field, options);
|
|
76
64
|
}
|
|
77
65
|
|
|
@@ -92,197 +80,322 @@ methods!(
|
|
|
92
80
|
}
|
|
93
81
|
|
|
94
82
|
for field in facet_fields {
|
|
95
|
-
|
|
96
|
-
schema_builder.add_facet_field(&field, options);
|
|
83
|
+
schema_builder.add_facet_field(&field, FacetOptions::default());
|
|
97
84
|
}
|
|
98
85
|
|
|
99
86
|
let schema = schema_builder.build();
|
|
100
|
-
|
|
87
|
+
|
|
88
|
+
// Create index based on whether path is provided
|
|
89
|
+
let index = match path {
|
|
90
|
+
Some(path_str) => {
|
|
91
|
+
let index_path = MmapDirectory::open(path_str).map_err(|e| {
|
|
92
|
+
Error::new(
|
|
93
|
+
ruby.exception_runtime_error(),
|
|
94
|
+
format!("Failed to open directory: {}", e),
|
|
95
|
+
)
|
|
96
|
+
})?;
|
|
97
|
+
tantivy::Index::open_or_create(index_path, schema.clone()).map_err(|e| {
|
|
98
|
+
Error::new(
|
|
99
|
+
ruby.exception_runtime_error(),
|
|
100
|
+
format!("Failed to create index: {}", e),
|
|
101
|
+
)
|
|
102
|
+
})?
|
|
103
|
+
}
|
|
104
|
+
None => {
|
|
105
|
+
// Create in-memory index
|
|
106
|
+
tantivy::Index::create_in_ram(schema.clone())
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
// Access the tokenizers field before moving index
|
|
101
111
|
let tokenizers = index.tokenizers();
|
|
102
112
|
|
|
103
|
-
tokenizers
|
|
113
|
+
// Register tokenizers
|
|
114
|
+
tokenizers.register("default", default_tokenizer.get_analyzer());
|
|
104
115
|
|
|
105
|
-
for (field, tokenizer) in
|
|
106
|
-
tokenizers.register(&field,
|
|
116
|
+
for (field, tokenizer) in field_tokenizers_map {
|
|
117
|
+
tokenizers.register(&field, tokenizer.get_analyzer())
|
|
107
118
|
}
|
|
108
119
|
|
|
109
|
-
let index_writer = None;
|
|
110
|
-
|
|
111
120
|
let index_reader = index
|
|
112
121
|
.reader_builder()
|
|
113
122
|
.reload_policy(ReloadPolicy::Manual)
|
|
114
123
|
.try_into()
|
|
115
|
-
.
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
124
|
+
.map_err(|e| {
|
|
125
|
+
Error::new(
|
|
126
|
+
ruby.exception_runtime_error(),
|
|
127
|
+
format!("Failed to create reader: {}", e),
|
|
128
|
+
)
|
|
129
|
+
})?;
|
|
130
|
+
|
|
131
|
+
Ok(Index {
|
|
132
|
+
schema,
|
|
133
|
+
index,
|
|
134
|
+
index_writer: RefCell::new(None),
|
|
135
|
+
index_reader,
|
|
136
|
+
})
|
|
121
137
|
}
|
|
122
138
|
|
|
139
|
+
#[allow(clippy::too_many_arguments)]
|
|
123
140
|
fn add_document(
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
let
|
|
143
|
-
let
|
|
144
|
-
let
|
|
145
|
-
|
|
146
|
-
let mut doc =
|
|
147
|
-
|
|
148
|
-
let id_field = schema.get_field("id").
|
|
141
|
+
&self,
|
|
142
|
+
id: String,
|
|
143
|
+
text_fields: RHash,
|
|
144
|
+
string_fields: RHash,
|
|
145
|
+
integer_fields: RHash,
|
|
146
|
+
double_fields: RHash,
|
|
147
|
+
date_fields: RHash,
|
|
148
|
+
facet_fields: RHash,
|
|
149
|
+
) -> Result<(), Error> {
|
|
150
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
|
151
|
+
let index_writer = self.index_writer.borrow();
|
|
152
|
+
let index_writer = index_writer.as_ref().ok_or_else(|| {
|
|
153
|
+
Error::new(ruby.exception_runtime_error(), "No index writer available")
|
|
154
|
+
})?;
|
|
155
|
+
|
|
156
|
+
let text_map: HashMap<String, Vec<String>> = hash_to_multivalue_map(text_fields)?;
|
|
157
|
+
let string_map: HashMap<String, Vec<String>> = hash_to_multivalue_map(string_fields)?;
|
|
158
|
+
let integer_map: HashMap<String, Vec<i64>> = hash_to_multivalue_map(integer_fields)?;
|
|
159
|
+
let double_map: HashMap<String, Vec<f64>> = hash_to_multivalue_map(double_fields)?;
|
|
160
|
+
let date_map: HashMap<String, Vec<String>> = hash_to_multivalue_map(date_fields)?;
|
|
161
|
+
let facet_map: HashMap<String, Vec<String>> = hash_to_multivalue_map(facet_fields)?;
|
|
162
|
+
|
|
163
|
+
let mut doc = TantivyDocument::default();
|
|
164
|
+
|
|
165
|
+
let id_field = self.schema.get_field("id").map_err(|e| {
|
|
166
|
+
Error::new(
|
|
167
|
+
ruby.exception_runtime_error(),
|
|
168
|
+
format!("Failed to get id field: {}", e),
|
|
169
|
+
)
|
|
170
|
+
})?;
|
|
149
171
|
doc.add_text(id_field, &id);
|
|
150
172
|
|
|
151
|
-
for (key,
|
|
152
|
-
let field = schema.get_field(key).
|
|
153
|
-
|
|
173
|
+
for (key, values) in text_map.iter() {
|
|
174
|
+
let field = self.schema.get_field(key).map_err(|e| {
|
|
175
|
+
Error::new(
|
|
176
|
+
ruby.exception_runtime_error(),
|
|
177
|
+
format!("Failed to get field {}: {}", key, e),
|
|
178
|
+
)
|
|
179
|
+
})?;
|
|
180
|
+
for value in values {
|
|
181
|
+
doc.add_text(field, value);
|
|
182
|
+
}
|
|
154
183
|
}
|
|
155
184
|
|
|
156
|
-
for (key,
|
|
157
|
-
let field = schema.get_field(key).
|
|
158
|
-
|
|
185
|
+
for (key, values) in string_map.iter() {
|
|
186
|
+
let field = self.schema.get_field(key).map_err(|e| {
|
|
187
|
+
Error::new(
|
|
188
|
+
ruby.exception_runtime_error(),
|
|
189
|
+
format!("Failed to get field {}: {}", key, e),
|
|
190
|
+
)
|
|
191
|
+
})?;
|
|
192
|
+
for value in values {
|
|
193
|
+
doc.add_text(field, value);
|
|
194
|
+
}
|
|
159
195
|
}
|
|
160
196
|
|
|
161
|
-
for (key,
|
|
162
|
-
let field = schema.get_field(key).
|
|
163
|
-
|
|
197
|
+
for (key, values) in integer_map.iter() {
|
|
198
|
+
let field = self.schema.get_field(key).map_err(|e| {
|
|
199
|
+
Error::new(
|
|
200
|
+
ruby.exception_runtime_error(),
|
|
201
|
+
format!("Failed to get field {}: {}", key, e),
|
|
202
|
+
)
|
|
203
|
+
})?;
|
|
204
|
+
for &value in values {
|
|
205
|
+
doc.add_i64(field, value);
|
|
206
|
+
}
|
|
164
207
|
}
|
|
165
208
|
|
|
166
|
-
for (key,
|
|
167
|
-
let field = schema.get_field(key).
|
|
168
|
-
|
|
209
|
+
for (key, values) in double_map.iter() {
|
|
210
|
+
let field = self.schema.get_field(key).map_err(|e| {
|
|
211
|
+
Error::new(
|
|
212
|
+
ruby.exception_runtime_error(),
|
|
213
|
+
format!("Failed to get field {}: {}", key, e),
|
|
214
|
+
)
|
|
215
|
+
})?;
|
|
216
|
+
for &value in values {
|
|
217
|
+
doc.add_f64(field, value);
|
|
218
|
+
}
|
|
169
219
|
}
|
|
170
220
|
|
|
171
|
-
for (key,
|
|
172
|
-
let field = schema.get_field(key).
|
|
173
|
-
|
|
174
|
-
|
|
221
|
+
for (key, values) in date_map.iter() {
|
|
222
|
+
let field = self.schema.get_field(key).map_err(|e| {
|
|
223
|
+
Error::new(
|
|
224
|
+
ruby.exception_runtime_error(),
|
|
225
|
+
format!("Failed to get field {}: {}", key, e),
|
|
226
|
+
)
|
|
227
|
+
})?;
|
|
228
|
+
for value in values {
|
|
229
|
+
let datetime =
|
|
230
|
+
OffsetDateTime::parse(value, &time::format_description::well_known::Rfc3339)
|
|
231
|
+
.map_err(|e| {
|
|
232
|
+
Error::new(
|
|
233
|
+
ruby.exception_runtime_error(),
|
|
234
|
+
format!("Invalid date format: {}", e),
|
|
235
|
+
)
|
|
236
|
+
})?;
|
|
237
|
+
doc.add_date(
|
|
238
|
+
field,
|
|
239
|
+
tantivy::DateTime::from_timestamp_nanos(datetime.unix_timestamp_nanos() as i64),
|
|
240
|
+
);
|
|
241
|
+
}
|
|
175
242
|
}
|
|
176
243
|
|
|
177
|
-
for (key,
|
|
178
|
-
let field = schema.get_field(key).
|
|
179
|
-
|
|
244
|
+
for (key, values) in facet_map.iter() {
|
|
245
|
+
let field = self.schema.get_field(key).map_err(|e| {
|
|
246
|
+
Error::new(
|
|
247
|
+
ruby.exception_runtime_error(),
|
|
248
|
+
format!("Failed to get field {}: {}", key, e),
|
|
249
|
+
)
|
|
250
|
+
})?;
|
|
251
|
+
for value in values {
|
|
252
|
+
doc.add_facet(field, value);
|
|
253
|
+
}
|
|
180
254
|
}
|
|
181
255
|
|
|
182
256
|
let doc_id = Term::from_field_text(id_field, &id);
|
|
183
257
|
index_writer.delete_term(doc_id.clone());
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
258
|
+
index_writer.add_document(doc).map_err(|e| {
|
|
259
|
+
Error::new(
|
|
260
|
+
ruby.exception_runtime_error(),
|
|
261
|
+
format!("Failed to add document: {}", e),
|
|
262
|
+
)
|
|
263
|
+
})?;
|
|
264
|
+
|
|
265
|
+
Ok(())
|
|
188
266
|
}
|
|
189
267
|
|
|
190
|
-
fn delete_document(id:
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
let
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
268
|
+
fn delete_document(&self, id: String) -> Result<(), Error> {
|
|
269
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
|
270
|
+
let index_writer = self.index_writer.borrow();
|
|
271
|
+
let index_writer = index_writer.as_ref().ok_or_else(|| {
|
|
272
|
+
Error::new(ruby.exception_runtime_error(), "No index writer available")
|
|
273
|
+
})?;
|
|
274
|
+
|
|
275
|
+
let id_field = self.schema.get_field("id").map_err(|e| {
|
|
276
|
+
Error::new(
|
|
277
|
+
ruby.exception_runtime_error(),
|
|
278
|
+
format!("Failed to get id field: {}", e),
|
|
279
|
+
)
|
|
280
|
+
})?;
|
|
197
281
|
let doc_id = Term::from_field_text(id_field, &id);
|
|
198
282
|
|
|
199
283
|
index_writer.delete_term(doc_id.clone());
|
|
200
|
-
|
|
201
|
-
NilClass::new()
|
|
284
|
+
Ok(())
|
|
202
285
|
}
|
|
203
286
|
|
|
204
|
-
fn acquire_index_writer(
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
internal.index_writer = Some(index_writer);
|
|
216
|
-
|
|
217
|
-
NilClass::new()
|
|
287
|
+
fn acquire_index_writer(&self, overall_memory: i64) -> Result<(), Error> {
|
|
288
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
|
289
|
+
let index_writer = self.index.writer(overall_memory as usize).map_err(|e| {
|
|
290
|
+
Error::new(
|
|
291
|
+
ruby.exception_runtime_error(),
|
|
292
|
+
format!("Failed to create writer: {}", e),
|
|
293
|
+
)
|
|
294
|
+
})?;
|
|
295
|
+
|
|
296
|
+
*self.index_writer.borrow_mut() = Some(index_writer);
|
|
297
|
+
Ok(())
|
|
218
298
|
}
|
|
219
299
|
|
|
220
|
-
fn release_index_writer() ->
|
|
221
|
-
let
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
300
|
+
fn release_index_writer(&self) -> Result<(), Error> {
|
|
301
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
|
302
|
+
let mut writer = self.index_writer.borrow_mut();
|
|
303
|
+
if writer.is_none() {
|
|
304
|
+
return Err(Error::new(
|
|
305
|
+
ruby.exception_runtime_error(),
|
|
306
|
+
"No index writer to release",
|
|
307
|
+
));
|
|
308
|
+
}
|
|
309
|
+
*writer = None;
|
|
310
|
+
Ok(())
|
|
227
311
|
}
|
|
228
312
|
|
|
229
|
-
fn commit() ->
|
|
230
|
-
let
|
|
231
|
-
let
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
313
|
+
fn commit(&self) -> Result<(), Error> {
|
|
314
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
|
315
|
+
let mut writer_cell = self.index_writer.borrow_mut();
|
|
316
|
+
let index_writer = writer_cell.as_mut().ok_or_else(|| {
|
|
317
|
+
Error::new(ruby.exception_runtime_error(), "No index writer available")
|
|
318
|
+
})?;
|
|
319
|
+
|
|
320
|
+
index_writer.commit().map_err(|e| {
|
|
321
|
+
Error::new(
|
|
322
|
+
ruby.exception_runtime_error(),
|
|
323
|
+
format!("Failed to commit: {}", e),
|
|
324
|
+
)
|
|
325
|
+
})?;
|
|
326
|
+
Ok(())
|
|
236
327
|
}
|
|
237
328
|
|
|
238
|
-
fn reload() ->
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
329
|
+
fn reload(&self) -> Result<(), Error> {
|
|
330
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
|
331
|
+
self.index_reader.reload().map_err(|e| {
|
|
332
|
+
Error::new(
|
|
333
|
+
ruby.exception_runtime_error(),
|
|
334
|
+
format!("Failed to reload: {}", e),
|
|
335
|
+
)
|
|
336
|
+
})?;
|
|
337
|
+
Ok(())
|
|
242
338
|
}
|
|
243
339
|
|
|
244
|
-
fn search(
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
let internal = unwrap_index(&_itself);
|
|
254
|
-
let id_field = internal.schema.get_field("id").try_unwrap();
|
|
255
|
-
let searcher = internal.index_reader.searcher();
|
|
256
|
-
let query = unwrap_query(&query);
|
|
340
|
+
fn search(&self, query: &Query, limit: i64) -> Result<Vec<String>, Error> {
|
|
341
|
+
let ruby = unsafe { Ruby::get_unchecked() };
|
|
342
|
+
let id_field = self.schema.get_field("id").map_err(|e| {
|
|
343
|
+
Error::new(
|
|
344
|
+
ruby.exception_runtime_error(),
|
|
345
|
+
format!("Failed to get id field: {}", e),
|
|
346
|
+
)
|
|
347
|
+
})?;
|
|
348
|
+
let searcher = self.index_reader.searcher();
|
|
257
349
|
|
|
258
350
|
let top_docs = searcher
|
|
259
|
-
.search(query, &TopDocs::with_limit(limit as usize))
|
|
260
|
-
.
|
|
351
|
+
.search(query.get_query(), &TopDocs::with_limit(limit as usize))
|
|
352
|
+
.map_err(|e| {
|
|
353
|
+
Error::new(
|
|
354
|
+
ruby.exception_runtime_error(),
|
|
355
|
+
format!("Search failed: {}", e),
|
|
356
|
+
)
|
|
357
|
+
})?;
|
|
261
358
|
|
|
262
|
-
let mut
|
|
359
|
+
let mut results = Vec::with_capacity(top_docs.len());
|
|
263
360
|
|
|
264
361
|
for (_score, doc_address) in top_docs {
|
|
265
|
-
let doc = searcher.doc(doc_address).
|
|
362
|
+
let doc: TantivyDocument = searcher.doc(doc_address).map_err(|e| {
|
|
363
|
+
Error::new(
|
|
364
|
+
ruby.exception_runtime_error(),
|
|
365
|
+
format!("Failed to get document: {}", e),
|
|
366
|
+
)
|
|
367
|
+
})?;
|
|
266
368
|
if let Some(value) = doc.get_first(id_field) {
|
|
267
|
-
if let Some(id) =
|
|
268
|
-
|
|
369
|
+
if let Some(id) = value.as_str() {
|
|
370
|
+
results.push(id.to_string());
|
|
269
371
|
}
|
|
270
372
|
}
|
|
271
373
|
}
|
|
272
374
|
|
|
273
|
-
|
|
375
|
+
Ok(results)
|
|
274
376
|
}
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
pub
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
pub fn init(ruby: &Ruby, module: RModule) -> Result<(), Error> {
|
|
380
|
+
let class = module.define_class("Index", ruby.class_object())?;
|
|
381
|
+
|
|
382
|
+
class.define_singleton_method("__new", magnus::function!(Index::new, 9))?;
|
|
383
|
+
class.define_method("__add_document", magnus::method!(Index::add_document, 7))?;
|
|
384
|
+
class.define_method(
|
|
385
|
+
"__delete_document",
|
|
386
|
+
magnus::method!(Index::delete_document, 1),
|
|
387
|
+
)?;
|
|
388
|
+
class.define_method(
|
|
389
|
+
"__acquire_index_writer",
|
|
390
|
+
magnus::method!(Index::acquire_index_writer, 1),
|
|
391
|
+
)?;
|
|
392
|
+
class.define_method(
|
|
393
|
+
"__release_index_writer",
|
|
394
|
+
magnus::method!(Index::release_index_writer, 0),
|
|
395
|
+
)?;
|
|
396
|
+
class.define_method("__commit", magnus::method!(Index::commit, 0))?;
|
|
397
|
+
class.define_method("__reload", magnus::method!(Index::reload, 0))?;
|
|
398
|
+
class.define_method("__search", magnus::method!(Index::search, 2))?;
|
|
399
|
+
|
|
400
|
+
Ok(())
|
|
401
|
+
}
|
data/src/lib.rs
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
mod helpers;
|
|
2
|
-
#[allow(improper_ctypes_definitions)]
|
|
3
2
|
mod index;
|
|
4
|
-
#[allow(improper_ctypes_definitions)]
|
|
5
3
|
mod query;
|
|
6
|
-
#[allow(improper_ctypes_definitions)]
|
|
7
4
|
mod tokenizer;
|
|
8
5
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
6
|
+
use magnus::{Error, Ruby};
|
|
7
|
+
|
|
8
|
+
#[magnus::init]
|
|
9
|
+
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
10
|
+
let module = ruby.define_module("Tantiny")?;
|
|
11
|
+
|
|
12
|
+
index::init(ruby, module)?;
|
|
13
|
+
query::init(ruby, module)?;
|
|
14
|
+
tokenizer::init(ruby, module)?;
|
|
15
|
+
|
|
16
|
+
Ok(())
|
|
17
|
+
}
|