tantiny 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/src/query.rs ADDED
@@ -0,0 +1,260 @@
1
+ use std::str::FromStr;
2
+ use std::ops::Bound::Included;
3
+ use rutie::{methods, Object, AnyObject, Integer, Float, Array, RString};
4
+ use tantivy::{Term, DateTime};
5
+ use tantivy::schema::{IndexRecordOption, Facet, Type, FieldType};
6
+ use tantivy::query::*;
7
+
8
+ use crate::helpers::{try_unwrap_params, scaffold, TryUnwrap};
9
+ use crate::index::{unwrap_index, RTantinyIndex};
10
+
11
+ pub struct TantinyQuery(pub(crate) Box<dyn Query>);
12
+
13
+ scaffold!(RTantinyQuery, TantinyQuery, "Query");
14
+
15
+ fn wrap_query(query: Box<dyn Query>) -> RTantinyQuery {
16
+ klass().wrap_data(
17
+ TantinyQuery(query),
18
+ &*TANTINY_QUERY_WRAPPER
19
+ )
20
+ }
21
+
22
+ pub(crate) fn unwrap_query(query: &RTantinyQuery) -> &Box<dyn Query> {
23
+ &query.get_data(&*TANTINY_QUERY_WRAPPER).0
24
+ }
25
+
26
+ #[rustfmt::skip::macros(methods)]
27
+ methods!(
28
+ RTantinyQuery,
29
+ _itself,
30
+
31
+ fn new_all_query() -> RTantinyQuery {
32
+ wrap_query(Box::new(AllQuery))
33
+ }
34
+
35
+ fn new_empty_query() -> RTantinyQuery {
36
+ wrap_query(Box::new(EmptyQuery))
37
+ }
38
+
39
+ fn new_term_query(
40
+ index: RTantinyIndex,
41
+ field: RString,
42
+ term: RString
43
+ ) -> RTantinyQuery {
44
+ try_unwrap_params!(
45
+ index,
46
+ field: String,
47
+ term: String
48
+ );
49
+
50
+ let schema = &unwrap_index(&index).schema;
51
+ let field = schema.get_field(&field).try_unwrap();
52
+ let term = Term::from_field_text(field, &term);
53
+ let query = TermQuery::new(term, IndexRecordOption::Basic);
54
+
55
+ wrap_query(Box::new(query))
56
+ }
57
+
58
+ fn new_fuzzy_term_query(
59
+ index: RTantinyIndex,
60
+ field: RString,
61
+ term: RString,
62
+ distance: Integer
63
+ ) -> RTantinyQuery {
64
+ try_unwrap_params!(
65
+ index,
66
+ field: String,
67
+ term: String,
68
+ distance: i64
69
+ );
70
+
71
+ let schema = &unwrap_index(&index).schema;
72
+ let field = schema.get_field(&field).try_unwrap();
73
+ let term = Term::from_field_text(field, &term);
74
+ let query = FuzzyTermQuery::new(term, distance as u8, true);
75
+
76
+ wrap_query(Box::new(query))
77
+ }
78
+
79
+ fn new_phrase_query(
80
+ index: RTantinyIndex,
81
+ field: RString,
82
+ terms: Array
83
+ ) -> RTantinyQuery {
84
+ try_unwrap_params!(
85
+ index,
86
+ field: String,
87
+ terms: Vec<String>
88
+ );
89
+
90
+ let schema = &unwrap_index(&index).schema;
91
+ let field = schema.get_field(&field).try_unwrap();
92
+
93
+ let terms: Vec<Term> = terms.into_iter().map(|term| {
94
+ Term::from_field_text(field, &term)
95
+ }).collect();
96
+ let query = PhraseQuery::new(terms);
97
+
98
+ wrap_query(Box::new(query))
99
+ }
100
+
101
+ fn new_regex_query(
102
+ index: RTantinyIndex,
103
+ field: RString,
104
+ regex: RString
105
+ ) -> RTantinyQuery {
106
+ try_unwrap_params!(
107
+ index,
108
+ field: String,
109
+ regex: String
110
+ );
111
+
112
+ let schema = &unwrap_index(&index).schema;
113
+ let field = schema.get_field(&field).try_unwrap();
114
+ let query = RegexQuery::from_pattern(&regex, field).try_unwrap();
115
+
116
+ wrap_query(Box::new(query))
117
+ }
118
+
119
+ fn new_range_query(
120
+ index: RTantinyIndex,
121
+ field: RString,
122
+ from: AnyObject,
123
+ to: AnyObject
124
+ ) -> RTantinyQuery {
125
+ try_unwrap_params!(index, from, to, field: String);
126
+
127
+ let schema = &unwrap_index(&index).schema;
128
+ let field = schema.get_field(&field).try_unwrap();
129
+ let field_name = schema.get_field_name(field);
130
+ let field_type = schema.get_field_entry(field).field_type();
131
+
132
+ let range = match field_type {
133
+ FieldType::Date(_) => {
134
+ let from: String = from.try_unwrap();
135
+ let to: String = to.try_unwrap();
136
+ let from = DateTime::from_str(&from).try_unwrap();
137
+ let to = DateTime::from_str(&to).try_unwrap();
138
+
139
+ Ok((
140
+ Type::Date,
141
+ Included(Term::from_field_date(field, &from)),
142
+ Included(Term::from_field_date(field, &to))
143
+ ))
144
+ },
145
+ FieldType::I64(_) => {
146
+ let from: i64 = from.try_unwrap();
147
+ let to: i64 = to.try_unwrap();
148
+
149
+ Ok((
150
+ Type::I64,
151
+ Included(Term::from_field_i64(field, from)),
152
+ Included(Term::from_field_i64(field, to))
153
+ ))
154
+ },
155
+ FieldType::F64(_) => {
156
+ let from: f64 = from.try_unwrap();
157
+ let to: f64 = to.try_unwrap();
158
+
159
+ Ok((
160
+ Type::F64,
161
+ Included(Term::from_field_f64(field, from)),
162
+ Included(Term::from_field_f64(field, to))
163
+ ))
164
+ },
165
+ _ => { Err(format!("Field '{}' is not supported by range query.", field_name)) }
166
+ };
167
+
168
+ let (value_type, left, right) = range.try_unwrap();
169
+
170
+ let query = RangeQuery::new_term_bounds(field, value_type, &left, &right);
171
+
172
+ wrap_query(Box::new(query))
173
+ }
174
+
175
+ fn new_facet_query(
176
+ index: RTantinyIndex,
177
+ field: RString,
178
+ path: RString
179
+ ) -> RTantinyQuery {
180
+ try_unwrap_params!(
181
+ index,
182
+ field: String,
183
+ path: String
184
+ );
185
+
186
+ let schema = &unwrap_index(&index).schema;
187
+ let field = schema.get_field(&field).try_unwrap();
188
+ let facet = Facet::from(&path);
189
+ let term = Term::from_facet(field, &facet);
190
+ let query = TermQuery::new(term, IndexRecordOption::Basic);
191
+
192
+ wrap_query(Box::new(query))
193
+ }
194
+
195
+ fn disjunction(queries: Array) -> RTantinyQuery {
196
+ try_unwrap_params!(queries);
197
+
198
+ let mut query_vec = Vec::new();
199
+
200
+ for query in queries {
201
+ let query: RTantinyQuery = query.try_unwrap();
202
+ query_vec.push((Occur::Should, unwrap_query(&query).box_clone()));
203
+ }
204
+
205
+ let disjunction_query = BooleanQuery::from(query_vec);
206
+
207
+ wrap_query(Box::new(disjunction_query))
208
+ }
209
+
210
+ fn conjunction(queries: Array) -> RTantinyQuery {
211
+ try_unwrap_params!(queries);
212
+
213
+ let mut query_vec = Vec::new();
214
+
215
+ for query in queries {
216
+ let query: RTantinyQuery = query.try_unwrap();
217
+ query_vec.push((Occur::Must, unwrap_query(&query).box_clone()));
218
+ }
219
+
220
+ let conjunction_query = BooleanQuery::from(query_vec);
221
+
222
+ wrap_query(Box::new(conjunction_query))
223
+ }
224
+
225
+ fn negation() -> RTantinyQuery {
226
+ // See: https://github.com/quickwit-oss/tantivy/issues/1153
227
+ let all_query: Box<dyn Query> = Box::new(AllQuery);
228
+ let negation_query = BooleanQuery::from(vec![
229
+ (Occur::Must, all_query.box_clone()),
230
+ (Occur::MustNot, unwrap_query(&_itself).box_clone()),
231
+ ]);
232
+
233
+ wrap_query(Box::new(negation_query))
234
+ }
235
+
236
+ fn boost(score: Float) -> RTantinyQuery {
237
+ try_unwrap_params!(score: f64);
238
+
239
+ let query = BoostQuery::new(unwrap_query(&_itself).box_clone(), score as f32);
240
+
241
+ wrap_query(Box::new(query))
242
+ }
243
+ );
244
+
245
+ pub(super) fn init() {
246
+ klass().define(|klass| {
247
+ klass.def_self("__new_all_query", new_all_query);
248
+ klass.def_self("__new_empty_query", new_empty_query);
249
+ klass.def_self("__new_term_query", new_term_query);
250
+ klass.def_self("__new_fuzzy_term_query", new_fuzzy_term_query);
251
+ klass.def_self("__new_regex_query", new_regex_query);
252
+ klass.def_self("__new_range_query", new_range_query);
253
+ klass.def_self("__new_phrase_query", new_phrase_query);
254
+ klass.def_self("__new_facet_query", new_facet_query);
255
+ klass.def_self("__disjunction", disjunction);
256
+ klass.def_self("__conjunction", conjunction);
257
+ klass.def("__negation", negation);
258
+ klass.def("__boost", boost);
259
+ });
260
+ }
data/src/tokenizer.rs ADDED
@@ -0,0 +1,94 @@
1
+
2
+ use rutie::{methods, Object, Array, RString, Integer, Boolean};
3
+ use tantivy::tokenizer::{TextAnalyzer, SimpleTokenizer, RemoveLongFilter, LowerCaser, Stemmer, NgramTokenizer};
4
+
5
+ use crate::helpers::{try_unwrap_params, scaffold, TryUnwrap, LanguageWrapper};
6
+
7
+ pub struct TantinyTokenizer(pub(crate) TextAnalyzer);
8
+
9
+ scaffold!(RTantinyTokenizer, TantinyTokenizer, "Tokenizer");
10
+
11
+ fn wrap_tokenizer(tokenizer: TextAnalyzer) -> RTantinyTokenizer {
12
+ klass().wrap_data(
13
+ TantinyTokenizer(tokenizer),
14
+ &*TANTINY_TOKENIZER_WRAPPER
15
+ )
16
+ }
17
+
18
+ pub(crate) fn unwrap_tokenizer(tokenizer: &RTantinyTokenizer) -> &TextAnalyzer {
19
+ &tokenizer.get_data(&*TANTINY_TOKENIZER_WRAPPER).0
20
+ }
21
+
22
+ #[rustfmt::skip::macros(methods)]
23
+ methods!(
24
+ RTantinyTokenizer,
25
+ _itself,
26
+
27
+ fn new_simple_tokenizer() -> RTantinyTokenizer {
28
+ let tokenizer = TextAnalyzer::from(SimpleTokenizer)
29
+ .filter(RemoveLongFilter::limit(40))
30
+ .filter(LowerCaser);
31
+
32
+ wrap_tokenizer(tokenizer)
33
+ }
34
+
35
+ fn new_stemmer_tokenizer(locale_code: RString) -> RTantinyTokenizer {
36
+ try_unwrap_params!(locale_code: String);
37
+
38
+ let language: LanguageWrapper = locale_code.parse().try_unwrap();
39
+ let tokenizer = TextAnalyzer::from(SimpleTokenizer)
40
+ .filter(RemoveLongFilter::limit(40))
41
+ .filter(LowerCaser)
42
+ .filter(Stemmer::new(language.0));
43
+
44
+ wrap_tokenizer(tokenizer)
45
+ }
46
+
47
+ fn new_ngram_tokenizer(
48
+ min_gram: Integer,
49
+ max_gram: Integer,
50
+ prefix_only: Boolean
51
+ ) -> RTantinyTokenizer {
52
+ try_unwrap_params!(
53
+ min_gram: i64,
54
+ max_gram: i64,
55
+ prefix_only: bool
56
+ );
57
+
58
+ let tokenizer = NgramTokenizer::new(
59
+ min_gram as usize,
60
+ max_gram as usize,
61
+ prefix_only
62
+ );
63
+
64
+ wrap_tokenizer(TextAnalyzer::from(tokenizer))
65
+ }
66
+
67
+ fn extract_terms(text: RString) -> Array {
68
+ try_unwrap_params!(text: String);
69
+
70
+ let mut token_stream = unwrap_tokenizer(&_itself).token_stream(&text);
71
+ let mut terms = vec![];
72
+
73
+ while token_stream.advance() {
74
+ terms.push(token_stream.token().clone().text);
75
+ }
76
+
77
+ let mut array = Array::with_capacity(terms.len());
78
+
79
+ for term in terms {
80
+ array.push(RString::from(term));
81
+ }
82
+
83
+ array
84
+ }
85
+ );
86
+
87
+ pub(super) fn init() {
88
+ klass().define(|klass| {
89
+ klass.def_self("__new_simple_tokenizer", new_simple_tokenizer);
90
+ klass.def_self("__new_stemmer_tokenizer", new_stemmer_tokenizer);
91
+ klass.def_self("__new_ngram_tokenizer", new_ngram_tokenizer);
92
+ klass.def("__extract_terms", extract_terms);
93
+ });
94
+ }
metadata ADDED
@@ -0,0 +1,135 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tantiny
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.2
5
+ platform: ruby
6
+ authors:
7
+ - Alexander Baygeldin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-03-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ruby-next
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.14.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.14.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: rutie
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.0.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.0.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: thermite
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '13.0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '13.0'
69
+ description:
70
+ email:
71
+ - a.baygeldin@gmail.com
72
+ executables: []
73
+ extensions:
74
+ - ext/Rakefile
75
+ extra_rdoc_files: []
76
+ files:
77
+ - CHANGELOG.md
78
+ - Cargo.toml
79
+ - LICENSE
80
+ - README.md
81
+ - bin/console
82
+ - bin/setup
83
+ - ext/Rakefile
84
+ - lib/.rbnext/3.0/tantiny/schema.rb
85
+ - lib/tantiny.rb
86
+ - lib/tantiny.so
87
+ - lib/tantiny/errors.rb
88
+ - lib/tantiny/helpers.rb
89
+ - lib/tantiny/index.rb
90
+ - lib/tantiny/query.rb
91
+ - lib/tantiny/schema.rb
92
+ - lib/tantiny/tokenizer.rb
93
+ - lib/tantiny/version.rb
94
+ - sig/tantiny.rbs
95
+ - sig/tantiny/errors.rbs
96
+ - sig/tantiny/helpers.rbs
97
+ - sig/tantiny/index.rbs
98
+ - sig/tantiny/query.rbs
99
+ - sig/tantiny/schema.rbs
100
+ - sig/tantiny/tokenizer.rbs
101
+ - sig/tantiny/version.rbs
102
+ - src/helpers.rs
103
+ - src/index.rs
104
+ - src/lib.rs
105
+ - src/query.rs
106
+ - src/tokenizer.rs
107
+ homepage: https://github.com/baygeldin/tantiny
108
+ licenses:
109
+ - MIT
110
+ metadata:
111
+ bug_tracker_uri: https://github.com/baygeldin/tantiny/issues
112
+ changelog_uri: https://github.com/baygeldin/tantiny/blob/master/CHANGELOG.md
113
+ documentation_uri: https://github.com/baygeldin/tantiny/blob/master/README.md
114
+ homepage_uri: https://github.com/baygeldin/tantiny
115
+ source_code_uri: https://github.com/baygeldin/tantiny
116
+ post_install_message:
117
+ rdoc_options: []
118
+ require_paths:
119
+ - lib
120
+ required_ruby_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '2.6'
125
+ required_rubygems_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ requirements: []
131
+ rubygems_version: 3.3.7
132
+ signing_key:
133
+ specification_version: 4
134
+ summary: Tiny full-text search for Ruby powered by Tantivy.
135
+ test_files: []