tantiny-in-memory 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: acb98be2f1ef173c3ad7ce7efb93c9846c35c28f7f16f291923356a785996e60
4
- data.tar.gz: c4c40b964ce4ac108bc608419eee38397be1cd3ae4b40c259b8c6b18eabfa7f6
3
+ metadata.gz: fd302bd9b783fbee6a64945f376814799fa98f2c53d8f474acfa477f98c4e48c
4
+ data.tar.gz: 9dd34494fc0f16a7f76e972772cd08a1c7de9770bd3601530bbb20d2088f0eb3
5
5
  SHA512:
6
- metadata.gz: f85d2c7a1d13f98a044fbb7ef9196e387cbc91268fb03271b84db9655441e31c116873e943896e977de09b44a25e77b1be14c2b7ba492c8046ba1162993b1ce7
7
- data.tar.gz: f98c87aaf9a3bf1ba9f3e45496397da8dff68962eebcb6ed5f30ffab8c9dc8d030ecfbe7b31397e972fb58aacb24b56752e0bdc365e258b1b08203e12f990bdd
6
+ metadata.gz: f63f2b7c6984f097c3b9e2b984f1854a479a8bc195548d34428c3a01b3166fa6cbb61721cac1718f31e6974fa071aebe2befee1d030edcaa42145bf828c91b1f
7
+ data.tar.gz: 58a02e43c93bb326c7337e89c3133f266251f82b3dce29328df9dfe4a92deb8ecacd6ed7700d7686ae68a4fda7ff982e5e612208bc00ff3f53b109448c853d91
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.0.8](https://github.com/a-chris/tantiny-in-memory/compare/v1.0.7...v1.0.8) (2023-09-22)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * bad typo ([dc15ff1](https://github.com/a-chris/tantiny-in-memory/commit/dc15ff112d996bdc221c3d33218de0e0bde2086f))
9
+
3
10
  ## [1.0.7](https://github.com/a-chris/tantiny-in-memory/compare/v1.0.6...v1.0.7) (2023-09-04)
4
11
 
5
12
 
data/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tantiny"
3
- version = "1.0.7" # {x-release-please-version}
3
+ version = "1.0.8" # {x-release-please-version}
4
4
  edition = "2021"
5
5
  authors = ["Christian Toscano"]
6
6
  repository = "https://github.com/a-chris/tantiny-in-memory"
@@ -10,7 +10,7 @@ crate-type = ["cdylib"]
10
10
 
11
11
  [dependencies]
12
12
  rutie = "0.8"
13
- tantivy = "0.16"
13
+ tantivy = "0.21"
14
14
  lazy_static = "1.4"
15
15
  paste = "1.0"
16
16
 
data/lib/tantiny/index.rb CHANGED
@@ -81,6 +81,12 @@ module Tantiny
81
81
  end
82
82
  end
83
83
 
84
+ def raw_query_search(query, limit: DEFAULT_LIMIT)
85
+ raise ArgumentError, "Query must be a string" unless query.is_a?(String)
86
+
87
+ __raw_query_search(query, limit)
88
+ end
89
+
84
90
  def search(query, limit: DEFAULT_LIMIT, **smart_query_options)
85
91
  unless query.is_a?(Query)
86
92
  fields = schema.text_fields
@@ -32,17 +32,29 @@ module Tantiny
32
32
 
33
33
  private
34
34
 
35
- def id(key) = @id_field = key
35
+ def id(key)
36
+ @id_field = key
37
+ end
36
38
 
37
- def string(key) = @string_fields << key
39
+ def string(key)
40
+ @string_fields << key
41
+ end
38
42
 
39
- def integer(key) = @integer_fields << key
43
+ def integer(key)
44
+ @integer_fields << key
45
+ end
40
46
 
41
- def double(key) = @double_fields << key
47
+ def double(key)
48
+ @double_fields << key
49
+ end
42
50
 
43
- def date(key) = @date_fields << key
51
+ def date(key)
52
+ @date_fields << key
53
+ end
44
54
 
45
- def facet(key) = @facet_fields << key
55
+ def facet(key)
56
+ @facet_fields << key
57
+ end
46
58
 
47
59
  def text(key, tokenizer: nil)
48
60
  @field_tokenizers[key] = tokenizer if tokenizer
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tantiny
4
- VERSION = "1.0.7" # {x-release-please-version}
4
+ VERSION = "1.0.8" # {x-release-please-version}
5
5
  end
data/src/index.rs CHANGED
@@ -1,7 +1,9 @@
1
1
  use std::collections::HashMap;
2
- use std::str::FromStr;
3
2
  use rutie::{methods, Object, AnyObject, Integer, NilClass, Array, RString, Hash};
4
- use tantivy::{doc, Document, Term, ReloadPolicy, Index, IndexWriter, IndexReader, DateTime};
3
+ use tantivy::time::OffsetDateTime;
4
+ use tantivy::time::format_description::well_known::Rfc3339;
5
+ use tantivy::{doc, DateTime, Document, Term, ReloadPolicy, Index, IndexWriter, IndexReader};
6
+ use tantivy::query::QueryParser;
5
7
  use tantivy::schema::{Schema, TextOptions, TextFieldIndexing, IndexRecordOption, FacetOptions, STRING, STORED, INDEXED, FAST};
6
8
  use tantivy::collector::TopDocs;
7
9
 
@@ -90,7 +92,7 @@ methods!(
90
92
  }
91
93
 
92
94
  for field in facet_fields {
93
- let options = FacetOptions::default().set_indexed();
95
+ let options = FacetOptions::default();
94
96
  schema_builder.add_facet_field(&field, options);
95
97
  }
96
98
 
@@ -98,10 +100,10 @@ methods!(
98
100
  let index = Index::create_in_ram(schema.clone());
99
101
  let tokenizers = index.tokenizers();
100
102
 
101
- tokenizers.register("default", unwrap_tokenizer(&default_tokenizer).clone());
103
+ tokenizers.register("default", (&unwrap_tokenizer(default_tokenizer)).clone());
102
104
 
103
105
  for (field, tokenizer) in field_tokenizers {
104
- tokenizers.register(&field, unwrap_tokenizer(&tokenizer).clone())
106
+ tokenizers.register(&field, (&unwrap_tokenizer(tokenizer)).clone())
105
107
  }
106
108
 
107
109
  let index_writer = None;
@@ -168,8 +170,8 @@ methods!(
168
170
 
169
171
  for (key, value) in date_fields.iter() {
170
172
  let field = schema.get_field(key).try_unwrap();
171
- let value = DateTime::from_str(value).try_unwrap();
172
- doc.add_date(field, &value);
173
+ let value = DateTime::from_utc(OffsetDateTime::parse(value, &Rfc3339).unwrap());
174
+ doc.add_date(field, value);
173
175
  }
174
176
 
175
177
  for (key, value) in facet_fields.iter() {
@@ -239,6 +241,44 @@ methods!(
239
241
  NilClass::new()
240
242
  }
241
243
 
244
+ fn raw_query_search(
245
+ query_string: RString,
246
+ limit: Integer
247
+ ) -> Array {
248
+ try_unwrap_params!(
249
+ query_string: String,
250
+ limit: i64
251
+ );
252
+
253
+ let internal = unwrap_index(&_itself);
254
+ let content_field = internal.schema.get_field("content").try_unwrap();
255
+ let query_parser = QueryParser::for_index(
256
+ &internal.index,
257
+ vec![content_field],
258
+ );
259
+
260
+ let query = query_parser.parse_query(&query_string).try_unwrap();
261
+ let id_field = internal.schema.get_field("id").try_unwrap();
262
+ let searcher = internal.index_reader.searcher();
263
+
264
+ let top_docs = searcher
265
+ .search(&*query, &TopDocs::with_limit(limit as usize))
266
+ .try_unwrap();
267
+
268
+ let mut array = Array::with_capacity(top_docs.len());
269
+
270
+ for (_score, doc_address) in top_docs {
271
+ let doc = searcher.doc(doc_address).try_unwrap();
272
+ if let Some(value) = doc.get_first(id_field) {
273
+ if let Some(id) = (&*value).as_text() {
274
+ array.push(RString::from(String::from(id)));
275
+ }
276
+ }
277
+ }
278
+
279
+ array
280
+ }
281
+
242
282
  fn search(
243
283
  query: AnyObject,
244
284
  limit: Integer
@@ -262,7 +302,7 @@ methods!(
262
302
  for (_score, doc_address) in top_docs {
263
303
  let doc = searcher.doc(doc_address).try_unwrap();
264
304
  if let Some(value) = doc.get_first(id_field) {
265
- if let Some(id) = (&*value).text() {
305
+ if let Some(id) = (&*value).as_text() {
266
306
  array.push(RString::from(String::from(id)));
267
307
  }
268
308
  }
@@ -282,5 +322,6 @@ pub(super) fn init() {
282
322
  klass.def("__commit", commit);
283
323
  klass.def("__reload", reload);
284
324
  klass.def("__search", search);
325
+ klass.def("__raw_query_search", raw_query_search);
285
326
  });
286
327
  }
data/src/query.rs CHANGED
@@ -1,9 +1,10 @@
1
- use std::str::FromStr;
2
1
  use std::ops::Bound::Included;
3
2
  use rutie::{methods, Object, AnyObject, Integer, Float, Array, RString};
4
3
  use tantivy::{Term, DateTime};
5
4
  use tantivy::schema::{IndexRecordOption, Facet, Type, FieldType};
6
5
  use tantivy::query::*;
6
+ use tantivy::time::format_description::well_known::Rfc3339;
7
+ use tantivy::time::OffsetDateTime;
7
8
 
8
9
  use crate::helpers::{try_unwrap_params, scaffold, TryUnwrap};
9
10
  use crate::index::{unwrap_index, RTantinyIndex};
@@ -133,13 +134,13 @@ methods!(
133
134
  FieldType::Date(_) => {
134
135
  let from: String = from.try_unwrap();
135
136
  let to: String = to.try_unwrap();
136
- let from = DateTime::from_str(&from).try_unwrap();
137
- let to = DateTime::from_str(&to).try_unwrap();
137
+ let from = DateTime::from_utc(OffsetDateTime::parse(&from, &Rfc3339).unwrap());
138
+ let to = DateTime::from_utc(OffsetDateTime::parse(&to, &Rfc3339).unwrap());
138
139
 
139
140
  Ok((
140
141
  Type::Date,
141
- Included(Term::from_field_date(field, &from)),
142
- Included(Term::from_field_date(field, &to))
142
+ Included(Term::from_field_date(field, from)),
143
+ Included(Term::from_field_date(field, to))
143
144
  ))
144
145
  },
145
146
  FieldType::I64(_) => {
@@ -167,7 +168,7 @@ methods!(
167
168
 
168
169
  let (value_type, left, right) = range.try_unwrap();
169
170
 
170
- let query = RangeQuery::new_term_bounds(field, value_type, &left, &right);
171
+ let query = RangeQuery::new_term_bounds(field_name.to_string(), value_type, &left, &right);
171
172
 
172
173
  wrap_query(Box::new(query))
173
174
  }
@@ -257,4 +258,4 @@ pub(super) fn init() {
257
258
  klass.def("__negation", negation);
258
259
  klass.def("__boost", boost);
259
260
  });
260
- }
261
+ }
data/src/tokenizer.rs CHANGED
@@ -15,8 +15,8 @@ fn wrap_tokenizer(tokenizer: TextAnalyzer) -> RTantinyTokenizer {
15
15
  )
16
16
  }
17
17
 
18
- pub(crate) fn unwrap_tokenizer(tokenizer: &RTantinyTokenizer) -> &TextAnalyzer {
19
- &tokenizer.get_data(&*TANTINY_TOKENIZER_WRAPPER).0
18
+ pub(crate) fn unwrap_tokenizer(tokenizer: RTantinyTokenizer) -> TextAnalyzer {
19
+ tokenizer.get_data(&*TANTINY_TOKENIZER_WRAPPER).0.clone()
20
20
  }
21
21
 
22
22
  #[rustfmt::skip::macros(methods)]
@@ -25,9 +25,10 @@ methods!(
25
25
  _itself,
26
26
 
27
27
  fn new_simple_tokenizer() -> RTantinyTokenizer {
28
- let tokenizer = TextAnalyzer::from(SimpleTokenizer)
28
+ let tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
29
29
  .filter(RemoveLongFilter::limit(40))
30
- .filter(LowerCaser);
30
+ .filter(LowerCaser)
31
+ .build();
31
32
 
32
33
  wrap_tokenizer(tokenizer)
33
34
  }
@@ -36,10 +37,11 @@ methods!(
36
37
  try_unwrap_params!(locale_code: String);
37
38
 
38
39
  let language: LanguageWrapper = locale_code.parse().try_unwrap();
39
- let tokenizer = TextAnalyzer::from(SimpleTokenizer)
40
+ let tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
40
41
  .filter(RemoveLongFilter::limit(40))
41
42
  .filter(LowerCaser)
42
- .filter(Stemmer::new(language.0));
43
+ .filter(Stemmer::new(language.0))
44
+ .build();
43
45
 
44
46
  wrap_tokenizer(tokenizer)
45
47
  }
@@ -61,13 +63,14 @@ methods!(
61
63
  prefix_only
62
64
  );
63
65
 
64
- wrap_tokenizer(TextAnalyzer::from(tokenizer))
66
+ wrap_tokenizer(TextAnalyzer::from(tokenizer.try_unwrap()))
65
67
  }
66
68
 
67
69
  fn extract_terms(text: RString) -> Array {
68
70
  try_unwrap_params!(text: String);
69
71
 
70
- let mut token_stream = unwrap_tokenizer(&_itself).token_stream(&text);
72
+ let mut tokenizer: TextAnalyzer = unwrap_tokenizer(_itself);
73
+ let mut token_stream = tokenizer.token_stream(&text);
71
74
  let mut terms = vec![];
72
75
 
73
76
  while token_stream.advance() {
@@ -91,4 +94,7 @@ pub(super) fn init() {
91
94
  klass.def_self("__new_ngram_tokenizer", new_ngram_tokenizer);
92
95
  klass.def("__extract_terms", extract_terms);
93
96
  });
94
- }
97
+ }
98
+
99
+
100
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tantiny-in-memory
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
4
+ version: 1.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Toscano
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-04 00:00:00.000000000 Z
11
+ date: 2023-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-next
@@ -95,7 +95,6 @@ files:
95
95
  - bin/console
96
96
  - bin/setup
97
97
  - ext/Rakefile
98
- - lib/.rbnext/3.0/tantiny/schema.rb
99
98
  - lib/tantiny-in-memory.rb
100
99
  - lib/tantiny.rb
101
100
  - lib/tantiny/errors.rb
@@ -1,53 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Tantiny
4
- class Schema
5
- attr_reader :default_tokenizer,
6
- :id_field,
7
- :text_fields,
8
- :string_fields,
9
- :integer_fields,
10
- :double_fields,
11
- :date_fields,
12
- :facet_fields,
13
- :field_tokenizers
14
-
15
- def initialize(tokenizer, &block)
16
- @default_tokenizer = tokenizer
17
- @id_field = :id
18
- @text_fields = []
19
- @string_fields = []
20
- @integer_fields = []
21
- @double_fields = []
22
- @date_fields = []
23
- @facet_fields = []
24
- @field_tokenizers = {}
25
-
26
- instance_exec(&block)
27
- end
28
-
29
- def tokenizer_for(field)
30
- field_tokenizers[field] || default_tokenizer
31
- end
32
-
33
- private
34
-
35
- def id(key) ; @id_field = key; end
36
-
37
- def string(key) ; @string_fields << key; end
38
-
39
- def integer(key) ; @integer_fields << key; end
40
-
41
- def double(key) ; @double_fields << key; end
42
-
43
- def date(key) ; @date_fields << key; end
44
-
45
- def facet(key) ; @facet_fields << key; end
46
-
47
- def text(key, tokenizer: nil)
48
- @field_tokenizers[key] = tokenizer if tokenizer
49
-
50
- @text_fields << key
51
- end
52
- end
53
- end