tantiny-in-memory 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d7093accadbcf024c32015cc56018e1223afe46dc9b48bbaa8cde1d51ca06adb
4
- data.tar.gz: ca1998f6556cfaacffde8ac61c4cf289103089678653d18c3b9dfd328099a2c4
3
+ metadata.gz: fd302bd9b783fbee6a64945f376814799fa98f2c53d8f474acfa477f98c4e48c
4
+ data.tar.gz: 9dd34494fc0f16a7f76e972772cd08a1c7de9770bd3601530bbb20d2088f0eb3
5
5
  SHA512:
6
- metadata.gz: f544440e33ffb089e3838b2dc65dc53fc88953502c9ab79efcf61af8252f1343610b311681f404cbc4105361ea598eeb86c1078ff88fdceafc6b32435bd2c2ff
7
- data.tar.gz: 94e201b144960f261d31fd8d7f69fcfc451a3e01eff10e113f9f5ede744492ba14d55e28f90ea9355885d836ca514a921da4af3314e557bf42545095b49316bc
6
+ metadata.gz: f63f2b7c6984f097c3b9e2b984f1854a479a8bc195548d34428c3a01b3166fa6cbb61721cac1718f31e6974fa071aebe2befee1d030edcaa42145bf828c91b1f
7
+ data.tar.gz: 58a02e43c93bb326c7337e89c3133f266251f82b3dce29328df9dfe4a92deb8ecacd6ed7700d7686ae68a4fda7ff982e5e612208bc00ff3f53b109448c853d91
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.0.8](https://github.com/a-chris/tantiny-in-memory/compare/v1.0.7...v1.0.8) (2023-09-22)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * bad typo ([dc15ff1](https://github.com/a-chris/tantiny-in-memory/commit/dc15ff112d996bdc221c3d33218de0e0bde2086f))
9
+
10
+ ## [1.0.7](https://github.com/a-chris/tantiny-in-memory/compare/v1.0.6...v1.0.7) (2023-09-04)
11
+
12
+
13
+ ### Bug Fixes
14
+
15
+ * update readme ([25dd0eb](https://github.com/a-chris/tantiny-in-memory/commit/25dd0eba68c0befc0dcfe0df95d507f429e78f6f))
16
+
3
17
  ## [1.0.6](https://github.com/a-chris/tantiny-in-memory/compare/v1.0.5...v1.0.6) (2023-09-04)
4
18
 
5
19
 
data/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tantiny"
3
- version = "1.0.6" # {x-release-please-version}
3
+ version = "1.0.8" # {x-release-please-version}
4
4
  edition = "2021"
5
5
  authors = ["Christian Toscano"]
6
6
  repository = "https://github.com/a-chris/tantiny-in-memory"
@@ -10,7 +10,7 @@ crate-type = ["cdylib"]
10
10
 
11
11
  [dependencies]
12
12
  rutie = "0.8"
13
- tantivy = "0.16"
13
+ tantivy = "0.21"
14
14
  lazy_static = "1.4"
15
15
  paste = "1.0"
16
16
 
data/README.md CHANGED
@@ -14,7 +14,7 @@ Tantiny is not exactly Ruby bindings to Tantivy, but it tries to be close. The m
14
14
  Take a look at the most basic example:
15
15
 
16
16
  ```ruby
17
- index = Tantiny::Index.new("/path/to/index") { text :description }
17
+ index = Tantiny::Index.new(nil) { text :description }
18
18
 
19
19
  index << { id: 1, description: "Hello World!" }
20
20
  index << { id: 2, description: "What's up?" }
@@ -30,7 +30,7 @@ index.search("world") # 1, 3
30
30
  Add this line to your application's Gemfile:
31
31
 
32
32
  ```ruby
33
- gem "tantiny"
33
+ gem "tantiny-in-memory"
34
34
  ```
35
35
 
36
36
  And then execute:
@@ -39,7 +39,7 @@ And then execute:
39
39
 
40
40
  Or install it yourself as:
41
41
 
42
- $ gem install tantiny
42
+ $ gem install tantiny-in-memory
43
43
 
44
44
  You don't **have to** have Rust installed on your system since Tantiny will try to download the pre-compiled binaries hosted on GitHub releases during the installation. However, if no pre-compiled binaries were found for your system (which is a combination of platform, architecture, and Ruby version) you will need to [install Rust](https://www.rust-lang.org/tools/install) first.
45
45
 
@@ -52,7 +52,7 @@ Please, make sure to specify the minor version when declaring dependency on `tan
52
52
  You have to specify a path to where the index would be stored and a block that defines the schema:
53
53
 
54
54
  ```ruby
55
- Tantiny::Index.new "/tmp/index" do
55
+ Tantiny::Index.new(nil) do
56
56
  id :imdb_id
57
57
  facet :category
58
58
  string :title
data/lib/tantiny/index.rb CHANGED
@@ -81,6 +81,12 @@ module Tantiny
81
81
  end
82
82
  end
83
83
 
84
+ def raw_query_search(query, limit: DEFAULT_LIMIT)
85
+ raise ArgumentError, "Query must be a string" unless query.is_a?(String)
86
+
87
+ __raw_query_search(query, limit)
88
+ end
89
+
84
90
  def search(query, limit: DEFAULT_LIMIT, **smart_query_options)
85
91
  unless query.is_a?(Query)
86
92
  fields = schema.text_fields
@@ -32,17 +32,29 @@ module Tantiny
32
32
 
33
33
  private
34
34
 
35
- def id(key) = @id_field = key
35
+ def id(key)
36
+ @id_field = key
37
+ end
36
38
 
37
- def string(key) = @string_fields << key
39
+ def string(key)
40
+ @string_fields << key
41
+ end
38
42
 
39
- def integer(key) = @integer_fields << key
43
+ def integer(key)
44
+ @integer_fields << key
45
+ end
40
46
 
41
- def double(key) = @double_fields << key
47
+ def double(key)
48
+ @double_fields << key
49
+ end
42
50
 
43
- def date(key) = @date_fields << key
51
+ def date(key)
52
+ @date_fields << key
53
+ end
44
54
 
45
- def facet(key) = @facet_fields << key
55
+ def facet(key)
56
+ @facet_fields << key
57
+ end
46
58
 
47
59
  def text(key, tokenizer: nil)
48
60
  @field_tokenizers[key] = tokenizer if tokenizer
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tantiny
4
- VERSION = "1.0.6" # {x-release-please-version}
4
+ VERSION = "1.0.8" # {x-release-please-version}
5
5
  end
data/src/index.rs CHANGED
@@ -1,7 +1,9 @@
1
1
  use std::collections::HashMap;
2
- use std::str::FromStr;
3
2
  use rutie::{methods, Object, AnyObject, Integer, NilClass, Array, RString, Hash};
4
- use tantivy::{doc, Document, Term, ReloadPolicy, Index, IndexWriter, IndexReader, DateTime};
3
+ use tantivy::time::OffsetDateTime;
4
+ use tantivy::time::format_description::well_known::Rfc3339;
5
+ use tantivy::{doc, DateTime, Document, Term, ReloadPolicy, Index, IndexWriter, IndexReader};
6
+ use tantivy::query::QueryParser;
5
7
  use tantivy::schema::{Schema, TextOptions, TextFieldIndexing, IndexRecordOption, FacetOptions, STRING, STORED, INDEXED, FAST};
6
8
  use tantivy::collector::TopDocs;
7
9
 
@@ -90,7 +92,7 @@ methods!(
90
92
  }
91
93
 
92
94
  for field in facet_fields {
93
- let options = FacetOptions::default().set_indexed();
95
+ let options = FacetOptions::default();
94
96
  schema_builder.add_facet_field(&field, options);
95
97
  }
96
98
 
@@ -98,10 +100,10 @@ methods!(
98
100
  let index = Index::create_in_ram(schema.clone());
99
101
  let tokenizers = index.tokenizers();
100
102
 
101
- tokenizers.register("default", unwrap_tokenizer(&default_tokenizer).clone());
103
+ tokenizers.register("default", (&unwrap_tokenizer(default_tokenizer)).clone());
102
104
 
103
105
  for (field, tokenizer) in field_tokenizers {
104
- tokenizers.register(&field, unwrap_tokenizer(&tokenizer).clone())
106
+ tokenizers.register(&field, (&unwrap_tokenizer(tokenizer)).clone())
105
107
  }
106
108
 
107
109
  let index_writer = None;
@@ -168,8 +170,8 @@ methods!(
168
170
 
169
171
  for (key, value) in date_fields.iter() {
170
172
  let field = schema.get_field(key).try_unwrap();
171
- let value = DateTime::from_str(value).try_unwrap();
172
- doc.add_date(field, &value);
173
+ let value = DateTime::from_utc(OffsetDateTime::parse(value, &Rfc3339).unwrap());
174
+ doc.add_date(field, value);
173
175
  }
174
176
 
175
177
  for (key, value) in facet_fields.iter() {
@@ -239,6 +241,44 @@ methods!(
239
241
  NilClass::new()
240
242
  }
241
243
 
244
+ fn raw_query_search(
245
+ query_string: RString,
246
+ limit: Integer
247
+ ) -> Array {
248
+ try_unwrap_params!(
249
+ query_string: String,
250
+ limit: i64
251
+ );
252
+
253
+ let internal = unwrap_index(&_itself);
254
+ let content_field = internal.schema.get_field("content").try_unwrap();
255
+ let query_parser = QueryParser::for_index(
256
+ &internal.index,
257
+ vec![content_field],
258
+ );
259
+
260
+ let query = query_parser.parse_query(&query_string).try_unwrap();
261
+ let id_field = internal.schema.get_field("id").try_unwrap();
262
+ let searcher = internal.index_reader.searcher();
263
+
264
+ let top_docs = searcher
265
+ .search(&*query, &TopDocs::with_limit(limit as usize))
266
+ .try_unwrap();
267
+
268
+ let mut array = Array::with_capacity(top_docs.len());
269
+
270
+ for (_score, doc_address) in top_docs {
271
+ let doc = searcher.doc(doc_address).try_unwrap();
272
+ if let Some(value) = doc.get_first(id_field) {
273
+ if let Some(id) = (&*value).as_text() {
274
+ array.push(RString::from(String::from(id)));
275
+ }
276
+ }
277
+ }
278
+
279
+ array
280
+ }
281
+
242
282
  fn search(
243
283
  query: AnyObject,
244
284
  limit: Integer
@@ -262,7 +302,7 @@ methods!(
262
302
  for (_score, doc_address) in top_docs {
263
303
  let doc = searcher.doc(doc_address).try_unwrap();
264
304
  if let Some(value) = doc.get_first(id_field) {
265
- if let Some(id) = (&*value).text() {
305
+ if let Some(id) = (&*value).as_text() {
266
306
  array.push(RString::from(String::from(id)));
267
307
  }
268
308
  }
@@ -282,5 +322,6 @@ pub(super) fn init() {
282
322
  klass.def("__commit", commit);
283
323
  klass.def("__reload", reload);
284
324
  klass.def("__search", search);
325
+ klass.def("__raw_query_search", raw_query_search);
285
326
  });
286
327
  }
data/src/query.rs CHANGED
@@ -1,9 +1,10 @@
1
- use std::str::FromStr;
2
1
  use std::ops::Bound::Included;
3
2
  use rutie::{methods, Object, AnyObject, Integer, Float, Array, RString};
4
3
  use tantivy::{Term, DateTime};
5
4
  use tantivy::schema::{IndexRecordOption, Facet, Type, FieldType};
6
5
  use tantivy::query::*;
6
+ use tantivy::time::format_description::well_known::Rfc3339;
7
+ use tantivy::time::OffsetDateTime;
7
8
 
8
9
  use crate::helpers::{try_unwrap_params, scaffold, TryUnwrap};
9
10
  use crate::index::{unwrap_index, RTantinyIndex};
@@ -133,13 +134,13 @@ methods!(
133
134
  FieldType::Date(_) => {
134
135
  let from: String = from.try_unwrap();
135
136
  let to: String = to.try_unwrap();
136
- let from = DateTime::from_str(&from).try_unwrap();
137
- let to = DateTime::from_str(&to).try_unwrap();
137
+ let from = DateTime::from_utc(OffsetDateTime::parse(&from, &Rfc3339).unwrap());
138
+ let to = DateTime::from_utc(OffsetDateTime::parse(&to, &Rfc3339).unwrap());
138
139
 
139
140
  Ok((
140
141
  Type::Date,
141
- Included(Term::from_field_date(field, &from)),
142
- Included(Term::from_field_date(field, &to))
142
+ Included(Term::from_field_date(field, from)),
143
+ Included(Term::from_field_date(field, to))
143
144
  ))
144
145
  },
145
146
  FieldType::I64(_) => {
@@ -167,7 +168,7 @@ methods!(
167
168
 
168
169
  let (value_type, left, right) = range.try_unwrap();
169
170
 
170
- let query = RangeQuery::new_term_bounds(field, value_type, &left, &right);
171
+ let query = RangeQuery::new_term_bounds(field_name.to_string(), value_type, &left, &right);
171
172
 
172
173
  wrap_query(Box::new(query))
173
174
  }
@@ -257,4 +258,4 @@ pub(super) fn init() {
257
258
  klass.def("__negation", negation);
258
259
  klass.def("__boost", boost);
259
260
  });
260
- }
261
+ }
data/src/tokenizer.rs CHANGED
@@ -15,8 +15,8 @@ fn wrap_tokenizer(tokenizer: TextAnalyzer) -> RTantinyTokenizer {
15
15
  )
16
16
  }
17
17
 
18
- pub(crate) fn unwrap_tokenizer(tokenizer: &RTantinyTokenizer) -> &TextAnalyzer {
19
- &tokenizer.get_data(&*TANTINY_TOKENIZER_WRAPPER).0
18
+ pub(crate) fn unwrap_tokenizer(tokenizer: RTantinyTokenizer) -> TextAnalyzer {
19
+ tokenizer.get_data(&*TANTINY_TOKENIZER_WRAPPER).0.clone()
20
20
  }
21
21
 
22
22
  #[rustfmt::skip::macros(methods)]
@@ -25,9 +25,10 @@ methods!(
25
25
  _itself,
26
26
 
27
27
  fn new_simple_tokenizer() -> RTantinyTokenizer {
28
- let tokenizer = TextAnalyzer::from(SimpleTokenizer)
28
+ let tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
29
29
  .filter(RemoveLongFilter::limit(40))
30
- .filter(LowerCaser);
30
+ .filter(LowerCaser)
31
+ .build();
31
32
 
32
33
  wrap_tokenizer(tokenizer)
33
34
  }
@@ -36,10 +37,11 @@ methods!(
36
37
  try_unwrap_params!(locale_code: String);
37
38
 
38
39
  let language: LanguageWrapper = locale_code.parse().try_unwrap();
39
- let tokenizer = TextAnalyzer::from(SimpleTokenizer)
40
+ let tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
40
41
  .filter(RemoveLongFilter::limit(40))
41
42
  .filter(LowerCaser)
42
- .filter(Stemmer::new(language.0));
43
+ .filter(Stemmer::new(language.0))
44
+ .build();
43
45
 
44
46
  wrap_tokenizer(tokenizer)
45
47
  }
@@ -61,13 +63,14 @@ methods!(
61
63
  prefix_only
62
64
  );
63
65
 
64
- wrap_tokenizer(TextAnalyzer::from(tokenizer))
66
+ wrap_tokenizer(TextAnalyzer::from(tokenizer.try_unwrap()))
65
67
  }
66
68
 
67
69
  fn extract_terms(text: RString) -> Array {
68
70
  try_unwrap_params!(text: String);
69
71
 
70
- let mut token_stream = unwrap_tokenizer(&_itself).token_stream(&text);
72
+ let mut tokenizer: TextAnalyzer = unwrap_tokenizer(_itself);
73
+ let mut token_stream = tokenizer.token_stream(&text);
71
74
  let mut terms = vec![];
72
75
 
73
76
  while token_stream.advance() {
@@ -91,4 +94,7 @@ pub(super) fn init() {
91
94
  klass.def_self("__new_ngram_tokenizer", new_ngram_tokenizer);
92
95
  klass.def("__extract_terms", extract_terms);
93
96
  });
94
- }
97
+ }
98
+
99
+
100
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tantiny-in-memory
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.6
4
+ version: 1.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Toscano
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-04 00:00:00.000000000 Z
11
+ date: 2023-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-next
@@ -95,7 +95,6 @@ files:
95
95
  - bin/console
96
96
  - bin/setup
97
97
  - ext/Rakefile
98
- - lib/.rbnext/3.0/tantiny/schema.rb
99
98
  - lib/tantiny-in-memory.rb
100
99
  - lib/tantiny.rb
101
100
  - lib/tantiny/errors.rb
@@ -1,53 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Tantiny
4
- class Schema
5
- attr_reader :default_tokenizer,
6
- :id_field,
7
- :text_fields,
8
- :string_fields,
9
- :integer_fields,
10
- :double_fields,
11
- :date_fields,
12
- :facet_fields,
13
- :field_tokenizers
14
-
15
- def initialize(tokenizer, &block)
16
- @default_tokenizer = tokenizer
17
- @id_field = :id
18
- @text_fields = []
19
- @string_fields = []
20
- @integer_fields = []
21
- @double_fields = []
22
- @date_fields = []
23
- @facet_fields = []
24
- @field_tokenizers = {}
25
-
26
- instance_exec(&block)
27
- end
28
-
29
- def tokenizer_for(field)
30
- field_tokenizers[field] || default_tokenizer
31
- end
32
-
33
- private
34
-
35
- def id(key) ; @id_field = key; end
36
-
37
- def string(key) ; @string_fields << key; end
38
-
39
- def integer(key) ; @integer_fields << key; end
40
-
41
- def double(key) ; @double_fields << key; end
42
-
43
- def date(key) ; @date_fields << key; end
44
-
45
- def facet(key) ; @facet_fields << key; end
46
-
47
- def text(key, tokenizer: nil)
48
- @field_tokenizers[key] = tokenizer if tokenizer
49
-
50
- @text_fields << key
51
- end
52
- end
53
- end