RubyGems - tantiny-in-memory - Versions diffs - 1.0.6 → 1.0.8 - Mend

tantiny-in-memory 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +14 -0
data/Cargo.toml +2 -2
data/README.md +4 -4
data/lib/tantiny/index.rb +6 -0
data/lib/tantiny/schema.rb +18 -6
data/lib/tantiny/version.rb +1 -1
data/src/index.rs +49 -8
data/src/query.rs +8 -7
data/src/tokenizer.rs +15 -9
metadata +2 -3
data/lib/.rbnext/3.0/tantiny/schema.rb +0 -53

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d7093accadbcf024c32015cc56018e1223afe46dc9b48bbaa8cde1d51ca06adb
-  data.tar.gz: ca1998f6556cfaacffde8ac61c4cf289103089678653d18c3b9dfd328099a2c4
+  metadata.gz: fd302bd9b783fbee6a64945f376814799fa98f2c53d8f474acfa477f98c4e48c
+  data.tar.gz: 9dd34494fc0f16a7f76e972772cd08a1c7de9770bd3601530bbb20d2088f0eb3
 SHA512:
-  metadata.gz: f544440e33ffb089e3838b2dc65dc53fc88953502c9ab79efcf61af8252f1343610b311681f404cbc4105361ea598eeb86c1078ff88fdceafc6b32435bd2c2ff
-  data.tar.gz: 94e201b144960f261d31fd8d7f69fcfc451a3e01eff10e113f9f5ede744492ba14d55e28f90ea9355885d836ca514a921da4af3314e557bf42545095b49316bc
+  metadata.gz: f63f2b7c6984f097c3b9e2b984f1854a479a8bc195548d34428c3a01b3166fa6cbb61721cac1718f31e6974fa071aebe2befee1d030edcaa42145bf828c91b1f
+  data.tar.gz: 58a02e43c93bb326c7337e89c3133f266251f82b3dce29328df9dfe4a92deb8ecacd6ed7700d7686ae68a4fda7ff982e5e612208bc00ff3f53b109448c853d91

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,19 @@
 # Changelog
+## [1.0.8](https://github.com/a-chris/tantiny-in-memory/compare/v1.0.7...v1.0.8) (2023-09-22)
+### Bug Fixes
+* bad typo ([dc15ff1](https://github.com/a-chris/tantiny-in-memory/commit/dc15ff112d996bdc221c3d33218de0e0bde2086f))
+## [1.0.7](https://github.com/a-chris/tantiny-in-memory/compare/v1.0.6...v1.0.7) (2023-09-04)
+### Bug Fixes
+* update readme ([25dd0eb](https://github.com/a-chris/tantiny-in-memory/commit/25dd0eba68c0befc0dcfe0df95d507f429e78f6f))
 ## [1.0.6](https://github.com/a-chris/tantiny-in-memory/compare/v1.0.5...v1.0.6) (2023-09-04)

data/Cargo.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "tantiny"
-version = "1.0.6" # {x-release-please-version}
+version = "1.0.8" # {x-release-please-version}
 edition = "2021"
 authors = ["Christian Toscano"]
 repository = "https://github.com/a-chris/tantiny-in-memory"
@@ -10,7 +10,7 @@ crate-type = ["cdylib"]
 [dependencies]
 rutie = "0.8"
-tantivy = "0.16"
+tantivy = "0.21"
 lazy_static = "1.4"
 paste = "1.0"

data/README.md CHANGED Viewed

@@ -14,7 +14,7 @@ Tantiny is not exactly Ruby bindings to Tantivy, but it tries to be close. The m
 Take a look at the most basic example:
 ```ruby
-index = Tantiny::Index.new("/path/to/index") { text :description }
+index = Tantiny::Index.new(nil) { text :description }
 index << { id: 1, description: "Hello World!" }
 index << { id: 2, description: "What's up?" }
@@ -30,7 +30,7 @@ index.search("world") # 1, 3
 Add this line to your application's Gemfile:
 ```ruby
-gem "tantiny"
+gem "tantiny-in-memory"
 ```
 And then execute:
@@ -39,7 +39,7 @@ And then execute:
 Or install it yourself as:
-    $ gem install tantiny
+    $ gem install tantiny-in-memory
 You don't **have to** have Rust installed on your system since Tantiny will try to download the pre-compiled binaries hosted on GitHub releases during the installation. However, if no pre-compiled binaries were found for your system (which is a combination of platform, architecture, and Ruby version) you will need to [install Rust](https://www.rust-lang.org/tools/install) first.
@@ -52,7 +52,7 @@ Please, make sure to specify the minor version when declaring dependency on `tan
 You have to specify a path to where the index would be stored and a block that defines the schema:
 ```ruby
-Tantiny::Index.new "/tmp/index" do
+Tantiny::Index.new(nil) do
   id :imdb_id
   facet :category
   string :title

data/lib/tantiny/index.rb CHANGED Viewed

@@ -81,6 +81,12 @@ module Tantiny
       end
     end
+    def raw_query_search(query, limit: DEFAULT_LIMIT)
+      raise ArgumentError, "Query must be a string" unless query.is_a?(String)
+      __raw_query_search(query, limit)
+    end
     def search(query, limit: DEFAULT_LIMIT, **smart_query_options)
       unless query.is_a?(Query)
         fields = schema.text_fields

data/lib/tantiny/schema.rb CHANGED Viewed

@@ -32,17 +32,29 @@ module Tantiny
     private
-    def id(key) = @id_field = key
+    def id(key)
+      @id_field = key
+    end
-    def string(key) = @string_fields << key
+    def string(key)
+      @string_fields << key
+    end
-    def integer(key) = @integer_fields << key
+    def integer(key)
+      @integer_fields << key
+    end
-    def double(key) = @double_fields << key
+    def double(key)
+      @double_fields << key
+    end
-    def date(key) = @date_fields << key
+    def date(key)
+      @date_fields << key
+    end
-    def facet(key) = @facet_fields << key
+    def facet(key)
+      @facet_fields << key
+    end
     def text(key, tokenizer: nil)
       @field_tokenizers[key] = tokenizer if tokenizer

data/lib/tantiny/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Tantiny
-  VERSION = "1.0.6" # {x-release-please-version}
+  VERSION = "1.0.8" # {x-release-please-version}
 end

data/src/index.rs CHANGED Viewed

@@ -1,7 +1,9 @@
 use std::collections::HashMap;
-use std::str::FromStr;
 use rutie::{methods, Object, AnyObject, Integer, NilClass, Array, RString, Hash};
-use tantivy::{doc, Document, Term, ReloadPolicy, Index, IndexWriter, IndexReader, DateTime};
+use tantivy::time::OffsetDateTime;
+use tantivy::time::format_description::well_known::Rfc3339;
+use tantivy::{doc, DateTime, Document, Term, ReloadPolicy, Index, IndexWriter, IndexReader};
+use tantivy::query::QueryParser;
 use tantivy::schema::{Schema, TextOptions, TextFieldIndexing, IndexRecordOption, FacetOptions, STRING, STORED, INDEXED, FAST};
 use tantivy::collector::TopDocs;
@@ -90,7 +92,7 @@ methods!(
         }
         for field in facet_fields {
-            let options = FacetOptions::default().set_indexed();
+            let options = FacetOptions::default();
             schema_builder.add_facet_field(&field, options);
         }
@@ -98,10 +100,10 @@ methods!(
         let index = Index::create_in_ram(schema.clone());
         let tokenizers = index.tokenizers();
-        tokenizers.register("default", unwrap_tokenizer(&default_tokenizer).clone());
+        tokenizers.register("default", (&unwrap_tokenizer(default_tokenizer)).clone());
         for (field, tokenizer) in field_tokenizers {
-            tokenizers.register(&field, unwrap_tokenizer(&tokenizer).clone())
+            tokenizers.register(&field, (&unwrap_tokenizer(tokenizer)).clone())
         }
         let index_writer = None;
@@ -168,8 +170,8 @@ methods!(
         for (key, value) in date_fields.iter() {
             let field = schema.get_field(key).try_unwrap();
-            let value = DateTime::from_str(value).try_unwrap();
-            doc.add_date(field, &value);
+            let value = DateTime::from_utc(OffsetDateTime::parse(value, &Rfc3339).unwrap());
+            doc.add_date(field, value);
         }
         for (key, value) in facet_fields.iter() {
@@ -239,6 +241,44 @@ methods!(
         NilClass::new()
     }
+    fn raw_query_search(
+      query_string: RString,
+      limit: Integer
+    ) -> Array {
+      try_unwrap_params!(
+        query_string: String,
+        limit: i64
+      );
+      let internal = unwrap_index(&_itself);
+      let content_field = internal.schema.get_field("content").try_unwrap();
+      let query_parser = QueryParser::for_index(
+        &internal.index,
+        vec![content_field],
+      );
+      let query = query_parser.parse_query(&query_string).try_unwrap();
+      let id_field = internal.schema.get_field("id").try_unwrap();
+      let searcher = internal.index_reader.searcher();
+      let top_docs = searcher
+          .search(&*query, &TopDocs::with_limit(limit as usize))
+          .try_unwrap();
+      let mut array = Array::with_capacity(top_docs.len());
+      for (_score, doc_address) in top_docs {
+          let doc = searcher.doc(doc_address).try_unwrap();
+          if let Some(value) = doc.get_first(id_field) {
+              if let Some(id) = (&*value).as_text() {
+                  array.push(RString::from(String::from(id)));
+              }
+          }
+      }
+      array
+    }
     fn search(
         query: AnyObject,
         limit: Integer
@@ -262,7 +302,7 @@ methods!(
         for (_score, doc_address) in top_docs {
             let doc = searcher.doc(doc_address).try_unwrap();
             if let Some(value) = doc.get_first(id_field) {
-                if let Some(id) = (&*value).text() {
+                if let Some(id) = (&*value).as_text() {
                     array.push(RString::from(String::from(id)));
                 }
             }
@@ -282,5 +322,6 @@ pub(super) fn init() {
         klass.def("__commit", commit);
         klass.def("__reload", reload);
         klass.def("__search", search);
+        klass.def("__raw_query_search", raw_query_search);
     });
 }

data/src/query.rs CHANGED Viewed

@@ -1,9 +1,10 @@
-use std::str::FromStr;
 use std::ops::Bound::Included;
 use rutie::{methods, Object, AnyObject, Integer, Float, Array, RString};
 use tantivy::{Term, DateTime};
 use tantivy::schema::{IndexRecordOption, Facet, Type, FieldType};
 use tantivy::query::*;
+use tantivy::time::format_description::well_known::Rfc3339;
+use tantivy::time::OffsetDateTime;
 use crate::helpers::{try_unwrap_params, scaffold, TryUnwrap};
 use crate::index::{unwrap_index, RTantinyIndex};
@@ -133,13 +134,13 @@ methods!(
             FieldType::Date(_) => {
                 let from: String = from.try_unwrap();
                 let to: String = to.try_unwrap();
-                let from = DateTime::from_str(&from).try_unwrap();
-                let to = DateTime::from_str(&to).try_unwrap();
+                let from = DateTime::from_utc(OffsetDateTime::parse(&from, &Rfc3339).unwrap());
+                let to = DateTime::from_utc(OffsetDateTime::parse(&to, &Rfc3339).unwrap());
                 Ok((
                     Type::Date,
-                    Included(Term::from_field_date(field, &from)),
-                    Included(Term::from_field_date(field, &to))
+                    Included(Term::from_field_date(field, from)),
+                    Included(Term::from_field_date(field, to))
                 ))
             },
             FieldType::I64(_) => {
@@ -167,7 +168,7 @@ methods!(
         let (value_type, left, right) = range.try_unwrap();
-        let query = RangeQuery::new_term_bounds(field, value_type, &left, &right);
+        let query = RangeQuery::new_term_bounds(field_name.to_string(), value_type, &left, &right);
         wrap_query(Box::new(query))
     }
@@ -257,4 +258,4 @@ pub(super) fn init() {
         klass.def("__negation", negation);
         klass.def("__boost", boost);
     });
-}
+}

data/src/tokenizer.rs CHANGED Viewed

@@ -15,8 +15,8 @@ fn wrap_tokenizer(tokenizer: TextAnalyzer) -> RTantinyTokenizer {
     )
 }
-pub(crate) fn unwrap_tokenizer(tokenizer: &RTantinyTokenizer) -> &TextAnalyzer {
-    &tokenizer.get_data(&*TANTINY_TOKENIZER_WRAPPER).0
+pub(crate) fn unwrap_tokenizer(tokenizer: RTantinyTokenizer) -> TextAnalyzer {
+    tokenizer.get_data(&*TANTINY_TOKENIZER_WRAPPER).0.clone()
 }
 #[rustfmt::skip::macros(methods)]
@@ -25,9 +25,10 @@ methods!(
     _itself,
     fn new_simple_tokenizer() -> RTantinyTokenizer {
-        let tokenizer = TextAnalyzer::from(SimpleTokenizer)
+        let tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
             .filter(RemoveLongFilter::limit(40))
-            .filter(LowerCaser);
+            .filter(LowerCaser)
+            .build();
         wrap_tokenizer(tokenizer)
     }
@@ -36,10 +37,11 @@ methods!(
         try_unwrap_params!(locale_code: String);
         let language: LanguageWrapper = locale_code.parse().try_unwrap();
-        let tokenizer = TextAnalyzer::from(SimpleTokenizer)
+        let tokenizer = TextAnalyzer::builder(SimpleTokenizer::default())
             .filter(RemoveLongFilter::limit(40))
             .filter(LowerCaser)
-            .filter(Stemmer::new(language.0));
+            .filter(Stemmer::new(language.0))
+            .build();
         wrap_tokenizer(tokenizer)
     }
@@ -61,13 +63,14 @@ methods!(
             prefix_only
         );
-        wrap_tokenizer(TextAnalyzer::from(tokenizer))
+        wrap_tokenizer(TextAnalyzer::from(tokenizer.try_unwrap()))
     }
     fn extract_terms(text: RString) -> Array {
         try_unwrap_params!(text: String);
-        let mut token_stream = unwrap_tokenizer(&_itself).token_stream(&text);
+        let mut tokenizer: TextAnalyzer = unwrap_tokenizer(_itself);
+        let mut token_stream = tokenizer.token_stream(&text);
         let mut terms = vec![];
         while token_stream.advance() {
@@ -91,4 +94,7 @@ pub(super) fn init() {
         klass.def_self("__new_ngram_tokenizer", new_ngram_tokenizer);
         klass.def("__extract_terms", extract_terms);
     });
-}
+}

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: tantiny-in-memory
 version: !ruby/object:Gem::Version
-  version: 1.0.6
+  version: 1.0.8
 platform: ruby
 authors:
 - Christian Toscano
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-09-04 00:00:00.000000000 Z
+date: 2023-09-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ruby-next
@@ -95,7 +95,6 @@ files:
 - bin/console
 - bin/setup
 - ext/Rakefile
-- lib/.rbnext/3.0/tantiny/schema.rb
 - lib/tantiny-in-memory.rb
 - lib/tantiny.rb
 - lib/tantiny/errors.rb

data/lib/.rbnext/3.0/tantiny/schema.rb DELETED Viewed

@@ -1,53 +0,0 @@
-# frozen_string_literal: true
-module Tantiny
-  class Schema
-    attr_reader :default_tokenizer,
-      :id_field,
-      :text_fields,
-      :string_fields,
-      :integer_fields,
-      :double_fields,
-      :date_fields,
-      :facet_fields,
-      :field_tokenizers
-    def initialize(tokenizer, &block)
-      @default_tokenizer = tokenizer
-      @id_field = :id
-      @text_fields = []
-      @string_fields = []
-      @integer_fields = []
-      @double_fields = []
-      @date_fields = []
-      @facet_fields = []
-      @field_tokenizers = {}
-      instance_exec(&block)
-    end
-    def tokenizer_for(field)
-      field_tokenizers[field] || default_tokenizer
-    end
-    private
-    def id(key) ;  @id_field = key; end
-    def string(key) ;  @string_fields << key; end
-    def integer(key) ;  @integer_fields << key; end
-    def double(key) ;  @double_fields << key; end
-    def date(key) ;  @date_fields << key; end
-    def facet(key) ;  @facet_fields << key; end
-    def text(key, tokenizer: nil)
-      @field_tokenizers[key] = tokenizer if tokenizer
-      @text_fields << key
-    end
-  end
-end