tantiny-in-memory 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +8 -0
- data/Cargo.toml +20 -0
- data/LICENSE +21 -0
- data/README.md +339 -0
- data/bin/console +64 -0
- data/bin/setup +6 -0
- data/ext/Rakefile +10 -0
- data/lib/tantiny/errors.rb +38 -0
- data/lib/tantiny/helpers.rb +19 -0
- data/lib/tantiny/index.rb +165 -0
- data/lib/tantiny/query.rb +165 -0
- data/lib/tantiny/schema.rb +53 -0
- data/lib/tantiny/tokenizer.rb +28 -0
- data/lib/tantiny/version.rb +5 -0
- data/lib/tantiny.rb +27 -0
- data/lib/tantiny.so +0 -0
- data/sig/tantiny/errors.rbs +20 -0
- data/sig/tantiny/helpers.rbs +8 -0
- data/sig/tantiny/index.rbs +103 -0
- data/sig/tantiny/query.rbs +135 -0
- data/sig/tantiny/schema.rbs +26 -0
- data/sig/tantiny/tokenizer.rbs +25 -0
- data/sig/tantiny/version.rbs +3 -0
- data/sig/tantiny.rbs +5 -0
- data/src/helpers.rs +202 -0
- data/src/index.rs +286 -0
- data/src/lib.rs +14 -0
- data/src/query.rs +260 -0
- data/src/tokenizer.rs +94 -0
- metadata +148 -0
| @@ -0,0 +1,135 @@ | |
| 1 | 
            +
            module Tantiny
         | 
| 2 | 
            +
              class Query
         | 
| 3 | 
            +
                TYPES: Array[Symbol]
         | 
| 4 | 
            +
                DEFAULT_BOOST: Float
         | 
| 5 | 
            +
                DEFAULT_FUZZY_DISTANCE: Integer
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def self.disjunction: (*Query queries) -> Query
         | 
| 8 | 
            +
                def self.conjunction: (*Query queries) -> Query
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                def self.all_query: (?Index _index) -> Query
         | 
| 11 | 
            +
                def self.empty_query: (?Index _index) -> Query
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def self.term_query: (
         | 
| 14 | 
            +
                  Index index,
         | 
| 15 | 
            +
                  fields fields,
         | 
| 16 | 
            +
                  String term,
         | 
| 17 | 
            +
                  **untyped options
         | 
| 18 | 
            +
                ) -> Query
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                def self.fuzzy_term_query: (
         | 
| 21 | 
            +
                  Index index,
         | 
| 22 | 
            +
                  fields fields,
         | 
| 23 | 
            +
                  String term,
         | 
| 24 | 
            +
                  ?Integer distance,
         | 
| 25 | 
            +
                  **untyped options
         | 
| 26 | 
            +
                ) -> Query
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                def self.phrase_query: (
         | 
| 29 | 
            +
                  Index index,
         | 
| 30 | 
            +
                  fields fields,
         | 
| 31 | 
            +
                  String phrase,
         | 
| 32 | 
            +
                  **untyped options
         | 
| 33 | 
            +
                ) -> Query
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                def self.regex_query: (
         | 
| 36 | 
            +
                  Index index,
         | 
| 37 | 
            +
                  fields fields,
         | 
| 38 | 
            +
                  String regex,
         | 
| 39 | 
            +
                  **untyped options
         | 
| 40 | 
            +
                ) -> Query
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                def self.prefix_query: (
         | 
| 43 | 
            +
                  Index index,
         | 
| 44 | 
            +
                  fields fields,
         | 
| 45 | 
            +
                  String prefix,
         | 
| 46 | 
            +
                  **untyped options
         | 
| 47 | 
            +
                ) -> Query
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                def self.facet_query: (
         | 
| 50 | 
            +
                  Index index,
         | 
| 51 | 
            +
                  Symbol field,
         | 
| 52 | 
            +
                  String path,
         | 
| 53 | 
            +
                  **untyped options
         | 
| 54 | 
            +
                ) -> Query
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                def self.range_query: (
         | 
| 57 | 
            +
                  Index index,
         | 
| 58 | 
            +
                  fields fields,
         | 
| 59 | 
            +
                  Range[numeric | date] range,
         | 
| 60 | 
            +
                  **untyped options
         | 
| 61 | 
            +
                ) -> Query
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                def self.smart_query: (
         | 
| 64 | 
            +
                  Index index,
         | 
| 65 | 
            +
                  fields fields,
         | 
| 66 | 
            +
                  String query_string,
         | 
| 67 | 
            +
                  **untyped options
         | 
| 68 | 
            +
                ) -> Query
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                def self.__new_all_query: () -> Query
         | 
| 71 | 
            +
                def self.__new_empty_query: () -> Query
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                def self.__new_term_query: (
         | 
| 74 | 
            +
                  Index index,
         | 
| 75 | 
            +
                  String field,
         | 
| 76 | 
            +
                  String term
         | 
| 77 | 
            +
                ) -> Query
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                def self.__new_fuzzy_term_query: (
         | 
| 80 | 
            +
                  Index index,
         | 
| 81 | 
            +
                  String field,
         | 
| 82 | 
            +
                  String term,
         | 
| 83 | 
            +
                  Integer distance
         | 
| 84 | 
            +
                ) -> Query
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                def self.__new_regex_query: (
         | 
| 87 | 
            +
                  Index index,
         | 
| 88 | 
            +
                  String field,
         | 
| 89 | 
            +
                  String regex
         | 
| 90 | 
            +
                ) -> Query
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                def self.__new_range_query: (
         | 
| 93 | 
            +
                  Index index,
         | 
| 94 | 
            +
                  String field,
         | 
| 95 | 
            +
                  untyped from,
         | 
| 96 | 
            +
                  untyped to
         | 
| 97 | 
            +
                ) -> Query
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                def self.__new_phrase_query: (
         | 
| 100 | 
            +
                  Index index,
         | 
| 101 | 
            +
                  String field,
         | 
| 102 | 
            +
                  Array[String] terms
         | 
| 103 | 
            +
                ) -> Query
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                def self.__new_facet_query: (
         | 
| 106 | 
            +
                  Index index,
         | 
| 107 | 
            +
                  String field,
         | 
| 108 | 
            +
                  String path
         | 
| 109 | 
            +
                ) -> Query
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                def self.__disjunction: (Array[Query] queries) -> Query
         | 
| 112 | 
            +
                def self.__conjunction: (Array[Query] queries) -> Query
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                def |: (Query query) -> Query
         | 
| 115 | 
            +
                def &: (Query query) -> Query
         | 
| 116 | 
            +
                def !: () -> Query
         | 
| 117 | 
            +
                def boost: (numeric boost_factor) -> Query
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                def __negation: () -> Query
         | 
| 120 | 
            +
                def __boost: (Float boost_factor) -> Query
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                private
         | 
| 123 | 
            +
                
         | 
| 124 | 
            +
                def self.construct_query: (
         | 
| 125 | 
            +
                  Index index,
         | 
| 126 | 
            +
                  Symbol query_type,
         | 
| 127 | 
            +
                  Array[Symbol] allowed_fields,
         | 
| 128 | 
            +
                  fields fields,
         | 
| 129 | 
            +
                  Array[untyped] params,
         | 
| 130 | 
            +
                  **untyped options
         | 
| 131 | 
            +
                ) -> Query
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                def self.text_and_strings: (Index index) -> Array[Symbol]
         | 
| 134 | 
            +
              end
         | 
| 135 | 
            +
            end
         | 
| @@ -0,0 +1,26 @@ | |
| 1 | 
            +
            module Tantiny
         | 
| 2 | 
            +
              class Schema
         | 
| 3 | 
            +
                attr_reader default_tokenizer: Tokenizer
         | 
| 4 | 
            +
                attr_reader id_field: Symbol
         | 
| 5 | 
            +
                attr_reader text_fields: Array[Symbol]
         | 
| 6 | 
            +
                attr_reader string_fields: Array[Symbol]
         | 
| 7 | 
            +
                attr_reader integer_fields: Array[Symbol]
         | 
| 8 | 
            +
                attr_reader double_fields: Array[Symbol]
         | 
| 9 | 
            +
                attr_reader date_fields: Array[Symbol]
         | 
| 10 | 
            +
                attr_reader facet_fields: Array[Symbol]
         | 
| 11 | 
            +
                attr_reader field_tokenizers: Hash[Symbol, Tokenizer]
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def initialize: (Tokenizer tokenizer) { (*untyped) -> void } -> void
         | 
| 14 | 
            +
                def tokenizer_for: (Symbol field) -> Tokenizer
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                private
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def id: (Symbol key) -> void
         | 
| 19 | 
            +
                def text: (Symbol key, ?tokenizer: Tokenizer) -> void
         | 
| 20 | 
            +
                def string: (Symbol key) -> void
         | 
| 21 | 
            +
                def integer: (Symbol key) -> void
         | 
| 22 | 
            +
                def double: (Symbol key) -> void
         | 
| 23 | 
            +
                def date: (Symbol key) -> void
         | 
| 24 | 
            +
                def facet: (Symbol key) -> void
         | 
| 25 | 
            +
              end
         | 
| 26 | 
            +
            end
         | 
| @@ -0,0 +1,25 @@ | |
| 1 | 
            +
            module Tantiny
         | 
| 2 | 
            +
              class Tokenizer
         | 
| 3 | 
            +
                def self.default: () -> Tokenizer
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                def self.new: (Symbol kind, **untyped options) -> Tokenizer
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def self.__new_ngram_tokenizer: (
         | 
| 8 | 
            +
                  Integer min,
         | 
| 9 | 
            +
                  Integer max,
         | 
| 10 | 
            +
                  bool prefix_only
         | 
| 11 | 
            +
                ) -> Tokenizer
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def self.__new_stemmer_tokenizer: (
         | 
| 14 | 
            +
                  String locale_code
         | 
| 15 | 
            +
                ) -> Tokenizer
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def self.__new_simple_tokenizer: () -> Tokenizer
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                public
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def terms: (String string) -> Array[String]
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def __extract_terms: (String string) -> Array[String]
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
            end
         | 
    
        data/sig/tantiny.rbs
    ADDED
    
    
    
        data/src/helpers.rs
    ADDED
    
    | @@ -0,0 +1,202 @@ | |
| 1 | 
            +
            use std::collections::HashMap;
         | 
| 2 | 
            +
            use rutie::{AnyException, Array, Exception, RString, Hash, Integer, Float, Boolean, Module};
         | 
| 3 | 
            +
            use tantivy::tokenizer::Language;
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            // Macro dependencies:
         | 
| 6 | 
            +
            pub(super) use paste::paste;
         | 
| 7 | 
            +
            pub(super) use rutie::{class, wrappable_struct, AnyObject, VerifiedObject, VM, Object, Class};
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            pub(crate) fn namespace() -> Module {
         | 
| 10 | 
            +
                Module::from_existing("Tantiny")
         | 
| 11 | 
            +
            }
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            pub(crate) struct LanguageWrapper(pub(crate) Language);
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            impl std::str::FromStr for LanguageWrapper {
         | 
| 16 | 
            +
                type Err = String;
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                fn from_str(s: &str) -> Result<Self, Self::Err> {
         | 
| 19 | 
            +
                    match s {
         | 
| 20 | 
            +
                        "en" => Ok(LanguageWrapper(Language::English)),
         | 
| 21 | 
            +
                        "ar" => Ok(LanguageWrapper(Language::Arabic)),
         | 
| 22 | 
            +
                        "da" => Ok(LanguageWrapper(Language::Danish)),
         | 
| 23 | 
            +
                        "nl" => Ok(LanguageWrapper(Language::Dutch)),
         | 
| 24 | 
            +
                        "fi" => Ok(LanguageWrapper(Language::Finnish)),
         | 
| 25 | 
            +
                        "fr" => Ok(LanguageWrapper(Language::French)),
         | 
| 26 | 
            +
                        "de" => Ok(LanguageWrapper(Language::German)),
         | 
| 27 | 
            +
                        "el" => Ok(LanguageWrapper(Language::Greek)),
         | 
| 28 | 
            +
                        "hu" => Ok(LanguageWrapper(Language::Hungarian)),
         | 
| 29 | 
            +
                        "it" => Ok(LanguageWrapper(Language::Italian)),
         | 
| 30 | 
            +
                        "no" => Ok(LanguageWrapper(Language::Norwegian)),
         | 
| 31 | 
            +
                        "pt" => Ok(LanguageWrapper(Language::Portuguese)),
         | 
| 32 | 
            +
                        "ro" => Ok(LanguageWrapper(Language::Romanian)),
         | 
| 33 | 
            +
                        "ru" => Ok(LanguageWrapper(Language::Russian)),
         | 
| 34 | 
            +
                        "es" => Ok(LanguageWrapper(Language::Spanish)),
         | 
| 35 | 
            +
                        "sv" => Ok(LanguageWrapper(Language::Swedish)),
         | 
| 36 | 
            +
                        "ta" => Ok(LanguageWrapper(Language::Tamil)),
         | 
| 37 | 
            +
                        "tr" => Ok(LanguageWrapper(Language::Turkish)),
         | 
| 38 | 
            +
                        _ => Err(format!("Language '{}' is not supported.", s)),
         | 
| 39 | 
            +
                    }
         | 
| 40 | 
            +
                }
         | 
| 41 | 
            +
            }
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            pub(crate) trait TryUnwrap<T> {
         | 
| 44 | 
            +
                fn try_unwrap(self) -> T;
         | 
| 45 | 
            +
            }
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            macro_rules! primitive_try_unwrap_impl {
         | 
| 48 | 
            +
                ( $ruby_type:ty, $type:ty ) => {
         | 
| 49 | 
            +
                    paste! {
         | 
| 50 | 
            +
                        impl TryUnwrap<$type> for $ruby_type {
         | 
| 51 | 
            +
                            fn try_unwrap(self) -> $type {
         | 
| 52 | 
            +
                                self.[<to_ $type:lower>]()
         | 
| 53 | 
            +
                            }
         | 
| 54 | 
            +
                        }
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                        impl TryUnwrap<$type> for AnyObject {
         | 
| 57 | 
            +
                            fn try_unwrap(self) -> $type {
         | 
| 58 | 
            +
                                self.try_convert_to::<$ruby_type>()
         | 
| 59 | 
            +
                                    .try_unwrap()
         | 
| 60 | 
            +
                                    .[<to_ $type:lower>]()
         | 
| 61 | 
            +
                            }
         | 
| 62 | 
            +
                        }
         | 
| 63 | 
            +
                    }
         | 
| 64 | 
            +
                };
         | 
| 65 | 
            +
            }
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            primitive_try_unwrap_impl!(RString, String);
         | 
| 68 | 
            +
            primitive_try_unwrap_impl!(Integer, i64);
         | 
| 69 | 
            +
            primitive_try_unwrap_impl!(Float, f64);
         | 
| 70 | 
            +
            primitive_try_unwrap_impl!(Boolean, bool);
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            impl<T> TryUnwrap<Vec<T>> for Array where
         | 
| 73 | 
            +
                AnyObject: TryUnwrap<T>
         | 
| 74 | 
            +
            {
         | 
| 75 | 
            +
                fn try_unwrap(self) -> Vec<T> {
         | 
| 76 | 
            +
                    let mut vec = Vec::new();
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                    for elem in self {
         | 
| 79 | 
            +
                        vec.push(elem.try_unwrap());
         | 
| 80 | 
            +
                    }
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                    vec
         | 
| 83 | 
            +
                }
         | 
| 84 | 
            +
            }
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            impl<K, V> TryUnwrap<HashMap<K, V>> for Hash where
         | 
| 87 | 
            +
                AnyObject: TryUnwrap<K> + TryUnwrap<V>,
         | 
| 88 | 
            +
                K: Eq + std::hash::Hash
         | 
| 89 | 
            +
            {
         | 
| 90 | 
            +
                fn try_unwrap(self) -> HashMap<K, V> {
         | 
| 91 | 
            +
                    let mut hashmap = HashMap::new();
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                    self.each(|key, value| {
         | 
| 94 | 
            +
                        hashmap.insert(key.try_unwrap(), value.try_unwrap());
         | 
| 95 | 
            +
                    });
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                    hashmap
         | 
| 98 | 
            +
                }
         | 
| 99 | 
            +
            }
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            impl<T, E> TryUnwrap<T> for Result<T, E>
         | 
| 102 | 
            +
            where
         | 
| 103 | 
            +
                E: ToString,
         | 
| 104 | 
            +
            {
         | 
| 105 | 
            +
                fn try_unwrap(self) -> T {
         | 
| 106 | 
            +
                    self.map_err(|e| {
         | 
| 107 | 
            +
                        VM::raise_ex(AnyException::new(
         | 
| 108 | 
            +
                            "Tantiny::TantivyError",
         | 
| 109 | 
            +
                            Some(&e.to_string()),
         | 
| 110 | 
            +
                        ))
         | 
| 111 | 
            +
                    })
         | 
| 112 | 
            +
                    .unwrap()
         | 
| 113 | 
            +
                }
         | 
| 114 | 
            +
            }
         | 
| 115 | 
            +
             | 
| 116 | 
            +
            impl<T> TryUnwrap<T> for Option<T> {
         | 
| 117 | 
            +
                fn try_unwrap(self) -> T {
         | 
| 118 | 
            +
                    if let Some(value) = self {
         | 
| 119 | 
            +
                        value
         | 
| 120 | 
            +
                    } else {
         | 
| 121 | 
            +
                        VM::raise_ex(AnyException::new(
         | 
| 122 | 
            +
                            "Tantiny::UnexpectedNone",
         | 
| 123 | 
            +
                            Some(&*format!("{}", std::any::type_name::<T>())))
         | 
| 124 | 
            +
                        );
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                        self.unwrap()
         | 
| 127 | 
            +
                    }
         | 
| 128 | 
            +
                }
         | 
| 129 | 
            +
            }
         | 
| 130 | 
            +
             | 
| 131 | 
            +
            macro_rules! try_unwrap_params {
         | 
| 132 | 
            +
                (
         | 
| 133 | 
            +
                    $param:ident: $type:ty,
         | 
| 134 | 
            +
                    $( $rest:tt )*
         | 
| 135 | 
            +
                ) => {
         | 
| 136 | 
            +
                    let _tmp = $param.map_err(|e| $crate::helpers::VM::raise_ex(e)).unwrap();
         | 
| 137 | 
            +
                    let $param = <_ as $crate::helpers::TryUnwrap<$type>>::try_unwrap(_tmp);
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                    try_unwrap_params!($($rest)*)
         | 
| 140 | 
            +
                };
         | 
| 141 | 
            +
                (
         | 
| 142 | 
            +
                    $param:ident,
         | 
| 143 | 
            +
                    $( $rest:tt )*
         | 
| 144 | 
            +
                ) => {
         | 
| 145 | 
            +
                    let $param = $param.map_err(|e| $crate::helpers::VM::raise_ex(e)).unwrap();
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                    try_unwrap_params!($($rest)*)
         | 
| 148 | 
            +
                };
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                // Handle optional trailing commas.
         | 
| 151 | 
            +
                ( $param:ident: $type:ty ) => {
         | 
| 152 | 
            +
                    try_unwrap_params!($param: $type,)
         | 
| 153 | 
            +
                };
         | 
| 154 | 
            +
                ( $param:ident ) => {
         | 
| 155 | 
            +
                    try_unwrap_params!($param,)
         | 
| 156 | 
            +
                };
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                () => {}
         | 
| 159 | 
            +
            }
         | 
| 160 | 
            +
             | 
| 161 | 
            +
            pub(crate) use try_unwrap_params;
         | 
| 162 | 
            +
             | 
| 163 | 
            +
            macro_rules! scaffold {
         | 
| 164 | 
            +
                ( $ruby_type:ident, $type:ty, $klass:literal ) => {
         | 
| 165 | 
            +
                    $crate::helpers::class!($ruby_type);
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                    // There is a bug in Rutie which prevents using this macro
         | 
| 168 | 
            +
                    // by resolving it by a full path, so the only workaround is:
         | 
| 169 | 
            +
                    use crate::helpers::wrappable_struct;
         | 
| 170 | 
            +
                     
         | 
| 171 | 
            +
                    $crate::helpers::paste! {
         | 
| 172 | 
            +
                        wrappable_struct!(
         | 
| 173 | 
            +
                            $type,
         | 
| 174 | 
            +
                            [<$type Wrapper>],
         | 
| 175 | 
            +
                            [<$type:snake:upper _WRAPPER>]
         | 
| 176 | 
            +
                        );
         | 
| 177 | 
            +
                    }
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                    pub(crate) fn klass() -> $crate::helpers::Class {
         | 
| 180 | 
            +
                        $crate::helpers::namespace().get_nested_class($klass)
         | 
| 181 | 
            +
                    }
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                    impl $crate::helpers::TryUnwrap<$ruby_type> for $crate::helpers::AnyObject {
         | 
| 184 | 
            +
                        fn try_unwrap(self) -> $ruby_type {
         | 
| 185 | 
            +
                            let result = self.try_convert_to::<$ruby_type>();
         | 
| 186 | 
            +
                            <_ as $crate::helpers::TryUnwrap<$ruby_type>>::try_unwrap(result)
         | 
| 187 | 
            +
                        }
         | 
| 188 | 
            +
                    }
         | 
| 189 | 
            +
             | 
| 190 | 
            +
                    impl $crate::helpers::VerifiedObject for $ruby_type {
         | 
| 191 | 
            +
                        fn is_correct_type<T: $crate::helpers::Object>(object: &T) -> bool {
         | 
| 192 | 
            +
                            object.class() == klass()
         | 
| 193 | 
            +
                        }
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                        fn error_message() -> &'static str {
         | 
| 196 | 
            +
                            concat!("Error converting to ", stringify!($ruby_type), ".")
         | 
| 197 | 
            +
                        }
         | 
| 198 | 
            +
                    }
         | 
| 199 | 
            +
                }
         | 
| 200 | 
            +
            }
         | 
| 201 | 
            +
             | 
| 202 | 
            +
            pub(crate) use scaffold;
         | 
    
        data/src/index.rs
    ADDED
    
    | @@ -0,0 +1,286 @@ | |
| 1 | 
            +
            use std::collections::HashMap;
         | 
| 2 | 
            +
            use std::str::FromStr;
         | 
| 3 | 
            +
            use rutie::{methods, Object, AnyObject, Integer, NilClass, Array, RString, Hash};
         | 
| 4 | 
            +
            use tantivy::{doc, Document, Term, ReloadPolicy, Index, IndexWriter, IndexReader, DateTime};
         | 
| 5 | 
            +
            use tantivy::schema::{Schema, TextOptions, TextFieldIndexing, IndexRecordOption, FacetOptions, STRING, STORED, INDEXED, FAST};
         | 
| 6 | 
            +
            use tantivy::collector::TopDocs;
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            use crate::helpers::{scaffold, try_unwrap_params, TryUnwrap};
         | 
| 9 | 
            +
            use crate::query::{unwrap_query, RTantinyQuery};
         | 
| 10 | 
            +
            use crate::tokenizer::{unwrap_tokenizer, RTantinyTokenizer};
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            pub struct TantinyIndex {
         | 
| 13 | 
            +
                pub(crate) schema: Schema,
         | 
| 14 | 
            +
                pub(crate) index: Index,
         | 
| 15 | 
            +
                pub(crate) index_writer: Option<IndexWriter>,
         | 
| 16 | 
            +
                pub(crate) index_reader: IndexReader,
         | 
| 17 | 
            +
            }
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            scaffold!(RTantinyIndex, TantinyIndex, "Index");
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            pub(crate) fn unwrap_index(index: &RTantinyIndex) -> &TantinyIndex {
         | 
| 22 | 
            +
                index.get_data(&*TANTINY_INDEX_WRAPPER)
         | 
| 23 | 
            +
            }
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            pub(crate) fn unwrap_index_mut(index: &mut RTantinyIndex) -> &mut TantinyIndex {
         | 
| 26 | 
            +
                index.get_data_mut(&*TANTINY_INDEX_WRAPPER)
         | 
| 27 | 
            +
            }
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            #[rustfmt::skip::macros(methods)]
         | 
| 30 | 
            +
            methods!(
         | 
| 31 | 
            +
                RTantinyIndex,
         | 
| 32 | 
            +
                _itself,
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                fn new_index(
         | 
| 35 | 
            +
                    _path: RString,
         | 
| 36 | 
            +
                    default_tokenizer: AnyObject,
         | 
| 37 | 
            +
                    field_tokenizers: Hash,
         | 
| 38 | 
            +
                    text_fields: Array,
         | 
| 39 | 
            +
                    string_fields: Array,
         | 
| 40 | 
            +
                    integer_fields: Array,
         | 
| 41 | 
            +
                    double_fields: Array,
         | 
| 42 | 
            +
                    date_fields: Array,
         | 
| 43 | 
            +
                    facet_fields: Array
         | 
| 44 | 
            +
                ) -> RTantinyIndex {
         | 
| 45 | 
            +
                    try_unwrap_params!(
         | 
| 46 | 
            +
                        _path: String,
         | 
| 47 | 
            +
                        default_tokenizer: RTantinyTokenizer,
         | 
| 48 | 
            +
                        field_tokenizers: HashMap<String, RTantinyTokenizer>,
         | 
| 49 | 
            +
                        text_fields: Vec<String>,
         | 
| 50 | 
            +
                        string_fields: Vec<String>,
         | 
| 51 | 
            +
                        integer_fields: Vec<String>,
         | 
| 52 | 
            +
                        double_fields: Vec<String>,
         | 
| 53 | 
            +
                        date_fields: Vec<String>,
         | 
| 54 | 
            +
                        facet_fields: Vec<String>
         | 
| 55 | 
            +
                    );
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                    let mut schema_builder = Schema::builder();
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    schema_builder.add_text_field("id", STRING | STORED);
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                    for field in text_fields {
         | 
| 62 | 
            +
                        let tokenizer_name =
         | 
| 63 | 
            +
                            if field_tokenizers.contains_key(&field) {
         | 
| 64 | 
            +
                                &*field
         | 
| 65 | 
            +
                            } else {
         | 
| 66 | 
            +
                                "default"
         | 
| 67 | 
            +
                            };
         | 
| 68 | 
            +
                        let indexing = TextFieldIndexing::default()
         | 
| 69 | 
            +
                            .set_tokenizer(tokenizer_name)
         | 
| 70 | 
            +
                            .set_index_option(IndexRecordOption::WithFreqsAndPositions);
         | 
| 71 | 
            +
                        let options = TextOptions::default()
         | 
| 72 | 
            +
                            .set_indexing_options(indexing);
         | 
| 73 | 
            +
                        schema_builder.add_text_field(&field, options);
         | 
| 74 | 
            +
                    }
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                    for field in string_fields {
         | 
| 77 | 
            +
                        schema_builder.add_text_field(&field, STRING);
         | 
| 78 | 
            +
                    }
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    for field in integer_fields {
         | 
| 81 | 
            +
                        schema_builder.add_i64_field(&field, FAST | INDEXED);
         | 
| 82 | 
            +
                    }
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                    for field in double_fields {
         | 
| 85 | 
            +
                        schema_builder.add_f64_field(&field, FAST | INDEXED);
         | 
| 86 | 
            +
                    }
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                    for field in date_fields {
         | 
| 89 | 
            +
                        schema_builder.add_date_field(&field, FAST | INDEXED);
         | 
| 90 | 
            +
                    }
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                    for field in facet_fields {
         | 
| 93 | 
            +
                        let options = FacetOptions::default().set_indexed();
         | 
| 94 | 
            +
                        schema_builder.add_facet_field(&field, options);
         | 
| 95 | 
            +
                    }
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                    let schema = schema_builder.build();
         | 
| 98 | 
            +
                    let index = Index::create_in_ram(schema.clone());
         | 
| 99 | 
            +
                    let tokenizers = index.tokenizers();
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                    tokenizers.register("default", unwrap_tokenizer(&default_tokenizer).clone());
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                    for (field, tokenizer) in field_tokenizers {
         | 
| 104 | 
            +
                        tokenizers.register(&field, unwrap_tokenizer(&tokenizer).clone())
         | 
| 105 | 
            +
                    }
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    let index_writer = None;
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                    let index_reader = index
         | 
| 110 | 
            +
                        .reader_builder()
         | 
| 111 | 
            +
                        .reload_policy(ReloadPolicy::Manual)
         | 
| 112 | 
            +
                        .try_into()
         | 
| 113 | 
            +
                        .try_unwrap();
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                    klass().wrap_data(
         | 
| 116 | 
            +
                        TantinyIndex { index, index_writer, index_reader, schema },
         | 
| 117 | 
            +
                        &*TANTINY_INDEX_WRAPPER
         | 
| 118 | 
            +
                    )
         | 
| 119 | 
            +
                }
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                fn add_document(
         | 
| 122 | 
            +
                    id: RString,
         | 
| 123 | 
            +
                    text_fields: Hash,
         | 
| 124 | 
            +
                    string_fields: Hash,
         | 
| 125 | 
            +
                    integer_fields: Hash,
         | 
| 126 | 
            +
                    double_fields: Hash,
         | 
| 127 | 
            +
                    date_fields: Hash,
         | 
| 128 | 
            +
                    facet_fields: Hash
         | 
| 129 | 
            +
                ) -> NilClass {
         | 
| 130 | 
            +
                    try_unwrap_params!(
         | 
| 131 | 
            +
                        id: String,
         | 
| 132 | 
            +
                        text_fields: HashMap<String, String>,
         | 
| 133 | 
            +
                        string_fields: HashMap<String, String>,
         | 
| 134 | 
            +
                        integer_fields: HashMap<String, i64>,
         | 
| 135 | 
            +
                        double_fields: HashMap<String, f64>,
         | 
| 136 | 
            +
                        date_fields: HashMap<String, String>,
         | 
| 137 | 
            +
                        facet_fields: HashMap<String, String>
         | 
| 138 | 
            +
                    );
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                    let internal = unwrap_index(&_itself);
         | 
| 141 | 
            +
                    let index_writer = internal.index_writer.as_ref().try_unwrap();
         | 
| 142 | 
            +
                    let schema = &internal.schema;
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                    let mut doc = Document::default();
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                    let id_field = schema.get_field("id").try_unwrap();
         | 
| 147 | 
            +
                    doc.add_text(id_field, &id);
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                    for (key, value) in text_fields.iter() {
         | 
| 150 | 
            +
                        let field = schema.get_field(key).try_unwrap();
         | 
| 151 | 
            +
                        doc.add_text(field, value);
         | 
| 152 | 
            +
                    }
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                    for (key, value) in string_fields.iter() {
         | 
| 155 | 
            +
                        let field = schema.get_field(key).try_unwrap();
         | 
| 156 | 
            +
                        doc.add_text(field, value);
         | 
| 157 | 
            +
                    }
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                    for (key, &value) in integer_fields.iter() {
         | 
| 160 | 
            +
                        let field = schema.get_field(key).try_unwrap();
         | 
| 161 | 
            +
                        doc.add_i64(field, value);
         | 
| 162 | 
            +
                    }
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                    for (key, &value) in double_fields.iter() {
         | 
| 165 | 
            +
                        let field = schema.get_field(key).try_unwrap();
         | 
| 166 | 
            +
                        doc.add_f64(field, value);
         | 
| 167 | 
            +
                    }
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                    for (key, value) in date_fields.iter() {
         | 
| 170 | 
            +
                        let field = schema.get_field(key).try_unwrap();
         | 
| 171 | 
            +
                        let value = DateTime::from_str(value).try_unwrap();
         | 
| 172 | 
            +
                        doc.add_date(field, &value);
         | 
| 173 | 
            +
                    }
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                    for (key, value) in facet_fields.iter() {
         | 
| 176 | 
            +
                        let field = schema.get_field(key).try_unwrap();
         | 
| 177 | 
            +
                        doc.add_facet(field, &value);
         | 
| 178 | 
            +
                    }
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                    let doc_id = Term::from_field_text(id_field, &id);
         | 
| 181 | 
            +
                    index_writer.delete_term(doc_id.clone());
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                    index_writer.add_document(doc);
         | 
| 184 | 
            +
             | 
| 185 | 
            +
                    NilClass::new()
         | 
| 186 | 
            +
                }
         | 
| 187 | 
            +
             | 
| 188 | 
            +
                fn delete_document(id: RString) -> NilClass {
         | 
| 189 | 
            +
                    try_unwrap_params!(id: String);
         | 
| 190 | 
            +
             | 
| 191 | 
            +
                    let internal = unwrap_index(&_itself);
         | 
| 192 | 
            +
                    let index_writer = internal.index_writer.as_ref().unwrap();
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                    let id_field = internal.schema.get_field("id").try_unwrap();
         | 
| 195 | 
            +
                    let doc_id = Term::from_field_text(id_field, &id);
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                    index_writer.delete_term(doc_id.clone());
         | 
| 198 | 
            +
             | 
| 199 | 
            +
                    NilClass::new()
         | 
| 200 | 
            +
                }
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                fn acquire_index_writer(
         | 
| 203 | 
            +
                    overall_memory: Integer
         | 
| 204 | 
            +
                ) -> NilClass {
         | 
| 205 | 
            +
                    try_unwrap_params!(overall_memory: i64);
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                    let internal = unwrap_index_mut(&mut _itself);
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                    let mut index_writer = internal.index
         | 
| 210 | 
            +
                        .writer(overall_memory as usize)
         | 
| 211 | 
            +
                        .try_unwrap();
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    internal.index_writer = Some(index_writer);
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                    NilClass::new()
         | 
| 216 | 
            +
                }
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                fn release_index_writer() -> NilClass {
         | 
| 219 | 
            +
                    let internal = unwrap_index_mut(&mut _itself);
         | 
| 220 | 
            +
             | 
| 221 | 
            +
                    drop(internal.index_writer.as_ref().try_unwrap());
         | 
| 222 | 
            +
                    internal.index_writer = None;
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                    NilClass::new()
         | 
| 225 | 
            +
                }
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                fn commit() -> NilClass {
         | 
| 228 | 
            +
                    let internal = unwrap_index_mut(&mut _itself);
         | 
| 229 | 
            +
                    let index_writer = internal.index_writer.as_mut().try_unwrap();
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                    index_writer.commit().try_unwrap();
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                    NilClass::new()
         | 
| 234 | 
            +
                }
         | 
| 235 | 
            +
             | 
| 236 | 
            +
                fn reload() -> NilClass {
         | 
| 237 | 
            +
                    unwrap_index(&_itself).index_reader.reload().try_unwrap();
         | 
| 238 | 
            +
             | 
| 239 | 
            +
                    NilClass::new()
         | 
| 240 | 
            +
                }
         | 
| 241 | 
            +
             | 
| 242 | 
            +
                fn search(
         | 
| 243 | 
            +
                    query: AnyObject,
         | 
| 244 | 
            +
                    limit: Integer
         | 
| 245 | 
            +
                ) -> Array {
         | 
| 246 | 
            +
                    try_unwrap_params!(
         | 
| 247 | 
            +
                        query: RTantinyQuery,
         | 
| 248 | 
            +
                        limit: i64
         | 
| 249 | 
            +
                    );
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                    let internal = unwrap_index(&_itself);
         | 
| 252 | 
            +
                    let id_field = internal.schema.get_field("id").try_unwrap();
         | 
| 253 | 
            +
                    let searcher = internal.index_reader.searcher();
         | 
| 254 | 
            +
                    let query = unwrap_query(&query);
         | 
| 255 | 
            +
             | 
| 256 | 
            +
                    let top_docs = searcher
         | 
| 257 | 
            +
                        .search(query, &TopDocs::with_limit(limit as usize))
         | 
| 258 | 
            +
                        .try_unwrap();
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                    let mut array = Array::with_capacity(top_docs.len());
         | 
| 261 | 
            +
             | 
| 262 | 
            +
                    for (_score, doc_address) in top_docs {
         | 
| 263 | 
            +
                        let doc = searcher.doc(doc_address).try_unwrap();
         | 
| 264 | 
            +
                        if let Some(value) = doc.get_first(id_field) {
         | 
| 265 | 
            +
                            if let Some(id) = (&*value).text() {
         | 
| 266 | 
            +
                                array.push(RString::from(String::from(id)));
         | 
| 267 | 
            +
                            }
         | 
| 268 | 
            +
                        }
         | 
| 269 | 
            +
                    }
         | 
| 270 | 
            +
             | 
| 271 | 
            +
                    array
         | 
| 272 | 
            +
                }
         | 
| 273 | 
            +
            );
         | 
| 274 | 
            +
             | 
| 275 | 
            +
            pub(super) fn init() {
         | 
| 276 | 
            +
                klass().define(|klass| {
         | 
| 277 | 
            +
                    klass.def_self("__new", new_index);
         | 
| 278 | 
            +
                    klass.def("__add_document", add_document);
         | 
| 279 | 
            +
                    klass.def("__delete_document", delete_document);
         | 
| 280 | 
            +
                    klass.def("__acquire_index_writer", acquire_index_writer);
         | 
| 281 | 
            +
                    klass.def("__release_index_writer", release_index_writer);
         | 
| 282 | 
            +
                    klass.def("__commit", commit);
         | 
| 283 | 
            +
                    klass.def("__reload", reload);
         | 
| 284 | 
            +
                    klass.def("__search", search);
         | 
| 285 | 
            +
                });
         | 
| 286 | 
            +
            }
         | 
    
        data/src/lib.rs
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
| 1 | 
            +
            mod helpers;
         | 
| 2 | 
            +
            #[allow(improper_ctypes_definitions)]
         | 
| 3 | 
            +
            mod index;
         | 
| 4 | 
            +
            #[allow(improper_ctypes_definitions)]
         | 
| 5 | 
            +
            mod query;
         | 
| 6 | 
            +
            #[allow(improper_ctypes_definitions)]
         | 
| 7 | 
            +
            mod tokenizer;
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            #[no_mangle]
         | 
| 10 | 
            +
            pub extern "C" fn Init_tantiny() {
         | 
| 11 | 
            +
                index::init();
         | 
| 12 | 
            +
                query::init();
         | 
| 13 | 
            +
                tokenizer::init();
         | 
| 14 | 
            +
            }
         |