tantiny-in-memory 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +8 -0
- data/Cargo.toml +20 -0
- data/LICENSE +21 -0
- data/README.md +339 -0
- data/bin/console +64 -0
- data/bin/setup +6 -0
- data/ext/Rakefile +10 -0
- data/lib/tantiny/errors.rb +38 -0
- data/lib/tantiny/helpers.rb +19 -0
- data/lib/tantiny/index.rb +165 -0
- data/lib/tantiny/query.rb +165 -0
- data/lib/tantiny/schema.rb +53 -0
- data/lib/tantiny/tokenizer.rb +28 -0
- data/lib/tantiny/version.rb +5 -0
- data/lib/tantiny.rb +27 -0
- data/lib/tantiny.so +0 -0
- data/sig/tantiny/errors.rbs +20 -0
- data/sig/tantiny/helpers.rbs +8 -0
- data/sig/tantiny/index.rbs +103 -0
- data/sig/tantiny/query.rbs +135 -0
- data/sig/tantiny/schema.rbs +26 -0
- data/sig/tantiny/tokenizer.rbs +25 -0
- data/sig/tantiny/version.rbs +3 -0
- data/sig/tantiny.rbs +5 -0
- data/src/helpers.rs +202 -0
- data/src/index.rs +286 -0
- data/src/lib.rs +14 -0
- data/src/query.rs +260 -0
- data/src/tokenizer.rs +94 -0
- metadata +148 -0
| @@ -0,0 +1,165 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Tantiny
         | 
| 4 | 
            +
              class Index
         | 
| 5 | 
            +
                LOCKFILE = ".tantiny.lock"
         | 
| 6 | 
            +
                DEFAULT_WRITER_MEMORY = 5_000_000 # 5MB
         | 
| 7 | 
            +
                DEFAULT_LIMIT = 10
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def self.new(path, **options, &block)
         | 
| 10 | 
            +
                  default_tokenizer = options[:tokenizer] || Tokenizer.default
         | 
| 11 | 
            +
                  schema = Schema.new(default_tokenizer, &block)
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  object = __new(
         | 
| 14 | 
            +
                    path.to_s,
         | 
| 15 | 
            +
                    schema.default_tokenizer,
         | 
| 16 | 
            +
                    schema.field_tokenizers.transform_keys(&:to_s),
         | 
| 17 | 
            +
                    schema.text_fields.map(&:to_s),
         | 
| 18 | 
            +
                    schema.string_fields.map(&:to_s),
         | 
| 19 | 
            +
                    schema.integer_fields.map(&:to_s),
         | 
| 20 | 
            +
                    schema.double_fields.map(&:to_s),
         | 
| 21 | 
            +
                    schema.date_fields.map(&:to_s),
         | 
| 22 | 
            +
                    schema.facet_fields.map(&:to_s)
         | 
| 23 | 
            +
                  )
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  object.send(:initialize, path, schema, **options)
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  object
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def initialize(path, schema, **options)
         | 
| 31 | 
            +
                  @path = path
         | 
| 32 | 
            +
                  @schema = schema
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                  @indexer_memory = options[:writer_memory] || DEFAULT_WRITER_MEMORY
         | 
| 35 | 
            +
                  @exclusive_writer = options[:exclusive_writer] || false
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                  @active_transaction = Concurrent::ThreadLocalVar.new(false)
         | 
| 38 | 
            +
                  @transaction_semaphore = Mutex.new
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                  acquire_index_writer if exclusive_writer?
         | 
| 41 | 
            +
                end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                attr_reader :schema
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                def transaction
         | 
| 46 | 
            +
                  if inside_transaction?
         | 
| 47 | 
            +
                    yield
         | 
| 48 | 
            +
                  else
         | 
| 49 | 
            +
                    synchronize do
         | 
| 50 | 
            +
                      open_transaction!
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                      yield
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                      close_transaction!
         | 
| 55 | 
            +
                    end
         | 
| 56 | 
            +
                  end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                  nil
         | 
| 59 | 
            +
                end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                def reload
         | 
| 62 | 
            +
                  __reload
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                def <<(document)
         | 
| 66 | 
            +
                  transaction do
         | 
| 67 | 
            +
                    __add_document(
         | 
| 68 | 
            +
                      resolve(document, schema.id_field).to_s,
         | 
| 69 | 
            +
                      slice_document(document, schema.text_fields) { |v| v.to_s },
         | 
| 70 | 
            +
                      slice_document(document, schema.string_fields) { |v| v.to_s },
         | 
| 71 | 
            +
                      slice_document(document, schema.integer_fields) { |v| v.to_i },
         | 
| 72 | 
            +
                      slice_document(document, schema.double_fields) { |v| v.to_f },
         | 
| 73 | 
            +
                      slice_document(document, schema.date_fields) { |v| Helpers.timestamp(v) },
         | 
| 74 | 
            +
                      slice_document(document, schema.facet_fields) { |v| v.to_s }
         | 
| 75 | 
            +
                    )
         | 
| 76 | 
            +
                  end
         | 
| 77 | 
            +
                end
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                def delete(id)
         | 
| 80 | 
            +
                  transaction do
         | 
| 81 | 
            +
                    __delete_document(id.to_s)
         | 
| 82 | 
            +
                  end
         | 
| 83 | 
            +
                end
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                def search(query, limit: DEFAULT_LIMIT, **smart_query_options)
         | 
| 86 | 
            +
                  unless query.is_a?(Query)
         | 
| 87 | 
            +
                    fields = schema.text_fields
         | 
| 88 | 
            +
                    query = Query.smart_query(self, fields, query.to_s, **smart_query_options)
         | 
| 89 | 
            +
                  end
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                  __search(query, limit)
         | 
| 92 | 
            +
                end
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                # Shortcuts for creating queries:
         | 
| 95 | 
            +
                Query::TYPES.each do |query_type|
         | 
| 96 | 
            +
                  method_name = "#{query_type}_query"
         | 
| 97 | 
            +
                  define_method(method_name) do |*args, **kwargs|
         | 
| 98 | 
            +
                    Query.send(method_name, self, *args, **kwargs)
         | 
| 99 | 
            +
                  end
         | 
| 100 | 
            +
                end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                private
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                def slice_document(document, fields, &block)
         | 
| 105 | 
            +
                  fields.inject({}) do |hash, field|
         | 
| 106 | 
            +
                    hash.tap { |h| h[field.to_s] = resolve(document, field) }
         | 
| 107 | 
            +
                  end.compact.transform_values(&block)
         | 
| 108 | 
            +
                end
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                def resolve(document, field)
         | 
| 111 | 
            +
                  document.is_a?(Hash) ? document[field] : document.send(field)
         | 
| 112 | 
            +
                end
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                def acquire_index_writer
         | 
| 115 | 
            +
                  __acquire_index_writer(@indexer_memory)
         | 
| 116 | 
            +
                rescue TantivyError => e
         | 
| 117 | 
            +
                  case e.message
         | 
| 118 | 
            +
                  when /Failed to acquire Lockfile/
         | 
| 119 | 
            +
                    raise IndexWriterBusyError.new
         | 
| 120 | 
            +
                  else
         | 
| 121 | 
            +
                    raise
         | 
| 122 | 
            +
                  end
         | 
| 123 | 
            +
                end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                def release_index_writer
         | 
| 126 | 
            +
                  __release_index_writer
         | 
| 127 | 
            +
                end
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                def commit
         | 
| 130 | 
            +
                  __commit
         | 
| 131 | 
            +
                end
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                def open_transaction!
         | 
| 134 | 
            +
                  acquire_index_writer unless exclusive_writer?
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                  @active_transaction.value = true
         | 
| 137 | 
            +
                end
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                def close_transaction!
         | 
| 140 | 
            +
                  commit
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                  release_index_writer unless exclusive_writer?
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                  @active_transaction.value = false
         | 
| 145 | 
            +
                end
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                def inside_transaction?
         | 
| 148 | 
            +
                  @active_transaction.value
         | 
| 149 | 
            +
                end
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                def exclusive_writer?
         | 
| 152 | 
            +
                  @exclusive_writer
         | 
| 153 | 
            +
                end
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                def synchronize(&block)
         | 
| 156 | 
            +
                  @transaction_semaphore.synchronize do
         | 
| 157 | 
            +
                    Helpers.with_lock(lockfile_path, &block)
         | 
| 158 | 
            +
                  end
         | 
| 159 | 
            +
                end
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                def lockfile_path
         | 
| 162 | 
            +
                  @lockfile_path ||= File.join(@path, LOCKFILE)
         | 
| 163 | 
            +
                end
         | 
| 164 | 
            +
              end
         | 
| 165 | 
            +
            end
         | 
| @@ -0,0 +1,165 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require "date"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Tantiny
         | 
| 6 | 
            +
              class Query
         | 
| 7 | 
            +
                TYPES = %i[
         | 
| 8 | 
            +
                  all empty term fuzzy_term
         | 
| 9 | 
            +
                  phrase regex range facet
         | 
| 10 | 
            +
                  smart prefix
         | 
| 11 | 
            +
                ].freeze
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                DEFAULT_BOOST = 1.0
         | 
| 14 | 
            +
                DEFAULT_FUZZY_DISTANCE = 1
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                class << self
         | 
| 17 | 
            +
                  def conjunction(*queries)
         | 
| 18 | 
            +
                    # @type var queries: Array[untyped]
         | 
| 19 | 
            +
                    queries.one? ? queries.first : __conjunction(queries)
         | 
| 20 | 
            +
                  end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                  def disjunction(*queries)
         | 
| 23 | 
            +
                    # @type var queries: Array[untyped]
         | 
| 24 | 
            +
                    queries.one? ? queries.first : __disjunction(queries)
         | 
| 25 | 
            +
                  end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  def all_query(_index = nil)
         | 
| 28 | 
            +
                    __new_all_query
         | 
| 29 | 
            +
                  end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                  def empty_query(_index = nil)
         | 
| 32 | 
            +
                    __new_empty_query
         | 
| 33 | 
            +
                  end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                  def term_query(index, fields, term, **options)
         | 
| 36 | 
            +
                    allowed_fields = text_and_strings(index)
         | 
| 37 | 
            +
                    construct_query(index, :term, allowed_fields, fields, [term.to_s], **options)
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                  def fuzzy_term_query(index, fields, term, distance = DEFAULT_FUZZY_DISTANCE, **options)
         | 
| 41 | 
            +
                    params = [term.to_s, distance.to_i]
         | 
| 42 | 
            +
                    allowed_fields = text_and_strings(index)
         | 
| 43 | 
            +
                    construct_query(index, :fuzzy_term, allowed_fields, fields, params, **options)
         | 
| 44 | 
            +
                  end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                  def phrase_query(index, fields, phrase, **options)
         | 
| 47 | 
            +
                    queries = [*fields].map do |f|
         | 
| 48 | 
            +
                      terms = index.schema.tokenizer_for(f).terms(phrase)
         | 
| 49 | 
            +
                      allowed_fields = index.schema.text_fields
         | 
| 50 | 
            +
                      construct_query(index, :phrase, allowed_fields, f, [terms], **options)
         | 
| 51 | 
            +
                    end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                    queries.empty? ? empty_query : disjunction(*queries)
         | 
| 54 | 
            +
                  end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                  def regex_query(index, fields, regex, **options)
         | 
| 57 | 
            +
                    allowed_fields = text_and_strings(index)
         | 
| 58 | 
            +
                    construct_query(index, :regex, allowed_fields, fields, [regex.to_s], **options)
         | 
| 59 | 
            +
                  end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                  def prefix_query(index, fields, prefix, **options)
         | 
| 62 | 
            +
                    regex_query(index, fields, Regexp.escape(prefix) + ".*", **options)
         | 
| 63 | 
            +
                  end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                  def range_query(index, fields, range, **options)
         | 
| 66 | 
            +
                    schema = index.schema
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                    case range.first
         | 
| 69 | 
            +
                    when Integer
         | 
| 70 | 
            +
                      allowed_fields = schema.integer_fields
         | 
| 71 | 
            +
                      from, to = [range.min, range.max]
         | 
| 72 | 
            +
                    when Float
         | 
| 73 | 
            +
                      allowed_fields = schema.double_fields
         | 
| 74 | 
            +
                      from, to = [range.first, range.last]
         | 
| 75 | 
            +
                    when Date, DateTime
         | 
| 76 | 
            +
                      # @type var range: Range[Date | DateTime]
         | 
| 77 | 
            +
                      allowed_fields = schema.date_fields
         | 
| 78 | 
            +
                      from, to = [Helpers.timestamp(range.first), Helpers.timestamp(range.last)]
         | 
| 79 | 
            +
                    else
         | 
| 80 | 
            +
                      raise UnsupportedRange.new(range.first.class)
         | 
| 81 | 
            +
                    end
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                    # @type var allowed_fields: Array[Symbol]
         | 
| 84 | 
            +
                    construct_query(index, :range, allowed_fields, fields, [from, to], **options)
         | 
| 85 | 
            +
                  end
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                  def facet_query(index, field, path, **options)
         | 
| 88 | 
            +
                    allowed_fields = index.schema.facet_fields
         | 
| 89 | 
            +
                    construct_query(index, :facet, allowed_fields, field, [path], **options)
         | 
| 90 | 
            +
                  end
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                  def smart_query(index, fields, query_string, **options)
         | 
| 93 | 
            +
                    fuzzy_distance = options[:fuzzy_distance]
         | 
| 94 | 
            +
                    boost_factor = options.fetch(:boost, DEFAULT_BOOST)
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                    field_queries = [*fields].map do |field|
         | 
| 97 | 
            +
                      terms = index.schema.tokenizer_for(field).terms(query_string)
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                      # See: https://github.com/soutaro/steep/issues/272
         | 
| 100 | 
            +
                      # @type block: nil | Query
         | 
| 101 | 
            +
                      next if terms.empty?
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                      term_queries = terms.map do |term|
         | 
| 104 | 
            +
                        if fuzzy_distance.nil?
         | 
| 105 | 
            +
                          term_query(index, field, term)
         | 
| 106 | 
            +
                        else
         | 
| 107 | 
            +
                          fuzzy_term_query(index, field, term, fuzzy_distance)
         | 
| 108 | 
            +
                        end
         | 
| 109 | 
            +
                      end
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                      # @type var terms: untyped
         | 
| 112 | 
            +
                      # @type var term_queries: untyped
         | 
| 113 | 
            +
                      last_term_query = prefix_query(index, field, terms.last) | term_queries.last
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                      conjunction(last_term_query, *term_queries[0...-1])
         | 
| 116 | 
            +
                    end.compact
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                    disjunction(*field_queries).boost(boost_factor)
         | 
| 119 | 
            +
                  end
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                  private
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                  # Can't use variadic argument `params` here due to:
         | 
| 124 | 
            +
                  # https://github.com/soutaro/steep/issues/480
         | 
| 125 | 
            +
                  def construct_query(index, query_type, allowed_fields, fields, params, **options)
         | 
| 126 | 
            +
                    queries = [*fields].map do |field|
         | 
| 127 | 
            +
                      supported = allowed_fields.include?(field)
         | 
| 128 | 
            +
                      raise UnsupportedField.new(field) unless supported
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                      send("__new_#{query_type}_query", index, field.to_s, *params)
         | 
| 131 | 
            +
                    end
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                    return empty_query if fields.empty?
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                    disjunction(*queries).boost(options.fetch(:boost, DEFAULT_BOOST))
         | 
| 136 | 
            +
                  end
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                  def text_and_strings(index)
         | 
| 139 | 
            +
                    index.schema.text_fields | index.schema.string_fields
         | 
| 140 | 
            +
                  end
         | 
| 141 | 
            +
                end
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                def |(other)
         | 
| 144 | 
            +
                  raise ArgumentError.new("Not a #{self.class}.") unless other.is_a?(self.class)
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                  self.class.disjunction(self, other)
         | 
| 147 | 
            +
                end
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                def &(other)
         | 
| 150 | 
            +
                  raise ArgumentError.new("Not a #{self.class}.") unless other.is_a?(self.class)
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                  self.class.conjunction(self, other)
         | 
| 153 | 
            +
                end
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                def !
         | 
| 156 | 
            +
                  __negation
         | 
| 157 | 
            +
                end
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                def boost(boost_factor)
         | 
| 160 | 
            +
                  return self if boost_factor == DEFAULT_BOOST
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                  __boost(boost_factor.to_f)
         | 
| 163 | 
            +
                end
         | 
| 164 | 
            +
              end
         | 
| 165 | 
            +
            end
         | 
| @@ -0,0 +1,53 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Tantiny
         | 
| 4 | 
            +
              class Schema
         | 
| 5 | 
            +
                attr_reader :default_tokenizer,
         | 
| 6 | 
            +
                  :id_field,
         | 
| 7 | 
            +
                  :text_fields,
         | 
| 8 | 
            +
                  :string_fields,
         | 
| 9 | 
            +
                  :integer_fields,
         | 
| 10 | 
            +
                  :double_fields,
         | 
| 11 | 
            +
                  :date_fields,
         | 
| 12 | 
            +
                  :facet_fields,
         | 
| 13 | 
            +
                  :field_tokenizers
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                def initialize(tokenizer, &block)
         | 
| 16 | 
            +
                  @default_tokenizer = tokenizer
         | 
| 17 | 
            +
                  @id_field = :id
         | 
| 18 | 
            +
                  @text_fields = []
         | 
| 19 | 
            +
                  @string_fields = []
         | 
| 20 | 
            +
                  @integer_fields = []
         | 
| 21 | 
            +
                  @double_fields = []
         | 
| 22 | 
            +
                  @date_fields = []
         | 
| 23 | 
            +
                  @facet_fields = []
         | 
| 24 | 
            +
                  @field_tokenizers = {}
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                  instance_exec(&block)
         | 
| 27 | 
            +
                end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                def tokenizer_for(field)
         | 
| 30 | 
            +
                  field_tokenizers[field] || default_tokenizer
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                private
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                def id(key) = @id_field = key
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                def string(key) = @string_fields << key
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                def integer(key) = @integer_fields << key
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def double(key) = @double_fields << key
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                def date(key) = @date_fields << key
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                def facet(key) = @facet_fields << key
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                def text(key, tokenizer: nil)
         | 
| 48 | 
            +
                  @field_tokenizers[key] = tokenizer if tokenizer
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                  @text_fields << key
         | 
| 51 | 
            +
                end
         | 
| 52 | 
            +
              end
         | 
| 53 | 
            +
            end
         | 
| @@ -0,0 +1,28 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Tantiny
         | 
| 4 | 
            +
              class Tokenizer
         | 
| 5 | 
            +
                def self.default
         | 
| 6 | 
            +
                  new(:simple)
         | 
| 7 | 
            +
                end
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def self.new(kind, **options)
         | 
| 10 | 
            +
                  case kind
         | 
| 11 | 
            +
                  when :simple
         | 
| 12 | 
            +
                    __new_simple_tokenizer
         | 
| 13 | 
            +
                  when :stemmer
         | 
| 14 | 
            +
                    language = options[:language] || :en
         | 
| 15 | 
            +
                    __new_stemmer_tokenizer(language.to_s)
         | 
| 16 | 
            +
                  when :ngram
         | 
| 17 | 
            +
                    prefix_only = options.fetch(:prefix_only, false)
         | 
| 18 | 
            +
                    __new_ngram_tokenizer(options[:min], options[:max], prefix_only)
         | 
| 19 | 
            +
                  else
         | 
| 20 | 
            +
                    raise UnknownTokenizer.new(kind)
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def terms(string)
         | 
| 25 | 
            +
                  __extract_terms(string)
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
              end
         | 
| 28 | 
            +
            end
         | 
    
        data/lib/tantiny.rb
    ADDED
    
    | @@ -0,0 +1,27 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require "ruby-next/language/setup"
         | 
| 4 | 
            +
            RubyNext::Language.setup_gem_load_path
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            require "rutie"
         | 
| 7 | 
            +
            require "thermite/fiddle"
         | 
| 8 | 
            +
            require "concurrent"
         | 
| 9 | 
            +
            require "fileutils"
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            require "tantiny/version"
         | 
| 12 | 
            +
            require "tantiny/errors"
         | 
| 13 | 
            +
            require "tantiny/helpers"
         | 
| 14 | 
            +
            require "tantiny/schema"
         | 
| 15 | 
            +
            require "tantiny/tokenizer"
         | 
| 16 | 
            +
            require "tantiny/query"
         | 
| 17 | 
            +
            require "tantiny/index"
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            module Tantiny
         | 
| 20 | 
            +
              project_dir = File.expand_path("../..", __FILE__)
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              Thermite::Fiddle.load_module(
         | 
| 23 | 
            +
                "Init_tantiny",
         | 
| 24 | 
            +
                cargo_project_path: project_dir,
         | 
| 25 | 
            +
                ruby_project_path: project_dir
         | 
| 26 | 
            +
              )
         | 
| 27 | 
            +
            end
         | 
    
        data/lib/tantiny.so
    ADDED
    
    | Binary file | 
| @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            module Tantiny
         | 
| 2 | 
            +
              class TantivyError < StandardError
         | 
| 3 | 
            +
              end
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              class UnknownField < StandardError
         | 
| 6 | 
            +
                def initialize: () -> void
         | 
| 7 | 
            +
              end
         | 
| 8 | 
            +
             | 
| 9 | 
            +
              class UnknownTokenizer < StandardError
         | 
| 10 | 
            +
                def initialize: (Symbol tokenizer_type) -> void
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              class UnsupportedRange < StandardError
         | 
| 14 | 
            +
                def initialize: (Class range_type) -> void
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              class UnsupportedField < StandardError
         | 
| 18 | 
            +
                def initialize: (Symbol field) -> void
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
            end
         | 
| @@ -0,0 +1,103 @@ | |
| 1 | 
            +
            module Tantiny
         | 
| 2 | 
            +
              class Index
         | 
| 3 | 
            +
                LOCKFILE: String
         | 
| 4 | 
            +
                DEFAULT_WRITER_MEMORY: Integer
         | 
| 5 | 
            +
                DEFAULT_LIMIT: Integer
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def self.new: (
         | 
| 8 | 
            +
                  String path,
         | 
| 9 | 
            +
                  **untyped options
         | 
| 10 | 
            +
                ) { (*untyped) -> void } -> Index
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                def self.__new: (
         | 
| 13 | 
            +
                  String path,
         | 
| 14 | 
            +
                  Tokenizer default_tokenizer,
         | 
| 15 | 
            +
                  Hash[String, Tokenizer] field_tokenizers,
         | 
| 16 | 
            +
                  Array[String] text_fields,
         | 
| 17 | 
            +
                  Array[String] string_fields,
         | 
| 18 | 
            +
                  Array[String] integer_fields,
         | 
| 19 | 
            +
                  Array[String] double_fields,
         | 
| 20 | 
            +
                  Array[String] date_fields,
         | 
| 21 | 
            +
                  Array[String] facet_fields
         | 
| 22 | 
            +
                ) -> Index
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def initialize: (
         | 
| 25 | 
            +
                  String path,
         | 
| 26 | 
            +
                  Schema schema,
         | 
| 27 | 
            +
                  **untyped options
         | 
| 28 | 
            +
                ) -> void
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                attr_reader schema: Schema
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                def transaction: () { (*untyped) -> void } -> void
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                def reload: () -> void
         | 
| 35 | 
            +
                def <<: (untyped document) -> void
         | 
| 36 | 
            +
                def delete: (String id) -> void
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def search: (
         | 
| 39 | 
            +
                  (Query | String) query,
         | 
| 40 | 
            +
                  ?limit: Integer,
         | 
| 41 | 
            +
                  **untyped smart_query_options
         | 
| 42 | 
            +
                ) -> Array[String]
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def all_query: () -> Query
         | 
| 45 | 
            +
                def empty_query: () -> Query
         | 
| 46 | 
            +
                def term_query: (fields fields, String term, **untyped options) -> Query
         | 
| 47 | 
            +
                def fuzzy_term_query: (fields fields, String term, ?Integer distance, **untyped options) -> Query
         | 
| 48 | 
            +
                def phrase_query: (fields fields, String phrase, **untyped options) -> Query
         | 
| 49 | 
            +
                def regex_query: (fields fields, String regex, **untyped options) -> Query
         | 
| 50 | 
            +
                def prefix_query: (fields fields, String prefix, **untyped options) -> Query
         | 
| 51 | 
            +
                def facet_query: (Symbol field, String path, **untyped options) -> Query
         | 
| 52 | 
            +
                def range_query: (fields fields, Range[numeric | date] range, **untyped options) -> Query
         | 
| 53 | 
            +
                def smart_query: (fields fields, String query_string, **untyped options) -> Query
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                def __commit: () -> void
         | 
| 56 | 
            +
                def __reload: () -> void
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                def __add_document: (
         | 
| 59 | 
            +
                  String id,
         | 
| 60 | 
            +
                  Hash[String, String] text_fields,
         | 
| 61 | 
            +
                  Hash[String, String] string_fields,
         | 
| 62 | 
            +
                  Hash[String, Integer] integer_fields,
         | 
| 63 | 
            +
                  Hash[String, Float] double_fields,
         | 
| 64 | 
            +
                  Hash[String, String] date_fields,
         | 
| 65 | 
            +
                  Hash[String, String] facet_fields
         | 
| 66 | 
            +
                ) -> void
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                def __delete_document: (String id) -> void
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                def __search: (Query query, Integer limit) -> Array[String]
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                def __acquire_index_writer: (Integer overall_memory) -> void
         | 
| 73 | 
            +
                def __release_index_writer: () -> void
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                private
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                def commit: () -> void
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                def slice_document: (
         | 
| 80 | 
            +
                  untyped document,
         | 
| 81 | 
            +
                  Array[Symbol] fields
         | 
| 82 | 
            +
                ) { (untyped v) -> untyped } -> Hash[String, untyped]
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                def default_search: (
         | 
| 85 | 
            +
                  String query_string,
         | 
| 86 | 
            +
                  Integer limit,
         | 
| 87 | 
            +
                  ?fuzzy_distance: Integer
         | 
| 88 | 
            +
                ) -> Array[String]
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                def resolve: (untyped document, Symbol field) -> untyped
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                def synchronize: () { (*untyped) -> void } -> void
         | 
| 93 | 
            +
                def lockfile_path: () -> String
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                def exclusive_writer?: () -> bool
         | 
| 96 | 
            +
                def acquire_index_writer: () -> void
         | 
| 97 | 
            +
                def release_index_writer: () -> void
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                def open_transaction!: () -> void
         | 
| 100 | 
            +
                def close_transaction!: () -> void
         | 
| 101 | 
            +
                def inside_transaction?: () -> bool
         | 
| 102 | 
            +
              end
         | 
| 103 | 
            +
            end
         |