RubyGems - tantiny - Versions diffs - 0.3.3 → 0.4.0 - Mend

tantiny 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +13 -0
data/Cargo.toml +9 -6
data/README.md +118 -42
data/bin/console +2 -3
data/lib/tantiny/errors.rb +1 -1
data/lib/tantiny/index.rb +29 -19
data/lib/tantiny/query.rb +21 -16
data/lib/tantiny/schema.rb +2 -2
data/lib/tantiny/version.rb +1 -1
data/lib/tantiny.rb +21 -10
data/lib/tantiny.so +0 -0
data/src/helpers.rs +71 -191
data/src/index.rs +310 -197
data/src/lib.rs +12 -9
data/src/query.rs +246 -203
data/src/tokenizer.rs +62 -75
metadata +44 -43
data/lib/.rbnext/3.0/tantiny/schema.rb +0 -53
data/sig/tantiny/errors.rbs +0 -20
data/sig/tantiny/helpers.rbs +0 -8
data/sig/tantiny/index.rbs +0 -103
data/sig/tantiny/query.rbs +0 -135
data/sig/tantiny/schema.rbs +0 -26
data/sig/tantiny/tokenizer.rbs +0 -25
data/sig/tantiny/version.rbs +0 -3
data/sig/tantiny.rbs +0 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8d30d965599efcd16b86f22128dc7d1e9312b01aedd7e3aa60fcbb7607e256b4
-  data.tar.gz: 4110a57eb436469b941870420c0ef6143646d02cd4899da8700ecf286c513cf3
+  metadata.gz: 32334d17636719a204b09795443ba26989c1511e515965649af0e92aa0ee5d5a
+  data.tar.gz: 2c596b09325d57012e7987e5c8eba5eb8e9e81f93f5fa3d99859567407f10c9f
 SHA512:
-  metadata.gz: 5fb8942ae18f37ff5d884d583259bc39693fdc687c803773d1c47becfc35adda474764c82356eadb4b67501e05c5c8cc7034bbb0f34d695b6866be2312f0521c
-  data.tar.gz: 4cb076852f8399e4bfcd1bf8515353462cc4416b7adba7e84f82a790d319814c0553dadd11749b7824ea33cbb75c84657457b97617ea603fec789a4fb16782e1
+  metadata.gz: 797d85d76769bf0165f8ecc81d652890d0603806b005de9cece8a3bb6b8b0f6866b4b53fd42caee0738cc43cc9b2e383b1f08ab28f1e706c6231c908bd7334dc
+  data.tar.gz: c683bcb69c47af11da1020cffaa40a9aad40eef358e8c87674b39a9678600f987606db0945632b0d604db4c66a7d634c980fd9446f2674a53c5d42692e4e5913

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # Changelog
+## [0.4.0](https://github.com/altertable-ai/tantiny/compare/tantiny-v0.3.3...tantiny/v0.4.0) (2025-11-01)
+[Resume development & transfer ownership](https://github.com/altertable-ai/tantiny/pull/1) following https://github.com/baygeldin/tantiny/pull/24
+### Features
+* Add simple highlighting feature ([7d4273e](https://github.com/altertable-ai/tantiny/commit/7d4273e657b658a670fc882714bb319d0b3b374c))
+* Add in-memory indexing capabilities ([7d4273e](https://github.com/altertable-ai/tantiny/commit/7d4273e657b658a670fc882714bb319d0b3b374c))
+### Bug Fixes
+* Support Ruby 3.4 moving from Rutie to Magnus ([7d4273e](https://github.com/altertable-ai/tantiny/commit/7d4273e657b658a670fc882714bb319d0b3b374c))
 ### [0.3.3](https://github.com/baygeldin/tantiny/compare/v0.3.2...v0.3.3) (2022-04-29)

data/Cargo.toml CHANGED Viewed

@@ -1,20 +1,23 @@
 [package]
 name = "tantiny"
-version = "0.3.3" # {x-release-please-version}
+version = "0.4.0" # {x-release-please-version}
 edition = "2021"
-authors = ["Alexander Baygeldin"]
-repository = "https://github.com/baygeldin/tantiny"
+authors = ["Sylvain Utard", "Alexander Baygeldin"]
+repository = "https://github.com/altertable-ai/tantiny"
 [lib]
+name = "tantiny"
 crate-type = ["cdylib"]
 [dependencies]
-rutie = "0.8"
-tantivy = "0.16"
+magnus = { version = "0.8", features = ["rb-sys"] }
+tantivy = "0.25"
 lazy_static = "1.4"
 paste = "1.0"
+time = { version = "0.3", features = ["parsing", "formatting"] }
+levenshtein_automata = "0.2.1"
 [package.metadata.thermite]
 github_releases = true
 github_release_type = "latest"
-git_tag_regex = "^v(\\d+\\.\\d+\\.\\d+)$"
+git_tag_regex = "^v(\\d+\\.\\d+\\.\\d+)$"

data/README.md CHANGED Viewed

@@ -1,21 +1,24 @@
-[![Build workflow](https://github.com/baygeldin/tantiny/actions/workflows/build.yml/badge.svg)](https://github.com/baygeldin/tantiny/actions/workflows/build.yml)
-[![Tantiny](https://img.shields.io/gem/v/tantiny?color=31c553)](https://rubygems.org/gems/tantiny)
-[![Maintainability](https://api.codeclimate.com/v1/badges/1b466b52d2ba71ab9d80/maintainability)](https://codeclimate.com/github/baygeldin/tantiny/maintainability)
-[![Test Coverage](https://api.codeclimate.com/v1/badges/1b466b52d2ba71ab9d80/test_coverage)](https://codeclimate.com/github/baygeldin/tantiny/test_coverage)
+[![Build workflow](https://github.com/altertable-ai/tantiny/actions/workflows/build.yml/badge.svg)](https://github.com/altertable-ai/tantiny/actions/workflows/build.yml) [![Tantiny](https://img.shields.io/gem/v/tantiny?color=31c553)](https://rubygems.org/gems/tantiny) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+> This is a fork of the [original Tantiny](https://github.com/baygeldin/tantiny) gem by [Alexander Baygeldin](https://github.com/baygeldin). Following https://github.com/baygeldin/tantiny/pull/24 we agreed transfering ownership of the gem to [Altertable](https://github.com/altertable-ai) so we can keep it up to date with the latest versions of Tantivy and Ruby.
 # Tantiny
-Need a fast full-text search for your Ruby script, but Solr and Elasticsearch are an overkill? 😏
+Need a fast full-text search for your Ruby script, but don't want to host/operate a full-blown search engine yet?
-You're in the right place. **Tantiny** is a minimalistic full-text search library for Ruby based on [Tanti**v**y](https://github.com/quickwit-oss/tantivy) (an awesome alternative to Apache Lucene written in Rust). It's great for cases when your task at hand requires a full-text search, but configuring a full-blown distributed search engine would take more time than the task itself. And even if you already use such an engine in your project (which is highly likely, actually), it still might be easier to just use Tantiny instead because unlike Solr and Elasticsearch it doesn't need *anything* to work (no separate server or process or whatever), it's purely embeddable. So, when you find yourself in a situation when using your search engine of choice would be tricky/inconvinient or would require additional setup you can always revert back to a quick and dirty solution that is nontheless flexible and fast.
+You're in the right place. **Tantiny** is a minimalistic full-text search library for Ruby based on [Tanti**v**y](https://github.com/quickwit-oss/tantivy) (an awesome alternative to Apache Lucene written in Rust). It's great for cases when your task at hand requires a full-text search, but configuring a full-blown distributed search engine would take more time than the task itself. And even if you already use such an engine in your project (which is highly likely, actually), it still might be easier to just use Tantiny instead because unlike Solr, Elasticsearch, or any hosted search engine it doesn't need _anything_ to work (no separate server, process, API or whatever), it's purely embeddable. So, when you find yourself in a situation when using your search engine of choice would be tricky/inconvinient or would require additional setup you can always revert back to a quick and dirty solution that is nontheless flexible and fast.
 Tantiny is not exactly Ruby bindings to Tantivy, but it tries to be close. The main philosophy is to provide low-level access to Tantivy's inverted index, but with a nice Ruby-esque API, sensible defaults, and additional functionality sprinkled on top.
 Take a look at the most basic example:
 ```ruby
+# Persisted index
 index = Tantiny::Index.new("/path/to/index") { text :description }
+# Or in-memory (no persistence)
+index = Tantiny::Index.new { text :description }
 index << { id: 1, description: "Hello World!" }
 index << { id: 2, description: "What's up?" }
 index << { id: 3, description: "Goodbye World!" }
@@ -43,37 +46,51 @@ Or install it yourself as:
 You don't **have to** have Rust installed on your system since Tantiny will try to download the pre-compiled binaries hosted on GitHub releases during the installation. However, if no pre-compiled binaries were found for your system (which is a combination of platform, architecture, and Ruby version) you will need to [install Rust](https://www.rust-lang.org/tools/install) first.
-⚠️ **IMPORTANT** ⚠️
-Please, make sure to specify the minor version when declaring dependency on `tantiny`. The API is a subject to change, and until it reaches `1.0.0` a bump in the minor version will most likely signify a breaking change.
+## Defining the index schema
-## Defining the index
-You have to specify a path to where the index would be stored and a block that defines the schema:
+Whether you want to use a persisted index or an in-memory index, you need to define the schema first:
 ```ruby
-Tantiny::Index.new "/tmp/index" do
+Tantiny::Index.new(path_or_memory) do
   id :imdb_id
-  facet :category
   string :title
   text :description
   integer :duration
   double :rating
   date :release_date
+  facet :category
 end
 ```
+## In-memory indexes
+For small to medium datasets or temporary search needs (or tests!), you can create an in-memory index by omitting the path parameter:
+```ruby
+index = Tantiny::Index.new do
+  text :title
+  text :description
+  double :price
+end
+```
+In-memory indexes are perfect when you don't need persistence between runs, or when you're building a search index from data that already exists in a database. They offer the same full-text search capabilities without any file I/O overhead.
+## Field types
 Here are the descriptions for every field type:
-| Type | Description |
-| --- | --- |
-| id | Specifies where documents' ids are stored (defaults to `:id`). |
-| facet | Fields with values like `/animals/birds` (i.e. hierarchial categories). |
-| string | Fields with text that are **not** tokenized. |
-| text | Fields with text that are tokenized by the specified tokenizer. |
-| integer | Fields with integer values. |
-| double  | Fields with float values. |
-| date | Fields with either `DateTime` type or something that converts to it. |
+| Type    | Description                                                             |
+| ------- | ----------------------------------------------------------------------- |
+| id      | Specifies where documents' ids are stored (defaults to `:id`).          |
+| facet   | Fields with values like `/animals/birds` (i.e. hierarchial categories). |
+| string  | Fields with text that are **not** tokenized.                            |
+| text    | Fields with text that are tokenized by the specified tokenizer.         |
+| integer | Fields with integer values.                                             |
+| double  | Fields with float values.                                               |
+| date    | Fields with either `DateTime` type or something that converts to it.    |
+Each field can either be a single value or an array of values.
 ## Managing documents
@@ -142,7 +159,7 @@ index.transaction do
 end
 ```
-Transactions group changes and [commit](https://docs.rs/tantivy/latest/tantivy/struct.IndexWriter.html#method.commit) them to the index in one go. This is *dramatically* more efficient than performing these changes one by one. In fact, all writing operations (i.e. `<<` and `delete`) are wrapped in a transaction implicitly when you call them outside of a transaction, so calling `<<` 10 times outside of a transaction is the same thing as performing 10 separate transactions.
+Transactions group changes and [commit](https://docs.rs/tantivy/latest/tantivy/struct.IndexWriter.html#method.commit) them to the index in one go. This is _dramatically_ more efficient than performing these changes one by one. In fact, all writing operations (i.e. `<<` and `delete`) are wrapped in a transaction implicitly when you call them outside of a transaction, so calling `<<` 10 times outside of a transaction is the same thing as performing 10 separate transactions.
 ### Concurrency and thread-safety
@@ -182,7 +199,7 @@ You may wonder, how exactly does it conduct the search? Well, the default behavi
 index.search("a dlunk, a kib, and an olt mab", fuzzy_distance: 1)
 ```
-However, you can customize it by composing your own query out of basic building blocks:
+However, you can customize it by composing your own query out of basic building blocks:
 ```ruby
 popular_movies = index.range_query(:rating, 8.0..10.0)
@@ -198,20 +215,18 @@ I know, weird taste! But pretty cool, huh? Take a look at all the available quer
 ### Supported queries
-| Query | Behavior |
-| --- | --- |
-| all_query | Returns all indexed documents. |
-| empty_query | Returns exactly nothing (used internally). |
-| term_query | Documents that contain the specified term. |
-| fuzzy_term_query | Documents that contain the specified term within a Levenshtein distance. |
-| phrase_query | Documents that contain the specified sequence of terms. |
-| regex_query | Documents that contain a term that matches the specified regex. |
-| prefix_query | Documents that contain a term with the specified prefix. |
-| range_query | Documents that with an `integer`, `double` or `date` field within the specified range. |
-| facet_query | Documents that belong to the specified category. |
-| smart_query | A combination of `term_query`, `fuzzy_term_query` and `prefix_query`. |
-Take a look at the [signatures file](https://github.com/baygeldin/tantiny/blob/main/sig/tantiny/query.rbs) to see what parameters do queries accept.
+| Query            | Behavior                                                                               |
+| ---------------- | -------------------------------------------------------------------------------------- |
+| all_query        | Returns all indexed documents.                                                         |
+| empty_query      | Returns exactly nothing (used internally).                                             |
+| term_query       | Documents that contain the specified term.                                             |
+| fuzzy_term_query | Documents that contain the specified term within a Levenshtein distance.               |
+| phrase_query     | Documents that contain the specified sequence of terms.                                |
+| regex_query      | Documents that contain a term that matches the specified regex.                        |
+| prefix_query     | Documents that contain a term with the specified prefix.                               |
+| range_query      | Documents that with an `integer`, `double` or `date` field within the specified range. |
+| facet_query      | Documents that belong to the specified category.                                       |
+| smart_query      | A combination of `term_query`, `fuzzy_term_query` and `prefix_query`.                  |
 ### Searching on multiple fields
@@ -276,7 +291,7 @@ The `regex_query` accepts the regex pattern, but it has to be a [Rust regex](htt
 So, we've mentioned tokenizers more than once already. What are they?
-Tokenizers is what Tantivy uses to chop your text onto terms to build an inverted index. Then you can search the index by these terms. It's an important concept to understand so that you don't get confused when `index.term_query(:description, "Hello")` returns nothing because `Hello` isn't a term, but `hello` is. You have to extract the terms from the query before searching the index. Currently, only `smart_query` does that for you. Also, the only field type that is tokenized is `text`, so for `string` fields you should use the exact match (i.e. `index.term_query(:title, "Hello")`).
+Tokenizers is what Tantivy uses to chop your text onto terms to build an inverted index. Then you can search the index by these terms. It's an important concept to understand so that you don't get confused when `index.term_query(:description, "Hello")` returns nothing because `Hello` isn't a term, but `hello` is. You have to extract the terms from the query before searching the index. Currently, only `smart_query` does that for you. Also, the only field type that is tokenized is `text`, so for `string` fields you should use the exact match (i.e. `index.term_query(:title, "Hello")`).
 ### Specifying the tokenizer
@@ -320,19 +335,80 @@ Ngram tokenizer chops your text onto ngrams of specified size.
 tokenizer = Tantiny::Tokenizer.new(:ngram, min: 5, max: 10, prefix_only: true)
 tokenizer.terms("Morrowind") # ["Morro", "Morrow", "Morrowi", "Morrowin", "Morrowind"]
 ```
 ## Retrieving documents
 You may have noticed that `search` method returns only documents ids. This is by design. The documents themselves are **not** stored in the index. Tantiny is a minimalistic library, so it tries to keep things simple. If you need to retrieve a full document, use a key-value store like Redis alongside.
+## Highlighting
+Tantiny supports highlighting of search results. This is useful when you want to display the search results in a more readable format.
+```ruby
+Tantiny::Query.highlight(field_text, query_string)
+```
+It supports fuzzy highlighting by specifying the fuzzy distance.
+```ruby
+Tantiny::Query.highlight(field_text, query_string, fuzzy_distance: 2)
+```
+As well as custom tokenizers, but make sure to use the same tokenizer that was used to index the field.
+```ruby
+tokenizer = Tantiny::Tokenizer.new(:stemmer, language: :fr)
+Tantiny::Query.highlight(field_text, query_string, tokenizer: tokenizer)
+```
+This will return the text with the terms highlighted:
+```ruby
+Tantiny::Query.highlight("hellow world. you are welcome.", "hello you")
+# "<b>hellow</b> world. <b>you</b> are welcome."
+```
+## Examples
+The [examples directory](examples/) contains practical demonstrations of Tantiny's capabilities. These examples are great starting points for understanding how to use Tantiny in real-world scenarios.
+### Simple Ranking Example
+[`examples/simple_ranking.rb`](examples/simple_ranking.rb)
+A minimal demonstration of field-based ranking showing:
+- Creating an in-memory index
+- Using boost values to rank title matches higher than description matches
+- Side-by-side comparison of equal weights vs boosted fields
+This is perfect for understanding the core concept of ranking in just a few lines of code.
+### Ecommerce Example
+[`examples/ecommerce.rb`](examples/ecommerce.rb)
+A comprehensive example demonstrating in-memory search for a product catalog:
+- **In-memory indexing** - Perfect for small to medium datasets without persistent storage
+- **Product search** - Indexing products with various attributes (title, description, category, price, stock)
+- **Fuzzy search** - Handling typos and misspellings (e.g., "loptop" → "laptop")
+- **Field-based ranking** - Boosting title matches to rank higher than description matches
+- **Complex queries** - Combining multiple conditions with AND/OR operators
+- **Category filtering** - Filtering products by exact category match
+- **Price range queries** - Finding products within a specific price range
+See the [examples README](examples/README.md) for more details.
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake build` to build native extensions, and then `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
-We use [conventional commits](https://www.conventionalcommits.org) to automatically generate the CHANGELOG, bump the semantic version, and to publish and release the gem. All you need to do is stick to the convention and [CI will take care of everything else](https://github.com/baygeldin/tantiny/blob/main/.github/workflows/release.yml) for you.
+We use [conventional commits](https://www.conventionalcommits.org) to automatically generate the CHANGELOG, bump the semantic version, and to publish and release the gem. All you need to do is stick to the convention and [CI will take care of everything else](https://github.com/altertable-ai/tantiny/blob/main/.github/workflows/release.yml) for you.
 ## Contributing
-Bug reports and pull requests are welcome on GitHub at https://github.com/baygeldin/tantiny.
+Bug reports and pull requests are welcome on GitHub at https://github.com/altertable-ai/tantiny.
 ## License

data/bin/console CHANGED Viewed

@@ -3,17 +3,16 @@
 require "bundler/setup"
 require "pry"
+require "ostruct"
 require "tantiny"
-path = File.join(__dir__, "../tmp")
 options = {
   tokenizer: Tantiny::Tokenizer.new(:stemmer, language: :en),
   exclusive_writer: true,
 }
-index = Tantiny::Index.new(path, **options) do
+index = Tantiny::Index.new(nil, **options) do
   id :imdb_id
   facet :category
   string :title

data/lib/tantiny/errors.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Tantiny
   class IndexWriterBusyError < StandardError
     def initialize
-      msg = "Failed to acquire an index writer. "\
+      msg = "Failed to acquire an index writer. " \
         "Is there an active index with an exclusive writer already?"
       super(msg)

data/lib/tantiny/index.rb CHANGED Viewed

@@ -3,17 +3,18 @@
 module Tantiny
   class Index
     LOCKFILE = ".tantiny.lock"
-    DEFAULT_WRITER_MEMORY = 5_000_000 # 5MB
+    DEFAULT_WRITER_MEMORY = 15_000_000 # 15MB
     DEFAULT_LIMIT = 10
-    def self.new(path, **options, &block)
-      FileUtils.mkdir_p(path)
+    def self.new(path = nil, **options, &)
+      # Only create directory if path is provided
+      FileUtils.mkdir_p(path) if path
       default_tokenizer = options[:tokenizer] || Tokenizer.default
-      schema = Schema.new(default_tokenizer, &block)
+      schema = Schema.new(default_tokenizer, &)
       object = __new(
-        path.to_s,
+        path&.to_s,
         schema.default_tokenizer,
         schema.field_tokenizers.transform_keys(&:to_s),
         schema.text_fields.map(&:to_s),
@@ -44,6 +45,10 @@ module Tantiny
     attr_reader :schema
+    def in_memory?
+      @path.nil?
+    end
     def transaction
       if inside_transaction?
         yield
@@ -68,12 +73,12 @@ module Tantiny
       transaction do
         __add_document(
           resolve(document, schema.id_field).to_s,
-          slice_document(document, schema.text_fields) { |v| v.to_s },
-          slice_document(document, schema.string_fields) { |v| v.to_s },
-          slice_document(document, schema.integer_fields) { |v| v.to_i },
-          slice_document(document, schema.double_fields) { |v| v.to_f },
-          slice_document(document, schema.date_fields) { |v| Helpers.timestamp(v) },
-          slice_document(document, schema.facet_fields) { |v| v.to_s }
+          slice_document(document, schema.text_fields) { |v| v.is_a?(Array) ? v.map(&:to_s) : v.to_s },
+          slice_document(document, schema.string_fields) { |v| v.is_a?(Array) ? v.map(&:to_s) : v.to_s },
+          slice_document(document, schema.integer_fields) { |v| v.is_a?(Array) ? v.map(&:to_i) : v.to_i },
+          slice_document(document, schema.double_fields) { |v| v.is_a?(Array) ? v.map(&:to_f) : v.to_f },
+          slice_document(document, schema.date_fields) { |v| v.is_a?(Array) ? v.map { |d| Helpers.timestamp(d) } : Helpers.timestamp(v) },
+          slice_document(document, schema.facet_fields) { |v| v.is_a?(Array) ? v.map(&:to_s) : v.to_s }
         )
       end
     end
@@ -103,10 +108,10 @@ module Tantiny
     private
-    def slice_document(document, fields, &block)
+    def slice_document(document, fields, &)
       fields.inject({}) do |hash, field|
         hash.tap { |h| h[field.to_s] = resolve(document, field) }
-      end.compact.transform_values(&block)
+      end.compact.transform_values(&)
     end
     def resolve(document, field)
@@ -115,9 +120,9 @@ module Tantiny
     def acquire_index_writer
       __acquire_index_writer(@indexer_memory)
-    rescue TantivyError => e
+    rescue RuntimeError => e
       case e.message
-      when /Failed to acquire Lockfile/
+      when /Failed to acquire Lockfile/, /LockBusy/
         raise IndexWriterBusyError.new
       else
         raise
@@ -154,14 +159,19 @@ module Tantiny
       @exclusive_writer
     end
-    def synchronize(&block)
-      @transaction_semaphore.synchronize do
-        Helpers.with_lock(lockfile_path, &block)
+    def synchronize(&)
+      # In-memory indexes don't need file locking
+      if in_memory?
+        @transaction_semaphore.synchronize(&)
+      else
+        @transaction_semaphore.synchronize do
+          Helpers.with_lock(lockfile_path, &)
+        end
       end
     end
     def lockfile_path
-      @lockfile_path ||= File.join(@path, LOCKFILE)
+      @lockfile_path ||= @path && File.join(@path, LOCKFILE)
     end
   end
 end

data/lib/tantiny/query.rb CHANGED Viewed

@@ -32,37 +32,37 @@ module Tantiny
         __new_empty_query
       end
-      def term_query(index, fields, term, **options)
+      def term_query(index, fields, term, **)
         allowed_fields = text_and_strings(index)
-        construct_query(index, :term, allowed_fields, fields, [term.to_s], **options)
+        construct_query(index, :term, allowed_fields, fields, [term.to_s], **)
       end
-      def fuzzy_term_query(index, fields, term, distance = DEFAULT_FUZZY_DISTANCE, **options)
+      def fuzzy_term_query(index, fields, term, distance = DEFAULT_FUZZY_DISTANCE, **)
         params = [term.to_s, distance.to_i]
         allowed_fields = text_and_strings(index)
-        construct_query(index, :fuzzy_term, allowed_fields, fields, params, **options)
+        construct_query(index, :fuzzy_term, allowed_fields, fields, params, **)
       end
-      def phrase_query(index, fields, phrase, **options)
+      def phrase_query(index, fields, phrase, **)
         queries = [*fields].map do |f|
           terms = index.schema.tokenizer_for(f).terms(phrase)
           allowed_fields = index.schema.text_fields
-          construct_query(index, :phrase, allowed_fields, f, [terms], **options)
+          construct_query(index, :phrase, allowed_fields, f, [terms], **)
         end
         queries.empty? ? empty_query : disjunction(*queries)
       end
-      def regex_query(index, fields, regex, **options)
+      def regex_query(index, fields, regex, **)
         allowed_fields = text_and_strings(index)
-        construct_query(index, :regex, allowed_fields, fields, [regex.to_s], **options)
+        construct_query(index, :regex, allowed_fields, fields, [regex.to_s], **)
       end
-      def prefix_query(index, fields, prefix, **options)
-        regex_query(index, fields, Regexp.escape(prefix) + ".*", **options)
+      def prefix_query(index, fields, prefix, **)
+        regex_query(index, fields, Regexp.escape(prefix) + ".*", **)
       end
-      def range_query(index, fields, range, **options)
+      def range_query(index, fields, range, **)
         schema = index.schema
         case range.first
@@ -81,19 +81,19 @@ module Tantiny
         end
         # @type var allowed_fields: Array[Symbol]
-        construct_query(index, :range, allowed_fields, fields, [from, to], **options)
+        construct_query(index, :range, allowed_fields, fields, [from, to], **)
       end
-      def facet_query(index, field, path, **options)
+      def facet_query(index, field, path, **)
         allowed_fields = index.schema.facet_fields
-        construct_query(index, :facet, allowed_fields, field, [path], **options)
+        construct_query(index, :facet, allowed_fields, field, [path], **)
       end
       def smart_query(index, fields, query_string, **options)
         fuzzy_distance = options[:fuzzy_distance]
         boost_factor = options.fetch(:boost, DEFAULT_BOOST)
-        field_queries = [*fields].map do |field|
+        field_queries = [*fields].filter_map do |field|
           terms = index.schema.tokenizer_for(field).terms(query_string)
           # See: https://github.com/soutaro/steep/issues/272
@@ -113,11 +113,16 @@ module Tantiny
           last_term_query = prefix_query(index, field, terms.last) | term_queries.last
           conjunction(last_term_query, *term_queries[0...-1])
-        end.compact
+        end
         disjunction(*field_queries).boost(boost_factor)
       end
+      def highlight(text, query_string, fuzzy_distance: 0, tokenizer: Tantiny::Tokenizer.new(:simple))
+        terms = tokenizer.terms(query_string).map(&:to_s)
+        __highlight(text.to_s, terms, fuzzy_distance)
+      end
       private
       # Can't use variadic argument `params` here due to:

data/lib/tantiny/schema.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module Tantiny
       :facet_fields,
       :field_tokenizers
-    def initialize(tokenizer, &block)
+    def initialize(tokenizer, &)
       @default_tokenizer = tokenizer
       @id_field = :id
       @text_fields = []
@@ -23,7 +23,7 @@ module Tantiny
       @facet_fields = []
       @field_tokenizers = {}
-      instance_exec(&block)
+      instance_exec(&)
     end
     def tokenizer_for(field)

data/lib/tantiny/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Tantiny
-  VERSION = "0.3.3" # {x-release-please-version}
+  VERSION = "0.4.0" # {x-release-please-version}
 end

data/lib/tantiny.rb CHANGED Viewed

@@ -1,10 +1,6 @@
 # frozen_string_literal: true
-require "ruby-next/language/setup"
-RubyNext::Language.setup_gem_load_path
-require "rutie"
-require "thermite/fiddle"
+require "fiddle/import"
 require "concurrent"
 require "fileutils"
@@ -19,9 +15,24 @@ require "tantiny/index"
 module Tantiny
   project_dir = File.expand_path("../..", __FILE__)
-  Thermite::Fiddle.load_module(
-    "Init_tantiny",
-    cargo_project_path: project_dir,
-    ruby_project_path: project_dir
-  )
+  # Try multiple possible locations for the library
+  lib_paths = [
+    File.join(project_dir, "target", "release", "libtantiny.dylib"),
+    File.join(project_dir, "target", "debug", "libtantiny.dylib"),
+    File.join(project_dir, "target", "release", "libtantiny.so"),
+    File.join(project_dir, "target", "debug", "libtantiny.so"),
+    File.join(project_dir, "lib", "tantiny.bundle"),
+    File.join(project_dir, "lib", "tantiny.so"),
+    File.join(project_dir, "lib", "tantiny.dylib")
+  ]
+  lib_path = lib_paths.find { |path| File.exist?(path) }
+  if lib_path.nil?
+    raise LoadError, "Could not find tantiny library in any of: #{lib_paths.join(", ")}"
+  end
+  # Load the library using Fiddle and call the init function
+  handle = Fiddle.dlopen(lib_path)
+  Fiddle::Function.new(handle["Init_tantiny"], [], Fiddle::TYPE_VOIDP).call
 end

data/lib/tantiny.so ADDED Viewed

Binary file