tantiny 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8d30d965599efcd16b86f22128dc7d1e9312b01aedd7e3aa60fcbb7607e256b4
4
- data.tar.gz: 4110a57eb436469b941870420c0ef6143646d02cd4899da8700ecf286c513cf3
3
+ metadata.gz: 32334d17636719a204b09795443ba26989c1511e515965649af0e92aa0ee5d5a
4
+ data.tar.gz: 2c596b09325d57012e7987e5c8eba5eb8e9e81f93f5fa3d99859567407f10c9f
5
5
  SHA512:
6
- metadata.gz: 5fb8942ae18f37ff5d884d583259bc39693fdc687c803773d1c47becfc35adda474764c82356eadb4b67501e05c5c8cc7034bbb0f34d695b6866be2312f0521c
7
- data.tar.gz: 4cb076852f8399e4bfcd1bf8515353462cc4416b7adba7e84f82a790d319814c0553dadd11749b7824ea33cbb75c84657457b97617ea603fec789a4fb16782e1
6
+ metadata.gz: 797d85d76769bf0165f8ecc81d652890d0603806b005de9cece8a3bb6b8b0f6866b4b53fd42caee0738cc43cc9b2e383b1f08ab28f1e706c6231c908bd7334dc
7
+ data.tar.gz: c683bcb69c47af11da1020cffaa40a9aad40eef358e8c87674b39a9678600f987606db0945632b0d604db4c66a7d634c980fd9446f2674a53c5d42692e4e5913
data/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.4.0](https://github.com/altertable-ai/tantiny/compare/tantiny-v0.3.3...tantiny/v0.4.0) (2025-11-01)
4
+
5
+ [Resume development & transfer ownership](https://github.com/altertable-ai/tantiny/pull/1) following https://github.com/baygeldin/tantiny/pull/24
6
+
7
+ ### Features
8
+
9
+ * Add simple highlighting feature ([7d4273e](https://github.com/altertable-ai/tantiny/commit/7d4273e657b658a670fc882714bb319d0b3b374c))
10
+ * Add in-memory indexing capabilities ([7d4273e](https://github.com/altertable-ai/tantiny/commit/7d4273e657b658a670fc882714bb319d0b3b374c))
11
+
12
+ ### Bug Fixes
13
+
14
+ * Support Ruby 3.4 moving from Rutie to Magnus ([7d4273e](https://github.com/altertable-ai/tantiny/commit/7d4273e657b658a670fc882714bb319d0b3b374c))
15
+
3
16
  ### [0.3.3](https://github.com/baygeldin/tantiny/compare/v0.3.2...v0.3.3) (2022-04-29)
4
17
 
5
18
 
data/Cargo.toml CHANGED
@@ -1,20 +1,23 @@
1
1
  [package]
2
2
  name = "tantiny"
3
- version = "0.3.3" # {x-release-please-version}
3
+ version = "0.4.0" # {x-release-please-version}
4
4
  edition = "2021"
5
- authors = ["Alexander Baygeldin"]
6
- repository = "https://github.com/baygeldin/tantiny"
5
+ authors = ["Sylvain Utard", "Alexander Baygeldin"]
6
+ repository = "https://github.com/altertable-ai/tantiny"
7
7
 
8
8
  [lib]
9
+ name = "tantiny"
9
10
  crate-type = ["cdylib"]
10
11
 
11
12
  [dependencies]
12
- rutie = "0.8"
13
- tantivy = "0.16"
13
+ magnus = { version = "0.8", features = ["rb-sys"] }
14
+ tantivy = "0.25"
14
15
  lazy_static = "1.4"
15
16
  paste = "1.0"
17
+ time = { version = "0.3", features = ["parsing", "formatting"] }
18
+ levenshtein_automata = "0.2.1"
16
19
 
17
20
  [package.metadata.thermite]
18
21
  github_releases = true
19
22
  github_release_type = "latest"
20
- git_tag_regex = "^v(\\d+\\.\\d+\\.\\d+)$"
23
+ git_tag_regex = "^v(\\d+\\.\\d+\\.\\d+)$"
data/README.md CHANGED
@@ -1,21 +1,24 @@
1
- [![Build workflow](https://github.com/baygeldin/tantiny/actions/workflows/build.yml/badge.svg)](https://github.com/baygeldin/tantiny/actions/workflows/build.yml)
2
- [![Tantiny](https://img.shields.io/gem/v/tantiny?color=31c553)](https://rubygems.org/gems/tantiny)
3
- [![Maintainability](https://api.codeclimate.com/v1/badges/1b466b52d2ba71ab9d80/maintainability)](https://codeclimate.com/github/baygeldin/tantiny/maintainability)
4
- [![Test Coverage](https://api.codeclimate.com/v1/badges/1b466b52d2ba71ab9d80/test_coverage)](https://codeclimate.com/github/baygeldin/tantiny/test_coverage)
1
+ [![Build workflow](https://github.com/altertable-ai/tantiny/actions/workflows/build.yml/badge.svg)](https://github.com/altertable-ai/tantiny/actions/workflows/build.yml) [![Tantiny](https://img.shields.io/gem/v/tantiny?color=31c553)](https://rubygems.org/gems/tantiny) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
2
+
3
+ > This is a fork of the [original Tantiny](https://github.com/baygeldin/tantiny) gem by [Alexander Baygeldin](https://github.com/baygeldin). Following https://github.com/baygeldin/tantiny/pull/24 we agreed transfering ownership of the gem to [Altertable](https://github.com/altertable-ai) so we can keep it up to date with the latest versions of Tantivy and Ruby.
5
4
 
6
5
  # Tantiny
7
6
 
8
- Need a fast full-text search for your Ruby script, but Solr and Elasticsearch are an overkill? 😏
7
+ Need a fast full-text search for your Ruby script, but don't want to host/operate a full-blown search engine yet?
9
8
 
10
- You're in the right place. **Tantiny** is a minimalistic full-text search library for Ruby based on [Tanti**v**y](https://github.com/quickwit-oss/tantivy) (an awesome alternative to Apache Lucene written in Rust). It's great for cases when your task at hand requires a full-text search, but configuring a full-blown distributed search engine would take more time than the task itself. And even if you already use such an engine in your project (which is highly likely, actually), it still might be easier to just use Tantiny instead because unlike Solr and Elasticsearch it doesn't need *anything* to work (no separate server or process or whatever), it's purely embeddable. So, when you find yourself in a situation when using your search engine of choice would be tricky/inconvinient or would require additional setup you can always revert back to a quick and dirty solution that is nontheless flexible and fast.
9
+ You're in the right place. **Tantiny** is a minimalistic full-text search library for Ruby based on [Tanti**v**y](https://github.com/quickwit-oss/tantivy) (an awesome alternative to Apache Lucene written in Rust). It's great for cases when your task at hand requires a full-text search, but configuring a full-blown distributed search engine would take more time than the task itself. And even if you already use such an engine in your project (which is highly likely, actually), it still might be easier to just use Tantiny instead because unlike Solr, Elasticsearch, or any hosted search engine it doesn't need _anything_ to work (no separate server, process, API or whatever), it's purely embeddable. So, when you find yourself in a situation when using your search engine of choice would be tricky/inconvinient or would require additional setup you can always revert back to a quick and dirty solution that is nontheless flexible and fast.
11
10
 
12
11
  Tantiny is not exactly Ruby bindings to Tantivy, but it tries to be close. The main philosophy is to provide low-level access to Tantivy's inverted index, but with a nice Ruby-esque API, sensible defaults, and additional functionality sprinkled on top.
13
12
 
14
13
  Take a look at the most basic example:
15
14
 
16
15
  ```ruby
16
+ # Persisted index
17
17
  index = Tantiny::Index.new("/path/to/index") { text :description }
18
18
 
19
+ # Or in-memory (no persistence)
20
+ index = Tantiny::Index.new { text :description }
21
+
19
22
  index << { id: 1, description: "Hello World!" }
20
23
  index << { id: 2, description: "What's up?" }
21
24
  index << { id: 3, description: "Goodbye World!" }
@@ -43,37 +46,51 @@ Or install it yourself as:
43
46
 
44
47
  You don't **have to** have Rust installed on your system since Tantiny will try to download the pre-compiled binaries hosted on GitHub releases during the installation. However, if no pre-compiled binaries were found for your system (which is a combination of platform, architecture, and Ruby version) you will need to [install Rust](https://www.rust-lang.org/tools/install) first.
45
48
 
46
- ⚠️ **IMPORTANT** ⚠️
47
-
48
- Please, make sure to specify the minor version when declaring dependency on `tantiny`. The API is a subject to change, and until it reaches `1.0.0` a bump in the minor version will most likely signify a breaking change.
49
+ ## Defining the index schema
49
50
 
50
- ## Defining the index
51
-
52
- You have to specify a path to where the index would be stored and a block that defines the schema:
51
+ Whether you want to use a persisted index or an in-memory index, you need to define the schema first:
53
52
 
54
53
  ```ruby
55
- Tantiny::Index.new "/tmp/index" do
54
+ Tantiny::Index.new(path_or_memory) do
56
55
  id :imdb_id
57
- facet :category
58
56
  string :title
59
57
  text :description
60
58
  integer :duration
61
59
  double :rating
62
60
  date :release_date
61
+ facet :category
63
62
  end
64
63
  ```
65
64
 
65
+ ## In-memory indexes
66
+
67
+ For small to medium datasets or temporary search needs (or tests!), you can create an in-memory index by omitting the path parameter:
68
+
69
+ ```ruby
70
+ index = Tantiny::Index.new do
71
+ text :title
72
+ text :description
73
+ double :price
74
+ end
75
+ ```
76
+
77
+ In-memory indexes are perfect when you don't need persistence between runs, or when you're building a search index from data that already exists in a database. They offer the same full-text search capabilities without any file I/O overhead.
78
+
79
+ ## Field types
80
+
66
81
  Here are the descriptions for every field type:
67
82
 
68
- | Type | Description |
69
- | --- | --- |
70
- | id | Specifies where documents' ids are stored (defaults to `:id`). |
71
- | facet | Fields with values like `/animals/birds` (i.e. hierarchial categories). |
72
- | string | Fields with text that are **not** tokenized. |
73
- | text | Fields with text that are tokenized by the specified tokenizer. |
74
- | integer | Fields with integer values. |
75
- | double | Fields with float values. |
76
- | date | Fields with either `DateTime` type or something that converts to it. |
83
+ | Type | Description |
84
+ | ------- | ----------------------------------------------------------------------- |
85
+ | id | Specifies where documents' ids are stored (defaults to `:id`). |
86
+ | facet | Fields with values like `/animals/birds` (i.e. hierarchial categories). |
87
+ | string | Fields with text that are **not** tokenized. |
88
+ | text | Fields with text that are tokenized by the specified tokenizer. |
89
+ | integer | Fields with integer values. |
90
+ | double | Fields with float values. |
91
+ | date | Fields with either `DateTime` type or something that converts to it. |
92
+
93
+ Each field can either be a single value or an array of values.
77
94
 
78
95
  ## Managing documents
79
96
 
@@ -142,7 +159,7 @@ index.transaction do
142
159
  end
143
160
  ```
144
161
 
145
- Transactions group changes and [commit](https://docs.rs/tantivy/latest/tantivy/struct.IndexWriter.html#method.commit) them to the index in one go. This is *dramatically* more efficient than performing these changes one by one. In fact, all writing operations (i.e. `<<` and `delete`) are wrapped in a transaction implicitly when you call them outside of a transaction, so calling `<<` 10 times outside of a transaction is the same thing as performing 10 separate transactions.
162
+ Transactions group changes and [commit](https://docs.rs/tantivy/latest/tantivy/struct.IndexWriter.html#method.commit) them to the index in one go. This is _dramatically_ more efficient than performing these changes one by one. In fact, all writing operations (i.e. `<<` and `delete`) are wrapped in a transaction implicitly when you call them outside of a transaction, so calling `<<` 10 times outside of a transaction is the same thing as performing 10 separate transactions.
146
163
 
147
164
  ### Concurrency and thread-safety
148
165
 
@@ -182,7 +199,7 @@ You may wonder, how exactly does it conduct the search? Well, the default behavi
182
199
  index.search("a dlunk, a kib, and an olt mab", fuzzy_distance: 1)
183
200
  ```
184
201
 
185
- However, you can customize it by composing your own query out of basic building blocks:
202
+ However, you can customize it by composing your own query out of basic building blocks:
186
203
 
187
204
  ```ruby
188
205
  popular_movies = index.range_query(:rating, 8.0..10.0)
@@ -198,20 +215,18 @@ I know, weird taste! But pretty cool, huh? Take a look at all the available quer
198
215
 
199
216
  ### Supported queries
200
217
 
201
- | Query | Behavior |
202
- | --- | --- |
203
- | all_query | Returns all indexed documents. |
204
- | empty_query | Returns exactly nothing (used internally). |
205
- | term_query | Documents that contain the specified term. |
206
- | fuzzy_term_query | Documents that contain the specified term within a Levenshtein distance. |
207
- | phrase_query | Documents that contain the specified sequence of terms. |
208
- | regex_query | Documents that contain a term that matches the specified regex. |
209
- | prefix_query | Documents that contain a term with the specified prefix. |
210
- | range_query | Documents that with an `integer`, `double` or `date` field within the specified range. |
211
- | facet_query | Documents that belong to the specified category. |
212
- | smart_query | A combination of `term_query`, `fuzzy_term_query` and `prefix_query`. |
213
-
214
- Take a look at the [signatures file](https://github.com/baygeldin/tantiny/blob/main/sig/tantiny/query.rbs) to see what parameters do queries accept.
218
+ | Query | Behavior |
219
+ | ---------------- | -------------------------------------------------------------------------------------- |
220
+ | all_query | Returns all indexed documents. |
221
+ | empty_query | Returns exactly nothing (used internally). |
222
+ | term_query | Documents that contain the specified term. |
223
+ | fuzzy_term_query | Documents that contain the specified term within a Levenshtein distance. |
224
+ | phrase_query | Documents that contain the specified sequence of terms. |
225
+ | regex_query | Documents that contain a term that matches the specified regex. |
226
+ | prefix_query | Documents that contain a term with the specified prefix. |
227
+ | range_query | Documents that with an `integer`, `double` or `date` field within the specified range. |
228
+ | facet_query | Documents that belong to the specified category. |
229
+ | smart_query | A combination of `term_query`, `fuzzy_term_query` and `prefix_query`. |
215
230
 
216
231
  ### Searching on multiple fields
217
232
 
@@ -276,7 +291,7 @@ The `regex_query` accepts the regex pattern, but it has to be a [Rust regex](htt
276
291
 
277
292
  So, we've mentioned tokenizers more than once already. What are they?
278
293
 
279
- Tokenizers is what Tantivy uses to chop your text onto terms to build an inverted index. Then you can search the index by these terms. It's an important concept to understand so that you don't get confused when `index.term_query(:description, "Hello")` returns nothing because `Hello` isn't a term, but `hello` is. You have to extract the terms from the query before searching the index. Currently, only `smart_query` does that for you. Also, the only field type that is tokenized is `text`, so for `string` fields you should use the exact match (i.e. `index.term_query(:title, "Hello")`).
294
+ Tokenizers is what Tantivy uses to chop your text onto terms to build an inverted index. Then you can search the index by these terms. It's an important concept to understand so that you don't get confused when `index.term_query(:description, "Hello")` returns nothing because `Hello` isn't a term, but `hello` is. You have to extract the terms from the query before searching the index. Currently, only `smart_query` does that for you. Also, the only field type that is tokenized is `text`, so for `string` fields you should use the exact match (i.e. `index.term_query(:title, "Hello")`).
280
295
 
281
296
  ### Specifying the tokenizer
282
297
 
@@ -320,19 +335,80 @@ Ngram tokenizer chops your text onto ngrams of specified size.
320
335
  tokenizer = Tantiny::Tokenizer.new(:ngram, min: 5, max: 10, prefix_only: true)
321
336
  tokenizer.terms("Morrowind") # ["Morro", "Morrow", "Morrowi", "Morrowin", "Morrowind"]
322
337
  ```
338
+
323
339
  ## Retrieving documents
324
340
 
325
341
  You may have noticed that `search` method returns only documents ids. This is by design. The documents themselves are **not** stored in the index. Tantiny is a minimalistic library, so it tries to keep things simple. If you need to retrieve a full document, use a key-value store like Redis alongside.
326
342
 
343
+ ## Highlighting
344
+
345
+ Tantiny supports highlighting of search results. This is useful when you want to display the search results in a more readable format.
346
+
347
+ ```ruby
348
+ Tantiny::Query.highlight(field_text, query_string)
349
+ ```
350
+
351
+ It supports fuzzy highlighting by specifying the fuzzy distance.
352
+
353
+ ```ruby
354
+ Tantiny::Query.highlight(field_text, query_string, fuzzy_distance: 2)
355
+ ```
356
+
357
+ As well as custom tokenizers, but make sure to use the same tokenizer that was used to index the field.
358
+
359
+ ```ruby
360
+ tokenizer = Tantiny::Tokenizer.new(:stemmer, language: :fr)
361
+ Tantiny::Query.highlight(field_text, query_string, tokenizer: tokenizer)
362
+ ```
363
+
364
+ This will return the text with the terms highlighted:
365
+
366
+ ```ruby
367
+ Tantiny::Query.highlight("hellow world. you are welcome.", "hello you")
368
+ # "<b>hellow</b> world. <b>you</b> are welcome."
369
+ ```
370
+
371
+ ## Examples
372
+
373
+ The [examples directory](examples/) contains practical demonstrations of Tantiny's capabilities. These examples are great starting points for understanding how to use Tantiny in real-world scenarios.
374
+
375
+ ### Simple Ranking Example
376
+
377
+ [`examples/simple_ranking.rb`](examples/simple_ranking.rb)
378
+
379
+ A minimal demonstration of field-based ranking showing:
380
+
381
+ - Creating an in-memory index
382
+ - Using boost values to rank title matches higher than description matches
383
+ - Side-by-side comparison of equal weights vs boosted fields
384
+
385
+ This is perfect for understanding the core concept of ranking in just a few lines of code.
386
+
387
+ ### Ecommerce Example
388
+
389
+ [`examples/ecommerce.rb`](examples/ecommerce.rb)
390
+
391
+ A comprehensive example demonstrating in-memory search for a product catalog:
392
+
393
+ - **In-memory indexing** - Perfect for small to medium datasets without persistent storage
394
+ - **Product search** - Indexing products with various attributes (title, description, category, price, stock)
395
+ - **Fuzzy search** - Handling typos and misspellings (e.g., "loptop" → "laptop")
396
+ - **Field-based ranking** - Boosting title matches to rank higher than description matches
397
+ - **Complex queries** - Combining multiple conditions with AND/OR operators
398
+ - **Category filtering** - Filtering products by exact category match
399
+ - **Price range queries** - Finding products within a specific price range
400
+
401
+ See the [examples README](examples/README.md) for more details.
402
+
327
403
  ## Development
328
404
 
329
405
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake build` to build native extensions, and then `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
330
406
 
331
- We use [conventional commits](https://www.conventionalcommits.org) to automatically generate the CHANGELOG, bump the semantic version, and to publish and release the gem. All you need to do is stick to the convention and [CI will take care of everything else](https://github.com/baygeldin/tantiny/blob/main/.github/workflows/release.yml) for you.
407
+ We use [conventional commits](https://www.conventionalcommits.org) to automatically generate the CHANGELOG, bump the semantic version, and to publish and release the gem. All you need to do is stick to the convention and [CI will take care of everything else](https://github.com/altertable-ai/tantiny/blob/main/.github/workflows/release.yml) for you.
332
408
 
333
409
  ## Contributing
334
410
 
335
- Bug reports and pull requests are welcome on GitHub at https://github.com/baygeldin/tantiny.
411
+ Bug reports and pull requests are welcome on GitHub at https://github.com/altertable-ai/tantiny.
336
412
 
337
413
  ## License
338
414
 
data/bin/console CHANGED
@@ -3,17 +3,16 @@
3
3
 
4
4
  require "bundler/setup"
5
5
  require "pry"
6
+ require "ostruct"
6
7
 
7
8
  require "tantiny"
8
9
 
9
- path = File.join(__dir__, "../tmp")
10
-
11
10
  options = {
12
11
  tokenizer: Tantiny::Tokenizer.new(:stemmer, language: :en),
13
12
  exclusive_writer: true,
14
13
  }
15
14
 
16
- index = Tantiny::Index.new(path, **options) do
15
+ index = Tantiny::Index.new(nil, **options) do
17
16
  id :imdb_id
18
17
  facet :category
19
18
  string :title
@@ -5,7 +5,7 @@ module Tantiny
5
5
 
6
6
  class IndexWriterBusyError < StandardError
7
7
  def initialize
8
- msg = "Failed to acquire an index writer. "\
8
+ msg = "Failed to acquire an index writer. " \
9
9
  "Is there an active index with an exclusive writer already?"
10
10
 
11
11
  super(msg)
data/lib/tantiny/index.rb CHANGED
@@ -3,17 +3,18 @@
3
3
  module Tantiny
4
4
  class Index
5
5
  LOCKFILE = ".tantiny.lock"
6
- DEFAULT_WRITER_MEMORY = 5_000_000 # 5MB
6
+ DEFAULT_WRITER_MEMORY = 15_000_000 # 15MB
7
7
  DEFAULT_LIMIT = 10
8
8
 
9
- def self.new(path, **options, &block)
10
- FileUtils.mkdir_p(path)
9
+ def self.new(path = nil, **options, &)
10
+ # Only create directory if path is provided
11
+ FileUtils.mkdir_p(path) if path
11
12
 
12
13
  default_tokenizer = options[:tokenizer] || Tokenizer.default
13
- schema = Schema.new(default_tokenizer, &block)
14
+ schema = Schema.new(default_tokenizer, &)
14
15
 
15
16
  object = __new(
16
- path.to_s,
17
+ path&.to_s,
17
18
  schema.default_tokenizer,
18
19
  schema.field_tokenizers.transform_keys(&:to_s),
19
20
  schema.text_fields.map(&:to_s),
@@ -44,6 +45,10 @@ module Tantiny
44
45
 
45
46
  attr_reader :schema
46
47
 
48
+ def in_memory?
49
+ @path.nil?
50
+ end
51
+
47
52
  def transaction
48
53
  if inside_transaction?
49
54
  yield
@@ -68,12 +73,12 @@ module Tantiny
68
73
  transaction do
69
74
  __add_document(
70
75
  resolve(document, schema.id_field).to_s,
71
- slice_document(document, schema.text_fields) { |v| v.to_s },
72
- slice_document(document, schema.string_fields) { |v| v.to_s },
73
- slice_document(document, schema.integer_fields) { |v| v.to_i },
74
- slice_document(document, schema.double_fields) { |v| v.to_f },
75
- slice_document(document, schema.date_fields) { |v| Helpers.timestamp(v) },
76
- slice_document(document, schema.facet_fields) { |v| v.to_s }
76
+ slice_document(document, schema.text_fields) { |v| v.is_a?(Array) ? v.map(&:to_s) : v.to_s },
77
+ slice_document(document, schema.string_fields) { |v| v.is_a?(Array) ? v.map(&:to_s) : v.to_s },
78
+ slice_document(document, schema.integer_fields) { |v| v.is_a?(Array) ? v.map(&:to_i) : v.to_i },
79
+ slice_document(document, schema.double_fields) { |v| v.is_a?(Array) ? v.map(&:to_f) : v.to_f },
80
+ slice_document(document, schema.date_fields) { |v| v.is_a?(Array) ? v.map { |d| Helpers.timestamp(d) } : Helpers.timestamp(v) },
81
+ slice_document(document, schema.facet_fields) { |v| v.is_a?(Array) ? v.map(&:to_s) : v.to_s }
77
82
  )
78
83
  end
79
84
  end
@@ -103,10 +108,10 @@ module Tantiny
103
108
 
104
109
  private
105
110
 
106
- def slice_document(document, fields, &block)
111
+ def slice_document(document, fields, &)
107
112
  fields.inject({}) do |hash, field|
108
113
  hash.tap { |h| h[field.to_s] = resolve(document, field) }
109
- end.compact.transform_values(&block)
114
+ end.compact.transform_values(&)
110
115
  end
111
116
 
112
117
  def resolve(document, field)
@@ -115,9 +120,9 @@ module Tantiny
115
120
 
116
121
  def acquire_index_writer
117
122
  __acquire_index_writer(@indexer_memory)
118
- rescue TantivyError => e
123
+ rescue RuntimeError => e
119
124
  case e.message
120
- when /Failed to acquire Lockfile/
125
+ when /Failed to acquire Lockfile/, /LockBusy/
121
126
  raise IndexWriterBusyError.new
122
127
  else
123
128
  raise
@@ -154,14 +159,19 @@ module Tantiny
154
159
  @exclusive_writer
155
160
  end
156
161
 
157
- def synchronize(&block)
158
- @transaction_semaphore.synchronize do
159
- Helpers.with_lock(lockfile_path, &block)
162
+ def synchronize(&)
163
+ # In-memory indexes don't need file locking
164
+ if in_memory?
165
+ @transaction_semaphore.synchronize(&)
166
+ else
167
+ @transaction_semaphore.synchronize do
168
+ Helpers.with_lock(lockfile_path, &)
169
+ end
160
170
  end
161
171
  end
162
172
 
163
173
  def lockfile_path
164
- @lockfile_path ||= File.join(@path, LOCKFILE)
174
+ @lockfile_path ||= @path && File.join(@path, LOCKFILE)
165
175
  end
166
176
  end
167
177
  end
data/lib/tantiny/query.rb CHANGED
@@ -32,37 +32,37 @@ module Tantiny
32
32
  __new_empty_query
33
33
  end
34
34
 
35
- def term_query(index, fields, term, **options)
35
+ def term_query(index, fields, term, **)
36
36
  allowed_fields = text_and_strings(index)
37
- construct_query(index, :term, allowed_fields, fields, [term.to_s], **options)
37
+ construct_query(index, :term, allowed_fields, fields, [term.to_s], **)
38
38
  end
39
39
 
40
- def fuzzy_term_query(index, fields, term, distance = DEFAULT_FUZZY_DISTANCE, **options)
40
+ def fuzzy_term_query(index, fields, term, distance = DEFAULT_FUZZY_DISTANCE, **)
41
41
  params = [term.to_s, distance.to_i]
42
42
  allowed_fields = text_and_strings(index)
43
- construct_query(index, :fuzzy_term, allowed_fields, fields, params, **options)
43
+ construct_query(index, :fuzzy_term, allowed_fields, fields, params, **)
44
44
  end
45
45
 
46
- def phrase_query(index, fields, phrase, **options)
46
+ def phrase_query(index, fields, phrase, **)
47
47
  queries = [*fields].map do |f|
48
48
  terms = index.schema.tokenizer_for(f).terms(phrase)
49
49
  allowed_fields = index.schema.text_fields
50
- construct_query(index, :phrase, allowed_fields, f, [terms], **options)
50
+ construct_query(index, :phrase, allowed_fields, f, [terms], **)
51
51
  end
52
52
 
53
53
  queries.empty? ? empty_query : disjunction(*queries)
54
54
  end
55
55
 
56
- def regex_query(index, fields, regex, **options)
56
+ def regex_query(index, fields, regex, **)
57
57
  allowed_fields = text_and_strings(index)
58
- construct_query(index, :regex, allowed_fields, fields, [regex.to_s], **options)
58
+ construct_query(index, :regex, allowed_fields, fields, [regex.to_s], **)
59
59
  end
60
60
 
61
- def prefix_query(index, fields, prefix, **options)
62
- regex_query(index, fields, Regexp.escape(prefix) + ".*", **options)
61
+ def prefix_query(index, fields, prefix, **)
62
+ regex_query(index, fields, Regexp.escape(prefix) + ".*", **)
63
63
  end
64
64
 
65
- def range_query(index, fields, range, **options)
65
+ def range_query(index, fields, range, **)
66
66
  schema = index.schema
67
67
 
68
68
  case range.first
@@ -81,19 +81,19 @@ module Tantiny
81
81
  end
82
82
 
83
83
  # @type var allowed_fields: Array[Symbol]
84
- construct_query(index, :range, allowed_fields, fields, [from, to], **options)
84
+ construct_query(index, :range, allowed_fields, fields, [from, to], **)
85
85
  end
86
86
 
87
- def facet_query(index, field, path, **options)
87
+ def facet_query(index, field, path, **)
88
88
  allowed_fields = index.schema.facet_fields
89
- construct_query(index, :facet, allowed_fields, field, [path], **options)
89
+ construct_query(index, :facet, allowed_fields, field, [path], **)
90
90
  end
91
91
 
92
92
  def smart_query(index, fields, query_string, **options)
93
93
  fuzzy_distance = options[:fuzzy_distance]
94
94
  boost_factor = options.fetch(:boost, DEFAULT_BOOST)
95
95
 
96
- field_queries = [*fields].map do |field|
96
+ field_queries = [*fields].filter_map do |field|
97
97
  terms = index.schema.tokenizer_for(field).terms(query_string)
98
98
 
99
99
  # See: https://github.com/soutaro/steep/issues/272
@@ -113,11 +113,16 @@ module Tantiny
113
113
  last_term_query = prefix_query(index, field, terms.last) | term_queries.last
114
114
 
115
115
  conjunction(last_term_query, *term_queries[0...-1])
116
- end.compact
116
+ end
117
117
 
118
118
  disjunction(*field_queries).boost(boost_factor)
119
119
  end
120
120
 
121
+ def highlight(text, query_string, fuzzy_distance: 0, tokenizer: Tantiny::Tokenizer.new(:simple))
122
+ terms = tokenizer.terms(query_string).map(&:to_s)
123
+ __highlight(text.to_s, terms, fuzzy_distance)
124
+ end
125
+
121
126
  private
122
127
 
123
128
  # Can't use variadic argument `params` here due to:
@@ -12,7 +12,7 @@ module Tantiny
12
12
  :facet_fields,
13
13
  :field_tokenizers
14
14
 
15
- def initialize(tokenizer, &block)
15
+ def initialize(tokenizer, &)
16
16
  @default_tokenizer = tokenizer
17
17
  @id_field = :id
18
18
  @text_fields = []
@@ -23,7 +23,7 @@ module Tantiny
23
23
  @facet_fields = []
24
24
  @field_tokenizers = {}
25
25
 
26
- instance_exec(&block)
26
+ instance_exec(&)
27
27
  end
28
28
 
29
29
  def tokenizer_for(field)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tantiny
4
- VERSION = "0.3.3" # {x-release-please-version}
4
+ VERSION = "0.4.0" # {x-release-please-version}
5
5
  end
data/lib/tantiny.rb CHANGED
@@ -1,10 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "ruby-next/language/setup"
4
- RubyNext::Language.setup_gem_load_path
5
-
6
- require "rutie"
7
- require "thermite/fiddle"
3
+ require "fiddle/import"
8
4
  require "concurrent"
9
5
  require "fileutils"
10
6
 
@@ -19,9 +15,24 @@ require "tantiny/index"
19
15
  module Tantiny
20
16
  project_dir = File.expand_path("../..", __FILE__)
21
17
 
22
- Thermite::Fiddle.load_module(
23
- "Init_tantiny",
24
- cargo_project_path: project_dir,
25
- ruby_project_path: project_dir
26
- )
18
+ # Try multiple possible locations for the library
19
+ lib_paths = [
20
+ File.join(project_dir, "target", "release", "libtantiny.dylib"),
21
+ File.join(project_dir, "target", "debug", "libtantiny.dylib"),
22
+ File.join(project_dir, "target", "release", "libtantiny.so"),
23
+ File.join(project_dir, "target", "debug", "libtantiny.so"),
24
+ File.join(project_dir, "lib", "tantiny.bundle"),
25
+ File.join(project_dir, "lib", "tantiny.so"),
26
+ File.join(project_dir, "lib", "tantiny.dylib")
27
+ ]
28
+
29
+ lib_path = lib_paths.find { |path| File.exist?(path) }
30
+
31
+ if lib_path.nil?
32
+ raise LoadError, "Could not find tantiny library in any of: #{lib_paths.join(", ")}"
33
+ end
34
+
35
+ # Load the library using Fiddle and call the init function
36
+ handle = Fiddle.dlopen(lib_path)
37
+ Fiddle::Function.new(handle["Init_tantiny"], [], Fiddle::TYPE_VOIDP).call
27
38
  end
data/lib/tantiny.so ADDED
Binary file