search-engine-for-typesense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +148 -0
- data/app/search_engine/search_engine/app_info.rb +11 -0
- data/app/search_engine/search_engine/index_partition_job.rb +170 -0
- data/lib/generators/search_engine/install/install_generator.rb +20 -0
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
- data/lib/generators/search_engine/model/model_generator.rb +86 -0
- data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
- data/lib/search-engine-for-typesense.rb +12 -0
- data/lib/search_engine/active_record_syncable.rb +247 -0
- data/lib/search_engine/admin/stopwords.rb +125 -0
- data/lib/search_engine/admin/synonyms.rb +125 -0
- data/lib/search_engine/admin.rb +12 -0
- data/lib/search_engine/ast/and.rb +52 -0
- data/lib/search_engine/ast/binary_op.rb +75 -0
- data/lib/search_engine/ast/eq.rb +19 -0
- data/lib/search_engine/ast/group.rb +18 -0
- data/lib/search_engine/ast/gt.rb +12 -0
- data/lib/search_engine/ast/gte.rb +12 -0
- data/lib/search_engine/ast/in.rb +28 -0
- data/lib/search_engine/ast/lt.rb +12 -0
- data/lib/search_engine/ast/lte.rb +12 -0
- data/lib/search_engine/ast/matches.rb +55 -0
- data/lib/search_engine/ast/node.rb +176 -0
- data/lib/search_engine/ast/not_eq.rb +13 -0
- data/lib/search_engine/ast/not_in.rb +24 -0
- data/lib/search_engine/ast/or.rb +52 -0
- data/lib/search_engine/ast/prefix.rb +51 -0
- data/lib/search_engine/ast/raw.rb +41 -0
- data/lib/search_engine/ast/unary_op.rb +43 -0
- data/lib/search_engine/ast.rb +101 -0
- data/lib/search_engine/base/creation.rb +727 -0
- data/lib/search_engine/base/deletion.rb +80 -0
- data/lib/search_engine/base/display_coercions.rb +36 -0
- data/lib/search_engine/base/hydration.rb +312 -0
- data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
- data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
- data/lib/search_engine/base/index_maintenance.rb +459 -0
- data/lib/search_engine/base/indexing_dsl.rb +255 -0
- data/lib/search_engine/base/joins.rb +479 -0
- data/lib/search_engine/base/model_dsl.rb +472 -0
- data/lib/search_engine/base/presets.rb +43 -0
- data/lib/search_engine/base/pretty_printer.rb +315 -0
- data/lib/search_engine/base/relation_delegation.rb +42 -0
- data/lib/search_engine/base/scopes.rb +113 -0
- data/lib/search_engine/base/updating.rb +92 -0
- data/lib/search_engine/base.rb +38 -0
- data/lib/search_engine/bulk.rb +284 -0
- data/lib/search_engine/cache.rb +33 -0
- data/lib/search_engine/cascade.rb +531 -0
- data/lib/search_engine/cli/doctor.rb +631 -0
- data/lib/search_engine/cli/support.rb +217 -0
- data/lib/search_engine/cli.rb +222 -0
- data/lib/search_engine/client/http_adapter.rb +63 -0
- data/lib/search_engine/client/request_builder.rb +92 -0
- data/lib/search_engine/client/services/base.rb +74 -0
- data/lib/search_engine/client/services/collections.rb +161 -0
- data/lib/search_engine/client/services/documents.rb +214 -0
- data/lib/search_engine/client/services/operations.rb +152 -0
- data/lib/search_engine/client/services/search.rb +190 -0
- data/lib/search_engine/client/services.rb +29 -0
- data/lib/search_engine/client.rb +765 -0
- data/lib/search_engine/client_options.rb +20 -0
- data/lib/search_engine/collection_resolver.rb +191 -0
- data/lib/search_engine/collections_graph.rb +330 -0
- data/lib/search_engine/compiled_params.rb +143 -0
- data/lib/search_engine/compiler.rb +383 -0
- data/lib/search_engine/config/observability.rb +27 -0
- data/lib/search_engine/config/presets.rb +92 -0
- data/lib/search_engine/config/selection.rb +16 -0
- data/lib/search_engine/config/typesense.rb +48 -0
- data/lib/search_engine/config/validators.rb +97 -0
- data/lib/search_engine/config.rb +917 -0
- data/lib/search_engine/console_helpers.rb +130 -0
- data/lib/search_engine/deletion.rb +103 -0
- data/lib/search_engine/dispatcher.rb +125 -0
- data/lib/search_engine/dsl/parser.rb +582 -0
- data/lib/search_engine/engine.rb +167 -0
- data/lib/search_engine/errors.rb +290 -0
- data/lib/search_engine/filters/sanitizer.rb +189 -0
- data/lib/search_engine/hydration/materializers.rb +808 -0
- data/lib/search_engine/hydration/selection_context.rb +96 -0
- data/lib/search_engine/indexer/batch_planner.rb +76 -0
- data/lib/search_engine/indexer/bulk_import.rb +626 -0
- data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
- data/lib/search_engine/indexer/retry_policy.rb +103 -0
- data/lib/search_engine/indexer.rb +747 -0
- data/lib/search_engine/instrumentation.rb +308 -0
- data/lib/search_engine/joins/guard.rb +202 -0
- data/lib/search_engine/joins/resolver.rb +95 -0
- data/lib/search_engine/logging/color.rb +78 -0
- data/lib/search_engine/logging/format_helpers.rb +92 -0
- data/lib/search_engine/logging/partition_progress.rb +53 -0
- data/lib/search_engine/logging_subscriber.rb +388 -0
- data/lib/search_engine/mapper.rb +785 -0
- data/lib/search_engine/multi.rb +286 -0
- data/lib/search_engine/multi_result.rb +186 -0
- data/lib/search_engine/notifications/compact_logger.rb +675 -0
- data/lib/search_engine/observability.rb +162 -0
- data/lib/search_engine/operations.rb +58 -0
- data/lib/search_engine/otel.rb +227 -0
- data/lib/search_engine/partitioner.rb +128 -0
- data/lib/search_engine/ranking_plan.rb +118 -0
- data/lib/search_engine/registry.rb +158 -0
- data/lib/search_engine/relation/compiler.rb +711 -0
- data/lib/search_engine/relation/deletion.rb +37 -0
- data/lib/search_engine/relation/dsl/filters.rb +624 -0
- data/lib/search_engine/relation/dsl/selection.rb +240 -0
- data/lib/search_engine/relation/dsl.rb +903 -0
- data/lib/search_engine/relation/dx/dry_run.rb +59 -0
- data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
- data/lib/search_engine/relation/dx.rb +231 -0
- data/lib/search_engine/relation/materializers.rb +118 -0
- data/lib/search_engine/relation/options.rb +138 -0
- data/lib/search_engine/relation/state.rb +274 -0
- data/lib/search_engine/relation/updating.rb +44 -0
- data/lib/search_engine/relation.rb +623 -0
- data/lib/search_engine/result.rb +664 -0
- data/lib/search_engine/schema.rb +1083 -0
- data/lib/search_engine/sources/active_record_source.rb +185 -0
- data/lib/search_engine/sources/base.rb +62 -0
- data/lib/search_engine/sources/lambda_source.rb +55 -0
- data/lib/search_engine/sources/sql_source.rb +196 -0
- data/lib/search_engine/sources.rb +71 -0
- data/lib/search_engine/stale_rules.rb +160 -0
- data/lib/search_engine/test/minitest_assertions.rb +57 -0
- data/lib/search_engine/test/offline_client.rb +134 -0
- data/lib/search_engine/test/rspec_matchers.rb +77 -0
- data/lib/search_engine/test/stub_client.rb +201 -0
- data/lib/search_engine/test.rb +66 -0
- data/lib/search_engine/test_autoload.rb +8 -0
- data/lib/search_engine/update.rb +35 -0
- data/lib/search_engine/version.rb +7 -0
- data/lib/search_engine.rb +332 -0
- data/lib/tasks/search_engine.rake +501 -0
- data/lib/tasks/search_engine_doctor.rake +16 -0
- metadata +225 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 40c59f9aee799bdf68e236563e8c6f77c6fb20a698416685685f27adbce81657
|
|
4
|
+
data.tar.gz: 8c6e21a6c33287344fe0d9a336c2a9830ab8cd3c298b95a2a4d957b28b3b269a
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 5f1c329eb77d31ccac5fc595909b8b853fb021b48179d33504e37b1ed65408ddff1051cb4aa16f14e7160c0c752828bfc10249b9b88f2ea9ec6bef644ad4120b
|
|
7
|
+
data.tar.gz: 54c259f69e49fdf98345fc87af72f3f84a718f08e5e76e5ffe2d78f841a541bc830c1b21f71398a003adaa2fbf72ff52ef7d82a465c57bb34f8ab563342f9527
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Nikita Shkoda
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Search Engine for Typesense [![CI][ci-badge]][ci-url] [![Gem][gem-badge]][gem-url] [![Docs][docs-badge]][docs-url]
|
|
2
|
+
[](https://typesense.org) [](https://github.com/typesense/typesense-ruby)
|
|
3
|
+
|
|
4
|
+
> [!WARNING]
|
|
5
|
+
> **⚠️ This project is under maintenance – work in progress. APIs and docs may change. ⚠️**
|
|
6
|
+
|
|
7
|
+
Mountless Rails::Engine for [Typesense](https://typesense.org). Expressive Relation/DSL with JOINs, grouping, presets/curation — with strong DX and observability.
|
|
8
|
+
|
|
9
|
+
> [!NOTE]
|
|
10
|
+
> This project is not affiliated with [Typesense](https://typesense.org) and is a wrapper for the [`typesense` gem](https://github.com/typesense/typesense-ruby).
|
|
11
|
+
|
|
12
|
+
## Quickstart
|
|
13
|
+
|
|
14
|
+
```ruby
|
|
15
|
+
# Gemfile
|
|
16
|
+
gem "search-engine-for-typesense"
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
```ruby
|
|
20
|
+
# config/initializers/search_engine_for_typesense.rb
|
|
21
|
+
SearchEngine.configure do |c|
|
|
22
|
+
c.host = ENV.fetch("TYPESENSE_HOST", "localhost")
|
|
23
|
+
c.port = 8108
|
|
24
|
+
c.protocol = "http"
|
|
25
|
+
c.api_key = ENV.fetch("TYPESENSE_API_KEY")
|
|
26
|
+
end
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
class SearchEngine::Product < SearchEngine::Base
|
|
31
|
+
collection :products
|
|
32
|
+
|
|
33
|
+
attribute :id, :integer
|
|
34
|
+
attribute :name, :string
|
|
35
|
+
|
|
36
|
+
query_by %i[name brand description]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
SearchEngine::Product.where(name: "milk").select(:id, :name).limit(5).to_a
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
See [Quickstart](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/quickstart).
|
|
43
|
+
|
|
44
|
+
### Host app SearchEngine models
|
|
45
|
+
|
|
46
|
+
By default, the gem manages a dedicated Zeitwerk loader for your SearchEngine models under `app/search_engine/`. The loader is initialized after Rails so that application models/constants are available, auto-reloads in development, and is eager-loaded in production/test.
|
|
47
|
+
|
|
48
|
+
Customize or disable via configuration:
|
|
49
|
+
|
|
50
|
+
```ruby
|
|
51
|
+
# config/initializers/search_engine.rb
|
|
52
|
+
SearchEngine.configure do |c|
|
|
53
|
+
# Relative to Rails.root or absolute; set to nil/false to disable
|
|
54
|
+
c.search_engine_models = 'app/search_engine'
|
|
55
|
+
end
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Usage examples
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
# Model
|
|
62
|
+
class SearchEngine::Product < SearchEngine::Base
|
|
63
|
+
collection "products"
|
|
64
|
+
|
|
65
|
+
attribute :id, :integer
|
|
66
|
+
attribute :name, :string
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Basic query
|
|
70
|
+
SearchEngine::Product
|
|
71
|
+
.where(name: "milk")
|
|
72
|
+
# Explicit query_by always wins over model/global defaults
|
|
73
|
+
.options(query_by: 'name,brand')
|
|
74
|
+
.select(:id, :name)
|
|
75
|
+
.order(price_cents: :asc)
|
|
76
|
+
.limit(5)
|
|
77
|
+
.to_a
|
|
78
|
+
|
|
79
|
+
# JOIN + nested selection
|
|
80
|
+
SearchEngine::Product
|
|
81
|
+
.joins(:brands)
|
|
82
|
+
.select(:id, :name, brands: %i[id name])
|
|
83
|
+
.where(brands: { name: "Acme" })
|
|
84
|
+
.per(10)
|
|
85
|
+
.to_a
|
|
86
|
+
|
|
87
|
+
# Faceting + grouping
|
|
88
|
+
rel = SearchEngine::Product
|
|
89
|
+
.facet_by(:brand_id, max_values: 5)
|
|
90
|
+
.facet_by(:category)
|
|
91
|
+
.group_by(:brand_id, limit: 3)
|
|
92
|
+
params = rel.to_h # compiled Typesense params
|
|
93
|
+
|
|
94
|
+
# Multi-search
|
|
95
|
+
result_set = SearchEngine.multi_search(common: { query_by: SearchEngine.config.default_query_by }) do |m|
|
|
96
|
+
m.add :products, SearchEngine::Product.where("name:~rud").per(10)
|
|
97
|
+
m.add :brands, SearchEngine::Brand.all.per(5)
|
|
98
|
+
end
|
|
99
|
+
result_set[:products].found
|
|
100
|
+
|
|
101
|
+
# Upserting documents
|
|
102
|
+
product_record = Product.first
|
|
103
|
+
mapped = SearchEngine::Product.mapped_data_for(product_record)
|
|
104
|
+
|
|
105
|
+
# Map + upsert a single record
|
|
106
|
+
SearchEngine::Product.upsert(record: product_record)
|
|
107
|
+
|
|
108
|
+
# Upsert already-mapped data
|
|
109
|
+
SearchEngine::Product.upsert(data: mapped)
|
|
110
|
+
|
|
111
|
+
# Bulk upsert records (mapper runs internally)
|
|
112
|
+
SearchEngine::Product.upsert_bulk(records: Product.limit(2))
|
|
113
|
+
|
|
114
|
+
# Bulk upsert mapped payloads
|
|
115
|
+
SearchEngine::Product.upsert_bulk(data: [mapped])
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Documentation
|
|
119
|
+
|
|
120
|
+
See the [Docs](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/index)
|
|
121
|
+
|
|
122
|
+
## Test/offline mode
|
|
123
|
+
|
|
124
|
+
In test environments (`Rails.env.test?` or `RACK_ENV=test`), SearchEngine defaults to an offline client
|
|
125
|
+
(`SearchEngine::Test::OfflineClient`) so no Typesense HTTP calls are made.
|
|
126
|
+
|
|
127
|
+
You can control this explicitly with:
|
|
128
|
+
- `SEARCH_ENGINE_TEST_MODE=1` to force offline mode
|
|
129
|
+
- `SEARCH_ENGINE_TEST_MODE=0` to disable offline mode
|
|
130
|
+
- `SEARCH_ENGINE_OFFLINE=1` (legacy alias)
|
|
131
|
+
|
|
132
|
+
If you set `SearchEngine.configure { |c| c.client = ... }`, the custom client is always used.
|
|
133
|
+
|
|
134
|
+
## Example app
|
|
135
|
+
|
|
136
|
+
See `examples/demo_shop` — demonstrates single/multi search, JOINs, grouping, presets/curation, and DX/observability. Supports offline mode via the stub client (see [Testing](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/testing)).
|
|
137
|
+
|
|
138
|
+
## Contributing
|
|
139
|
+
|
|
140
|
+
See [Docs Style Guide](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/docs-style-guide). Follow YARDoc for public APIs, add backlinks on docs landing pages, and redact secrets in examples.
|
|
141
|
+
|
|
142
|
+
<!-- Badge references (placeholders) -->
|
|
143
|
+
[ci-badge]: https://img.shields.io/github/actions/workflow/status/lstpsche/search-engine-for-typesense/ci.yml?branch=main
|
|
144
|
+
[ci-url]: #
|
|
145
|
+
[gem-badge]: https://img.shields.io/gem/v/search-engine-for-typesense.svg?label=gem
|
|
146
|
+
[gem-url]: https://rubygems.org/gems/search-engine-for-typesense
|
|
147
|
+
[docs-badge]: https://img.shields.io/badge/docs-index-blue
|
|
148
|
+
[docs-url]: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/index
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Provides basic information about the engine for smoke testing.
|
|
5
|
+
class AppInfo
|
|
6
|
+
# @return [String] a short identifier proving autoload worked
|
|
7
|
+
def self.identifier
|
|
8
|
+
'search_engine/app_info'
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
end
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# ActiveJob to rebuild a single partition using the same orchestration as inline.
|
|
5
|
+
#
|
|
6
|
+
# Arguments:
|
|
7
|
+
# - collection_class_name [String]
|
|
8
|
+
# - partition [Object] (JSON-serializable)
|
|
9
|
+
# - into [String, nil]
|
|
10
|
+
# - metadata [Hash]
|
|
11
|
+
class IndexPartitionJob < ::ActiveJob::Base
|
|
12
|
+
queue_as do
|
|
13
|
+
cfg = SearchEngine.config.indexer
|
|
14
|
+
(cfg&.queue_name || 'search_index').to_s
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Handle transient errors with exponential backoff based on Indexer config.
|
|
18
|
+
rescue_from(SearchEngine::Errors::Timeout) { |error| retry_if_possible(error) }
|
|
19
|
+
rescue_from(SearchEngine::Errors::Connection) { |error| retry_if_possible(error) }
|
|
20
|
+
rescue_from(SearchEngine::Errors::Api) do |error|
|
|
21
|
+
if transient_status?(error.status.to_i)
|
|
22
|
+
retry_if_possible(error)
|
|
23
|
+
else
|
|
24
|
+
instrument_error(error)
|
|
25
|
+
raise
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Perform a single-partition rebuild.
|
|
30
|
+
# @param collection_class_name [String]
|
|
31
|
+
# @param partition [Object]
|
|
32
|
+
# @param into [String, nil]
|
|
33
|
+
# @param metadata [Hash]
|
|
34
|
+
# @return [void]
|
|
35
|
+
def perform(collection_class_name, partition, into: nil, metadata: {})
|
|
36
|
+
klass = constantize_collection!(collection_class_name)
|
|
37
|
+
payload = base_payload(klass, partition: partition, into: into)
|
|
38
|
+
instrument('search_engine.dispatcher.job_started',
|
|
39
|
+
payload.merge(queue: queue_name, job_id: job_id, metadata: metadata)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
started = monotonic_ms
|
|
43
|
+
summary = nil
|
|
44
|
+
SearchEngine::Instrumentation.with_context(dispatch_mode: :active_job, job_id: job_id) do
|
|
45
|
+
summary = SearchEngine::Indexer.rebuild_partition!(klass, partition: partition, into: into)
|
|
46
|
+
end
|
|
47
|
+
duration = (monotonic_ms - started).round(1)
|
|
48
|
+
|
|
49
|
+
instrument(
|
|
50
|
+
'search_engine.dispatcher.job_finished',
|
|
51
|
+
payload.merge(queue: queue_name, job_id: job_id, duration_ms: duration, status: summary.status,
|
|
52
|
+
metadata: metadata
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
nil
|
|
56
|
+
rescue StandardError => error
|
|
57
|
+
instrument_error(error, payload: payload.merge(metadata: metadata))
|
|
58
|
+
raise
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def base_payload(klass, partition:, into:)
|
|
64
|
+
{
|
|
65
|
+
collection: (klass.respond_to?(:collection) ? klass.collection.to_s : klass.name.to_s),
|
|
66
|
+
partition: partition,
|
|
67
|
+
into: into
|
|
68
|
+
}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def constantize_collection!(name)
|
|
72
|
+
raise ArgumentError, 'collection_class_name must be a String' unless name.is_a?(String)
|
|
73
|
+
|
|
74
|
+
klass = name.constantize
|
|
75
|
+
unless klass.is_a?(Class) && klass.ancestors.include?(SearchEngine::Base)
|
|
76
|
+
raise ArgumentError, 'collection_class_name must be a SearchEngine::Base subclass'
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
klass
|
|
80
|
+
rescue NameError => error
|
|
81
|
+
raise ArgumentError, "unknown collection class: #{name}", error.backtrace
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def retry_if_possible(error)
|
|
85
|
+
attempts, base, max, jitter = retry_settings
|
|
86
|
+
attempt_no = executions.to_i # number of times we've run so far (1-based)
|
|
87
|
+
if attempt_no >= attempts
|
|
88
|
+
instrument_error(error)
|
|
89
|
+
raise
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
wait_seconds = backoff_seconds(attempt_no + 1, base: base, max: max, jitter_fraction: jitter)
|
|
93
|
+
instrument(
|
|
94
|
+
'search_engine.dispatcher.job_error',
|
|
95
|
+
error_payload(error).merge(queue: queue_name, job_id: job_id, retry_after_s: wait_seconds)
|
|
96
|
+
)
|
|
97
|
+
retry_job wait: wait_seconds
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def error_payload(error)
|
|
101
|
+
{
|
|
102
|
+
collection: arguments_dig_collection,
|
|
103
|
+
partition: arguments[1],
|
|
104
|
+
into: begin
|
|
105
|
+
arguments_hash[:into]
|
|
106
|
+
rescue StandardError
|
|
107
|
+
nil
|
|
108
|
+
end,
|
|
109
|
+
error_class: error.class.name,
|
|
110
|
+
message_truncated: error.message.to_s[0, 200]
|
|
111
|
+
}
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def instrument_error(error, payload: nil)
|
|
115
|
+
instrument(
|
|
116
|
+
'search_engine.dispatcher.job_error',
|
|
117
|
+
(payload || {}).merge(queue: queue_name, job_id: job_id, error_class: error.class.name,
|
|
118
|
+
message_truncated: error.message.to_s[0, 200]
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def instrument(event, payload)
|
|
124
|
+
SearchEngine::Instrumentation.instrument(event, payload) {}
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def retry_settings
|
|
128
|
+
cfg = SearchEngine.config.indexer
|
|
129
|
+
attempts = cfg&.retries && cfg.retries[:attempts].to_i.positive? ? cfg.retries[:attempts].to_i : 3
|
|
130
|
+
base = cfg&.retries && cfg.retries[:base].to_f.positive? ? cfg.retries[:base].to_f : 0.5
|
|
131
|
+
max = cfg&.retries && cfg.retries[:max].to_f.positive? ? cfg.retries[:max].to_f : 5.0
|
|
132
|
+
jitter = cfg&.retries && cfg.retries[:jitter_fraction].to_f >= 0 ? cfg.retries[:jitter_fraction].to_f : 0.2
|
|
133
|
+
[attempts, base, max, jitter]
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def backoff_seconds(attempt, base:, max:, jitter_fraction:)
|
|
137
|
+
exp = [base * (2 ** (attempt - 1)), max].min
|
|
138
|
+
jitter = exp * jitter_fraction
|
|
139
|
+
delta = rand(-jitter..jitter)
|
|
140
|
+
sleep_time = exp + delta
|
|
141
|
+
sleep_time.positive? ? sleep_time : 0.0
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def transient_status?(code)
|
|
145
|
+
return true if code == 429
|
|
146
|
+
return true if code >= 500 && code <= 599
|
|
147
|
+
|
|
148
|
+
false
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def monotonic_ms
|
|
152
|
+
SearchEngine::Instrumentation.monotonic_ms
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def arguments_hash
|
|
156
|
+
# ActiveJob stores keyword args in the last Hash argument when using perform(class, partition, into:, metadata:)
|
|
157
|
+
args = arguments
|
|
158
|
+
args.last.is_a?(Hash) ? args.last.symbolize_keys : {}
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def arguments_dig_collection
|
|
162
|
+
begin
|
|
163
|
+
name = arguments[0].to_s
|
|
164
|
+
rescue StandardError
|
|
165
|
+
name = nil
|
|
166
|
+
end
|
|
167
|
+
name || 'unknown'
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rails/generators'
|
|
4
|
+
|
|
5
|
+
module SearchEngine
|
|
6
|
+
module Generators
|
|
7
|
+
# Install generator that creates the initializer with ENV-based defaults.
|
|
8
|
+
#
|
|
9
|
+
# @example
|
|
10
|
+
# rails g search_engine:install
|
|
11
|
+
# @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/dx
|
|
12
|
+
class InstallGenerator < Rails::Generators::Base
|
|
13
|
+
source_root File.expand_path('templates', __dir__)
|
|
14
|
+
|
|
15
|
+
def create_initializer
|
|
16
|
+
template 'initializer.rb.tt', 'config/initializers/search_engine.rb'
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# SearchEngine configuration — ENV-based defaults
|
|
4
|
+
#
|
|
5
|
+
# This initializer is idempotent and safe for local development. Keep secrets
|
|
6
|
+
# out of code: prefer ENV variables for API keys and sensitive config.
|
|
7
|
+
#
|
|
8
|
+
# Next steps:
|
|
9
|
+
# - Define a model under `app/search_engine/` (or your configured `c.search_engine_models`; see Quickstart)
|
|
10
|
+
# - Run the CLI doctor to verify connectivity and config
|
|
11
|
+
# - Explore queries in `rails console` using SE helpers: `SE.q("milk")`
|
|
12
|
+
#
|
|
13
|
+
# Docs:
|
|
14
|
+
# - Quickstart: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/quickstart
|
|
15
|
+
# - CLI (Doctor): https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/cli
|
|
16
|
+
# - DX helpers: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/dx
|
|
17
|
+
# - Configuration: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/configuration
|
|
18
|
+
SearchEngine.configure do |c|
|
|
19
|
+
c.host = ENV.fetch('TYPESENSE_HOST', 'localhost')
|
|
20
|
+
c.port = Integer(ENV.fetch('TYPESENSE_PORT', 8108))
|
|
21
|
+
c.protocol = ENV.fetch('TYPESENSE_PROTOCOL', 'http')
|
|
22
|
+
# Read API key from ENV; do not hardcode secrets here.
|
|
23
|
+
c.api_key = ENV['TYPESENSE_API_KEY']
|
|
24
|
+
|
|
25
|
+
# Default fields used when query_by is not provided explicitly.
|
|
26
|
+
# Uncomment and customize to your schema.
|
|
27
|
+
# c.default_query_by = 'name, description'
|
|
28
|
+
|
|
29
|
+
# Tip: You can also set per-collection defaults in your model:
|
|
30
|
+
# class SearchEngine::Product < SearchEngine::Base
|
|
31
|
+
# collection 'products'
|
|
32
|
+
# # Accepts String, Symbol, or Array; stored as a canonical String
|
|
33
|
+
# query_by %i[name brand description]
|
|
34
|
+
# end
|
|
35
|
+
|
|
36
|
+
# Optional: set a default console model for SE.q/SE.rel helpers.
|
|
37
|
+
# Accepts a constant or String name (e.g., 'SearchEngine::Product').
|
|
38
|
+
# c.default_console_model = nil
|
|
39
|
+
|
|
40
|
+
# Host app SearchEngine models directory. Relative paths are resolved against
|
|
41
|
+
# Rails.root. Set to nil/false to disable gem-managed loading.
|
|
42
|
+
# Defaults to 'app/search_engine'.
|
|
43
|
+
# c.search_engine_models = 'app/search_engine'
|
|
44
|
+
|
|
45
|
+
# --- Typesense transport -------------------------------------------------
|
|
46
|
+
|
|
47
|
+
# Request total timeout in milliseconds. Default: 3_600_000 (60 minutes)
|
|
48
|
+
# c.timeout_ms = 3_600_000
|
|
49
|
+
|
|
50
|
+
# Connect/open timeout in milliseconds. Default: 1000
|
|
51
|
+
# c.open_timeout_ms = 1_000
|
|
52
|
+
|
|
53
|
+
# Retry policy for client requests. Default: { attempts: 2, backoff: 10.0..60.0 }
|
|
54
|
+
# Use a Range for backoff to introduce jitter between retries (seconds).
|
|
55
|
+
# c.retries = { attempts: 2, backoff: 10.0..60.0 }
|
|
56
|
+
# Recommendation: keep a 60-minute timeout and add jitter for long-running writes.
|
|
57
|
+
# See wiki: Configuration → Timeouts & retries
|
|
58
|
+
|
|
59
|
+
# Default Typesense infix behavior for queries. Default: 'fallback'
|
|
60
|
+
# c.default_infix = 'fallback'
|
|
61
|
+
|
|
62
|
+
# --- URL-level caching ---------------------------------------------------
|
|
63
|
+
|
|
64
|
+
# Allow URL-level caching (passed as common params). Default: true
|
|
65
|
+
# c.use_cache = true
|
|
66
|
+
|
|
67
|
+
# Cache TTL in seconds (URL/common params). Default: 60
|
|
68
|
+
# c.cache_ttl_s = 60
|
|
69
|
+
|
|
70
|
+
# --- Field strictness & limits ------------------------------------------
|
|
71
|
+
|
|
72
|
+
# Enforce strict field validation at compile/hydration time.
|
|
73
|
+
# Default: true in development/test; false in production
|
|
74
|
+
# c.strict_fields = !Rails.env.production?
|
|
75
|
+
|
|
76
|
+
# Max number of searches in a single multi-search call. Default: 50
|
|
77
|
+
# c.multi_search_limit = 50
|
|
78
|
+
|
|
79
|
+
# Whether Relation#inspect pretty-prints by materializing a preview. Default: true
|
|
80
|
+
# c.relation_print_materializes = true
|
|
81
|
+
|
|
82
|
+
# Override the logger used by the engine (defaults to Rails.logger). Example:
|
|
83
|
+
# c.logger = Rails.logger
|
|
84
|
+
|
|
85
|
+
# --- Presets -------------------------------------------------------------
|
|
86
|
+
# See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/presets
|
|
87
|
+
|
|
88
|
+
# Enable preset namespacing and resolution. Default: true
|
|
89
|
+
# c.presets.enabled = true
|
|
90
|
+
|
|
91
|
+
# Optional namespace to prepend to preset names. Default: nil
|
|
92
|
+
# c.presets.namespace = nil
|
|
93
|
+
|
|
94
|
+
# Limit which domains presets may modify. Default: %i[filter_by sort_by include_fields exclude_fields]
|
|
95
|
+
# c.presets.locked_domains = %i[filter_by sort_by include_fields exclude_fields]
|
|
96
|
+
|
|
97
|
+
# --- Selection & Grouping ------------------------------------------------
|
|
98
|
+
# See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/field-selection
|
|
99
|
+
|
|
100
|
+
# Raise when requested fields are missing during hydration. Default: false
|
|
101
|
+
# c.selection.strict_missing = false
|
|
102
|
+
|
|
103
|
+
# Emit warnings for ambiguous grouping combinations. Default: true
|
|
104
|
+
# c.grouping.warn_on_ambiguous = true
|
|
105
|
+
|
|
106
|
+
# --- Observability & Logging --------------------------------------------
|
|
107
|
+
# See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/observability
|
|
108
|
+
|
|
109
|
+
# Quiet by default: structured instrumentation logs ([se.*]) are OFF unless
|
|
110
|
+
# explicitly enabled below.
|
|
111
|
+
|
|
112
|
+
# Preferred (new) structured logs via LoggingSubscriber (opt-in):
|
|
113
|
+
# - :compact (single-line) or :json
|
|
114
|
+
# - optional sampling to reduce noise
|
|
115
|
+
# c.logging.mode = :compact # Default: nil (OFF)
|
|
116
|
+
# c.logging.level = :info # :debug | :info | :warn | :error
|
|
117
|
+
# c.logging.sample = 1.0 # e.g., 0.1 to sample 10%
|
|
118
|
+
# c.logging.logger = c.logger
|
|
119
|
+
|
|
120
|
+
# Legacy compact logger (fallback, also opt-in):
|
|
121
|
+
# Enable only if you prefer the legacy subscriber instead of LoggingSubscriber.
|
|
122
|
+
# c.observability.enabled = false # Default: false (OFF)
|
|
123
|
+
# c.observability.log_format = :kv # :kv or :json
|
|
124
|
+
# c.observability.max_message_length = 200
|
|
125
|
+
# c.observability.include_error_messages = false
|
|
126
|
+
# c.observability.emit_legacy_event_aliases = true
|
|
127
|
+
|
|
128
|
+
# OpenTelemetry integration (only when the SDK is present). Defaults shown.
|
|
129
|
+
# c.opentelemetry = { enabled: false, service_name: 'search_engine' }
|
|
130
|
+
|
|
131
|
+
# --- Schema lifecycle ----------------------------------------------------
|
|
132
|
+
# See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/schema
|
|
133
|
+
|
|
134
|
+
# Retention: how many previous physical collections to keep after swap. Default: 0
|
|
135
|
+
# c.schema.retention.keep_last = 0
|
|
136
|
+
|
|
137
|
+
# --- Indexer -------------------------------------------------------------
|
|
138
|
+
# See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer
|
|
139
|
+
|
|
140
|
+
# Default batch size for imports when not provided. Default: 2000
|
|
141
|
+
# c.indexer.batch_size = 2000
|
|
142
|
+
|
|
143
|
+
# Optional read timeout override (ms) for imports. Default: nil
|
|
144
|
+
# c.indexer.timeout_ms = nil
|
|
145
|
+
|
|
146
|
+
# Retry policy for imports. Default: { attempts: 3, base: 0.5, max: 5.0, jitter_fraction: 0.2 }
|
|
147
|
+
# c.indexer.retries = { attempts: 3, base: 0.5, max: 5.0, jitter_fraction: 0.2 }
|
|
148
|
+
|
|
149
|
+
# Gzip JSONL payloads during import. Default: false
|
|
150
|
+
# c.indexer.gzip = false
|
|
151
|
+
|
|
152
|
+
# Dispatch mode for import jobs (:active_job or :inline). Default depends on ActiveJob presence
|
|
153
|
+
# c.indexer.dispatch = :active_job
|
|
154
|
+
|
|
155
|
+
# Queue name for ActiveJob dispatch. Default: 'search_index'
|
|
156
|
+
# c.indexer.queue_name = 'search_index'
|
|
157
|
+
|
|
158
|
+
# --- Sources -------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
# ActiveRecord source: default ORM batch size. Default: 2000
|
|
161
|
+
# c.sources.active_record.batch_size = 2000
|
|
162
|
+
|
|
163
|
+
# ActiveRecord source: mark relations as readonly. Default: true
|
|
164
|
+
# c.sources.active_record.readonly = true
|
|
165
|
+
|
|
166
|
+
# ActiveRecord source: wrap fetching in a read-only transaction. Default: false
|
|
167
|
+
# c.sources.active_record.use_transaction = false
|
|
168
|
+
|
|
169
|
+
# SQL source: server-side cursor fetch size. Default: 2000
|
|
170
|
+
# c.sources.sql.fetch_size = 2000
|
|
171
|
+
|
|
172
|
+
# SQL source: per-statement timeout (ms). Default: nil
|
|
173
|
+
# c.sources.sql.statement_timeout_ms = nil
|
|
174
|
+
|
|
175
|
+
# SQL source: preferred row shape (:auto, :hash). Default: :auto
|
|
176
|
+
# c.sources.sql.row_shape = :auto
|
|
177
|
+
|
|
178
|
+
# Lambda source: optional max batch size hint for validation/metrics. Default: nil
|
|
179
|
+
# c.sources.lambda.max_batch_size_hint = nil
|
|
180
|
+
|
|
181
|
+
# --- Mapper --------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
# Treat unknown keys as errors instead of warnings. Default: false
|
|
184
|
+
# c.mapper.strict_unknown_keys = false
|
|
185
|
+
|
|
186
|
+
# Nested coercions configuration. Default: { enabled: false, rules: {} }
|
|
187
|
+
# c.mapper.coercions = { enabled: false, rules: {} }
|
|
188
|
+
|
|
189
|
+
# Maximum number of error samples included in reports. Default: 5
|
|
190
|
+
# c.mapper.max_error_samples = 5
|
|
191
|
+
|
|
192
|
+
# --- Partitioning --------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
# Optional resolver Proc for default physical collection. Default: nil
|
|
195
|
+
# c.partitioning.default_into_resolver = nil
|
|
196
|
+
|
|
197
|
+
# Before/after hook timeouts (ms) for partitioning callbacks. Default: nil
|
|
198
|
+
# c.partitioning.before_hook_timeout_ms = nil
|
|
199
|
+
# c.partitioning.after_hook_timeout_ms = nil
|
|
200
|
+
|
|
201
|
+
# Maximum error samples included in partition payloads. Default: 5
|
|
202
|
+
# c.partitioning.max_error_samples = 5
|
|
203
|
+
|
|
204
|
+
# --- Stale deletes -------------------------------------------------------
|
|
205
|
+
# See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/deletion
|
|
206
|
+
|
|
207
|
+
# Global kill switch for stale delete workflows. Default: true
|
|
208
|
+
# c.stale_deletes.enabled = true
|
|
209
|
+
|
|
210
|
+
# Strict mode blocks suspicious filters. Default: false
|
|
211
|
+
# c.stale_deletes.strict_mode = false
|
|
212
|
+
|
|
213
|
+
# Timeout (ms) for delete requests. Default: nil
|
|
214
|
+
# c.stale_deletes.timeout_ms = nil
|
|
215
|
+
|
|
216
|
+
# Enable found estimation via search for stale deletes. Default: false
|
|
217
|
+
# c.stale_deletes.estimation_enabled = false
|
|
218
|
+
|
|
219
|
+
# --- Curation ------------------------------------------------------------
|
|
220
|
+
# See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/curation
|
|
221
|
+
|
|
222
|
+
# Maximum number of pinned IDs allowed. Default: 50
|
|
223
|
+
# c.curation.max_pins = 50
|
|
224
|
+
|
|
225
|
+
# Maximum number of hidden IDs allowed. Default: 200
|
|
226
|
+
# c.curation.max_hidden = 200
|
|
227
|
+
|
|
228
|
+
# Allowed curated ID pattern. Default: /\A[\w\-:.]+\z/
|
|
229
|
+
# c.curation.id_regex = /\A[\w\-:.]+\z/
|
|
230
|
+
end
|