search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 40c59f9aee799bdf68e236563e8c6f77c6fb20a698416685685f27adbce81657
4
+ data.tar.gz: 8c6e21a6c33287344fe0d9a336c2a9830ab8cd3c298b95a2a4d957b28b3b269a
5
+ SHA512:
6
+ metadata.gz: 5f1c329eb77d31ccac5fc595909b8b853fb021b48179d33504e37b1ed65408ddff1051cb4aa16f14e7160c0c752828bfc10249b9b88f2ea9ec6bef644ad4120b
7
+ data.tar.gz: 54c259f69e49fdf98345fc87af72f3f84a718f08e5e76e5ffe2d78f841a541bc830c1b21f71398a003adaa2fbf72ff52ef7d82a465c57bb34f8ab563342f9527
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Nikita Shkoda
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,148 @@
1
+ # Search Engine for Typesense [![CI][ci-badge]][ci-url] [![Gem][gem-badge]][gem-url] [![Docs][docs-badge]][docs-url]
2
+ [![Typesense](https://img.shields.io/badge/Typesense-Typesense-blue)](https://typesense.org) [![Typesense Ruby gem](https://img.shields.io/badge/Typesense%20Ruby%20gem-TypesenseRubyGem-blue)](https://github.com/typesense/typesense-ruby)
3
+
4
+ > [!WARNING]
5
+ > **⚠️ This project is under maintenance – work in progress. APIs and docs may change. ⚠️**
6
+
7
+ Mountless Rails::Engine for [Typesense](https://typesense.org). Expressive Relation/DSL with JOINs, grouping, presets/curation — with strong DX and observability.
8
+
9
+ > [!NOTE]
10
+ > This project is not affiliated with [Typesense](https://typesense.org) and is a wrapper for the [`typesense` gem](https://github.com/typesense/typesense-ruby).
11
+
12
+ ## Quickstart
13
+
14
+ ```ruby
15
+ # Gemfile
16
+ gem "search-engine-for-typesense"
17
+ ```
18
+
19
+ ```ruby
20
+ # config/initializers/search_engine_for_typesense.rb
21
+ SearchEngine.configure do |c|
22
+ c.host = ENV.fetch("TYPESENSE_HOST", "localhost")
23
+ c.port = 8108
24
+ c.protocol = "http"
25
+ c.api_key = ENV.fetch("TYPESENSE_API_KEY")
26
+ end
27
+ ```
28
+
29
+ ```ruby
30
+ class SearchEngine::Product < SearchEngine::Base
31
+ collection :products
32
+
33
+ attribute :id, :integer
34
+ attribute :name, :string
35
+
36
+ query_by %i[name brand description]
37
+ end
38
+
39
+ SearchEngine::Product.where(name: "milk").select(:id, :name).limit(5).to_a
40
+ ```
41
+
42
+ See [Quickstart](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/quickstart).
43
+
44
+ ### Host app SearchEngine models
45
+
46
+ By default, the gem manages a dedicated Zeitwerk loader for your SearchEngine models under `app/search_engine/`. The loader is initialized after Rails so that application models/constants are available, auto-reloads in development, and is eager-loaded in production/test.
47
+
48
+ Customize or disable via configuration:
49
+
50
+ ```ruby
51
+ # config/initializers/search_engine.rb
52
+ SearchEngine.configure do |c|
53
+ # Relative to Rails.root or absolute; set to nil/false to disable
54
+ c.search_engine_models = 'app/search_engine'
55
+ end
56
+ ```
57
+
58
+ ## Usage examples
59
+
60
+ ```ruby
61
+ # Model
62
+ class SearchEngine::Product < SearchEngine::Base
63
+ collection "products"
64
+
65
+ attribute :id, :integer
66
+ attribute :name, :string
67
+ end
68
+
69
+ # Basic query
70
+ SearchEngine::Product
71
+ .where(name: "milk")
72
+ # Explicit query_by always wins over model/global defaults
73
+ .options(query_by: 'name,brand')
74
+ .select(:id, :name)
75
+ .order(price_cents: :asc)
76
+ .limit(5)
77
+ .to_a
78
+
79
+ # JOIN + nested selection
80
+ SearchEngine::Product
81
+ .joins(:brands)
82
+ .select(:id, :name, brands: %i[id name])
83
+ .where(brands: { name: "Acme" })
84
+ .per(10)
85
+ .to_a
86
+
87
+ # Faceting + grouping
88
+ rel = SearchEngine::Product
89
+ .facet_by(:brand_id, max_values: 5)
90
+ .facet_by(:category)
91
+ .group_by(:brand_id, limit: 3)
92
+ params = rel.to_h # compiled Typesense params
93
+
94
+ # Multi-search
95
+ result_set = SearchEngine.multi_search(common: { query_by: SearchEngine.config.default_query_by }) do |m|
96
+ m.add :products, SearchEngine::Product.where("name:~rud").per(10)
97
+ m.add :brands, SearchEngine::Brand.all.per(5)
98
+ end
99
+ result_set[:products].found
100
+
101
+ # Upserting documents
102
+ product_record = Product.first
103
+ mapped = SearchEngine::Product.mapped_data_for(product_record)
104
+
105
+ # Map + upsert a single record
106
+ SearchEngine::Product.upsert(record: product_record)
107
+
108
+ # Upsert already-mapped data
109
+ SearchEngine::Product.upsert(data: mapped)
110
+
111
+ # Bulk upsert records (mapper runs internally)
112
+ SearchEngine::Product.upsert_bulk(records: Product.limit(2))
113
+
114
+ # Bulk upsert mapped payloads
115
+ SearchEngine::Product.upsert_bulk(data: [mapped])
116
+ ```
117
+
118
+ ## Documentation
119
+
120
+ See the [Docs](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/index)
121
+
122
+ ## Test/offline mode
123
+
124
+ In test environments (`Rails.env.test?` or `RACK_ENV=test`), SearchEngine defaults to an offline client
125
+ (`SearchEngine::Test::OfflineClient`) so no Typesense HTTP calls are made.
126
+
127
+ You can control this explicitly with:
128
+ - `SEARCH_ENGINE_TEST_MODE=1` to force offline mode
129
+ - `SEARCH_ENGINE_TEST_MODE=0` to disable offline mode
130
+ - `SEARCH_ENGINE_OFFLINE=1` (legacy alias)
131
+
132
+ If you set `SearchEngine.configure { |c| c.client = ... }`, the custom client is always used.
133
+
134
+ ## Example app
135
+
136
+ See `examples/demo_shop` — demonstrates single/multi search, JOINs, grouping, presets/curation, and DX/observability. Supports offline mode via the stub client (see [Testing](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/testing)).
137
+
138
+ ## Contributing
139
+
140
+ See [Docs Style Guide](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/docs-style-guide). Follow YARDoc for public APIs, add backlinks on docs landing pages, and redact secrets in examples.
141
+
142
+ <!-- Badge references (placeholders) -->
143
+ [ci-badge]: https://img.shields.io/github/actions/workflow/status/lstpsche/search-engine-for-typesense/ci.yml?branch=main
144
+ [ci-url]: #
145
+ [gem-badge]: https://img.shields.io/gem/v/search-engine-for-typesense.svg?label=gem
146
+ [gem-url]: https://rubygems.org/gems/search-engine-for-typesense
147
+ [docs-badge]: https://img.shields.io/badge/docs-index-blue
148
+ [docs-url]: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/index
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Provides basic information about the engine for smoke testing.
5
+ class AppInfo
6
+ # @return [String] a short identifier proving autoload worked
7
+ def self.identifier
8
+ 'search_engine/app_info'
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,170 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # ActiveJob to rebuild a single partition using the same orchestration as inline.
5
+ #
6
+ # Arguments:
7
+ # - collection_class_name [String]
8
+ # - partition [Object] (JSON-serializable)
9
+ # - into [String, nil]
10
+ # - metadata [Hash]
11
+ class IndexPartitionJob < ::ActiveJob::Base
12
+ queue_as do
13
+ cfg = SearchEngine.config.indexer
14
+ (cfg&.queue_name || 'search_index').to_s
15
+ end
16
+
17
+ # Handle transient errors with exponential backoff based on Indexer config.
18
+ rescue_from(SearchEngine::Errors::Timeout) { |error| retry_if_possible(error) }
19
+ rescue_from(SearchEngine::Errors::Connection) { |error| retry_if_possible(error) }
20
+ rescue_from(SearchEngine::Errors::Api) do |error|
21
+ if transient_status?(error.status.to_i)
22
+ retry_if_possible(error)
23
+ else
24
+ instrument_error(error)
25
+ raise
26
+ end
27
+ end
28
+
29
+ # Perform a single-partition rebuild.
30
+ # @param collection_class_name [String]
31
+ # @param partition [Object]
32
+ # @param into [String, nil]
33
+ # @param metadata [Hash]
34
+ # @return [void]
35
+ def perform(collection_class_name, partition, into: nil, metadata: {})
36
+ klass = constantize_collection!(collection_class_name)
37
+ payload = base_payload(klass, partition: partition, into: into)
38
+ instrument('search_engine.dispatcher.job_started',
39
+ payload.merge(queue: queue_name, job_id: job_id, metadata: metadata)
40
+ )
41
+
42
+ started = monotonic_ms
43
+ summary = nil
44
+ SearchEngine::Instrumentation.with_context(dispatch_mode: :active_job, job_id: job_id) do
45
+ summary = SearchEngine::Indexer.rebuild_partition!(klass, partition: partition, into: into)
46
+ end
47
+ duration = (monotonic_ms - started).round(1)
48
+
49
+ instrument(
50
+ 'search_engine.dispatcher.job_finished',
51
+ payload.merge(queue: queue_name, job_id: job_id, duration_ms: duration, status: summary.status,
52
+ metadata: metadata
53
+ )
54
+ )
55
+ nil
56
+ rescue StandardError => error
57
+ instrument_error(error, payload: payload.merge(metadata: metadata))
58
+ raise
59
+ end
60
+
61
+ private
62
+
63
+ def base_payload(klass, partition:, into:)
64
+ {
65
+ collection: (klass.respond_to?(:collection) ? klass.collection.to_s : klass.name.to_s),
66
+ partition: partition,
67
+ into: into
68
+ }
69
+ end
70
+
71
+ def constantize_collection!(name)
72
+ raise ArgumentError, 'collection_class_name must be a String' unless name.is_a?(String)
73
+
74
+ klass = name.constantize
75
+ unless klass.is_a?(Class) && klass.ancestors.include?(SearchEngine::Base)
76
+ raise ArgumentError, 'collection_class_name must be a SearchEngine::Base subclass'
77
+ end
78
+
79
+ klass
80
+ rescue NameError => error
81
+ raise ArgumentError, "unknown collection class: #{name}", error.backtrace
82
+ end
83
+
84
+ def retry_if_possible(error)
85
+ attempts, base, max, jitter = retry_settings
86
+ attempt_no = executions.to_i # number of times we've run so far (1-based)
87
+ if attempt_no >= attempts
88
+ instrument_error(error)
89
+ raise
90
+ end
91
+
92
+ wait_seconds = backoff_seconds(attempt_no + 1, base: base, max: max, jitter_fraction: jitter)
93
+ instrument(
94
+ 'search_engine.dispatcher.job_error',
95
+ error_payload(error).merge(queue: queue_name, job_id: job_id, retry_after_s: wait_seconds)
96
+ )
97
+ retry_job wait: wait_seconds
98
+ end
99
+
100
+ def error_payload(error)
101
+ {
102
+ collection: arguments_dig_collection,
103
+ partition: arguments[1],
104
+ into: begin
105
+ arguments_hash[:into]
106
+ rescue StandardError
107
+ nil
108
+ end,
109
+ error_class: error.class.name,
110
+ message_truncated: error.message.to_s[0, 200]
111
+ }
112
+ end
113
+
114
+ def instrument_error(error, payload: nil)
115
+ instrument(
116
+ 'search_engine.dispatcher.job_error',
117
+ (payload || {}).merge(queue: queue_name, job_id: job_id, error_class: error.class.name,
118
+ message_truncated: error.message.to_s[0, 200]
119
+ )
120
+ )
121
+ end
122
+
123
+ def instrument(event, payload)
124
+ SearchEngine::Instrumentation.instrument(event, payload) {}
125
+ end
126
+
127
+ def retry_settings
128
+ cfg = SearchEngine.config.indexer
129
+ attempts = cfg&.retries && cfg.retries[:attempts].to_i.positive? ? cfg.retries[:attempts].to_i : 3
130
+ base = cfg&.retries && cfg.retries[:base].to_f.positive? ? cfg.retries[:base].to_f : 0.5
131
+ max = cfg&.retries && cfg.retries[:max].to_f.positive? ? cfg.retries[:max].to_f : 5.0
132
+ jitter = cfg&.retries && cfg.retries[:jitter_fraction].to_f >= 0 ? cfg.retries[:jitter_fraction].to_f : 0.2
133
+ [attempts, base, max, jitter]
134
+ end
135
+
136
+ def backoff_seconds(attempt, base:, max:, jitter_fraction:)
137
+ exp = [base * (2 ** (attempt - 1)), max].min
138
+ jitter = exp * jitter_fraction
139
+ delta = rand(-jitter..jitter)
140
+ sleep_time = exp + delta
141
+ sleep_time.positive? ? sleep_time : 0.0
142
+ end
143
+
144
+ def transient_status?(code)
145
+ return true if code == 429
146
+ return true if code >= 500 && code <= 599
147
+
148
+ false
149
+ end
150
+
151
+ def monotonic_ms
152
+ SearchEngine::Instrumentation.monotonic_ms
153
+ end
154
+
155
+ def arguments_hash
156
+ # ActiveJob stores keyword args in the last Hash argument when using perform(class, partition, into:, metadata:)
157
+ args = arguments
158
+ args.last.is_a?(Hash) ? args.last.symbolize_keys : {}
159
+ end
160
+
161
+ def arguments_dig_collection
162
+ begin
163
+ name = arguments[0].to_s
164
+ rescue StandardError
165
+ name = nil
166
+ end
167
+ name || 'unknown'
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+
5
+ module SearchEngine
6
+ module Generators
7
+ # Install generator that creates the initializer with ENV-based defaults.
8
+ #
9
+ # @example
10
+ # rails g search_engine:install
11
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/dx
12
+ class InstallGenerator < Rails::Generators::Base
13
+ source_root File.expand_path('templates', __dir__)
14
+
15
+ def create_initializer
16
+ template 'initializer.rb.tt', 'config/initializers/search_engine.rb'
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ # SearchEngine configuration — ENV-based defaults
4
+ #
5
+ # This initializer is idempotent and safe for local development. Keep secrets
6
+ # out of code: prefer ENV variables for API keys and sensitive config.
7
+ #
8
+ # Next steps:
9
+ # - Define a model under `app/search_engine/` (or your configured `c.search_engine_models`; see Quickstart)
10
+ # - Run the CLI doctor to verify connectivity and config
11
+ # - Explore queries in `rails console` using SE helpers: `SE.q("milk")`
12
+ #
13
+ # Docs:
14
+ # - Quickstart: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/quickstart
15
+ # - CLI (Doctor): https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/cli
16
+ # - DX helpers: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/dx
17
+ # - Configuration: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/configuration
18
+ SearchEngine.configure do |c|
19
+ c.host = ENV.fetch('TYPESENSE_HOST', 'localhost')
20
+ c.port = Integer(ENV.fetch('TYPESENSE_PORT', 8108))
21
+ c.protocol = ENV.fetch('TYPESENSE_PROTOCOL', 'http')
22
+ # Read API key from ENV; do not hardcode secrets here.
23
+ c.api_key = ENV['TYPESENSE_API_KEY']
24
+
25
+ # Default fields used when query_by is not provided explicitly.
26
+ # Uncomment and customize to your schema.
27
+ # c.default_query_by = 'name, description'
28
+
29
+ # Tip: You can also set per-collection defaults in your model:
30
+ # class SearchEngine::Product < SearchEngine::Base
31
+ # collection 'products'
32
+ # # Accepts String, Symbol, or Array; stored as a canonical String
33
+ # query_by %i[name brand description]
34
+ # end
35
+
36
+ # Optional: set a default console model for SE.q/SE.rel helpers.
37
+ # Accepts a constant or String name (e.g., 'SearchEngine::Product').
38
+ # c.default_console_model = nil
39
+
40
+ # Host app SearchEngine models directory. Relative paths are resolved against
41
+ # Rails.root. Set to nil/false to disable gem-managed loading.
42
+ # Defaults to 'app/search_engine'.
43
+ # c.search_engine_models = 'app/search_engine'
44
+
45
+ # --- Typesense transport -------------------------------------------------
46
+
47
+ # Request total timeout in milliseconds. Default: 3_600_000 (60 minutes)
48
+ # c.timeout_ms = 3_600_000
49
+
50
+ # Connect/open timeout in milliseconds. Default: 1000
51
+ # c.open_timeout_ms = 1_000
52
+
53
+ # Retry policy for client requests. Default: { attempts: 2, backoff: 10.0..60.0 }
54
+ # Use a Range for backoff to introduce jitter between retries (seconds).
55
+ # c.retries = { attempts: 2, backoff: 10.0..60.0 }
56
+ # Recommendation: keep a 60-minute timeout and add jitter for long-running writes.
57
+ # See wiki: Configuration → Timeouts & retries
58
+
59
+ # Default Typesense infix behavior for queries. Default: 'fallback'
60
+ # c.default_infix = 'fallback'
61
+
62
+ # --- URL-level caching ---------------------------------------------------
63
+
64
+ # Allow URL-level caching (passed as common params). Default: true
65
+ # c.use_cache = true
66
+
67
+ # Cache TTL in seconds (URL/common params). Default: 60
68
+ # c.cache_ttl_s = 60
69
+
70
+ # --- Field strictness & limits ------------------------------------------
71
+
72
+ # Enforce strict field validation at compile/hydration time.
73
+ # Default: true in development/test; false in production
74
+ # c.strict_fields = !Rails.env.production?
75
+
76
+ # Max number of searches in a single multi-search call. Default: 50
77
+ # c.multi_search_limit = 50
78
+
79
+ # Whether Relation#inspect pretty-prints by materializing a preview. Default: true
80
+ # c.relation_print_materializes = true
81
+
82
+ # Override the logger used by the engine (defaults to Rails.logger). Example:
83
+ # c.logger = Rails.logger
84
+
85
+ # --- Presets -------------------------------------------------------------
86
+ # See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/presets
87
+
88
+ # Enable preset namespacing and resolution. Default: true
89
+ # c.presets.enabled = true
90
+
91
+ # Optional namespace to prepend to preset names. Default: nil
92
+ # c.presets.namespace = nil
93
+
94
+ # Limit which domains presets may modify. Default: %i[filter_by sort_by include_fields exclude_fields]
95
+ # c.presets.locked_domains = %i[filter_by sort_by include_fields exclude_fields]
96
+
97
+ # --- Selection & Grouping ------------------------------------------------
98
+ # See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/field-selection
99
+
100
+ # Raise when requested fields are missing during hydration. Default: false
101
+ # c.selection.strict_missing = false
102
+
103
+ # Emit warnings for ambiguous grouping combinations. Default: true
104
+ # c.grouping.warn_on_ambiguous = true
105
+
106
+ # --- Observability & Logging --------------------------------------------
107
+ # See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/observability
108
+
109
+ # Quiet by default: structured instrumentation logs ([se.*]) are OFF unless
110
+ # explicitly enabled below.
111
+
112
+ # Preferred (new) structured logs via LoggingSubscriber (opt-in):
113
+ # - :compact (single-line) or :json
114
+ # - optional sampling to reduce noise
115
+ # c.logging.mode = :compact # Default: nil (OFF)
116
+ # c.logging.level = :info # :debug | :info | :warn | :error
117
+ # c.logging.sample = 1.0 # e.g., 0.1 to sample 10%
118
+ # c.logging.logger = c.logger
119
+
120
+ # Legacy compact logger (fallback, also opt-in):
121
+ # Enable only if you prefer the legacy subscriber instead of LoggingSubscriber.
122
+ # c.observability.enabled = false # Default: false (OFF)
123
+ # c.observability.log_format = :kv # :kv or :json
124
+ # c.observability.max_message_length = 200
125
+ # c.observability.include_error_messages = false
126
+ # c.observability.emit_legacy_event_aliases = true
127
+
128
+ # OpenTelemetry integration (only when the SDK is present). Defaults shown.
129
+ # c.opentelemetry = { enabled: false, service_name: 'search_engine' }
130
+
131
+ # --- Schema lifecycle ----------------------------------------------------
132
+ # See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/schema
133
+
134
+ # Retention: how many previous physical collections to keep after swap. Default: 0
135
+ # c.schema.retention.keep_last = 0
136
+
137
+ # --- Indexer -------------------------------------------------------------
138
+ # See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer
139
+
140
+ # Default batch size for imports when not provided. Default: 2000
141
+ # c.indexer.batch_size = 2000
142
+
143
+ # Optional read timeout override (ms) for imports. Default: nil
144
+ # c.indexer.timeout_ms = nil
145
+
146
+ # Retry policy for imports. Default: { attempts: 3, base: 0.5, max: 5.0, jitter_fraction: 0.2 }
147
+ # c.indexer.retries = { attempts: 3, base: 0.5, max: 5.0, jitter_fraction: 0.2 }
148
+
149
+ # Gzip JSONL payloads during import. Default: false
150
+ # c.indexer.gzip = false
151
+
152
+ # Dispatch mode for import jobs (:active_job or :inline). Default depends on ActiveJob presence
153
+ # c.indexer.dispatch = :active_job
154
+
155
+ # Queue name for ActiveJob dispatch. Default: 'search_index'
156
+ # c.indexer.queue_name = 'search_index'
157
+
158
+ # --- Sources -------------------------------------------------------------
159
+
160
+ # ActiveRecord source: default ORM batch size. Default: 2000
161
+ # c.sources.active_record.batch_size = 2000
162
+
163
+ # ActiveRecord source: mark relations as readonly. Default: true
164
+ # c.sources.active_record.readonly = true
165
+
166
+ # ActiveRecord source: wrap fetching in a read-only transaction. Default: false
167
+ # c.sources.active_record.use_transaction = false
168
+
169
+ # SQL source: server-side cursor fetch size. Default: 2000
170
+ # c.sources.sql.fetch_size = 2000
171
+
172
+ # SQL source: per-statement timeout (ms). Default: nil
173
+ # c.sources.sql.statement_timeout_ms = nil
174
+
175
+ # SQL source: preferred row shape (:auto, :hash). Default: :auto
176
+ # c.sources.sql.row_shape = :auto
177
+
178
+ # Lambda source: optional max batch size hint for validation/metrics. Default: nil
179
+ # c.sources.lambda.max_batch_size_hint = nil
180
+
181
+ # --- Mapper --------------------------------------------------------------
182
+
183
+ # Treat unknown keys as errors instead of warnings. Default: false
184
+ # c.mapper.strict_unknown_keys = false
185
+
186
+ # Nested coercions configuration. Default: { enabled: false, rules: {} }
187
+ # c.mapper.coercions = { enabled: false, rules: {} }
188
+
189
+ # Maximum number of error samples included in reports. Default: 5
190
+ # c.mapper.max_error_samples = 5
191
+
192
+ # --- Partitioning --------------------------------------------------------
193
+
194
+ # Optional resolver Proc for default physical collection. Default: nil
195
+ # c.partitioning.default_into_resolver = nil
196
+
197
+ # Before/after hook timeouts (ms) for partitioning callbacks. Default: nil
198
+ # c.partitioning.before_hook_timeout_ms = nil
199
+ # c.partitioning.after_hook_timeout_ms = nil
200
+
201
+ # Maximum error samples included in partition payloads. Default: 5
202
+ # c.partitioning.max_error_samples = 5
203
+
204
+ # --- Stale deletes -------------------------------------------------------
205
+ # See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/deletion
206
+
207
+ # Global kill switch for stale delete workflows. Default: true
208
+ # c.stale_deletes.enabled = true
209
+
210
+ # Strict mode blocks suspicious filters. Default: false
211
+ # c.stale_deletes.strict_mode = false
212
+
213
+ # Timeout (ms) for delete requests. Default: nil
214
+ # c.stale_deletes.timeout_ms = nil
215
+
216
+ # Enable found estimation via search for stale deletes. Default: false
217
+ # c.stale_deletes.estimation_enabled = false
218
+
219
+ # --- Curation ------------------------------------------------------------
220
+ # See: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/curation
221
+
222
+ # Maximum number of pinned IDs allowed. Default: 50
223
+ # c.curation.max_pins = 50
224
+
225
+ # Maximum number of hidden IDs allowed. Default: 200
226
+ # c.curation.max_hidden = 200
227
+
228
+ # Allowed curated ID pattern. Default: /\A[\w\-:.]+\z/
229
+ # c.curation.id_regex = /\A[\w\-:.]+\z/
230
+ end