search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,917 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+ require 'search_engine/config/typesense'
5
+ require 'search_engine/config/selection'
6
+ require 'search_engine/config/observability'
7
+ require 'search_engine/config/presets'
8
+ require 'search_engine/config/validators'
9
+
10
+ module SearchEngine
11
+ # Central configuration container for the engine.
12
+ #
13
+ # Holds connection details, timeouts, retry policy, default search knobs,
14
+ # and caching switches. Mutable by design and safe to reuse across threads
15
+ # via the module-level {SearchEngine.configure} method.
16
+ #
17
+ # All attributes have sensible defaults for development. Values may be
18
+ # hydrated from ENV via {#hydrate_from_env!}. Validation is lightweight and
19
+ # intentionally does not require secrets at boot.
20
+ class Config
21
+ # @!attribute [rw] api_key
22
+ # @return [String, nil] secret Typesense API key (redacted in logs)
23
+ # @!attribute [rw] host
24
+ # @return [String] hostname of the Typesense server
25
+ # @!attribute [rw] port
26
+ # @return [Integer] TCP port for the Typesense server
27
+ # @!attribute [rw] protocol
28
+ # @return [String] one of "http" or "https"
29
+ # @!attribute [rw] timeout_ms
30
+ # @return [Integer] request total timeout in milliseconds
31
+ # @!attribute [rw] open_timeout_ms
32
+ # @return [Integer] connect/open timeout in milliseconds
33
+ # @!attribute [rw] retries
34
+ # @return [Hash] retry policy with keys { attempts: Integer, backoff: Float or Range<Float> }
35
+ # @!attribute [rw] logger
36
+ # @return [#info,#warn,#error] logger to use; defaults to Rails.logger
37
+ # @!attribute [rw] default_query_by
38
+ # @return [String, nil] comma-separated list of fields used to query by
39
+ # @!attribute [rw] default_infix
40
+ # @return [String] Typesense infix option (e.g., "fallback")
41
+ # @!attribute [rw] use_cache
42
+ # @return [Boolean] whether to allow URL-level caching
43
+ # @!attribute [rw] cache_ttl_s
44
+ # @return [Integer] cache TTL in seconds (URL-level only)
45
+ # @!attribute [rw] strict_fields
46
+ # @return [Boolean] when true, the Parser validates field names/types and raises
47
+ # friendly errors; when false, unknown fields are allowed (operators and shapes
48
+ # are still validated). Defaults to true in development/test.
49
+ # @!attribute [rw] multi_search_limit
50
+ # @return [Integer] maximum number of searches allowed in a single multi-search call (default: 50)
51
+ # @!attribute [rw] test_mode
52
+ # @return [Boolean] when true, avoid network I/O via an offline client
53
+ # @!attribute [rw] default_console_model
54
+ # @return [Class, String, nil] default model used by console helpers (SE.q/SE.rel)
55
+ # @!attribute [rw] search_engine_models
56
+ # @return [String, nil, false] path to host app SearchEngine models directory. May be
57
+ # relative to `Rails.root` (e.g., "app/search_engine") or absolute. When `nil` or
58
+ # `false`, gem-managed loading of host SearchEngine models is disabled.
59
+ attr_accessor :logger,
60
+ :default_query_by,
61
+ :default_infix,
62
+ :use_cache,
63
+ :cache_ttl_s,
64
+ :strict_fields,
65
+ :test_mode,
66
+ :multi_search_limit,
67
+ :client,
68
+ :default_console_model,
69
+ :search_engine_models,
70
+ :relation_print_materializes
71
+
72
+ # Lightweight nested configuration for schema lifecycle.
73
+ class SchemaConfig
74
+ # Retention knobs for physical collections
75
+ class RetentionConfig
76
+ # @return [Integer] how many previous physical collections to keep after swap (default: 0)
77
+ attr_accessor :keep_last
78
+
79
+ def initialize
80
+ @keep_last = 0
81
+ end
82
+ end
83
+
84
+ # @return [SearchEngine::Config::SchemaConfig::RetentionConfig]
85
+ attr_reader :retention
86
+
87
+ def initialize
88
+ @retention = RetentionConfig.new
89
+ end
90
+ end
91
+
92
+ # Lightweight nested configuration for indexer/import settings.
93
+ class IndexerConfig
94
+ # @return [Integer] default batch size when not provided explicitly
95
+ attr_accessor :batch_size
96
+ # @return [Integer, nil] optional override for import read timeout (ms)
97
+ attr_accessor :timeout_ms
98
+ # @return [Hash] retry policy: { attempts: Integer, base: Float, max: Float, jitter_fraction: Float }
99
+ attr_accessor :retries
100
+ # @return [Boolean] whether to gzip JSONL payloads (disabled by default)
101
+ attr_accessor :gzip
102
+ # @return [Symbol] dispatcher mode: :active_job or :inline
103
+ attr_accessor :dispatch
104
+ # @return [String] queue name for ActiveJob enqueues
105
+ attr_accessor :queue_name
106
+
107
+ def initialize
108
+ @batch_size = 2000
109
+ @timeout_ms = nil
110
+ @retries = { attempts: 3, base: 0.5, max: 5.0, jitter_fraction: 0.2 }
111
+ @gzip = false
112
+ @dispatch = active_job_available? ? :active_job : :inline
113
+ @queue_name = 'search_index'
114
+ end
115
+
116
+ private
117
+
118
+ def active_job_available?
119
+ defined?(::ActiveJob::Base)
120
+ end
121
+ end
122
+
123
+ # Lightweight nested configuration for data source adapters.
124
+ class SourcesConfig
125
+ # Defaults for ActiveRecord-backed source adapter.
126
+ class ActiveRecordConfig
127
+ # @return [Integer] default batch size for ORM batching
128
+ attr_accessor :batch_size
129
+ # @return [Boolean] mark relations as readonly to avoid dirty tracking
130
+ attr_accessor :readonly
131
+ # @return [Boolean] wrap fetching into a read-only transaction (best-effort, off by default)
132
+ attr_accessor :use_transaction
133
+
134
+ def initialize
135
+ @batch_size = 2000
136
+ @readonly = true
137
+ @use_transaction = false
138
+ end
139
+ end
140
+
141
+ # Defaults for raw SQL streaming source adapter.
142
+ class SQLConfig
143
+ # @return [Integer] default fetch size for server-side cursor/streaming
144
+ attr_accessor :fetch_size
145
+ # @return [Integer, nil] optional per-statement timeout (ms)
146
+ attr_accessor :statement_timeout_ms
147
+ # @return [Symbol] preferred row shape (:auto, :hash)
148
+ attr_accessor :row_shape
149
+
150
+ def initialize
151
+ @fetch_size = 2000
152
+ @statement_timeout_ms = nil
153
+ @row_shape = :auto
154
+ end
155
+ end
156
+
157
+ # Defaults for lambda-backed source adapter.
158
+ class LambdaConfig
159
+ # @return [Integer, nil] optional hint used for validation/metrics only
160
+ attr_accessor :max_batch_size_hint
161
+
162
+ def initialize
163
+ @max_batch_size_hint = nil
164
+ end
165
+ end
166
+
167
+ # @return [SearchEngine::Config::SourcesConfig::ActiveRecordConfig]
168
+ def active_record
169
+ @active_record ||= ActiveRecordConfig.new
170
+ end
171
+
172
+ # @return [SearchEngine::Config::SourcesConfig::SQLConfig]
173
+ def sql
174
+ @sql ||= SQLConfig.new
175
+ end
176
+
177
+ # @return [SearchEngine::Config::SourcesConfig::LambdaConfig]
178
+ def lambda
179
+ @lambda ||= LambdaConfig.new
180
+ end
181
+ end
182
+
183
+ # Lightweight nested configuration for mapper.
184
+ class MapperConfig
185
+ # @return [Boolean] when true, unknown keys raise; when false, they are reported as warnings
186
+ attr_accessor :strict_unknown_keys
187
+ # @return [Hash] nested coercions config: { enabled: Boolean, rules: Hash }
188
+ attr_accessor :coercions
189
+ # @return [Integer] maximum number of error samples to include in reports
190
+ attr_accessor :max_error_samples
191
+
192
+ def initialize
193
+ @strict_unknown_keys = false
194
+ @coercions = { enabled: false, rules: {} }
195
+ @max_error_samples = 5
196
+ end
197
+ end
198
+
199
+ # Lightweight nested configuration for partitioning.
200
+ class PartitioningConfig
201
+ # @return [Proc, nil] optional resolver for default physical collection
202
+ attr_accessor :default_into_resolver
203
+ # @return [Integer, nil] timeout in ms for before hook
204
+ attr_accessor :before_hook_timeout_ms
205
+ # @return [Integer, nil] timeout in ms for after hook
206
+ attr_accessor :after_hook_timeout_ms
207
+ # @return [Integer] maximum error samples to include in payloads
208
+ attr_accessor :max_error_samples
209
+
210
+ def initialize
211
+ @default_into_resolver = nil
212
+ @before_hook_timeout_ms = nil
213
+ @after_hook_timeout_ms = nil
214
+ @max_error_samples = 5
215
+ end
216
+ end
217
+
218
+ # Lightweight nested configuration for stale deletes.
219
+ class StaleDeletesConfig
220
+ # @return [Boolean] global kill switch
221
+ attr_accessor :enabled
222
+ # @return [Boolean] strict mode blocks suspicious filters
223
+ attr_accessor :strict_mode
224
+ # @return [Integer, nil] timeout in ms for delete requests
225
+ attr_accessor :timeout_ms
226
+ # @return [Boolean] enable found estimation via search
227
+ attr_accessor :estimation_enabled
228
+
229
+ def initialize
230
+ @enabled = true
231
+ @strict_mode = false
232
+ @timeout_ms = nil
233
+ @estimation_enabled = false
234
+ end
235
+ end
236
+
237
+ # Lightweight nested configuration for observability/logging.
238
+ # Kept for backward compatibility during refactor; delegates to external class.
239
+ #
240
+ # Defaults are quiet by design:
241
+ # - enabled: false (no legacy compact logger unless explicitly turned on)
242
+ # Enable by setting `config.observability.enabled = true` in the initializer.
243
+ class ObservabilityConfig < Observability
244
+ # @return [Boolean] enable the compact logging subscriber automatically
245
+ attr_accessor :enabled
246
+ # @return [Symbol] :kv or :json
247
+ attr_accessor :log_format
248
+ # @return [Integer] maximum message length for error samples in logs
249
+ attr_accessor :max_message_length
250
+ # @return [Boolean] include short error messages in logs for batch/stale events
251
+ attr_accessor :include_error_messages
252
+ # @return [Boolean] also emit legacy event aliases where applicable
253
+ attr_accessor :emit_legacy_event_aliases
254
+
255
+ def initialize
256
+ super()
257
+
258
+ @enabled = false
259
+ @log_format = :kv
260
+ @max_message_length = 200
261
+ @include_error_messages = false
262
+ @emit_legacy_event_aliases = true
263
+ end
264
+ end
265
+
266
+ # Lightweight nested configuration for grouping UX.
267
+ class GroupingConfig
268
+ # @return [Boolean] emit non-fatal warnings for ambiguous combinations
269
+ attr_accessor :warn_on_ambiguous
270
+
271
+ def initialize
272
+ @warn_on_ambiguous = true
273
+ end
274
+ end
275
+
276
+ # Lightweight nested configuration for selection/hydration.
277
+ # Controls strictness of missing attributes during hydration.
278
+ class SelectionConfig < Selection
279
+ # @return [Boolean] when true, missing requested fields raise MissingField
280
+ attr_accessor :strict_missing
281
+
282
+ def initialize
283
+ super()
284
+
285
+ @strict_missing = false
286
+ end
287
+ end
288
+
289
+ # Lightweight nested configuration for default presets resolution.
290
+ # Controls namespacing and enablement.
291
+ class PresetsConfig < Presets
292
+ class << self
293
+ # Delegate to Presets class methods to preserve existing call sites
294
+ def normalize_enabled(value)
295
+ Presets.normalize_enabled(value)
296
+ end
297
+
298
+ def normalize_namespace(value)
299
+ Presets.normalize_namespace(value)
300
+ end
301
+ end
302
+ end
303
+
304
+ # Lightweight nested configuration for curation DSL.
305
+ # Controls validation rules and list limits.
306
+ class CurationConfig
307
+ # @return [Integer] maximum number of pinned IDs allowed (default: 50)
308
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/curation
309
+ attr_accessor :max_pins
310
+ # @return [Integer] maximum number of hidden IDs allowed (default: 200)
311
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/curation
312
+ attr_accessor :max_hidden
313
+ # @return [Regexp] allowed curated ID pattern (used for IDs and override tags)
314
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/curation
315
+ attr_accessor :id_regex
316
+
317
+ def initialize
318
+ @max_pins = 50
319
+ @max_hidden = 200
320
+ @id_regex = /\A[\w\-:.]+\z/
321
+ end
322
+ end
323
+
324
+ # Create a new configuration with defaults, optionally hydrated from ENV.
325
+ #
326
+ # @param env [#[]] environment-like object (defaults to ::ENV)
327
+ def initialize(env = ENV)
328
+ @warned_incomplete = false
329
+ set_defaults!(env)
330
+ hydrate_from_env!(env, override_existing: true)
331
+ end
332
+
333
+ # Populate sane defaults for development.
334
+ # @return [void]
335
+ def set_defaults!(env = ENV)
336
+ # typesense transport defaults
337
+ typesense.api_key = nil
338
+ typesense.host = 'localhost'
339
+ typesense.port = 8108
340
+ typesense.protocol = 'http'
341
+ typesense.timeout_ms = 3_600_000
342
+ typesense.open_timeout_ms = 1_000
343
+ typesense.retries = { attempts: 2, backoff: (10.0..60.0) }
344
+ @default_query_by = nil
345
+ @default_infix = 'fallback'
346
+ @use_cache = true
347
+ @cache_ttl_s = 60
348
+ @strict_fields = default_strict_fields
349
+ @test_mode = default_test_mode(env)
350
+ @logger = default_logger
351
+ @multi_search_limit = 50
352
+ @schema = SchemaConfig.new
353
+ @indexer = IndexerConfig.new
354
+ @sources = SourcesConfig.new
355
+ @mapper = MapperConfig.new
356
+ @partitioning = PartitioningConfig.new
357
+ @stale_deletes = StaleDeletesConfig.new
358
+ @observability = ObservabilityConfig.new
359
+ @grouping = GroupingConfig.new
360
+ @selection = SelectionConfig.new
361
+ @presets = PresetsConfig.new
362
+ @curation = CurationConfig.new
363
+ @default_console_model = nil
364
+ # Path may be relative to Rails.root or absolute. Set nil/false to disable.
365
+ @search_engine_models = 'app/search_engine'
366
+ # When true, Relation#inspect/pretty_print materialize a preview (AR-like).
367
+ @relation_print_materializes = true
368
+ end
369
+
370
+ # Whether the engine should avoid network I/O and use an offline client.
371
+ # @return [Boolean]
372
+ def test_mode?
373
+ @test_mode ? true : false
374
+ end
375
+
376
+ # Expose schema lifecycle configuration.
377
+ # @return [SearchEngine::Config::SchemaConfig]
378
+ def schema
379
+ @schema ||= SchemaConfig.new
380
+ end
381
+
382
+ # Expose grouping UX configuration.
383
+ # @return [SearchEngine::Config::GroupingConfig]
384
+ def grouping
385
+ @grouping ||= GroupingConfig.new
386
+ end
387
+
388
+ # Expose selection/hydration configuration.
389
+ # @return [SearchEngine::Config::SelectionConfig]
390
+ def selection
391
+ @selection ||= SelectionConfig.new
392
+ end
393
+
394
+ # Expose presets configuration.
395
+ # @return [SearchEngine::Config::PresetsConfig]
396
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/presets
397
+ def presets
398
+ @presets ||= PresetsConfig.new
399
+ end
400
+
401
+ # Assign presets configuration from a compatible object.
402
+ # Accepts a PresetsConfig, a Hash-like, or an object responding to :namespace and/or :enabled (e.g., OpenStruct).
403
+ # Normalizes values on assignment.
404
+ # @param value [Object]
405
+ # @return [void]
406
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/presets#config-default-preset
407
+ def presets=(value)
408
+ cfg = presets
409
+ if value.is_a?(PresetsConfig)
410
+ @presets = value
411
+ return
412
+ end
413
+
414
+ source = if value.respond_to?(:to_h)
415
+ value.to_h
416
+ else
417
+ hash = {}
418
+ hash[:enabled] = value.enabled if value.respond_to?(:enabled)
419
+ hash[:namespace] = value.namespace if value.respond_to?(:namespace)
420
+ hash[:locked_domains] = value.locked_domains if value.respond_to?(:locked_domains)
421
+ hash
422
+ end
423
+
424
+ if source.key?(:enabled)
425
+ normalized = PresetsConfig.normalize_enabled(source[:enabled])
426
+ cfg.enabled = normalized
427
+ end
428
+
429
+ return unless source.key?(:namespace) || source.key?(:locked_domains)
430
+
431
+ cfg.namespace = PresetsConfig.normalize_namespace(source[:namespace]) if source.key?(:namespace)
432
+ cfg.locked_domains = source[:locked_domains] if source.key?(:locked_domains)
433
+ end
434
+
435
+ # Expose curation configuration.
436
+ # @return [SearchEngine::Config::CurationConfig]
437
+ def curation
438
+ @curation ||= CurationConfig.new
439
+ end
440
+
441
+ # Expose observability/logging configuration.
442
+ # @return [SearchEngine::Config::ObservabilityConfig]
443
+ def observability
444
+ @observability ||= ObservabilityConfig.new
445
+ end
446
+
447
+ # Expose partitioning configuration.
448
+ # @return [SearchEngine::Config::PartitioningConfig]
449
+ def partitioning
450
+ @partitioning ||= PartitioningConfig.new
451
+ end
452
+
453
+ # Expose stale deletes configuration.
454
+ # @return [SearchEngine::Config::StaleDeletesConfig]
455
+ def stale_deletes
456
+ @stale_deletes ||= StaleDeletesConfig.new
457
+ end
458
+
459
+ # Expose structured logging configuration.
460
+ #
461
+ # By default `mode` is nil which disables the structured `LoggingSubscriber`.
462
+ # Opt-in by setting `config.logging.mode = :compact` (or `:json`).
463
+ # @return [OpenStruct]
464
+ def logging
465
+ require 'ostruct'
466
+ @logging ||= OpenStruct.new(mode: nil, level: :info, sample: 1.0, logger: logger)
467
+ end
468
+
469
+ # Expose OpenTelemetry configuration. Optional and disabled by default.
470
+ # @return [OpenStruct]
471
+ def opentelemetry
472
+ require 'ostruct'
473
+ @opentelemetry ||= OpenStruct.new(enabled: false, service_name: 'search_engine')
474
+ end
475
+
476
+ # Assign OpenTelemetry configuration from a compatible object.
477
+ # Accepts an OpenStruct, a Hash-like, or an object responding to :enabled, :service_name.
478
+ # @param value [Object]
479
+ # @return [void]
480
+ def opentelemetry=(value)
481
+ require 'ostruct'
482
+ if value.is_a?(OpenStruct)
483
+ @opentelemetry = value
484
+ return
485
+ end
486
+
487
+ source = if value.respond_to?(:to_h)
488
+ value.to_h
489
+ else
490
+ hash = {}
491
+ hash[:enabled] = value.enabled if value.respond_to?(:enabled)
492
+ hash[:service_name] = value.service_name if value.respond_to?(:service_name)
493
+ hash
494
+ end
495
+
496
+ otel = opentelemetry
497
+ otel.enabled = (source[:enabled] ? true : false) if source.key?(:enabled)
498
+ return unless source.key?(:service_name)
499
+
500
+ otel.service_name = (source[:service_name].to_s.empty? ? 'search_engine' : source[:service_name])
501
+ end
502
+
503
+ # Assign curation configuration from a compatible object.
504
+ # Accepts a CurationConfig, a Hash-like, or an object responding to :max_pins, :max_hidden, :id_regex.
505
+ # Validates basic types on assignment.
506
+ # @param value [Object]
507
+ # @return [void]
508
+ def curation=(value)
509
+ cfg = curation
510
+ if value.is_a?(CurationConfig)
511
+ @curation = value
512
+ return
513
+ end
514
+
515
+ source = if value.respond_to?(:to_h)
516
+ value.to_h
517
+ else
518
+ hash = {}
519
+ hash[:max_pins] = value.max_pins if value.respond_to?(:max_pins)
520
+ hash[:max_hidden] = value.max_hidden if value.respond_to?(:max_hidden)
521
+ hash[:id_regex] = value.id_regex if value.respond_to?(:id_regex)
522
+ hash
523
+ end
524
+
525
+ if source.key?(:max_pins)
526
+ pins = source[:max_pins]
527
+ unless pins.nil? || pins.is_a?(Integer)
528
+ raise ArgumentError, "curation.max_pins must be an Integer (got #{pins.class})"
529
+ end
530
+
531
+ cfg.max_pins = pins if pins
532
+ end
533
+
534
+ if source.key?(:max_hidden)
535
+ hid = source[:max_hidden]
536
+ unless hid.nil? || hid.is_a?(Integer)
537
+ raise ArgumentError, "curation.max_hidden must be an Integer (got #{hid.class})"
538
+ end
539
+
540
+ cfg.max_hidden = hid if hid
541
+ end
542
+
543
+ return unless source.key?(:id_regex)
544
+
545
+ rx = source[:id_regex]
546
+ raise ArgumentError, "curation.id_regex must be a Regexp (got #{rx.class})" unless rx.is_a?(Regexp)
547
+
548
+ cfg.id_regex = rx
549
+ end
550
+
551
+ # Expose indexer configuration.
552
+ # @return [SearchEngine::Config::IndexerConfig]
553
+ def indexer
554
+ @indexer ||= IndexerConfig.new
555
+ end
556
+
557
+ # Expose sources configuration.
558
+ # @return [SearchEngine::Config::SourcesConfig]
559
+ def sources
560
+ @sources ||= SourcesConfig.new
561
+ end
562
+
563
+ # Expose mapper configuration.
564
+ # @return [SearchEngine::Config::MapperConfig]
565
+ def mapper
566
+ @mapper ||= MapperConfig.new
567
+ end
568
+
569
+ # Apply ENV values to any attribute, with control over overriding.
570
+ #
571
+ # @param env [#[]] environment-like object
572
+ # @param override_existing [Boolean] when true, overwrite current values
573
+ # @return [self]
574
+ def hydrate_from_env!(env = ENV, override_existing: false)
575
+ set_if_present(:host, env['TYPESENSE_HOST'], override_existing)
576
+ set_if_present(:port, integer_or_nil(env['TYPESENSE_PORT']), override_existing)
577
+ set_if_present(:protocol, env['TYPESENSE_PROTOCOL'], override_existing)
578
+ set_if_present(:api_key, env['TYPESENSE_API_KEY'], override_existing)
579
+ # Accept TYPESENSE_STRICT_FIELDS as 'true'/'false' when provided
580
+ val = env['TYPESENSE_STRICT_FIELDS']
581
+ if !val.nil? && val.is_a?(String) && !val.strip.empty?
582
+ normalized = %w[1 true yes on].include?(val.to_s.strip.downcase)
583
+ set_if_present(:strict_fields, normalized, override_existing)
584
+ end
585
+ test_mode_val = normalize_boolean(env['SEARCH_ENGINE_TEST_MODE'])
586
+ offline_val = normalize_boolean(env['SEARCH_ENGINE_OFFLINE'])
587
+ normalized = test_mode_val.nil? ? offline_val : test_mode_val
588
+ set_if_present(:test_mode, normalized, override_existing) unless normalized.nil?
589
+ self
590
+ end
591
+
592
+ # Validate obvious misconfigurations.
593
+ #
594
+ # @raise [ArgumentError] if a field is invalid
595
+ # @return [true]
596
+ def validate!
597
+ errors = []
598
+ errors.concat(SearchEngine::Config::Validators.validate_protocol(protocol))
599
+ errors.concat(SearchEngine::Config::Validators.validate_host(host))
600
+ errors.concat(SearchEngine::Config::Validators.validate_port(port))
601
+ raise ArgumentError, errors.join(', ') unless errors.empty?
602
+
603
+ true
604
+ end
605
+
606
+ # Log a one-time warning for incomplete non-fatal fields.
607
+ # @return [void]
608
+ def warn_if_incomplete!
609
+ return if @warned_incomplete
610
+
611
+ missing = []
612
+ missing << 'api_key' if string_blank?(api_key)
613
+ missing << 'default_query_by' if string_blank?(default_query_by)
614
+
615
+ if missing.empty?
616
+ # no-op
617
+ else
618
+ (logger || default_logger).warn(
619
+ "[search_engine] configuration incomplete: missing #{missing.join(', ')}"
620
+ )
621
+ end
622
+
623
+ @warned_incomplete = true
624
+ nil
625
+ end
626
+
627
+ # Hash representation of the configuration.
628
+ # Secrets are not redacted here.
629
+ # @return [Hash]
630
+ def to_h
631
+ {
632
+ api_key: api_key,
633
+ host: host,
634
+ port: port,
635
+ protocol: protocol,
636
+ timeout_ms: timeout_ms,
637
+ open_timeout_ms: open_timeout_ms,
638
+ retries: retries,
639
+ logger: !logger.nil?,
640
+ default_query_by: default_query_by,
641
+ default_infix: default_infix,
642
+ use_cache: use_cache ? true : false,
643
+ cache_ttl_s: cache_ttl_s,
644
+ strict_fields: strict_fields ? true : false,
645
+ test_mode: test_mode? || false,
646
+ multi_search_limit: multi_search_limit,
647
+ default_console_model: (
648
+ default_console_model.respond_to?(:name) ? default_console_model.name : default_console_model
649
+ ),
650
+ search_engine_models: search_engine_models,
651
+ schema: schema_hash_for_to_h,
652
+ indexer: indexer_hash_for_to_h,
653
+ sources: sources_hash_for_to_h,
654
+ mapper: mapper_hash_for_to_h,
655
+ partitioning: partitioning_hash_for_to_h,
656
+ observability: observability_hash_for_to_h,
657
+ selection: selection_hash_for_to_h,
658
+ presets: presets_hash_for_to_h,
659
+ curation: curation_hash_for_to_h,
660
+ relation_print_materializes: relation_print_materializes ? true : false
661
+ }
662
+ end
663
+
664
+ # Hash representation with secrets redacted.
665
+ # @return [Hash]
666
+ def to_h_redacted
667
+ redacted = to_h.dup
668
+ redacted[:api_key] = '[REDACTED]' unless string_blank?(api_key)
669
+ redacted
670
+ end
671
+
672
+ private
673
+
674
+ def schema_hash_for_to_h
675
+ { retention: { keep_last: schema.retention.keep_last } }
676
+ end
677
+
678
+ def indexer_hash_for_to_h
679
+ {
680
+ batch_size: indexer.batch_size,
681
+ timeout_ms: indexer.timeout_ms,
682
+ retries: indexer.retries,
683
+ gzip: indexer.gzip ? true : false,
684
+ dispatch: indexer.dispatch,
685
+ queue_name: indexer.queue_name
686
+ }
687
+ end
688
+
689
+ def sources_hash_for_to_h
690
+ {
691
+ active_record: {
692
+ batch_size: sources.active_record.batch_size,
693
+ readonly: sources.active_record.readonly ? true : false,
694
+ use_transaction: sources.active_record.use_transaction ? true : false
695
+ },
696
+ sql: {
697
+ fetch_size: sources.sql.fetch_size,
698
+ statement_timeout_ms: sources.sql.statement_timeout_ms,
699
+ row_shape: sources.sql.row_shape
700
+ },
701
+ lambda: {
702
+ max_batch_size_hint: sources.lambda.max_batch_size_hint
703
+ }
704
+ }
705
+ end
706
+
707
+ def mapper_hash_for_to_h
708
+ {
709
+ strict_unknown_keys: mapper.strict_unknown_keys ? true : false,
710
+ coercions: mapper.coercions,
711
+ max_error_samples: mapper.max_error_samples
712
+ }
713
+ end
714
+
715
+ def partitioning_hash_for_to_h
716
+ {
717
+ before_hook_timeout_ms: partitioning.before_hook_timeout_ms,
718
+ after_hook_timeout_ms: partitioning.after_hook_timeout_ms,
719
+ max_error_samples: partitioning.max_error_samples
720
+ }
721
+ end
722
+
723
+ def observability_hash_for_to_h
724
+ {
725
+ enabled: observability.enabled ? true : false,
726
+ log_format: observability.log_format,
727
+ max_message_length: observability.max_message_length,
728
+ include_error_messages: observability.include_error_messages ? true : false,
729
+ emit_legacy_event_aliases: observability.emit_legacy_event_aliases ? true : false
730
+ }
731
+ end
732
+
733
+ def selection_hash_for_to_h
734
+ {
735
+ strict_missing: selection.strict_missing ? true : false
736
+ }
737
+ end
738
+
739
+ def presets_hash_for_to_h
740
+ {
741
+ enabled: presets.enabled ? true : false,
742
+ namespace: presets.namespace,
743
+ locked_domains: presets.locked_domains
744
+ }
745
+ end
746
+
747
+ def curation_hash_for_to_h
748
+ {
749
+ max_pins: curation.max_pins,
750
+ max_hidden: curation.max_hidden,
751
+ id_regex: curation.id_regex.inspect
752
+ }
753
+ end
754
+
755
+ def default_strict_fields
756
+ if defined?(::Rails)
757
+ !::Rails.env.production?
758
+ else
759
+ true
760
+ end
761
+ end
762
+
763
+ def default_test_mode(env)
764
+ if defined?(::Rails)
765
+ ::Rails.env.test?
766
+ else
767
+ env_value = env['RACK_ENV'] || env['RAILS_ENV']
768
+ env_value.to_s.strip.downcase == 'test'
769
+ end
770
+ rescue StandardError
771
+ false
772
+ end
773
+
774
+ def default_logger
775
+ if defined?(::Rails)
776
+ ::Rails.logger
777
+ else
778
+ require 'logger'
779
+ Logger.new($stdout)
780
+ end
781
+ end
782
+
783
+ def integer_or_nil(val)
784
+ return nil if val.nil? || (val.is_a?(String) && val.strip.empty?)
785
+
786
+ Integer(val)
787
+ rescue ArgumentError, TypeError
788
+ nil
789
+ end
790
+
791
+ def set_if_present(attr, value, override_existing)
792
+ return unless !value.nil? && (override_existing || instance_variable_get(:@warned_incomplete) == false)
793
+
794
+ current = public_send(attr)
795
+ return unless override_existing || current.nil? || (current.is_a?(String) && current.strip.empty?)
796
+
797
+ public_send("#{attr}=", value)
798
+ end
799
+
800
+ def string_blank?(value)
801
+ value.nil? || (value.respond_to?(:strip) && value.strip.empty?)
802
+ end
803
+
804
+ def normalize_boolean(value)
805
+ return nil if value.nil?
806
+
807
+ str = value.to_s.strip
808
+ return nil if str.empty?
809
+
810
+ downcased = str.downcase
811
+ return true if %w[1 true yes on].include?(downcased)
812
+ return false if %w[0 false no off].include?(downcased)
813
+
814
+ nil
815
+ end
816
+
817
+ def validate_protocol
818
+ SearchEngine::Config::Validators.validate_protocol(protocol)
819
+ end
820
+
821
+ def validate_host
822
+ SearchEngine::Config::Validators.validate_host(host)
823
+ end
824
+
825
+ def validate_port
826
+ SearchEngine::Config::Validators.validate_port(port)
827
+ end
828
+
829
+ def validate_timeouts
830
+ SearchEngine::Config::Validators.validate_timeouts(timeout_ms, open_timeout_ms)
831
+ end
832
+
833
+ def validate_retries
834
+ SearchEngine::Config::Validators.validate_retries(retries)
835
+ end
836
+
837
+ def retries_valid_shape?
838
+ SearchEngine::Config::Validators.retries_valid_shape?(retries)
839
+ end
840
+
841
+ def validate_cache
842
+ SearchEngine::Config::Validators.validate_cache(cache_ttl_s)
843
+ end
844
+
845
+ def validate_multi_search_limit
846
+ SearchEngine::Config::Validators.validate_multi_search_limit(multi_search_limit)
847
+ end
848
+
849
+ def validate_presets
850
+ SearchEngine::Config::Validators.validate_presets(presets)
851
+ end
852
+
853
+ public
854
+
855
+ # Typesense transport sub-config and forwarders (public API preserved)
856
+ # @return [SearchEngine::Config::Typesense]
857
+ def typesense
858
+ @typesense ||= SearchEngine::Config::Typesense.new
859
+ end
860
+
861
+ def api_key
862
+ typesense.api_key
863
+ end
864
+
865
+ def api_key=(value)
866
+ typesense.api_key = value
867
+ end
868
+
869
+ def host
870
+ typesense.host
871
+ end
872
+
873
+ def host=(value)
874
+ typesense.host = value
875
+ end
876
+
877
+ def port
878
+ typesense.port
879
+ end
880
+
881
+ def port=(value)
882
+ typesense.port = value
883
+ end
884
+
885
+ def protocol
886
+ typesense.protocol
887
+ end
888
+
889
+ def protocol=(value)
890
+ typesense.protocol = value
891
+ end
892
+
893
+ def timeout_ms
894
+ typesense.timeout_ms
895
+ end
896
+
897
+ def timeout_ms=(value)
898
+ typesense.timeout_ms = value
899
+ end
900
+
901
+ def open_timeout_ms
902
+ typesense.open_timeout_ms
903
+ end
904
+
905
+ def open_timeout_ms=(value)
906
+ typesense.open_timeout_ms = value
907
+ end
908
+
909
+ def retries
910
+ typesense.retries
911
+ end
912
+
913
+ def retries=(value)
914
+ typesense.retries = value
915
+ end
916
+ end
917
+ end