search-engine-for-typesense 30.1.8.3 → 30.1.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +154 -0
- data/app/search_engine/search_engine/postgres_outbox/drain_job.rb +24 -0
- data/lib/generators/search_engine/postgres_outbox/install_generator.rb +35 -0
- data/lib/generators/search_engine/postgres_outbox/templates/add_outbox_triggers.rb.tt +41 -0
- data/lib/generators/search_engine/postgres_outbox/templates/create_outbox_events.rb.tt +9 -0
- data/lib/search_engine/active_record_syncable.rb +29 -6
- data/lib/search_engine/bulk.rb +3 -96
- data/lib/search_engine/config.rb +80 -0
- data/lib/search_engine/dependency_planner.rb +241 -0
- data/lib/search_engine/indexer/bulk_import.rb +61 -52
- data/lib/search_engine/indexer.rb +1 -1
- data/lib/search_engine/postgres_outbox/drainer.rb +176 -0
- data/lib/search_engine/postgres_outbox/event.rb +59 -0
- data/lib/search_engine/postgres_outbox/event_processor.rb +65 -0
- data/lib/search_engine/postgres_outbox/listener.rb +243 -0
- data/lib/search_engine/postgres_outbox/migration_helpers.rb +229 -0
- data/lib/search_engine/postgres_outbox/processor_result.rb +37 -0
- data/lib/search_engine/postgres_outbox/repository.rb +197 -0
- data/lib/search_engine/postgres_outbox.rb +15 -0
- data/lib/search_engine/version.rb +1 -1
- data/lib/search_engine.rb +2 -0
- metadata +15 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d6c07978f5cb0d447f674050793a5d30541d112c98bd51992463aea103134273
|
|
4
|
+
data.tar.gz: 2807c15e8ac558a8f519c4af880c6da14d31442ea1921778c0ec267c166c0423
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0f268f7adae79d3899a7c8cabcb7556fdd03442391d3c64c51f15e915ac78f7813719fa45030e280d566c56a05a5238e1e3e80a576ce6c2904bad85fcd000f71
|
|
7
|
+
data.tar.gz: 3499f0867134fdd0bfe2180c2e29ee81be40ed3c49db0c68b154be9c84166ae0388fa3fcc95e627fc3279ce1c79307ebe0385d022b2f24192fe694571cdfb0fc
|
data/README.md
CHANGED
|
@@ -188,6 +188,160 @@ Use a shared `Rails.cache` backend, or provide `c.indexer.partition_run_store`,
|
|
|
188
188
|
the parent indexing process can see the same run metadata. Size the queue carefully: worker concurrency
|
|
189
189
|
multiplies with any per-partition `max_parallel` setting.
|
|
190
190
|
|
|
191
|
+
## PostgreSQL outbox sync
|
|
192
|
+
|
|
193
|
+
Rails callbacks are convenient for ordinary `create`, `update`, and `destroy` flows, but they do not see
|
|
194
|
+
every database write. Bulk SQL imports, database triggers, background functions, and direct maintenance
|
|
195
|
+
scripts can change source tables without instantiating Active Record models. PostgreSQL outbox sync captures
|
|
196
|
+
those writes at the database layer and lets the gem process them through ActiveJob.
|
|
197
|
+
|
|
198
|
+
The flow is:
|
|
199
|
+
|
|
200
|
+
1. A row-level PostgreSQL trigger writes a durable outbox row in the same transaction as the source table
|
|
201
|
+
change.
|
|
202
|
+
2. The trigger calls `pg_notify` as a low-latency nudge after commit.
|
|
203
|
+
3. A host-managed listener receives notifications, or falls back to polling, and enqueues
|
|
204
|
+
`SearchEngine::PostgresOutbox::DrainJob`.
|
|
205
|
+
4. The drainer claims pending rows, coalesces older rows for the same collection/document pair, orders
|
|
206
|
+
collection groups with the dependency planner, and processes the resulting upserts/deletes.
|
|
207
|
+
|
|
208
|
+
`pg_notify` is not durable. Treat notifications only as a wakeup signal; the outbox table is the source of
|
|
209
|
+
truth. Run the listener in a process lifecycle you control, and keep fallback polling enabled so missed
|
|
210
|
+
notifications are drained later.
|
|
211
|
+
|
|
212
|
+
PostgreSQL outbox sync is disabled by default:
|
|
213
|
+
|
|
214
|
+
```ruby
|
|
215
|
+
# config/initializers/search_engine.rb
|
|
216
|
+
SearchEngine.configure do |c|
|
|
217
|
+
c.postgres_outbox.enabled = true
|
|
218
|
+
c.postgres_outbox.listener_enabled = -> { Rails.env.production? }
|
|
219
|
+
c.postgres_outbox.table_name = "search_engine_outbox_events"
|
|
220
|
+
c.postgres_outbox.channel = "search_engine_outbox"
|
|
221
|
+
c.postgres_outbox.queue_name = "search_engine"
|
|
222
|
+
c.postgres_outbox.batch_size = 1000
|
|
223
|
+
c.postgres_outbox.poll_interval_s = 5
|
|
224
|
+
c.postgres_outbox.retention_s = 7.days.to_i
|
|
225
|
+
|
|
226
|
+
# Optional. Leave off when your deployment already guarantees one listener.
|
|
227
|
+
c.postgres_outbox.advisory_lock = false
|
|
228
|
+
end
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Generate and edit the migrations:
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
bin/rails generate search_engine:postgres_outbox:install
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
The events table migration should include the gem helper:
|
|
238
|
+
|
|
239
|
+
```ruby
|
|
240
|
+
class CreateSearchEngineOutboxEvents < ActiveRecord::Migration[7.1]
|
|
241
|
+
include SearchEngine::PostgresOutbox::MigrationHelpers
|
|
242
|
+
|
|
243
|
+
def change
|
|
244
|
+
create_search_engine_outbox_events
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
Add one trigger per source table that should write outbox events:
|
|
250
|
+
|
|
251
|
+
```ruby
|
|
252
|
+
class AddSearchEngineOutboxTriggers < ActiveRecord::Migration[7.1]
|
|
253
|
+
include SearchEngine::PostgresOutbox::MigrationHelpers
|
|
254
|
+
|
|
255
|
+
def up
|
|
256
|
+
create_search_engine_outbox_trigger(
|
|
257
|
+
:products,
|
|
258
|
+
source_model: "Product",
|
|
259
|
+
collection: "products"
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
create_search_engine_outbox_trigger(
|
|
263
|
+
:product_variants,
|
|
264
|
+
source_model: "ProductVariant",
|
|
265
|
+
collection: "product_variants",
|
|
266
|
+
record_id_sql: "record_data.id::text",
|
|
267
|
+
document_id_sql: "record_data.product_id::text || '-' || record_data.id::text"
|
|
268
|
+
)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def down
|
|
272
|
+
drop_search_engine_outbox_trigger(:product_variants)
|
|
273
|
+
drop_search_engine_outbox_trigger(:products)
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
`record_id_sql` and `document_id_sql` are trusted migration SQL expressions. They may refer to the
|
|
279
|
+
PL/pgSQL `record_data` variable, which is `NEW` for inserts/updates and `OLD` for deletes.
|
|
280
|
+
|
|
281
|
+
Pair triggered source models with `sync_strategy: :postgres_outbox` so Active Record callbacks do not also
|
|
282
|
+
write to Typesense for the same changes:
|
|
283
|
+
|
|
284
|
+
```ruby
|
|
285
|
+
class Product < ApplicationRecord
|
|
286
|
+
include SearchEngine::ActiveRecordSyncable
|
|
287
|
+
|
|
288
|
+
search_engine_syncable collection: :products, sync_strategy: :postgres_outbox
|
|
289
|
+
end
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
The listener lifecycle belongs to the host app. This Sidekiq initializer is one example; any process manager
|
|
293
|
+
or ActiveJob backend can start and stop a listener as long as it can enqueue jobs:
|
|
294
|
+
|
|
295
|
+
```ruby
|
|
296
|
+
# config/initializers/search_engine_outbox_listener.rb
|
|
297
|
+
if defined?(Sidekiq)
|
|
298
|
+
Sidekiq.configure_server do |config|
|
|
299
|
+
listener = nil
|
|
300
|
+
|
|
301
|
+
config.on(:startup) do
|
|
302
|
+
outbox = SearchEngine.config.postgres_outbox
|
|
303
|
+
next unless outbox.enabled && outbox.listener_enabled.call
|
|
304
|
+
|
|
305
|
+
listener = SearchEngine::PostgresOutbox::Listener.new.start
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
config.on(:quiet) { listener&.stop(timeout: 5) }
|
|
309
|
+
config.on(:shutdown) { listener&.stop(timeout: 5) }
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Custom processors can override the default collection handling. Register processors by collection name and
|
|
315
|
+
return `SearchEngine::PostgresOutbox::ProcessorResult`:
|
|
316
|
+
|
|
317
|
+
```ruby
|
|
318
|
+
SearchEngine.configure do |c|
|
|
319
|
+
c.postgres_outbox.collection_processors["products"] = lambda do |events:, context:|
|
|
320
|
+
document_ids = events.map(&:document_id)
|
|
321
|
+
ProductSearchSync.call(document_ids: document_ids, worker_id: context[:worker_id])
|
|
322
|
+
|
|
323
|
+
SearchEngine::PostgresOutbox::ProcessorResult.success(events.map(&:id))
|
|
324
|
+
rescue StandardError => error
|
|
325
|
+
SearchEngine::PostgresOutbox::ProcessorResult.failure(events.map(&:id), error: error)
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
When one collection references another, declare those references on the SearchEngine models. The outbox
|
|
331
|
+
drainer uses the same dependency planner direction as bulk cascade planning, so parent/source collections
|
|
332
|
+
are processed before dependent collections in the same drain pass. If a collection group fails, later
|
|
333
|
+
dependent groups are left retryable instead of being processed against stale data.
|
|
334
|
+
|
|
335
|
+
Enable `c.postgres_outbox.advisory_lock = true` when multiple processes may start listeners and your host
|
|
336
|
+
deployment cannot guarantee exactly one listener. The listener uses `pg_try_advisory_lock` with
|
|
337
|
+
`c.postgres_outbox.advisory_lock_key`, or a stable key derived from the notification channel. If the lock is
|
|
338
|
+
not acquired, that listener sleeps and retries.
|
|
339
|
+
|
|
340
|
+
Processed and superseded rows are safe to delete after your retention window. Failed rows should be
|
|
341
|
+
inspected before deletion because they contain the last error and retry state. A typical cleanup job deletes
|
|
342
|
+
only rows with `status IN ('processed', 'superseded')` and `processed_at` older than
|
|
343
|
+
`c.postgres_outbox.retention_s`.
|
|
344
|
+
|
|
191
345
|
## Example app
|
|
192
346
|
|
|
193
347
|
See `examples/demo_shop` — demonstrates single/multi search, JOINs, grouping, presets/curation, and DX/observability. Supports offline mode via the stub client (see [Testing](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/v30.1/testing)).
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
module PostgresOutbox
|
|
5
|
+
# ActiveJob entrypoint for one bounded PostgreSQL outbox drain pass.
|
|
6
|
+
class DrainJob < ::ActiveJob::Base
|
|
7
|
+
queue_as do
|
|
8
|
+
SearchEngine.config.postgres_outbox.queue_name.to_s
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Drain pending outbox events once when PostgreSQL outbox processing is enabled.
|
|
12
|
+
# @param limit [Integer, nil] optional maximum number of events to claim
|
|
13
|
+
# @return [Hash, nil]
|
|
14
|
+
def perform(limit: nil)
|
|
15
|
+
return nil unless SearchEngine.config.postgres_outbox.enabled
|
|
16
|
+
|
|
17
|
+
drainer = SearchEngine::PostgresOutbox::Drainer.new
|
|
18
|
+
return drainer.drain_once if limit.nil?
|
|
19
|
+
|
|
20
|
+
drainer.drain_once(limit: limit)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rails/generators'
|
|
4
|
+
require 'rails/generators/active_record'
|
|
5
|
+
|
|
6
|
+
module SearchEngine
|
|
7
|
+
module Generators
|
|
8
|
+
module PostgresOutbox
|
|
9
|
+
# Install generator for PostgreSQL outbox migration helpers.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# rails g search_engine:postgres_outbox:install
|
|
13
|
+
class InstallGenerator < Rails::Generators::Base
|
|
14
|
+
include ActiveRecord::Generators::Migration
|
|
15
|
+
|
|
16
|
+
source_root File.expand_path('templates', __dir__)
|
|
17
|
+
|
|
18
|
+
def create_outbox_table_migration
|
|
19
|
+
migration_template 'create_outbox_events.rb.tt',
|
|
20
|
+
'db/migrate/create_search_engine_outbox_events.rb'
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def create_trigger_examples_migration
|
|
24
|
+
migration_template 'add_outbox_triggers.rb.tt',
|
|
25
|
+
'db/migrate/add_search_engine_outbox_triggers.rb'
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def self.next_migration_number(_dirname)
|
|
29
|
+
sleep 1
|
|
30
|
+
Time.now.utc.strftime('%Y%m%d%H%M%S')
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class AddSearchEngineOutboxTriggers < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
|
|
4
|
+
include SearchEngine::PostgresOutbox::MigrationHelpers
|
|
5
|
+
|
|
6
|
+
def up
|
|
7
|
+
# Add one trigger per source table that should write SearchEngine outbox
|
|
8
|
+
# events. Triggers fire AFTER INSERT/UPDATE/DELETE by default, insert the
|
|
9
|
+
# outbox row in the same database transaction as the source change, and call
|
|
10
|
+
# pg_notify after commit as a non-durable wakeup nudge for the listener.
|
|
11
|
+
#
|
|
12
|
+
# Pair triggered source models with:
|
|
13
|
+
# search_engine_syncable collection: :collection_name, sync_strategy: :postgres_outbox
|
|
14
|
+
# so Active Record callbacks do not also sync the same changes.
|
|
15
|
+
#
|
|
16
|
+
# record_id_sql and document_id_sql are trusted migration SQL expressions.
|
|
17
|
+
# They may reference record_data, which is NEW for inserts/updates and OLD
|
|
18
|
+
# for deletes.
|
|
19
|
+
#
|
|
20
|
+
# Simple id example:
|
|
21
|
+
# create_search_engine_outbox_trigger(
|
|
22
|
+
# :products,
|
|
23
|
+
# source_model: 'Product',
|
|
24
|
+
# collection: 'products'
|
|
25
|
+
# )
|
|
26
|
+
|
|
27
|
+
# Custom document id example:
|
|
28
|
+
# create_search_engine_outbox_trigger(
|
|
29
|
+
# :product_store_entries,
|
|
30
|
+
# source_model: 'ProductStoreEntry',
|
|
31
|
+
# collection: 'product_store_entries',
|
|
32
|
+
# document_id_sql: "record_data.product_id::text || '-' || record_data.store_id::text"
|
|
33
|
+
# )
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def down
|
|
37
|
+
# Drop triggers/functions in reverse order during rollback or teardown.
|
|
38
|
+
# drop_search_engine_outbox_trigger(:product_store_entries)
|
|
39
|
+
# drop_search_engine_outbox_trigger(:products)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreateSearchEngineOutboxEvents < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
|
|
4
|
+
include SearchEngine::PostgresOutbox::MigrationHelpers
|
|
5
|
+
|
|
6
|
+
def change
|
|
7
|
+
create_search_engine_outbox_events
|
|
8
|
+
end
|
|
9
|
+
end
|
|
@@ -5,8 +5,10 @@ require 'active_support/concern'
|
|
|
5
5
|
module SearchEngine
|
|
6
6
|
# ActiveRecord concern to keep a Typesense collection in sync.
|
|
7
7
|
#
|
|
8
|
-
# Include into an AR model and call {.search_engine_syncable} to
|
|
9
|
-
#
|
|
8
|
+
# Include into an AR model and call {.search_engine_syncable} to store sync
|
|
9
|
+
# metadata. The default callback strategy installs lifecycle callbacks that
|
|
10
|
+
# upsert on create/update and delete on destroy; the postgres_outbox strategy
|
|
11
|
+
# stores metadata without installing callbacks.
|
|
10
12
|
#
|
|
11
13
|
# @example
|
|
12
14
|
# class Product < ApplicationRecord
|
|
@@ -21,6 +23,7 @@ module SearchEngine
|
|
|
21
23
|
#
|
|
22
24
|
# - collection: defaults to the AR class tableized name (snake_case, plural)
|
|
23
25
|
# - on: one or many of :create, :update, :destroy (strings or symbols)
|
|
26
|
+
# - sync_strategy: :callbacks (default) or :postgres_outbox
|
|
24
27
|
#
|
|
25
28
|
# Validates that either a physical Typesense collection exists for the
|
|
26
29
|
# given name or a SearchEngine model is registered for it. Mapping for
|
|
@@ -29,20 +32,23 @@ module SearchEngine
|
|
|
29
32
|
#
|
|
30
33
|
# @param collection [Symbol, String, nil]
|
|
31
34
|
# @param on [Array<Symbol,String>, Symbol, String, nil]
|
|
35
|
+
# @param sync_strategy [Symbol, String, nil]
|
|
32
36
|
# @return [Class] self (for macro chaining)
|
|
33
|
-
def search_engine_syncable(collection: nil, on: nil)
|
|
37
|
+
def search_engine_syncable(collection: nil, on: nil, sync_strategy: :callbacks)
|
|
34
38
|
effective_actions = on
|
|
35
39
|
|
|
36
40
|
cfg = SearchEngine::ActiveRecordSyncable.__normalize_config_for(
|
|
37
41
|
self,
|
|
38
42
|
collection: collection,
|
|
39
|
-
actions: effective_actions
|
|
43
|
+
actions: effective_actions,
|
|
44
|
+
sync_strategy: sync_strategy
|
|
40
45
|
)
|
|
41
46
|
|
|
42
47
|
# Store config on the AR class (used by instance methods)
|
|
43
48
|
instance_variable_set(:@__se_syncable_cfg__, cfg)
|
|
44
49
|
|
|
45
|
-
SearchEngine::ActiveRecordSyncable.__register_callbacks_for(self, cfg)
|
|
50
|
+
SearchEngine::ActiveRecordSyncable.__register_callbacks_for(self, cfg) if cfg[:sync_strategy] == :callbacks
|
|
51
|
+
|
|
46
52
|
self
|
|
47
53
|
end
|
|
48
54
|
end
|
|
@@ -177,12 +183,14 @@ module SearchEngine
|
|
|
177
183
|
# @param ar_klass [Class]
|
|
178
184
|
# @param collection [String,Symbol,nil]
|
|
179
185
|
# @param actions [Array<String,Symbol>, String, Symbol, nil]
|
|
186
|
+
# @param sync_strategy [String, Symbol, nil]
|
|
180
187
|
# @return [Hash]
|
|
181
|
-
def __normalize_config_for(ar_klass, collection:, actions:)
|
|
188
|
+
def __normalize_config_for(ar_klass, collection:, actions:, sync_strategy: :callbacks)
|
|
182
189
|
require 'active_support/inflector'
|
|
183
190
|
|
|
184
191
|
logical = (collection || ActiveSupport::Inflector.tableize(ar_klass.name)).to_s
|
|
185
192
|
normalized_actions = __normalize_actions(actions)
|
|
193
|
+
normalized_sync_strategy = __normalize_sync_strategy(sync_strategy)
|
|
186
194
|
|
|
187
195
|
# Best-effort resolve SearchEngine model now; fall back to lazy resolution
|
|
188
196
|
se_klass = begin
|
|
@@ -200,6 +208,7 @@ module SearchEngine
|
|
|
200
208
|
{
|
|
201
209
|
logical: logical,
|
|
202
210
|
actions: normalized_actions,
|
|
211
|
+
sync_strategy: normalized_sync_strategy,
|
|
203
212
|
se_klass: se_klass
|
|
204
213
|
}
|
|
205
214
|
end
|
|
@@ -222,6 +231,20 @@ module SearchEngine
|
|
|
222
231
|
list.uniq
|
|
223
232
|
end
|
|
224
233
|
|
|
234
|
+
# @api private
|
|
235
|
+
# @param strategy [String, Symbol, nil]
|
|
236
|
+
# @return [Symbol]
|
|
237
|
+
def __normalize_sync_strategy(strategy)
|
|
238
|
+
allowed = %i[callbacks postgres_outbox]
|
|
239
|
+
normalized = strategy.nil? ? :callbacks : strategy.to_s.downcase.strip.to_sym
|
|
240
|
+
|
|
241
|
+
unless allowed.include?(normalized)
|
|
242
|
+
raise ArgumentError, "search_engine_syncable: sync_strategy must be within #{allowed.inspect}"
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
normalized
|
|
246
|
+
end
|
|
247
|
+
|
|
225
248
|
# (no-op placeholder kept for backwards compatibility of method table in case of reloads)
|
|
226
249
|
|
|
227
250
|
# @api private
|
data/lib/search_engine/bulk.rb
CHANGED
|
@@ -94,19 +94,9 @@ module SearchEngine
|
|
|
94
94
|
# Fallback to all declared/registered collections when no explicit targets are given.
|
|
95
95
|
input_names = SearchEngine::CollectionResolver.models_map.keys if input_names.empty?
|
|
96
96
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
# Identify inputs that are referrers of other inputs (skip them in stage 1)
|
|
101
|
-
internal_referrers = internal_referrers_within_inputs(reverse_graph, input_set)
|
|
102
|
-
|
|
103
|
-
stage1_list = input_names.reject { |n| internal_referrers.include?(n) }
|
|
104
|
-
|
|
105
|
-
# Collect unique referencers of any input for the final cascade step
|
|
106
|
-
cascade_candidates = unique_referencers_of_inputs(reverse_graph, input_names)
|
|
107
|
-
|
|
108
|
-
# Order cascade candidates among themselves by dependency (referenced first)
|
|
109
|
-
cascade_order = topo_sort_subset(reverse_graph, cascade_candidates)
|
|
97
|
+
stages = SearchEngine::DependencyPlanner.bulk_stages(input_names, source: :auto, client: ts_client)
|
|
98
|
+
stage1_list = stages[:stage_1]
|
|
99
|
+
cascade_order = stages[:cascade]
|
|
110
100
|
|
|
111
101
|
stats = {
|
|
112
102
|
inputs: input_names,
|
|
@@ -196,89 +186,6 @@ module SearchEngine
|
|
|
196
186
|
filtered.uniq
|
|
197
187
|
end
|
|
198
188
|
|
|
199
|
-
# Compute the subset of inputs that are referrers of other inputs.
|
|
200
|
-
# reverse_graph: target => [{ referrer, local_key, foreign_key }, ...]
|
|
201
|
-
# @param reverse_graph [Hash]
|
|
202
|
-
# @param input_set [Hash{String=>true}]
|
|
203
|
-
# @return [Set<String>]
|
|
204
|
-
def internal_referrers_within_inputs(reverse_graph, input_set)
|
|
205
|
-
require 'set'
|
|
206
|
-
refs = Set.new
|
|
207
|
-
reverse_graph.each do |target, edges|
|
|
208
|
-
next unless input_set[target]
|
|
209
|
-
|
|
210
|
-
Array(edges).each do |e|
|
|
211
|
-
r = (e[:referrer] || e['referrer']).to_s
|
|
212
|
-
refs.add(r) if input_set[r]
|
|
213
|
-
end
|
|
214
|
-
end
|
|
215
|
-
refs
|
|
216
|
-
end
|
|
217
|
-
|
|
218
|
-
# Unique list of referencers of any input logical name.
|
|
219
|
-
# @param reverse_graph [Hash]
|
|
220
|
-
# @param inputs [Array<String>]
|
|
221
|
-
# @return [Array<String>]
|
|
222
|
-
def unique_referencers_of_inputs(reverse_graph, inputs)
|
|
223
|
-
require 'set'
|
|
224
|
-
seen = Set.new
|
|
225
|
-
Array(inputs).each do |name|
|
|
226
|
-
Array(reverse_graph[name]).each do |e|
|
|
227
|
-
r = (e[:referrer] || e['referrer']).to_s
|
|
228
|
-
seen.add(r) unless r.strip.empty?
|
|
229
|
-
end
|
|
230
|
-
end
|
|
231
|
-
seen.to_a
|
|
232
|
-
end
|
|
233
|
-
|
|
234
|
-
# Topologically sort a subset of nodes using reverse_graph edges.
|
|
235
|
-
# Nodes are referencers; for any edge referrer -> target, ensure target comes first when it is in the subset.
|
|
236
|
-
# @param reverse_graph [Hash]
|
|
237
|
-
# @param subset [Array<String>]
|
|
238
|
-
# @return [Array<String>]
|
|
239
|
-
def topo_sort_subset(reverse_graph, subset)
|
|
240
|
-
require 'set'
|
|
241
|
-
nodes = Array(subset).uniq
|
|
242
|
-
node_set = nodes.to_h { |n| [n, true] }
|
|
243
|
-
|
|
244
|
-
# Build forward adjacency among subset nodes and indegree counts
|
|
245
|
-
adj = Hash.new { |h, k| h[k] = Set.new }
|
|
246
|
-
indeg = Hash.new(0)
|
|
247
|
-
|
|
248
|
-
nodes.each { |n| indeg[n] = 0 }
|
|
249
|
-
|
|
250
|
-
reverse_graph.each do |target, edges|
|
|
251
|
-
Array(edges).each do |e|
|
|
252
|
-
ref = (e[:referrer] || e['referrer']).to_s
|
|
253
|
-
tgt = target.to_s
|
|
254
|
-
next unless node_set[ref] && node_set[tgt]
|
|
255
|
-
|
|
256
|
-
# referrer depends on target: target should precede referrer
|
|
257
|
-
unless adj[tgt].include?(ref)
|
|
258
|
-
adj[tgt] << ref
|
|
259
|
-
indeg[ref] += 1
|
|
260
|
-
end
|
|
261
|
-
end
|
|
262
|
-
end
|
|
263
|
-
|
|
264
|
-
# Kahn's algorithm (stable by name)
|
|
265
|
-
queue = nodes.select { |n| indeg[n].to_i <= 0 }.sort
|
|
266
|
-
order = []
|
|
267
|
-
until queue.empty?
|
|
268
|
-
n = queue.shift
|
|
269
|
-
order << n
|
|
270
|
-
adj[n].each do |m|
|
|
271
|
-
indeg[m] -= 1
|
|
272
|
-
queue << m if indeg[m] <= 0
|
|
273
|
-
end
|
|
274
|
-
queue.sort!
|
|
275
|
-
end
|
|
276
|
-
|
|
277
|
-
# Append any remaining nodes (cycles) in stable name order
|
|
278
|
-
remaining = nodes - order
|
|
279
|
-
order + remaining.sort
|
|
280
|
-
end
|
|
281
|
-
|
|
282
189
|
# Resolve a collection model class from a collection name.
|
|
283
190
|
# Uses CollectionResolver for better fallback logic and model discovery.
|
|
284
191
|
# @param name [String]
|
data/lib/search_engine/config.rb
CHANGED
|
@@ -264,6 +264,58 @@ module SearchEngine
|
|
|
264
264
|
end
|
|
265
265
|
end
|
|
266
266
|
|
|
267
|
+
# Lightweight nested configuration for PostgreSQL outbox sync.
|
|
268
|
+
class PostgresOutboxConfig
|
|
269
|
+
# @return [Boolean] global kill switch for PostgreSQL outbox sync
|
|
270
|
+
attr_accessor :enabled
|
|
271
|
+
# @return [String] database table used by host-managed outbox events
|
|
272
|
+
attr_accessor :table_name
|
|
273
|
+
# @return [String] PostgreSQL notification channel for wakeups
|
|
274
|
+
attr_accessor :channel
|
|
275
|
+
# @return [String] queue name used by host app job dispatch
|
|
276
|
+
attr_accessor :queue_name
|
|
277
|
+
# @return [Integer] maximum events to claim per processing batch
|
|
278
|
+
attr_accessor :batch_size
|
|
279
|
+
# @return [Integer] maximum processing attempts before leaving an event failed
|
|
280
|
+
attr_accessor :max_attempts
|
|
281
|
+
# @return [Integer] polling interval in seconds
|
|
282
|
+
attr_accessor :poll_interval_s
|
|
283
|
+
# @return [Integer] listener wait timeout in seconds
|
|
284
|
+
attr_accessor :listener_wait_timeout_s
|
|
285
|
+
# @return [Integer] processing timeout in seconds
|
|
286
|
+
attr_accessor :processing_timeout_s
|
|
287
|
+
# @return [Integer] retention period in seconds
|
|
288
|
+
attr_accessor :retention_s
|
|
289
|
+
# @return [Boolean] whether host processing should use advisory locking
|
|
290
|
+
attr_accessor :advisory_lock
|
|
291
|
+
# @return [Integer, nil] optional PostgreSQL advisory lock key
|
|
292
|
+
attr_accessor :advisory_lock_key
|
|
293
|
+
# @return [#call] predicate controlling whether listener work may run
|
|
294
|
+
attr_accessor :listener_enabled
|
|
295
|
+
# @return [Hash] host-provided collection processors by collection name
|
|
296
|
+
attr_accessor :collection_processors
|
|
297
|
+
# @return [#call] retry backoff calculator receiving the attempt number
|
|
298
|
+
attr_accessor :retry_backoff
|
|
299
|
+
|
|
300
|
+
def initialize
|
|
301
|
+
@enabled = false
|
|
302
|
+
@table_name = 'search_engine_outbox_events'
|
|
303
|
+
@channel = 'search_engine_outbox'
|
|
304
|
+
@queue_name = 'search_engine'
|
|
305
|
+
@batch_size = 1000
|
|
306
|
+
@max_attempts = 10
|
|
307
|
+
@poll_interval_s = 5
|
|
308
|
+
@listener_wait_timeout_s = 30
|
|
309
|
+
@processing_timeout_s = 900
|
|
310
|
+
@retention_s = 604_800
|
|
311
|
+
@advisory_lock = false
|
|
312
|
+
@advisory_lock_key = nil
|
|
313
|
+
@listener_enabled = -> { false }
|
|
314
|
+
@collection_processors = {}
|
|
315
|
+
@retry_backoff = ->(attempt) { [attempt.to_i, 1].max * 5 }
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
267
319
|
# Lightweight nested configuration for observability/logging.
|
|
268
320
|
# Kept for backward compatibility during refactor; delegates to external class.
|
|
269
321
|
#
|
|
@@ -414,6 +466,7 @@ module SearchEngine
|
|
|
414
466
|
@mapper = MapperConfig.new
|
|
415
467
|
@partitioning = PartitioningConfig.new
|
|
416
468
|
@stale_deletes = StaleDeletesConfig.new
|
|
469
|
+
@postgres_outbox = PostgresOutboxConfig.new
|
|
417
470
|
@observability = ObservabilityConfig.new
|
|
418
471
|
@grouping = GroupingConfig.new
|
|
419
472
|
@selection = SelectionConfig.new
|
|
@@ -525,6 +578,12 @@ module SearchEngine
|
|
|
525
578
|
@stale_deletes ||= StaleDeletesConfig.new
|
|
526
579
|
end
|
|
527
580
|
|
|
581
|
+
# Expose PostgreSQL outbox configuration.
|
|
582
|
+
# @return [SearchEngine::Config::PostgresOutboxConfig]
|
|
583
|
+
def postgres_outbox
|
|
584
|
+
@postgres_outbox ||= PostgresOutboxConfig.new
|
|
585
|
+
end
|
|
586
|
+
|
|
528
587
|
# Expose structured logging configuration.
|
|
529
588
|
#
|
|
530
589
|
# By default `mode` is nil which disables the structured `LoggingSubscriber`.
|
|
@@ -722,6 +781,7 @@ module SearchEngine
|
|
|
722
781
|
sources: sources_hash_for_to_h,
|
|
723
782
|
mapper: mapper_hash_for_to_h,
|
|
724
783
|
partitioning: partitioning_hash_for_to_h,
|
|
784
|
+
postgres_outbox: postgres_outbox_hash_for_to_h,
|
|
725
785
|
observability: observability_hash_for_to_h,
|
|
726
786
|
selection: selection_hash_for_to_h,
|
|
727
787
|
presets: presets_hash_for_to_h,
|
|
@@ -799,6 +859,26 @@ module SearchEngine
|
|
|
799
859
|
}
|
|
800
860
|
end
|
|
801
861
|
|
|
862
|
+
def postgres_outbox_hash_for_to_h
|
|
863
|
+
{
|
|
864
|
+
enabled: postgres_outbox.enabled ? true : false,
|
|
865
|
+
table_name: postgres_outbox.table_name,
|
|
866
|
+
channel: postgres_outbox.channel,
|
|
867
|
+
queue_name: postgres_outbox.queue_name,
|
|
868
|
+
batch_size: postgres_outbox.batch_size,
|
|
869
|
+
max_attempts: postgres_outbox.max_attempts,
|
|
870
|
+
poll_interval_s: postgres_outbox.poll_interval_s,
|
|
871
|
+
listener_wait_timeout_s: postgres_outbox.listener_wait_timeout_s,
|
|
872
|
+
processing_timeout_s: postgres_outbox.processing_timeout_s,
|
|
873
|
+
retention_s: postgres_outbox.retention_s,
|
|
874
|
+
advisory_lock: postgres_outbox.advisory_lock ? true : false,
|
|
875
|
+
advisory_lock_key: postgres_outbox.advisory_lock_key,
|
|
876
|
+
listener_enabled: postgres_outbox.listener_enabled,
|
|
877
|
+
collection_processors: postgres_outbox.collection_processors,
|
|
878
|
+
retry_backoff: postgres_outbox.retry_backoff
|
|
879
|
+
}
|
|
880
|
+
end
|
|
881
|
+
|
|
802
882
|
def observability_hash_for_to_h
|
|
803
883
|
{
|
|
804
884
|
enabled: observability.enabled ? true : false,
|