search-engine-for-typesense 30.1.6.18 → 30.1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +54 -0
- data/app/search_engine/search_engine/index_partition_job.rb +28 -1
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +18 -0
- data/lib/search_engine/active_record_syncable.rb +14 -3
- data/lib/search_engine/async_partition_coordinator.rb +208 -0
- data/lib/search_engine/base/index_maintenance.rb +31 -0
- data/lib/search_engine/config.rb +35 -2
- data/lib/search_engine/indexing_run.rb +191 -0
- data/lib/search_engine/indexing_run_store/rails_cache.rb +231 -0
- data/lib/search_engine/indexing_run_store.rb +51 -0
- data/lib/search_engine/relation/dsl/eval.rb +62 -0
- data/lib/search_engine/relation/dsl/geo.rb +139 -0
- data/lib/search_engine/relation/dsl.rb +4 -0
- data/lib/search_engine/result.rb +25 -1
- data/lib/search_engine/schema.rb +4 -1
- data/lib/search_engine/version.rb +1 -1
- data/lib/search_engine.rb +4 -0
- metadata +8 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '0852da3905487de90a84633634760fba6d4806d236e6738a8404d70e9122eef4'
|
|
4
|
+
data.tar.gz: fe62daa0161f71c0b5bc24da4195708a96b1687bc096383a12056aa346178710
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6c12c7c80626ac24ea4fd5515559d354632a42a0bcd2381d61ed577aa3e24e6e60448cdf810defdf00d819e8960c93a69ce5129e27225a76c6a476e0b62115c4
|
|
7
|
+
data.tar.gz: 405565a325c099df28ba1d54d85a308f5297299a59eb40cd9f03a24ebb8306f588becc7c31be5d82cb3be05216cb443dd39a1152d417e07da2b64b97162cbe2a
|
data/README.md
CHANGED
|
@@ -116,6 +116,36 @@ SearchEngine::Product.upsert_bulk(records: Product.limit(2))
|
|
|
116
116
|
|
|
117
117
|
# Bulk upsert mapped payloads
|
|
118
118
|
SearchEngine::Product.upsert_bulk(data: [mapped])
|
|
119
|
+
|
|
120
|
+
# Geo search
|
|
121
|
+
class SearchEngine::Venue < SearchEngine::Base
|
|
122
|
+
collection :venues
|
|
123
|
+
identify_by :id
|
|
124
|
+
|
|
125
|
+
attribute :name, :string
|
|
126
|
+
attribute :location, :geopoint
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Filter by radius
|
|
130
|
+
SearchEngine::Venue
|
|
131
|
+
.where_geo(:location, within_radius: { lat: 54.69, lng: 25.28, radius: "10 km" })
|
|
132
|
+
.order_geo(:location, from: { lat: 54.69, lng: 25.28 })
|
|
133
|
+
.to_a
|
|
134
|
+
|
|
135
|
+
# Filter by polygon (viewport)
|
|
136
|
+
SearchEngine::Venue
|
|
137
|
+
.where_geo(:location, within_polygon: [[54.72, 25.35], [54.72, 25.22], [54.67, 25.22], [54.67, 25.35]])
|
|
138
|
+
.to_a
|
|
139
|
+
|
|
140
|
+
# Viewport boost with _eval() + distance tiebreaker
|
|
141
|
+
SearchEngine::Venue
|
|
142
|
+
.order_eval("location:(54.72,25.35, 54.72,25.22, 54.67,25.22, 54.67,25.35)", direction: :desc)
|
|
143
|
+
.order_geo(:location, from: { lat: 54.69, lng: 25.28 })
|
|
144
|
+
.to_a
|
|
145
|
+
|
|
146
|
+
# Access geo distance on results (present when order_geo is used)
|
|
147
|
+
result = SearchEngine::Venue.all.order_geo(:location, from: { lat: 54.69, lng: 25.28 }).execute
|
|
148
|
+
result.hits.first.geo_distance_meters # => { "location" => 1234 }
|
|
119
149
|
```
|
|
120
150
|
|
|
121
151
|
## Documentation
|
|
@@ -134,6 +164,30 @@ You can control this explicitly with:
|
|
|
134
164
|
|
|
135
165
|
If you set `SearchEngine.configure { |c| c.client = ... }`, the custom client is always used.
|
|
136
166
|
|
|
167
|
+
## Async partition indexing
|
|
168
|
+
|
|
169
|
+
Async partition indexing is an advanced opt-in mode for partitioned full indexing. The default remains
|
|
170
|
+
inline execution. Apps with a real ActiveJob backend can use queue-backed partition execution so each
|
|
171
|
+
search/index partition imports into the same blue/green physical collection before the alias is swapped.
|
|
172
|
+
This mode does not require Sidekiq; use any ActiveJob backend that can run the partition jobs.
|
|
173
|
+
|
|
174
|
+
```ruby
|
|
175
|
+
SearchEngine.configure do |c|
|
|
176
|
+
c.indexer.partition_execution = :active_job
|
|
177
|
+
c.indexer.partition_queue_name = "search_index_partitions"
|
|
178
|
+
c.indexer.partition_timeout_s = 7_200
|
|
179
|
+
end
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Async mode is partition-based, not app-domain based. The gem enqueues one
|
|
183
|
+
`SearchEngine::IndexPartitionJob` per configured partition, waits for every partition to finish, and
|
|
184
|
+
only then lets the schema lifecycle swap the alias. If any partition fails or times out, the previous
|
|
185
|
+
alias target remains active.
|
|
186
|
+
|
|
187
|
+
Use a shared `Rails.cache` backend, or provide `c.indexer.partition_run_store`, so worker processes and
|
|
188
|
+
the parent indexing process can see the same run metadata. Size the queue carefully: worker concurrency
|
|
189
|
+
multiplies with any per-partition `max_parallel` setting.
|
|
190
|
+
|
|
137
191
|
## Example app
|
|
138
192
|
|
|
139
193
|
See `examples/demo_shop` — demonstrates single/multi search, JOINs, grouping, presets/curation, and DX/observability. Supports offline mode via the stub client (see [Testing](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/v30.1/testing)).
|
|
@@ -8,6 +8,8 @@ module SearchEngine
|
|
|
8
8
|
# - partition [Object] (JSON-serializable)
|
|
9
9
|
# - into [String, nil]
|
|
10
10
|
# - metadata [Hash]
|
|
11
|
+
# - run_id [String, nil]
|
|
12
|
+
# - partition_key [String, nil]
|
|
11
13
|
class IndexPartitionJob < ::ActiveJob::Base
|
|
12
14
|
queue_as do
|
|
13
15
|
cfg = SearchEngine.config.indexer
|
|
@@ -31,11 +33,16 @@ module SearchEngine
|
|
|
31
33
|
# @param partition [Object]
|
|
32
34
|
# @param into [String, nil]
|
|
33
35
|
# @param metadata [Hash]
|
|
36
|
+
# @param run_id [String, nil]
|
|
37
|
+
# @param partition_key [String, nil]
|
|
34
38
|
# @return [void]
|
|
35
|
-
def perform(collection_class_name, partition, into: nil, metadata: {})
|
|
39
|
+
def perform(collection_class_name, partition, into: nil, metadata: {}, run_id: nil, partition_key: nil)
|
|
36
40
|
payload = nil
|
|
37
41
|
klass = constantize_collection!(collection_class_name)
|
|
38
42
|
payload = base_payload(klass, partition: partition, into: into)
|
|
43
|
+
run_store = indexing_run_store(run_id)
|
|
44
|
+
partition_key ||= SearchEngine::IndexingRun.partition_key(partition) if run_id
|
|
45
|
+
run_store&.mark_started(run_id: run_id, partition_key: partition_key, job_id: job_id)
|
|
39
46
|
instrument('search_engine.dispatcher.job_started',
|
|
40
47
|
payload.merge(queue: queue_name, job_id: job_id, metadata: metadata)
|
|
41
48
|
)
|
|
@@ -46,6 +53,7 @@ module SearchEngine
|
|
|
46
53
|
summary = SearchEngine::Indexer.rebuild_partition!(klass, partition: partition, into: into)
|
|
47
54
|
end
|
|
48
55
|
duration = (monotonic_ms - started).round(1)
|
|
56
|
+
run_store&.mark_succeeded(run_id: run_id, partition_key: partition_key, summary: summary)
|
|
49
57
|
|
|
50
58
|
instrument(
|
|
51
59
|
'search_engine.dispatcher.job_finished',
|
|
@@ -56,6 +64,7 @@ module SearchEngine
|
|
|
56
64
|
nil
|
|
57
65
|
rescue StandardError => error
|
|
58
66
|
safe_payload = payload || error_payload(error)
|
|
67
|
+
run_store&.mark_failed(run_id: run_id, partition_key: partition_key, error: error) unless retryable_error?(error)
|
|
59
68
|
instrument_error(error, payload: safe_payload.merge(metadata: metadata || {}))
|
|
60
69
|
raise
|
|
61
70
|
end
|
|
@@ -99,6 +108,24 @@ module SearchEngine
|
|
|
99
108
|
retry_job wait: wait_seconds
|
|
100
109
|
end
|
|
101
110
|
|
|
111
|
+
def indexing_run_store(run_id)
|
|
112
|
+
return nil if run_id.nil?
|
|
113
|
+
|
|
114
|
+
SearchEngine::IndexingRunStore.resolve
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def retryable_error?(error)
|
|
118
|
+
transient_error?(error) && executions.to_i < retry_policy.attempts
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def transient_error?(error)
|
|
122
|
+
return true if error.is_a?(SearchEngine::Errors::Timeout)
|
|
123
|
+
return true if error.is_a?(SearchEngine::Errors::Connection)
|
|
124
|
+
|
|
125
|
+
error.is_a?(SearchEngine::Errors::Api) &&
|
|
126
|
+
SearchEngine::Indexer::RetryPolicy.transient_status?(error.status.to_i)
|
|
127
|
+
end
|
|
128
|
+
|
|
102
129
|
def error_payload(error)
|
|
103
130
|
{
|
|
104
131
|
collection: arguments_dig_collection,
|
|
@@ -181,6 +181,24 @@ SearchEngine.configure do |c|
|
|
|
181
181
|
# Queue name for ActiveJob dispatch. Default: 'search_index'
|
|
182
182
|
# c.indexer.queue_name = 'search_index'
|
|
183
183
|
|
|
184
|
+
# Advanced partition execution for full indexing. Default: :inline
|
|
185
|
+
# Use :active_job with a real ActiveJob backend to enqueue one job per
|
|
186
|
+
# partition during blue/green indexing. Does not require Sidekiq.
|
|
187
|
+
# c.indexer.partition_execution = :inline
|
|
188
|
+
|
|
189
|
+
# Optional queue override for partition jobs. Falls back to c.indexer.queue_name.
|
|
190
|
+
# Size worker concurrency carefully: partition jobs can multiply partition max_parallel.
|
|
191
|
+
# c.indexer.partition_queue_name = nil
|
|
192
|
+
|
|
193
|
+
# Parent wait settings for async partition runs. Defaults shown.
|
|
194
|
+
# c.indexer.partition_poll_interval_s = 2
|
|
195
|
+
# c.indexer.partition_timeout_s = nil
|
|
196
|
+
|
|
197
|
+
# Operational metadata for async partition runs. Defaults shown.
|
|
198
|
+
# Use shared Rails.cache or provide a custom store visible to workers and parent.
|
|
199
|
+
# c.indexer.partition_run_store = nil
|
|
200
|
+
# c.indexer.partition_run_ttl_s = 86_400
|
|
201
|
+
|
|
184
202
|
# --- Sources -------------------------------------------------------------
|
|
185
203
|
|
|
186
204
|
# ActiveRecord source: default ORM batch size. Default: 2000
|
|
@@ -235,10 +235,21 @@ module SearchEngine
|
|
|
235
235
|
end
|
|
236
236
|
|
|
237
237
|
actions = cfg[:actions]
|
|
238
|
+
timing = begin
|
|
239
|
+
SearchEngine.config.syncable_callback_timing
|
|
240
|
+
rescue StandardError
|
|
241
|
+
:after_commit
|
|
242
|
+
end
|
|
238
243
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
244
|
+
if timing == :after_commit
|
|
245
|
+
ar_klass.after_create_commit :__se_syncable_upsert! if actions.include?(:create)
|
|
246
|
+
ar_klass.after_update_commit :__se_syncable_upsert! if actions.include?(:update)
|
|
247
|
+
ar_klass.after_destroy_commit :__se_syncable_delete! if actions.include?(:destroy)
|
|
248
|
+
else
|
|
249
|
+
ar_klass.after_create :__se_syncable_upsert! if actions.include?(:create)
|
|
250
|
+
ar_klass.after_update :__se_syncable_upsert! if actions.include?(:update)
|
|
251
|
+
ar_klass.after_destroy :__se_syncable_delete! if actions.include?(:destroy)
|
|
252
|
+
end
|
|
242
253
|
|
|
243
254
|
ar_klass.instance_variable_set(:@__se_syncable_callbacks_installed__, true)
|
|
244
255
|
nil
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Coordinates ActiveJob-backed partition indexing and waits for run completion.
|
|
5
|
+
module AsyncPartitionCoordinator
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
# Enqueue one partition job per partition and return a lifecycle-compatible aggregate.
|
|
9
|
+
# @param klass [Class] SearchEngine collection class
|
|
10
|
+
# @param partitions [Array<Object>] logical partition values
|
|
11
|
+
# @param into [String] target physical collection name
|
|
12
|
+
# @param queue [String, nil] optional ActiveJob queue override
|
|
13
|
+
# @param timeout_s [Numeric, nil] maximum wait time in seconds
|
|
14
|
+
# @param poll_interval_s [Numeric, nil] polling interval in seconds
|
|
15
|
+
# @param store [Object, nil] optional run store
|
|
16
|
+
# @param ttl_s [Numeric, nil] run metadata TTL in seconds
|
|
17
|
+
# @return [Hash] lifecycle result with :status, :docs_total, :success_total, :failed_total, :sample_error
|
|
18
|
+
def call(klass:, partitions:, into:, queue: nil, timeout_s: nil, poll_interval_s: nil, store: nil, ttl_s: nil)
|
|
19
|
+
cfg = SearchEngine.config.indexer
|
|
20
|
+
partition_list = Array(partitions)
|
|
21
|
+
run_id = SearchEngine::IndexingRun.generate_id
|
|
22
|
+
queue_name = resolve_queue_name(queue, cfg)
|
|
23
|
+
run_store = SearchEngine::IndexingRunStore.validate!(store || SearchEngine::IndexingRunStore.resolve)
|
|
24
|
+
ttl = ttl_s || cfg.partition_run_ttl_s
|
|
25
|
+
|
|
26
|
+
snapshot = create_run(
|
|
27
|
+
store: run_store,
|
|
28
|
+
run_id: run_id,
|
|
29
|
+
klass: klass,
|
|
30
|
+
into: into,
|
|
31
|
+
partitions: partition_list,
|
|
32
|
+
ttl_s: ttl
|
|
33
|
+
)
|
|
34
|
+
instrument('search_engine.indexing_run.started', event_payload(snapshot, queue: queue_name))
|
|
35
|
+
enqueue_partitions(klass, partition_list, into: into, queue_name: queue_name, run_id: run_id)
|
|
36
|
+
instrument('search_engine.indexing_run.enqueued', event_payload(snapshot, queue: queue_name))
|
|
37
|
+
|
|
38
|
+
wait_for_completion(
|
|
39
|
+
store: run_store,
|
|
40
|
+
run_id: run_id,
|
|
41
|
+
timeout_s: timeout_s.nil? ? cfg.partition_timeout_s : timeout_s,
|
|
42
|
+
poll_interval_s: poll_interval_s.nil? ? cfg.partition_poll_interval_s : poll_interval_s
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def create_run(store:, run_id:, klass:, into:, partitions:, ttl_s:)
|
|
47
|
+
store.create_run(
|
|
48
|
+
run_id: run_id,
|
|
49
|
+
collection: collection_name(klass),
|
|
50
|
+
collection_class_name: klass.name,
|
|
51
|
+
into: into,
|
|
52
|
+
partitions: partitions,
|
|
53
|
+
ttl_s: ttl_s
|
|
54
|
+
)
|
|
55
|
+
end
|
|
56
|
+
private_class_method :create_run
|
|
57
|
+
|
|
58
|
+
def enqueue_partitions(klass, partitions, into:, queue_name:, run_id:)
|
|
59
|
+
partitions.each do |partition|
|
|
60
|
+
SearchEngine::IndexPartitionJob
|
|
61
|
+
.set(queue: queue_name)
|
|
62
|
+
.perform_later(
|
|
63
|
+
klass.name,
|
|
64
|
+
partition,
|
|
65
|
+
into: into,
|
|
66
|
+
metadata: {},
|
|
67
|
+
run_id: run_id,
|
|
68
|
+
partition_key: SearchEngine::IndexingRun.partition_key(partition)
|
|
69
|
+
)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
private_class_method :enqueue_partitions
|
|
73
|
+
|
|
74
|
+
def wait_for_completion(store:, run_id:, timeout_s:, poll_interval_s:)
|
|
75
|
+
deadline = timeout_s.nil? ? nil : monotonic_seconds + timeout_s.to_f
|
|
76
|
+
|
|
77
|
+
loop do
|
|
78
|
+
snapshot = store.snapshot(run_id: run_id)
|
|
79
|
+
result = result_for_snapshot(snapshot)
|
|
80
|
+
return finish_failure(snapshot, result) if invalid_snapshot_result?(result)
|
|
81
|
+
return finish_success(snapshot, result) if result[:status] == :ok
|
|
82
|
+
return finish_failure(snapshot, result) if failure_terminal?(snapshot)
|
|
83
|
+
return timeout_result(store, run_id, snapshot) if deadline && monotonic_seconds >= deadline
|
|
84
|
+
|
|
85
|
+
sleep([poll_interval_s.to_f, 0].max)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
private_class_method :wait_for_completion
|
|
89
|
+
|
|
90
|
+
def result_for_snapshot(snapshot)
|
|
91
|
+
SearchEngine::IndexingRun.aggregate_result(snapshot)
|
|
92
|
+
end
|
|
93
|
+
private_class_method :result_for_snapshot
|
|
94
|
+
|
|
95
|
+
def invalid_snapshot_result?(result)
|
|
96
|
+
result[:status] == :failed && result[:sample_error].to_s.start_with?('async partition indexing run snapshot')
|
|
97
|
+
end
|
|
98
|
+
private_class_method :invalid_snapshot_result?
|
|
99
|
+
|
|
100
|
+
def finish_success(snapshot, result)
|
|
101
|
+
instrument('search_engine.indexing_run.finished', event_payload(snapshot, result: result))
|
|
102
|
+
result
|
|
103
|
+
end
|
|
104
|
+
private_class_method :finish_success
|
|
105
|
+
|
|
106
|
+
def finish_failure(snapshot, result)
|
|
107
|
+
result = failed_result(snapshot, result)
|
|
108
|
+
instrument('search_engine.indexing_run.failed', event_payload(snapshot, result: result))
|
|
109
|
+
result
|
|
110
|
+
end
|
|
111
|
+
private_class_method :finish_failure
|
|
112
|
+
|
|
113
|
+
def timeout_result(store, run_id, snapshot)
|
|
114
|
+
mark_non_terminal_failed(store, run_id, snapshot)
|
|
115
|
+
snapshot = store.snapshot(run_id: run_id) || snapshot
|
|
116
|
+
result = failed_result(
|
|
117
|
+
snapshot,
|
|
118
|
+
SearchEngine::IndexingRun.aggregate_result(snapshot),
|
|
119
|
+
sample_error: "SearchEngine async partition indexing timed out for run #{run_id}"
|
|
120
|
+
)
|
|
121
|
+
instrument('search_engine.indexing_run.failed', event_payload(snapshot, result: result))
|
|
122
|
+
result
|
|
123
|
+
end
|
|
124
|
+
private_class_method :timeout_result
|
|
125
|
+
|
|
126
|
+
def mark_non_terminal_failed(store, run_id, snapshot)
|
|
127
|
+
partitions = snapshot && snapshot[:partitions]
|
|
128
|
+
return unless partitions.is_a?(Hash)
|
|
129
|
+
|
|
130
|
+
partitions.each do |partition_key, entry|
|
|
131
|
+
status = entry[:status].to_s
|
|
132
|
+
next if %w[succeeded failed].include?(status)
|
|
133
|
+
|
|
134
|
+
store.mark_failed(
|
|
135
|
+
run_id: run_id,
|
|
136
|
+
partition_key: partition_key,
|
|
137
|
+
error: 'partition did not finish before timeout'
|
|
138
|
+
)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
private_class_method :mark_non_terminal_failed
|
|
142
|
+
|
|
143
|
+
def failed_result(snapshot, result, sample_error: nil)
|
|
144
|
+
statuses = partition_statuses(snapshot)
|
|
145
|
+
success_total = result[:success_total].to_i
|
|
146
|
+
status = success_total.positive? || statuses.include?('succeeded') ? :partial : :failed
|
|
147
|
+
|
|
148
|
+
result.merge(
|
|
149
|
+
status: status,
|
|
150
|
+
failed_total: [result[:failed_total].to_i, failed_partition_count(statuses)].max,
|
|
151
|
+
sample_error: sample_error || result[:sample_error] || 'async partition indexing failed'
|
|
152
|
+
)
|
|
153
|
+
end
|
|
154
|
+
private_class_method :failed_result
|
|
155
|
+
|
|
156
|
+
def failure_terminal?(snapshot)
|
|
157
|
+
partition_statuses(snapshot).include?('failed')
|
|
158
|
+
end
|
|
159
|
+
private_class_method :failure_terminal?
|
|
160
|
+
|
|
161
|
+
def partition_statuses(snapshot)
|
|
162
|
+
partitions = snapshot && snapshot[:partitions]
|
|
163
|
+
return [] unless partitions.is_a?(Hash)
|
|
164
|
+
|
|
165
|
+
partitions.values.map { |entry| entry[:status].to_s }
|
|
166
|
+
end
|
|
167
|
+
private_class_method :partition_statuses
|
|
168
|
+
|
|
169
|
+
def failed_partition_count(statuses)
|
|
170
|
+
statuses.count { |status| status == 'failed' }
|
|
171
|
+
end
|
|
172
|
+
private_class_method :failed_partition_count
|
|
173
|
+
|
|
174
|
+
def resolve_queue_name(queue, cfg)
|
|
175
|
+
(queue || cfg.partition_queue_name || cfg.queue_name || 'search_index').to_s
|
|
176
|
+
end
|
|
177
|
+
private_class_method :resolve_queue_name
|
|
178
|
+
|
|
179
|
+
def collection_name(klass)
|
|
180
|
+
klass.respond_to?(:collection) ? klass.collection.to_s : klass.name.to_s
|
|
181
|
+
end
|
|
182
|
+
private_class_method :collection_name
|
|
183
|
+
|
|
184
|
+
def event_payload(snapshot, queue: nil, result: nil)
|
|
185
|
+
payload = {
|
|
186
|
+
run_id: snapshot && snapshot[:run_id],
|
|
187
|
+
collection: snapshot && snapshot[:collection],
|
|
188
|
+
collection_class_name: snapshot && snapshot[:collection_class_name],
|
|
189
|
+
into: snapshot && snapshot[:into],
|
|
190
|
+
total_partitions: snapshot && snapshot[:total_partitions],
|
|
191
|
+
queue: queue
|
|
192
|
+
}
|
|
193
|
+
payload.merge!(result) if result
|
|
194
|
+
payload
|
|
195
|
+
end
|
|
196
|
+
private_class_method :event_payload
|
|
197
|
+
|
|
198
|
+
def instrument(event, payload)
|
|
199
|
+
SearchEngine::Instrumentation.instrument(event, payload) {}
|
|
200
|
+
end
|
|
201
|
+
private_class_method :instrument
|
|
202
|
+
|
|
203
|
+
def monotonic_seconds
|
|
204
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
205
|
+
end
|
|
206
|
+
private_class_method :monotonic_seconds
|
|
207
|
+
end
|
|
208
|
+
end
|
|
@@ -281,6 +281,11 @@ module SearchEngine
|
|
|
281
281
|
if compiled
|
|
282
282
|
parts = Array(compiled.partitions)
|
|
283
283
|
max_p = compiled.max_parallel.to_i
|
|
284
|
+
if __se_async_partition_execution_requested?(parts)
|
|
285
|
+
return __se_async_partition_result_unless_available unless __se_async_partition_execution_available?
|
|
286
|
+
|
|
287
|
+
return SearchEngine::AsyncPartitionCoordinator.call(klass: self, partitions: parts, into: into)
|
|
288
|
+
end
|
|
284
289
|
return __se_index_partitions_seq!(parts, into, compiled) if max_p <= 1 || parts.size <= 1
|
|
285
290
|
|
|
286
291
|
__se_index_partitions_parallel!(parts, into, max_p, compiled)
|
|
@@ -292,6 +297,32 @@ module SearchEngine
|
|
|
292
297
|
sample_error: "#{error.class}: #{error.message.to_s[0, 200]}" }
|
|
293
298
|
end
|
|
294
299
|
|
|
300
|
+
def __se_async_partition_execution_requested?(parts)
|
|
301
|
+
parts.size > 1 && SearchEngine.config.indexer.partition_execution.to_sym == :active_job
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def __se_async_partition_execution_available?
|
|
305
|
+
defined?(::ActiveJob::Base) && defined?(SearchEngine::IndexPartitionJob)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def __se_async_partition_result_unless_available
|
|
309
|
+
missing = []
|
|
310
|
+
missing << 'ActiveJob::Base' unless defined?(::ActiveJob::Base)
|
|
311
|
+
missing << 'SearchEngine::IndexPartitionJob' unless defined?(SearchEngine::IndexPartitionJob)
|
|
312
|
+
|
|
313
|
+
{
|
|
314
|
+
status: :failed,
|
|
315
|
+
docs_total: 0,
|
|
316
|
+
success_total: 0,
|
|
317
|
+
failed_total: 0,
|
|
318
|
+
sample_error: "async partition indexing requires #{missing.join(' and ')}"
|
|
319
|
+
}
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
private :__se_async_partition_execution_requested?,
|
|
323
|
+
:__se_async_partition_execution_available?,
|
|
324
|
+
:__se_async_partition_result_unless_available
|
|
325
|
+
|
|
295
326
|
def __se_index_single_with_renderer!(into)
|
|
296
327
|
docs_estimate = __se_heuristic_docs_estimate(1)
|
|
297
328
|
renderer = SearchEngine::Logging::LiveRenderer.new(
|
data/lib/search_engine/config.rb
CHANGED
|
@@ -56,6 +56,10 @@ module SearchEngine
|
|
|
56
56
|
# @return [String, nil, false] path to host app SearchEngine models directory. May be
|
|
57
57
|
# relative to `Rails.root` (e.g., "app/search_engine") or absolute. When `nil` or
|
|
58
58
|
# `false`, gem-managed loading of host SearchEngine models is disabled.
|
|
59
|
+
# @!attribute [rw] syncable_callback_timing
|
|
60
|
+
# @return [Symbol] controls ActiveRecordSyncable callback timing.
|
|
61
|
+
# +:after_commit+ (default) uses +after_*_commit+ callbacks (safe, post-transaction).
|
|
62
|
+
# +:after_save+ uses legacy +after_*+ callbacks (in-transaction).
|
|
59
63
|
attr_accessor :logger,
|
|
60
64
|
:default_query_by,
|
|
61
65
|
:default_infix,
|
|
@@ -67,7 +71,8 @@ module SearchEngine
|
|
|
67
71
|
:client,
|
|
68
72
|
:default_console_model,
|
|
69
73
|
:search_engine_models,
|
|
70
|
-
:relation_print_materializes
|
|
74
|
+
:relation_print_materializes,
|
|
75
|
+
:syncable_callback_timing
|
|
71
76
|
|
|
72
77
|
# Lightweight nested configuration for schema lifecycle.
|
|
73
78
|
class SchemaConfig
|
|
@@ -103,6 +108,18 @@ module SearchEngine
|
|
|
103
108
|
attr_accessor :dispatch
|
|
104
109
|
# @return [String] queue name for ActiveJob enqueues
|
|
105
110
|
attr_accessor :queue_name
|
|
111
|
+
# @return [Symbol] partition execution mode: :inline or :active_job
|
|
112
|
+
attr_accessor :partition_execution
|
|
113
|
+
# @return [String, nil] optional queue override for async partition jobs
|
|
114
|
+
attr_accessor :partition_queue_name
|
|
115
|
+
# @return [Integer] parent wait polling interval for async partition runs (seconds)
|
|
116
|
+
attr_accessor :partition_poll_interval_s
|
|
117
|
+
# @return [Integer, nil] maximum parent wait budget for async partition runs (seconds)
|
|
118
|
+
attr_accessor :partition_timeout_s
|
|
119
|
+
# @return [Integer] TTL for async partition run metadata (seconds)
|
|
120
|
+
attr_accessor :partition_run_ttl_s
|
|
121
|
+
# @return [Object, nil] custom async partition run store
|
|
122
|
+
attr_accessor :partition_run_store
|
|
106
123
|
# @return [Boolean] whether to run model.count for progress bar estimates (default true)
|
|
107
124
|
attr_accessor :estimate_progress
|
|
108
125
|
# @return [Integer, nil] graceful-shutdown timeout (seconds) for the parallel
|
|
@@ -116,6 +133,12 @@ module SearchEngine
|
|
|
116
133
|
@gzip = false
|
|
117
134
|
@dispatch = active_job_available? ? :active_job : :inline
|
|
118
135
|
@queue_name = 'search_index'
|
|
136
|
+
@partition_execution = :inline
|
|
137
|
+
@partition_queue_name = nil
|
|
138
|
+
@partition_poll_interval_s = 2
|
|
139
|
+
@partition_timeout_s = nil
|
|
140
|
+
@partition_run_ttl_s = 86_400
|
|
141
|
+
@partition_run_store = nil
|
|
119
142
|
@estimate_progress = true
|
|
120
143
|
@pool_timeout = nil
|
|
121
144
|
end
|
|
@@ -402,6 +425,9 @@ module SearchEngine
|
|
|
402
425
|
@search_engine_models = 'app/search_engine'
|
|
403
426
|
# When true, Relation#inspect/pretty_print materialize a preview (AR-like).
|
|
404
427
|
@relation_print_materializes = true
|
|
428
|
+
# Controls whether ActiveRecordSyncable uses after_*_commit (safe, default)
|
|
429
|
+
# or after_* (legacy in-transaction) callbacks. Values: :after_commit, :after_save.
|
|
430
|
+
@syncable_callback_timing = :after_commit
|
|
405
431
|
end
|
|
406
432
|
|
|
407
433
|
# Whether the engine should avoid network I/O and use an offline client.
|
|
@@ -701,7 +727,8 @@ module SearchEngine
|
|
|
701
727
|
presets: presets_hash_for_to_h,
|
|
702
728
|
curation: curation_hash_for_to_h,
|
|
703
729
|
embedding: embedding_hash_for_to_h,
|
|
704
|
-
relation_print_materializes: relation_print_materializes ? true : false
|
|
730
|
+
relation_print_materializes: relation_print_materializes ? true : false,
|
|
731
|
+
syncable_callback_timing: syncable_callback_timing
|
|
705
732
|
}
|
|
706
733
|
end
|
|
707
734
|
|
|
@@ -728,6 +755,12 @@ module SearchEngine
|
|
|
728
755
|
gzip: indexer.gzip ? true : false,
|
|
729
756
|
dispatch: indexer.dispatch,
|
|
730
757
|
queue_name: indexer.queue_name,
|
|
758
|
+
partition_execution: indexer.partition_execution,
|
|
759
|
+
partition_queue_name: indexer.partition_queue_name,
|
|
760
|
+
partition_poll_interval_s: indexer.partition_poll_interval_s,
|
|
761
|
+
partition_timeout_s: indexer.partition_timeout_s,
|
|
762
|
+
partition_run_ttl_s: indexer.partition_run_ttl_s,
|
|
763
|
+
partition_run_store: indexer.partition_run_store,
|
|
731
764
|
estimate_progress: indexer.estimate_progress
|
|
732
765
|
}
|
|
733
766
|
end
|