search-engine-for-typesense 30.1.7.0 → 30.1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +24 -0
- data/app/search_engine/search_engine/index_partition_job.rb +28 -1
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +18 -0
- data/lib/search_engine/async_partition_coordinator.rb +208 -0
- data/lib/search_engine/base/index_maintenance.rb +31 -0
- data/lib/search_engine/config.rb +24 -0
- data/lib/search_engine/indexing_run.rb +191 -0
- data/lib/search_engine/indexing_run_store/rails_cache.rb +231 -0
- data/lib/search_engine/indexing_run_store.rb +51 -0
- data/lib/search_engine/version.rb +1 -1
- data/lib/search_engine.rb +4 -0
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '0852da3905487de90a84633634760fba6d4806d236e6738a8404d70e9122eef4'
|
|
4
|
+
data.tar.gz: fe62daa0161f71c0b5bc24da4195708a96b1687bc096383a12056aa346178710
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6c12c7c80626ac24ea4fd5515559d354632a42a0bcd2381d61ed577aa3e24e6e60448cdf810defdf00d819e8960c93a69ce5129e27225a76c6a476e0b62115c4
|
|
7
|
+
data.tar.gz: 405565a325c099df28ba1d54d85a308f5297299a59eb40cd9f03a24ebb8306f588becc7c31be5d82cb3be05216cb443dd39a1152d417e07da2b64b97162cbe2a
|
data/README.md
CHANGED
|
@@ -164,6 +164,30 @@ You can control this explicitly with:
|
|
|
164
164
|
|
|
165
165
|
If you set `SearchEngine.configure { |c| c.client = ... }`, the custom client is always used.
|
|
166
166
|
|
|
167
|
+
## Async partition indexing
|
|
168
|
+
|
|
169
|
+
Async partition indexing is an advanced opt-in mode for partitioned full indexing. The default remains
|
|
170
|
+
inline execution. Apps with a real ActiveJob backend can use queue-backed partition execution so each
|
|
171
|
+
search/index partition imports into the same blue/green physical collection before the alias is swapped.
|
|
172
|
+
This mode does not require Sidekiq; use any ActiveJob backend that can run the partition jobs.
|
|
173
|
+
|
|
174
|
+
```ruby
|
|
175
|
+
SearchEngine.configure do |c|
|
|
176
|
+
c.indexer.partition_execution = :active_job
|
|
177
|
+
c.indexer.partition_queue_name = "search_index_partitions"
|
|
178
|
+
c.indexer.partition_timeout_s = 7_200
|
|
179
|
+
end
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Async mode is partition-based, not app-domain based. The gem enqueues one
|
|
183
|
+
`SearchEngine::IndexPartitionJob` per configured partition, waits for every partition to finish, and
|
|
184
|
+
only then lets the schema lifecycle swap the alias. If any partition fails or times out, the previous
|
|
185
|
+
alias target remains active.
|
|
186
|
+
|
|
187
|
+
Use a shared `Rails.cache` backend, or provide `c.indexer.partition_run_store`, so worker processes and
|
|
188
|
+
the parent indexing process can see the same run metadata. Size the queue carefully: worker concurrency
|
|
189
|
+
multiplies with any per-partition `max_parallel` setting.
|
|
190
|
+
|
|
167
191
|
## Example app
|
|
168
192
|
|
|
169
193
|
See `examples/demo_shop` — demonstrates single/multi search, JOINs, grouping, presets/curation, and DX/observability. Supports offline mode via the stub client (see [Testing](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/v30.1/testing)).
|
|
@@ -8,6 +8,8 @@ module SearchEngine
|
|
|
8
8
|
# - partition [Object] (JSON-serializable)
|
|
9
9
|
# - into [String, nil]
|
|
10
10
|
# - metadata [Hash]
|
|
11
|
+
# - run_id [String, nil]
|
|
12
|
+
# - partition_key [String, nil]
|
|
11
13
|
class IndexPartitionJob < ::ActiveJob::Base
|
|
12
14
|
queue_as do
|
|
13
15
|
cfg = SearchEngine.config.indexer
|
|
@@ -31,11 +33,16 @@ module SearchEngine
|
|
|
31
33
|
# @param partition [Object]
|
|
32
34
|
# @param into [String, nil]
|
|
33
35
|
# @param metadata [Hash]
|
|
36
|
+
# @param run_id [String, nil]
|
|
37
|
+
# @param partition_key [String, nil]
|
|
34
38
|
# @return [void]
|
|
35
|
-
def perform(collection_class_name, partition, into: nil, metadata: {})
|
|
39
|
+
def perform(collection_class_name, partition, into: nil, metadata: {}, run_id: nil, partition_key: nil)
|
|
36
40
|
payload = nil
|
|
37
41
|
klass = constantize_collection!(collection_class_name)
|
|
38
42
|
payload = base_payload(klass, partition: partition, into: into)
|
|
43
|
+
run_store = indexing_run_store(run_id)
|
|
44
|
+
partition_key ||= SearchEngine::IndexingRun.partition_key(partition) if run_id
|
|
45
|
+
run_store&.mark_started(run_id: run_id, partition_key: partition_key, job_id: job_id)
|
|
39
46
|
instrument('search_engine.dispatcher.job_started',
|
|
40
47
|
payload.merge(queue: queue_name, job_id: job_id, metadata: metadata)
|
|
41
48
|
)
|
|
@@ -46,6 +53,7 @@ module SearchEngine
|
|
|
46
53
|
summary = SearchEngine::Indexer.rebuild_partition!(klass, partition: partition, into: into)
|
|
47
54
|
end
|
|
48
55
|
duration = (monotonic_ms - started).round(1)
|
|
56
|
+
run_store&.mark_succeeded(run_id: run_id, partition_key: partition_key, summary: summary)
|
|
49
57
|
|
|
50
58
|
instrument(
|
|
51
59
|
'search_engine.dispatcher.job_finished',
|
|
@@ -56,6 +64,7 @@ module SearchEngine
|
|
|
56
64
|
nil
|
|
57
65
|
rescue StandardError => error
|
|
58
66
|
safe_payload = payload || error_payload(error)
|
|
67
|
+
run_store&.mark_failed(run_id: run_id, partition_key: partition_key, error: error) unless retryable_error?(error)
|
|
59
68
|
instrument_error(error, payload: safe_payload.merge(metadata: metadata || {}))
|
|
60
69
|
raise
|
|
61
70
|
end
|
|
@@ -99,6 +108,24 @@ module SearchEngine
|
|
|
99
108
|
retry_job wait: wait_seconds
|
|
100
109
|
end
|
|
101
110
|
|
|
111
|
+
def indexing_run_store(run_id)
|
|
112
|
+
return nil if run_id.nil?
|
|
113
|
+
|
|
114
|
+
SearchEngine::IndexingRunStore.resolve
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def retryable_error?(error)
|
|
118
|
+
transient_error?(error) && executions.to_i < retry_policy.attempts
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def transient_error?(error)
|
|
122
|
+
return true if error.is_a?(SearchEngine::Errors::Timeout)
|
|
123
|
+
return true if error.is_a?(SearchEngine::Errors::Connection)
|
|
124
|
+
|
|
125
|
+
error.is_a?(SearchEngine::Errors::Api) &&
|
|
126
|
+
SearchEngine::Indexer::RetryPolicy.transient_status?(error.status.to_i)
|
|
127
|
+
end
|
|
128
|
+
|
|
102
129
|
def error_payload(error)
|
|
103
130
|
{
|
|
104
131
|
collection: arguments_dig_collection,
|
|
@@ -181,6 +181,24 @@ SearchEngine.configure do |c|
|
|
|
181
181
|
# Queue name for ActiveJob dispatch. Default: 'search_index'
|
|
182
182
|
# c.indexer.queue_name = 'search_index'
|
|
183
183
|
|
|
184
|
+
# Advanced partition execution for full indexing. Default: :inline
|
|
185
|
+
# Use :active_job with a real ActiveJob backend to enqueue one job per
|
|
186
|
+
# partition during blue/green indexing. Does not require Sidekiq.
|
|
187
|
+
# c.indexer.partition_execution = :inline
|
|
188
|
+
|
|
189
|
+
# Optional queue override for partition jobs. Falls back to c.indexer.queue_name.
|
|
190
|
+
# Size worker concurrency carefully: partition jobs can multiply partition max_parallel.
|
|
191
|
+
# c.indexer.partition_queue_name = nil
|
|
192
|
+
|
|
193
|
+
# Parent wait settings for async partition runs. Defaults shown.
|
|
194
|
+
# c.indexer.partition_poll_interval_s = 2
|
|
195
|
+
# c.indexer.partition_timeout_s = nil
|
|
196
|
+
|
|
197
|
+
# Operational metadata for async partition runs. Defaults shown.
|
|
198
|
+
# Use shared Rails.cache or provide a custom store visible to workers and parent.
|
|
199
|
+
# c.indexer.partition_run_store = nil
|
|
200
|
+
# c.indexer.partition_run_ttl_s = 86_400
|
|
201
|
+
|
|
184
202
|
# --- Sources -------------------------------------------------------------
|
|
185
203
|
|
|
186
204
|
# ActiveRecord source: default ORM batch size. Default: 2000
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Coordinates ActiveJob-backed partition indexing and waits for run completion.
|
|
5
|
+
module AsyncPartitionCoordinator
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
# Enqueue one partition job per partition and return a lifecycle-compatible aggregate.
|
|
9
|
+
# @param klass [Class] SearchEngine collection class
|
|
10
|
+
# @param partitions [Array<Object>] logical partition values
|
|
11
|
+
# @param into [String] target physical collection name
|
|
12
|
+
# @param queue [String, nil] optional ActiveJob queue override
|
|
13
|
+
# @param timeout_s [Numeric, nil] maximum wait time in seconds
|
|
14
|
+
# @param poll_interval_s [Numeric, nil] polling interval in seconds
|
|
15
|
+
# @param store [Object, nil] optional run store
|
|
16
|
+
# @param ttl_s [Numeric, nil] run metadata TTL in seconds
|
|
17
|
+
# @return [Hash] lifecycle result with :status, :docs_total, :success_total, :failed_total, :sample_error
|
|
18
|
+
def call(klass:, partitions:, into:, queue: nil, timeout_s: nil, poll_interval_s: nil, store: nil, ttl_s: nil)
|
|
19
|
+
cfg = SearchEngine.config.indexer
|
|
20
|
+
partition_list = Array(partitions)
|
|
21
|
+
run_id = SearchEngine::IndexingRun.generate_id
|
|
22
|
+
queue_name = resolve_queue_name(queue, cfg)
|
|
23
|
+
run_store = SearchEngine::IndexingRunStore.validate!(store || SearchEngine::IndexingRunStore.resolve)
|
|
24
|
+
ttl = ttl_s || cfg.partition_run_ttl_s
|
|
25
|
+
|
|
26
|
+
snapshot = create_run(
|
|
27
|
+
store: run_store,
|
|
28
|
+
run_id: run_id,
|
|
29
|
+
klass: klass,
|
|
30
|
+
into: into,
|
|
31
|
+
partitions: partition_list,
|
|
32
|
+
ttl_s: ttl
|
|
33
|
+
)
|
|
34
|
+
instrument('search_engine.indexing_run.started', event_payload(snapshot, queue: queue_name))
|
|
35
|
+
enqueue_partitions(klass, partition_list, into: into, queue_name: queue_name, run_id: run_id)
|
|
36
|
+
instrument('search_engine.indexing_run.enqueued', event_payload(snapshot, queue: queue_name))
|
|
37
|
+
|
|
38
|
+
wait_for_completion(
|
|
39
|
+
store: run_store,
|
|
40
|
+
run_id: run_id,
|
|
41
|
+
timeout_s: timeout_s.nil? ? cfg.partition_timeout_s : timeout_s,
|
|
42
|
+
poll_interval_s: poll_interval_s.nil? ? cfg.partition_poll_interval_s : poll_interval_s
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def create_run(store:, run_id:, klass:, into:, partitions:, ttl_s:)
|
|
47
|
+
store.create_run(
|
|
48
|
+
run_id: run_id,
|
|
49
|
+
collection: collection_name(klass),
|
|
50
|
+
collection_class_name: klass.name,
|
|
51
|
+
into: into,
|
|
52
|
+
partitions: partitions,
|
|
53
|
+
ttl_s: ttl_s
|
|
54
|
+
)
|
|
55
|
+
end
|
|
56
|
+
private_class_method :create_run
|
|
57
|
+
|
|
58
|
+
def enqueue_partitions(klass, partitions, into:, queue_name:, run_id:)
|
|
59
|
+
partitions.each do |partition|
|
|
60
|
+
SearchEngine::IndexPartitionJob
|
|
61
|
+
.set(queue: queue_name)
|
|
62
|
+
.perform_later(
|
|
63
|
+
klass.name,
|
|
64
|
+
partition,
|
|
65
|
+
into: into,
|
|
66
|
+
metadata: {},
|
|
67
|
+
run_id: run_id,
|
|
68
|
+
partition_key: SearchEngine::IndexingRun.partition_key(partition)
|
|
69
|
+
)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
private_class_method :enqueue_partitions
|
|
73
|
+
|
|
74
|
+
def wait_for_completion(store:, run_id:, timeout_s:, poll_interval_s:)
|
|
75
|
+
deadline = timeout_s.nil? ? nil : monotonic_seconds + timeout_s.to_f
|
|
76
|
+
|
|
77
|
+
loop do
|
|
78
|
+
snapshot = store.snapshot(run_id: run_id)
|
|
79
|
+
result = result_for_snapshot(snapshot)
|
|
80
|
+
return finish_failure(snapshot, result) if invalid_snapshot_result?(result)
|
|
81
|
+
return finish_success(snapshot, result) if result[:status] == :ok
|
|
82
|
+
return finish_failure(snapshot, result) if failure_terminal?(snapshot)
|
|
83
|
+
return timeout_result(store, run_id, snapshot) if deadline && monotonic_seconds >= deadline
|
|
84
|
+
|
|
85
|
+
sleep([poll_interval_s.to_f, 0].max)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
private_class_method :wait_for_completion
|
|
89
|
+
|
|
90
|
+
def result_for_snapshot(snapshot)
|
|
91
|
+
SearchEngine::IndexingRun.aggregate_result(snapshot)
|
|
92
|
+
end
|
|
93
|
+
private_class_method :result_for_snapshot
|
|
94
|
+
|
|
95
|
+
def invalid_snapshot_result?(result)
|
|
96
|
+
result[:status] == :failed && result[:sample_error].to_s.start_with?('async partition indexing run snapshot')
|
|
97
|
+
end
|
|
98
|
+
private_class_method :invalid_snapshot_result?
|
|
99
|
+
|
|
100
|
+
def finish_success(snapshot, result)
|
|
101
|
+
instrument('search_engine.indexing_run.finished', event_payload(snapshot, result: result))
|
|
102
|
+
result
|
|
103
|
+
end
|
|
104
|
+
private_class_method :finish_success
|
|
105
|
+
|
|
106
|
+
def finish_failure(snapshot, result)
|
|
107
|
+
result = failed_result(snapshot, result)
|
|
108
|
+
instrument('search_engine.indexing_run.failed', event_payload(snapshot, result: result))
|
|
109
|
+
result
|
|
110
|
+
end
|
|
111
|
+
private_class_method :finish_failure
|
|
112
|
+
|
|
113
|
+
def timeout_result(store, run_id, snapshot)
|
|
114
|
+
mark_non_terminal_failed(store, run_id, snapshot)
|
|
115
|
+
snapshot = store.snapshot(run_id: run_id) || snapshot
|
|
116
|
+
result = failed_result(
|
|
117
|
+
snapshot,
|
|
118
|
+
SearchEngine::IndexingRun.aggregate_result(snapshot),
|
|
119
|
+
sample_error: "SearchEngine async partition indexing timed out for run #{run_id}"
|
|
120
|
+
)
|
|
121
|
+
instrument('search_engine.indexing_run.failed', event_payload(snapshot, result: result))
|
|
122
|
+
result
|
|
123
|
+
end
|
|
124
|
+
private_class_method :timeout_result
|
|
125
|
+
|
|
126
|
+
def mark_non_terminal_failed(store, run_id, snapshot)
|
|
127
|
+
partitions = snapshot && snapshot[:partitions]
|
|
128
|
+
return unless partitions.is_a?(Hash)
|
|
129
|
+
|
|
130
|
+
partitions.each do |partition_key, entry|
|
|
131
|
+
status = entry[:status].to_s
|
|
132
|
+
next if %w[succeeded failed].include?(status)
|
|
133
|
+
|
|
134
|
+
store.mark_failed(
|
|
135
|
+
run_id: run_id,
|
|
136
|
+
partition_key: partition_key,
|
|
137
|
+
error: 'partition did not finish before timeout'
|
|
138
|
+
)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
private_class_method :mark_non_terminal_failed
|
|
142
|
+
|
|
143
|
+
def failed_result(snapshot, result, sample_error: nil)
|
|
144
|
+
statuses = partition_statuses(snapshot)
|
|
145
|
+
success_total = result[:success_total].to_i
|
|
146
|
+
status = success_total.positive? || statuses.include?('succeeded') ? :partial : :failed
|
|
147
|
+
|
|
148
|
+
result.merge(
|
|
149
|
+
status: status,
|
|
150
|
+
failed_total: [result[:failed_total].to_i, failed_partition_count(statuses)].max,
|
|
151
|
+
sample_error: sample_error || result[:sample_error] || 'async partition indexing failed'
|
|
152
|
+
)
|
|
153
|
+
end
|
|
154
|
+
private_class_method :failed_result
|
|
155
|
+
|
|
156
|
+
def failure_terminal?(snapshot)
|
|
157
|
+
partition_statuses(snapshot).include?('failed')
|
|
158
|
+
end
|
|
159
|
+
private_class_method :failure_terminal?
|
|
160
|
+
|
|
161
|
+
def partition_statuses(snapshot)
|
|
162
|
+
partitions = snapshot && snapshot[:partitions]
|
|
163
|
+
return [] unless partitions.is_a?(Hash)
|
|
164
|
+
|
|
165
|
+
partitions.values.map { |entry| entry[:status].to_s }
|
|
166
|
+
end
|
|
167
|
+
private_class_method :partition_statuses
|
|
168
|
+
|
|
169
|
+
def failed_partition_count(statuses)
|
|
170
|
+
statuses.count { |status| status == 'failed' }
|
|
171
|
+
end
|
|
172
|
+
private_class_method :failed_partition_count
|
|
173
|
+
|
|
174
|
+
def resolve_queue_name(queue, cfg)
|
|
175
|
+
(queue || cfg.partition_queue_name || cfg.queue_name || 'search_index').to_s
|
|
176
|
+
end
|
|
177
|
+
private_class_method :resolve_queue_name
|
|
178
|
+
|
|
179
|
+
def collection_name(klass)
|
|
180
|
+
klass.respond_to?(:collection) ? klass.collection.to_s : klass.name.to_s
|
|
181
|
+
end
|
|
182
|
+
private_class_method :collection_name
|
|
183
|
+
|
|
184
|
+
def event_payload(snapshot, queue: nil, result: nil)
|
|
185
|
+
payload = {
|
|
186
|
+
run_id: snapshot && snapshot[:run_id],
|
|
187
|
+
collection: snapshot && snapshot[:collection],
|
|
188
|
+
collection_class_name: snapshot && snapshot[:collection_class_name],
|
|
189
|
+
into: snapshot && snapshot[:into],
|
|
190
|
+
total_partitions: snapshot && snapshot[:total_partitions],
|
|
191
|
+
queue: queue
|
|
192
|
+
}
|
|
193
|
+
payload.merge!(result) if result
|
|
194
|
+
payload
|
|
195
|
+
end
|
|
196
|
+
private_class_method :event_payload
|
|
197
|
+
|
|
198
|
+
def instrument(event, payload)
|
|
199
|
+
SearchEngine::Instrumentation.instrument(event, payload) {}
|
|
200
|
+
end
|
|
201
|
+
private_class_method :instrument
|
|
202
|
+
|
|
203
|
+
def monotonic_seconds
|
|
204
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
205
|
+
end
|
|
206
|
+
private_class_method :monotonic_seconds
|
|
207
|
+
end
|
|
208
|
+
end
|
|
@@ -281,6 +281,11 @@ module SearchEngine
|
|
|
281
281
|
if compiled
|
|
282
282
|
parts = Array(compiled.partitions)
|
|
283
283
|
max_p = compiled.max_parallel.to_i
|
|
284
|
+
if __se_async_partition_execution_requested?(parts)
|
|
285
|
+
return __se_async_partition_result_unless_available unless __se_async_partition_execution_available?
|
|
286
|
+
|
|
287
|
+
return SearchEngine::AsyncPartitionCoordinator.call(klass: self, partitions: parts, into: into)
|
|
288
|
+
end
|
|
284
289
|
return __se_index_partitions_seq!(parts, into, compiled) if max_p <= 1 || parts.size <= 1
|
|
285
290
|
|
|
286
291
|
__se_index_partitions_parallel!(parts, into, max_p, compiled)
|
|
@@ -292,6 +297,32 @@ module SearchEngine
|
|
|
292
297
|
sample_error: "#{error.class}: #{error.message.to_s[0, 200]}" }
|
|
293
298
|
end
|
|
294
299
|
|
|
300
|
+
def __se_async_partition_execution_requested?(parts)
|
|
301
|
+
parts.size > 1 && SearchEngine.config.indexer.partition_execution.to_sym == :active_job
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def __se_async_partition_execution_available?
|
|
305
|
+
defined?(::ActiveJob::Base) && defined?(SearchEngine::IndexPartitionJob)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def __se_async_partition_result_unless_available
|
|
309
|
+
missing = []
|
|
310
|
+
missing << 'ActiveJob::Base' unless defined?(::ActiveJob::Base)
|
|
311
|
+
missing << 'SearchEngine::IndexPartitionJob' unless defined?(SearchEngine::IndexPartitionJob)
|
|
312
|
+
|
|
313
|
+
{
|
|
314
|
+
status: :failed,
|
|
315
|
+
docs_total: 0,
|
|
316
|
+
success_total: 0,
|
|
317
|
+
failed_total: 0,
|
|
318
|
+
sample_error: "async partition indexing requires #{missing.join(' and ')}"
|
|
319
|
+
}
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
private :__se_async_partition_execution_requested?,
|
|
323
|
+
:__se_async_partition_execution_available?,
|
|
324
|
+
:__se_async_partition_result_unless_available
|
|
325
|
+
|
|
295
326
|
def __se_index_single_with_renderer!(into)
|
|
296
327
|
docs_estimate = __se_heuristic_docs_estimate(1)
|
|
297
328
|
renderer = SearchEngine::Logging::LiveRenderer.new(
|
data/lib/search_engine/config.rb
CHANGED
|
@@ -108,6 +108,18 @@ module SearchEngine
|
|
|
108
108
|
attr_accessor :dispatch
|
|
109
109
|
# @return [String] queue name for ActiveJob enqueues
|
|
110
110
|
attr_accessor :queue_name
|
|
111
|
+
# @return [Symbol] partition execution mode: :inline or :active_job
|
|
112
|
+
attr_accessor :partition_execution
|
|
113
|
+
# @return [String, nil] optional queue override for async partition jobs
|
|
114
|
+
attr_accessor :partition_queue_name
|
|
115
|
+
# @return [Integer] parent wait polling interval for async partition runs (seconds)
|
|
116
|
+
attr_accessor :partition_poll_interval_s
|
|
117
|
+
# @return [Integer, nil] maximum parent wait budget for async partition runs (seconds)
|
|
118
|
+
attr_accessor :partition_timeout_s
|
|
119
|
+
# @return [Integer] TTL for async partition run metadata (seconds)
|
|
120
|
+
attr_accessor :partition_run_ttl_s
|
|
121
|
+
# @return [Object, nil] custom async partition run store
|
|
122
|
+
attr_accessor :partition_run_store
|
|
111
123
|
# @return [Boolean] whether to run model.count for progress bar estimates (default true)
|
|
112
124
|
attr_accessor :estimate_progress
|
|
113
125
|
# @return [Integer, nil] graceful-shutdown timeout (seconds) for the parallel
|
|
@@ -121,6 +133,12 @@ module SearchEngine
|
|
|
121
133
|
@gzip = false
|
|
122
134
|
@dispatch = active_job_available? ? :active_job : :inline
|
|
123
135
|
@queue_name = 'search_index'
|
|
136
|
+
@partition_execution = :inline
|
|
137
|
+
@partition_queue_name = nil
|
|
138
|
+
@partition_poll_interval_s = 2
|
|
139
|
+
@partition_timeout_s = nil
|
|
140
|
+
@partition_run_ttl_s = 86_400
|
|
141
|
+
@partition_run_store = nil
|
|
124
142
|
@estimate_progress = true
|
|
125
143
|
@pool_timeout = nil
|
|
126
144
|
end
|
|
@@ -737,6 +755,12 @@ module SearchEngine
|
|
|
737
755
|
gzip: indexer.gzip ? true : false,
|
|
738
756
|
dispatch: indexer.dispatch,
|
|
739
757
|
queue_name: indexer.queue_name,
|
|
758
|
+
partition_execution: indexer.partition_execution,
|
|
759
|
+
partition_queue_name: indexer.partition_queue_name,
|
|
760
|
+
partition_poll_interval_s: indexer.partition_poll_interval_s,
|
|
761
|
+
partition_timeout_s: indexer.partition_timeout_s,
|
|
762
|
+
partition_run_ttl_s: indexer.partition_run_ttl_s,
|
|
763
|
+
partition_run_store: indexer.partition_run_store,
|
|
740
764
|
estimate_progress: indexer.estimate_progress
|
|
741
765
|
}
|
|
742
766
|
end
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'date'
|
|
5
|
+
require 'json'
|
|
6
|
+
require 'securerandom'
|
|
7
|
+
require 'time'
|
|
8
|
+
|
|
9
|
+
module SearchEngine
|
|
10
|
+
# Helpers for async partition indexing run metadata.
|
|
11
|
+
module IndexingRun
|
|
12
|
+
module_function
|
|
13
|
+
|
|
14
|
+
# Generate a unique run id suitable for operational metadata keys.
|
|
15
|
+
# @return [String]
|
|
16
|
+
def generate_id
|
|
17
|
+
"se-run-#{Time.now.utc.strftime('%Y%m%d%H%M%S')}-#{SecureRandom.hex(8)}"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Return a stable primitive key for a partition value.
|
|
21
|
+
# @param partition [Object]
|
|
22
|
+
# @return [String]
|
|
23
|
+
def partition_key(partition)
|
|
24
|
+
"p-#{Digest::SHA256.hexdigest(normalized_partition_json(partition))}"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Convert a partition value into ActiveJob-friendly primitives when possible.
|
|
28
|
+
# @param value [Object]
|
|
29
|
+
# @return [Object]
|
|
30
|
+
def serialize_partition(value)
|
|
31
|
+
normalize(value)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Return a compact partition string for logs and snapshots.
|
|
35
|
+
# @param partition [Object]
|
|
36
|
+
# @param max [Integer]
|
|
37
|
+
# @return [String]
|
|
38
|
+
def partition_display(partition, max: 160)
|
|
39
|
+
text = begin
|
|
40
|
+
JSON.generate(serialize_partition(partition))
|
|
41
|
+
rescue StandardError
|
|
42
|
+
partition.to_s
|
|
43
|
+
end
|
|
44
|
+
compact(text, max)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Build lifecycle-compatible aggregate counters from a run snapshot.
|
|
48
|
+
# @param snapshot [Hash, nil]
|
|
49
|
+
# @return [Hash] result with :status, :docs_total, :success_total, :failed_total, and :sample_error
|
|
50
|
+
def aggregate_result(snapshot)
|
|
51
|
+
unless valid_snapshot?(snapshot)
|
|
52
|
+
return {
|
|
53
|
+
status: :failed,
|
|
54
|
+
docs_total: 0,
|
|
55
|
+
success_total: 0,
|
|
56
|
+
failed_total: 1,
|
|
57
|
+
sample_error: invalid_snapshot_error(snapshot)
|
|
58
|
+
}
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
partitions = snapshot && snapshot[:partitions]
|
|
62
|
+
values = partitions.is_a?(Hash) ? partitions.values : []
|
|
63
|
+
|
|
64
|
+
docs_total = 0
|
|
65
|
+
success_total = 0
|
|
66
|
+
failed_total = 0
|
|
67
|
+
sample_error = nil
|
|
68
|
+
statuses = []
|
|
69
|
+
|
|
70
|
+
values.each do |entry|
|
|
71
|
+
data = symbolize_keys(entry)
|
|
72
|
+
statuses << data[:status].to_s
|
|
73
|
+
docs_total += data[:docs_total].to_i
|
|
74
|
+
success_total += data[:success_total].to_i
|
|
75
|
+
failed_total += data[:failed_total].to_i
|
|
76
|
+
sample_error ||= present_string(data[:sample_error] || data[:error])
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
{
|
|
80
|
+
status: aggregate_status(statuses, success_total, failed_total),
|
|
81
|
+
docs_total: docs_total,
|
|
82
|
+
success_total: success_total,
|
|
83
|
+
failed_total: failed_total,
|
|
84
|
+
sample_error: sample_error
|
|
85
|
+
}
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Build the stored metadata for one partition.
|
|
89
|
+
# @param partition [Object]
|
|
90
|
+
# @return [Hash]
|
|
91
|
+
def partition_entry(partition)
|
|
92
|
+
serialized = serialize_partition(partition)
|
|
93
|
+
{
|
|
94
|
+
partition: serialized,
|
|
95
|
+
partition_display: partition_display(serialized),
|
|
96
|
+
status: 'pending',
|
|
97
|
+
docs_total: 0,
|
|
98
|
+
success_total: 0,
|
|
99
|
+
failed_total: 0,
|
|
100
|
+
sample_error: nil,
|
|
101
|
+
job_id: nil,
|
|
102
|
+
updated_at: iso8601_now
|
|
103
|
+
}
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# @return [String]
|
|
107
|
+
def iso8601_now
|
|
108
|
+
Time.now.utc.iso8601(6)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def normalized_partition_json(value)
|
|
112
|
+
JSON.generate(serialize_partition(value))
|
|
113
|
+
rescue StandardError
|
|
114
|
+
JSON.generate('__search_engine_fallback__' => value.to_s)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def normalize(value)
|
|
118
|
+
case value
|
|
119
|
+
when NilClass, TrueClass, FalseClass, Numeric, String
|
|
120
|
+
value
|
|
121
|
+
when Symbol
|
|
122
|
+
value.to_s
|
|
123
|
+
when Time
|
|
124
|
+
value.utc.iso8601(6)
|
|
125
|
+
when Date
|
|
126
|
+
value.iso8601
|
|
127
|
+
when Array
|
|
128
|
+
value.map { |item| normalize(item) }
|
|
129
|
+
when Hash
|
|
130
|
+
value.keys.map(&:to_s).sort.each_with_object({}) do |key, hash|
|
|
131
|
+
original_key = value.key?(key) ? key : value.keys.find { |candidate| candidate.to_s == key }
|
|
132
|
+
hash[key] = normalize(value[original_key])
|
|
133
|
+
end
|
|
134
|
+
else
|
|
135
|
+
value.to_s
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
private_class_method :normalized_partition_json, :normalize
|
|
139
|
+
|
|
140
|
+
def aggregate_status(statuses, success_total, failed_total)
|
|
141
|
+
return :failed if statuses.empty?
|
|
142
|
+
return :failed if failed_total.positive? && success_total.zero?
|
|
143
|
+
return :partial if failed_total.positive?
|
|
144
|
+
return :running if statuses.any? { |status| %w[pending running].include?(status) }
|
|
145
|
+
|
|
146
|
+
:ok
|
|
147
|
+
end
|
|
148
|
+
private_class_method :aggregate_status
|
|
149
|
+
|
|
150
|
+
def valid_snapshot?(snapshot)
|
|
151
|
+
return false unless snapshot.is_a?(Hash)
|
|
152
|
+
|
|
153
|
+
partitions = snapshot[:partitions]
|
|
154
|
+
return false unless partitions.is_a?(Hash)
|
|
155
|
+
|
|
156
|
+
expected_total = snapshot[:total_partitions].to_i
|
|
157
|
+
expected_total.positive? && partitions.size == expected_total
|
|
158
|
+
end
|
|
159
|
+
private_class_method :valid_snapshot?
|
|
160
|
+
|
|
161
|
+
def invalid_snapshot_error(snapshot)
|
|
162
|
+
return 'async partition indexing run snapshot is missing' unless snapshot.is_a?(Hash)
|
|
163
|
+
return 'async partition indexing run snapshot has no partition metadata' unless snapshot[:partitions].is_a?(Hash)
|
|
164
|
+
|
|
165
|
+
expected_total = snapshot[:total_partitions].to_i
|
|
166
|
+
actual_total = snapshot[:partitions].size
|
|
167
|
+
"async partition indexing run snapshot has #{actual_total}/#{expected_total} partitions"
|
|
168
|
+
end
|
|
169
|
+
private_class_method :invalid_snapshot_error
|
|
170
|
+
|
|
171
|
+
def symbolize_keys(hash)
|
|
172
|
+
return {} unless hash.is_a?(Hash)
|
|
173
|
+
|
|
174
|
+
hash.transform_keys(&:to_sym)
|
|
175
|
+
end
|
|
176
|
+
private_class_method :symbolize_keys
|
|
177
|
+
|
|
178
|
+
def present_string(value)
|
|
179
|
+
text = value.to_s
|
|
180
|
+
text.strip.empty? ? nil : text
|
|
181
|
+
end
|
|
182
|
+
private_class_method :present_string
|
|
183
|
+
|
|
184
|
+
def compact(text, max)
|
|
185
|
+
return text if text.length <= max
|
|
186
|
+
|
|
187
|
+
"#{text[0, max - 3]}..."
|
|
188
|
+
end
|
|
189
|
+
private_class_method :compact
|
|
190
|
+
end
|
|
191
|
+
end
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'search_engine/indexing_run'
|
|
4
|
+
|
|
5
|
+
module SearchEngine
|
|
6
|
+
module IndexingRunStore
|
|
7
|
+
# Rails.cache-backed implementation for async partition indexing run metadata.
|
|
8
|
+
class RailsCache
|
|
9
|
+
# @param cache [Object, nil] cache object responding to read/write/delete
|
|
10
|
+
def initialize(cache: nil)
|
|
11
|
+
@cache = cache || resolve_rails_cache!
|
|
12
|
+
validate_cache!
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Create a run snapshot with pending partition entries.
|
|
16
|
+
# @return [Hash]
|
|
17
|
+
def create_run(run_id:, collection:, collection_class_name:, into:, partitions:, ttl_s:)
|
|
18
|
+
entries = build_partitions(partitions)
|
|
19
|
+
run = {
|
|
20
|
+
run_id: run_id.to_s,
|
|
21
|
+
collection: collection.to_s,
|
|
22
|
+
collection_class_name: collection_class_name.to_s,
|
|
23
|
+
into: into.to_s,
|
|
24
|
+
status: 'running',
|
|
25
|
+
total_partitions: Array(partitions).size,
|
|
26
|
+
partition_keys: entries.keys,
|
|
27
|
+
ttl_s: ttl_s,
|
|
28
|
+
created_at: SearchEngine::IndexingRun.iso8601_now,
|
|
29
|
+
updated_at: SearchEngine::IndexingRun.iso8601_now
|
|
30
|
+
}
|
|
31
|
+
write_meta(run, ttl_s: ttl_s)
|
|
32
|
+
entries.each do |partition_key, entry|
|
|
33
|
+
@cache.write(partition_cache_key(run_id, partition_key), entry, expires_in: ttl_s)
|
|
34
|
+
end
|
|
35
|
+
snapshot(run_id: run_id)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Mark a partition as started.
|
|
39
|
+
# @return [Hash]
|
|
40
|
+
def mark_started(run_id:, partition_key:, job_id: nil)
|
|
41
|
+
update_partition(run_id, partition_key) do |entry|
|
|
42
|
+
entry[:status] = 'running'
|
|
43
|
+
entry[:job_id] = job_id.to_s unless job_id.nil?
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Mark a partition as succeeded with import counters.
|
|
48
|
+
# @return [Hash]
|
|
49
|
+
def mark_succeeded(run_id:, partition_key:, summary:)
|
|
50
|
+
update_partition(run_id, partition_key) do |entry|
|
|
51
|
+
assign_summary!(entry, summary)
|
|
52
|
+
entry[:status] = 'succeeded'
|
|
53
|
+
entry[:sample_error] = nil
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Mark a partition as failed.
|
|
58
|
+
# @return [Hash]
|
|
59
|
+
def mark_failed(run_id:, partition_key:, error:)
|
|
60
|
+
update_partition(run_id, partition_key) do |entry|
|
|
61
|
+
entry[:status] = 'failed'
|
|
62
|
+
entry[:sample_error] = error_message(error)
|
|
63
|
+
entry[:failed_total] = [entry[:failed_total].to_i, 1].max
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Read the current run snapshot.
|
|
68
|
+
# @return [Hash, nil]
|
|
69
|
+
def snapshot(run_id:)
|
|
70
|
+
meta = symbolize_meta(@cache.read(meta_cache_key(run_id)))
|
|
71
|
+
return nil unless meta
|
|
72
|
+
|
|
73
|
+
partition_keys = Array(meta[:partition_keys]).map(&:to_s)
|
|
74
|
+
partitions = partition_keys.each_with_object({}) do |partition_key, hash|
|
|
75
|
+
entry = @cache.read(partition_cache_key(run_id, partition_key))
|
|
76
|
+
hash[partition_key] = if entry.is_a?(Hash)
|
|
77
|
+
deep_symbolize(entry)
|
|
78
|
+
else
|
|
79
|
+
missing_partition_entry(partition_key)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
meta[:partitions] = partitions
|
|
83
|
+
meta[:status] = run_status(partitions)
|
|
84
|
+
meta
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Expire a run immediately.
|
|
88
|
+
# @return [Object]
|
|
89
|
+
def expire(run_id:)
|
|
90
|
+
meta = symbolize_meta(@cache.read(meta_cache_key(run_id)))
|
|
91
|
+
Array(meta && meta[:partition_keys]).each do |partition_key|
|
|
92
|
+
@cache.delete(partition_cache_key(run_id, partition_key))
|
|
93
|
+
end
|
|
94
|
+
@cache.delete(meta_cache_key(run_id))
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
def resolve_rails_cache!
|
|
100
|
+
rails = Object.const_get(:Rails) if Object.const_defined?(:Rails)
|
|
101
|
+
cache = rails.cache if rails.respond_to?(:cache)
|
|
102
|
+
return cache if cache
|
|
103
|
+
|
|
104
|
+
raise ArgumentError,
|
|
105
|
+
'SearchEngine async partition indexing requires Rails.cache or a custom partition_run_store'
|
|
106
|
+
rescue NameError
|
|
107
|
+
raise ArgumentError,
|
|
108
|
+
'SearchEngine async partition indexing requires Rails.cache or a custom partition_run_store'
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def validate_cache!
|
|
112
|
+
missing = %i[read write delete].reject { |method_name| @cache.respond_to?(method_name) }
|
|
113
|
+
unless missing.empty?
|
|
114
|
+
raise ArgumentError, "Rails.cache is unusable for indexing runs; missing: #{missing.join(', ')}"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
probe_key = meta_cache_key("__probe__#{object_id}")
|
|
118
|
+
@cache.write(probe_key, { ok: true }, expires_in: 1)
|
|
119
|
+
value = @cache.read(probe_key)
|
|
120
|
+
@cache.delete(probe_key)
|
|
121
|
+
return if value
|
|
122
|
+
|
|
123
|
+
raise ArgumentError, 'Rails.cache is unusable for indexing runs; probe read returned nil'
|
|
124
|
+
rescue ArgumentError
|
|
125
|
+
raise
|
|
126
|
+
rescue StandardError => error
|
|
127
|
+
raise ArgumentError, "Rails.cache is unusable for indexing runs: #{error.class}: #{error.message}"
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def build_partitions(partitions)
|
|
131
|
+
Array(partitions).each_with_object({}) do |partition, hash|
|
|
132
|
+
hash[SearchEngine::IndexingRun.partition_key(partition)] = SearchEngine::IndexingRun.partition_entry(partition)
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def write_meta(run, ttl_s:)
|
|
137
|
+
@cache.write(meta_cache_key(run[:run_id]), run, expires_in: ttl_s)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def update_partition(run_id, partition_key)
|
|
141
|
+
meta = symbolize_meta(@cache.read(meta_cache_key(run_id)))
|
|
142
|
+
raise KeyError, "indexing run not found: #{run_id}" unless meta
|
|
143
|
+
|
|
144
|
+
partition_key = partition_key.to_s
|
|
145
|
+
unless Array(meta[:partition_keys]).map(&:to_s).include?(partition_key)
|
|
146
|
+
raise KeyError, "indexing run partition not found: #{partition_key}"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
entry = @cache.read(partition_cache_key(run_id, partition_key))
|
|
150
|
+
raise KeyError, "indexing run partition not found: #{partition_key}" unless entry
|
|
151
|
+
|
|
152
|
+
entry = deep_symbolize(entry)
|
|
153
|
+
yield entry
|
|
154
|
+
now = SearchEngine::IndexingRun.iso8601_now
|
|
155
|
+
entry[:updated_at] = now
|
|
156
|
+
@cache.write(partition_cache_key(run_id, partition_key), entry, expires_in: meta[:ttl_s])
|
|
157
|
+
meta[:updated_at] = now
|
|
158
|
+
write_meta(meta, ttl_s: meta[:ttl_s])
|
|
159
|
+
snapshot(run_id: run_id)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def run_status(partitions)
|
|
163
|
+
statuses = partitions.values.map { |entry| entry[:status].to_s }
|
|
164
|
+
return 'failed' if statuses.include?('failed')
|
|
165
|
+
return 'succeeded' if statuses.all? { |status| status == 'succeeded' }
|
|
166
|
+
|
|
167
|
+
'running'
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def assign_summary!(entry, summary)
|
|
171
|
+
entry[:docs_total] = summary_value(summary, :docs_total).to_i
|
|
172
|
+
entry[:success_total] = summary_value(summary, :success_total).to_i
|
|
173
|
+
entry[:failed_total] = summary_value(summary, :failed_total).to_i
|
|
174
|
+
entry[:sample_error] = summary_value(summary, :sample_error)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def summary_value(summary, key)
|
|
178
|
+
return summary.public_send(key) if summary.respond_to?(key)
|
|
179
|
+
return summary[key] if summary.is_a?(Hash) && summary.key?(key)
|
|
180
|
+
return summary[key.to_s] if summary.is_a?(Hash) && summary.key?(key.to_s)
|
|
181
|
+
|
|
182
|
+
nil
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def error_message(error)
|
|
186
|
+
return error if error.is_a?(String)
|
|
187
|
+
|
|
188
|
+
"#{error.class}: #{error.message.to_s[0, 200]}"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def meta_cache_key(run_id)
|
|
192
|
+
"search_engine:indexing_run:#{run_id}:meta"
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def partition_cache_key(run_id, partition_key)
|
|
196
|
+
"search_engine:indexing_run:#{run_id}:partition:#{partition_key}"
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def missing_partition_entry(partition_key)
|
|
200
|
+
{
|
|
201
|
+
partition: nil,
|
|
202
|
+
partition_display: partition_key,
|
|
203
|
+
status: 'failed',
|
|
204
|
+
docs_total: 0,
|
|
205
|
+
success_total: 0,
|
|
206
|
+
failed_total: 1,
|
|
207
|
+
sample_error: "partition metadata missing for #{partition_key}",
|
|
208
|
+
job_id: nil,
|
|
209
|
+
updated_at: SearchEngine::IndexingRun.iso8601_now
|
|
210
|
+
}
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def symbolize_meta(meta)
|
|
214
|
+
return nil unless meta.is_a?(Hash)
|
|
215
|
+
|
|
216
|
+
deep_symbolize(meta)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def deep_symbolize(value)
|
|
220
|
+
case value
|
|
221
|
+
when Hash
|
|
222
|
+
value.each_with_object({}) { |(key, item), hash| hash[key.to_sym] = deep_symbolize(item) }
|
|
223
|
+
when Array
|
|
224
|
+
value.map { |item| deep_symbolize(item) }
|
|
225
|
+
else
|
|
226
|
+
value
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Resolver and interface contract for async partition indexing run stores.
|
|
5
|
+
module IndexingRunStore
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
# Resolve a configured store or the Rails.cache-backed default.
|
|
9
|
+
# @param config [SearchEngine::Config, nil]
|
|
10
|
+
# @return [Object]
|
|
11
|
+
def resolve(config: nil)
|
|
12
|
+
cfg = config || SearchEngine.config
|
|
13
|
+
configured = configured_store(cfg)
|
|
14
|
+
return configured if configured
|
|
15
|
+
|
|
16
|
+
default
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Build the default run store.
|
|
20
|
+
# @return [SearchEngine::IndexingRunStore::RailsCache]
|
|
21
|
+
def default
|
|
22
|
+
require 'search_engine/indexing_run_store/rails_cache'
|
|
23
|
+
SearchEngine::IndexingRunStore::RailsCache.new
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Validate that an object implements the run-store contract.
|
|
27
|
+
# @param store [Object]
|
|
28
|
+
# @return [Object]
|
|
29
|
+
# @raise [ArgumentError]
|
|
30
|
+
def validate!(store)
|
|
31
|
+
missing = required_methods.reject { |method_name| store.respond_to?(method_name) }
|
|
32
|
+
return store if missing.empty?
|
|
33
|
+
|
|
34
|
+
raise ArgumentError, "indexing run store missing methods: #{missing.join(', ')}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @return [Array<Symbol>]
|
|
38
|
+
def required_methods
|
|
39
|
+
%i[create_run mark_started mark_succeeded mark_failed snapshot expire]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def configured_store(config)
|
|
43
|
+
indexer = config.respond_to?(:indexer) ? config.indexer : nil
|
|
44
|
+
return nil unless indexer.respond_to?(:partition_run_store)
|
|
45
|
+
|
|
46
|
+
store = indexer.partition_run_store
|
|
47
|
+
store ? validate!(store) : nil
|
|
48
|
+
end
|
|
49
|
+
private_class_method :configured_store
|
|
50
|
+
end
|
|
51
|
+
end
|
data/lib/search_engine.rb
CHANGED
|
@@ -30,6 +30,10 @@ require 'search_engine/indexer/bulk_import'
|
|
|
30
30
|
require 'search_engine/mapper'
|
|
31
31
|
require 'search_engine/sources'
|
|
32
32
|
require 'search_engine/partitioner'
|
|
33
|
+
require 'search_engine/indexing_run'
|
|
34
|
+
require 'search_engine/indexing_run_store'
|
|
35
|
+
require 'search_engine/indexing_run_store/rails_cache'
|
|
36
|
+
require 'search_engine/async_partition_coordinator'
|
|
33
37
|
require 'search_engine/dispatcher'
|
|
34
38
|
require 'search_engine/joins/guard'
|
|
35
39
|
require 'search_engine/admin'
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: search-engine-for-typesense
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 30.1.
|
|
4
|
+
version: 30.1.8.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Nikita Shkoda
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-06-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: concurrent-ruby
|
|
@@ -92,6 +92,7 @@ files:
|
|
|
92
92
|
- lib/search_engine/ast/prefix.rb
|
|
93
93
|
- lib/search_engine/ast/raw.rb
|
|
94
94
|
- lib/search_engine/ast/unary_op.rb
|
|
95
|
+
- lib/search_engine/async_partition_coordinator.rb
|
|
95
96
|
- lib/search_engine/base.rb
|
|
96
97
|
- lib/search_engine/base/creation.rb
|
|
97
98
|
- lib/search_engine/base/deletion.rb
|
|
@@ -150,6 +151,9 @@ files:
|
|
|
150
151
|
- lib/search_engine/indexer/import_dispatcher.rb
|
|
151
152
|
- lib/search_engine/indexer/import_response_parser.rb
|
|
152
153
|
- lib/search_engine/indexer/retry_policy.rb
|
|
154
|
+
- lib/search_engine/indexing_run.rb
|
|
155
|
+
- lib/search_engine/indexing_run_store.rb
|
|
156
|
+
- lib/search_engine/indexing_run_store/rails_cache.rb
|
|
153
157
|
- lib/search_engine/instrumentation.rb
|
|
154
158
|
- lib/search_engine/interruptible_pool.rb
|
|
155
159
|
- lib/search_engine/joins/guard.rb
|