search-engine-for-typesense 30.1.7.0 → 30.1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9eea6beb086957bbee9df77008a70750be3a74f6f39a53be361a3f1ae8295219
4
- data.tar.gz: 0eaf79b97f1c68b7bdab350f11cbfe09eecfb4944c76fd5c49528b68e63a5f09
3
+ metadata.gz: '0852da3905487de90a84633634760fba6d4806d236e6738a8404d70e9122eef4'
4
+ data.tar.gz: fe62daa0161f71c0b5bc24da4195708a96b1687bc096383a12056aa346178710
5
5
  SHA512:
6
- metadata.gz: ab9dd851d14c4a7019902afad59af281623576952ade57dd2a7df7d458caf68a3f432ac742873b52bc94fd8a88b14ef8715807d7ab3d7d7fd44cc05765424489
7
- data.tar.gz: 546d394202e194705c24f1c2584b57239bca5e1084d299a3c801fc2781c480d666e47e2906992e6885c788a12ec9162c57e18b1e7eb88e65e93d2a661a2d27f3
6
+ metadata.gz: 6c12c7c80626ac24ea4fd5515559d354632a42a0bcd2381d61ed577aa3e24e6e60448cdf810defdf00d819e8960c93a69ce5129e27225a76c6a476e0b62115c4
7
+ data.tar.gz: 405565a325c099df28ba1d54d85a308f5297299a59eb40cd9f03a24ebb8306f588becc7c31be5d82cb3be05216cb443dd39a1152d417e07da2b64b97162cbe2a
data/README.md CHANGED
@@ -164,6 +164,30 @@ You can control this explicitly with:
164
164
 
165
165
  If you set `SearchEngine.configure { |c| c.client = ... }`, the custom client is always used.
166
166
 
167
+ ## Async partition indexing
168
+
169
+ Async partition indexing is an advanced opt-in mode for partitioned full indexing. The default remains
170
+ inline execution. Apps with a real ActiveJob backend can use queue-backed partition execution so each
171
+ search/index partition imports into the same blue/green physical collection before the alias is swapped.
172
+ This mode does not require Sidekiq; use any ActiveJob backend that can run the partition jobs.
173
+
174
+ ```ruby
175
+ SearchEngine.configure do |c|
176
+ c.indexer.partition_execution = :active_job
177
+ c.indexer.partition_queue_name = "search_index_partitions"
178
+ c.indexer.partition_timeout_s = 7_200
179
+ end
180
+ ```
181
+
182
+ Async mode is partition-based, not app-domain based. The gem enqueues one
183
+ `SearchEngine::IndexPartitionJob` per configured partition, waits for every partition to finish, and
184
+ only then lets the schema lifecycle swap the alias. If any partition fails or times out, the previous
185
+ alias target remains active.
186
+
187
+ Use a shared `Rails.cache` backend, or provide `c.indexer.partition_run_store`, so worker processes and
188
+ the parent indexing process can see the same run metadata. Size the queue carefully: worker concurrency
189
+ multiplies with any per-partition `max_parallel` setting.
190
+
167
191
  ## Example app
168
192
 
169
193
  See `examples/demo_shop` — demonstrates single/multi search, JOINs, grouping, presets/curation, and DX/observability. Supports offline mode via the stub client (see [Testing](https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/v30.1/testing)).
@@ -8,6 +8,8 @@ module SearchEngine
8
8
  # - partition [Object] (JSON-serializable)
9
9
  # - into [String, nil]
10
10
  # - metadata [Hash]
11
+ # - run_id [String, nil]
12
+ # - partition_key [String, nil]
11
13
  class IndexPartitionJob < ::ActiveJob::Base
12
14
  queue_as do
13
15
  cfg = SearchEngine.config.indexer
@@ -31,11 +33,16 @@ module SearchEngine
31
33
  # @param partition [Object]
32
34
  # @param into [String, nil]
33
35
  # @param metadata [Hash]
36
+ # @param run_id [String, nil]
37
+ # @param partition_key [String, nil]
34
38
  # @return [void]
35
- def perform(collection_class_name, partition, into: nil, metadata: {})
39
+ def perform(collection_class_name, partition, into: nil, metadata: {}, run_id: nil, partition_key: nil)
36
40
  payload = nil
37
41
  klass = constantize_collection!(collection_class_name)
38
42
  payload = base_payload(klass, partition: partition, into: into)
43
+ run_store = indexing_run_store(run_id)
44
+ partition_key ||= SearchEngine::IndexingRun.partition_key(partition) if run_id
45
+ run_store&.mark_started(run_id: run_id, partition_key: partition_key, job_id: job_id)
39
46
  instrument('search_engine.dispatcher.job_started',
40
47
  payload.merge(queue: queue_name, job_id: job_id, metadata: metadata)
41
48
  )
@@ -46,6 +53,7 @@ module SearchEngine
46
53
  summary = SearchEngine::Indexer.rebuild_partition!(klass, partition: partition, into: into)
47
54
  end
48
55
  duration = (monotonic_ms - started).round(1)
56
+ run_store&.mark_succeeded(run_id: run_id, partition_key: partition_key, summary: summary)
49
57
 
50
58
  instrument(
51
59
  'search_engine.dispatcher.job_finished',
@@ -56,6 +64,7 @@ module SearchEngine
56
64
  nil
57
65
  rescue StandardError => error
58
66
  safe_payload = payload || error_payload(error)
67
+ run_store&.mark_failed(run_id: run_id, partition_key: partition_key, error: error) unless retryable_error?(error)
59
68
  instrument_error(error, payload: safe_payload.merge(metadata: metadata || {}))
60
69
  raise
61
70
  end
@@ -99,6 +108,24 @@ module SearchEngine
99
108
  retry_job wait: wait_seconds
100
109
  end
101
110
 
111
+ def indexing_run_store(run_id)
112
+ return nil if run_id.nil?
113
+
114
+ SearchEngine::IndexingRunStore.resolve
115
+ end
116
+
117
+ def retryable_error?(error)
118
+ transient_error?(error) && executions.to_i < retry_policy.attempts
119
+ end
120
+
121
+ def transient_error?(error)
122
+ return true if error.is_a?(SearchEngine::Errors::Timeout)
123
+ return true if error.is_a?(SearchEngine::Errors::Connection)
124
+
125
+ error.is_a?(SearchEngine::Errors::Api) &&
126
+ SearchEngine::Indexer::RetryPolicy.transient_status?(error.status.to_i)
127
+ end
128
+
102
129
  def error_payload(error)
103
130
  {
104
131
  collection: arguments_dig_collection,
@@ -181,6 +181,24 @@ SearchEngine.configure do |c|
181
181
  # Queue name for ActiveJob dispatch. Default: 'search_index'
182
182
  # c.indexer.queue_name = 'search_index'
183
183
 
184
+ # Advanced partition execution for full indexing. Default: :inline
185
+ # Use :active_job with a real ActiveJob backend to enqueue one job per
186
+ # partition during blue/green indexing. Does not require Sidekiq.
187
+ # c.indexer.partition_execution = :inline
188
+
189
+ # Optional queue override for partition jobs. Falls back to c.indexer.queue_name.
190
+ # Size worker concurrency carefully: partition jobs can multiply partition max_parallel.
191
+ # c.indexer.partition_queue_name = nil
192
+
193
+ # Parent wait settings for async partition runs. Defaults shown.
194
+ # c.indexer.partition_poll_interval_s = 2
195
+ # c.indexer.partition_timeout_s = nil
196
+
197
+ # Operational metadata for async partition runs. Defaults shown.
198
+ # Use shared Rails.cache or provide a custom store visible to workers and parent.
199
+ # c.indexer.partition_run_store = nil
200
+ # c.indexer.partition_run_ttl_s = 86_400
201
+
184
202
  # --- Sources -------------------------------------------------------------
185
203
 
186
204
  # ActiveRecord source: default ORM batch size. Default: 2000
@@ -0,0 +1,208 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Coordinates ActiveJob-backed partition indexing and waits for run completion.
5
+ module AsyncPartitionCoordinator
6
+ module_function
7
+
8
+ # Enqueue one partition job per partition and return a lifecycle-compatible aggregate.
9
+ # @param klass [Class] SearchEngine collection class
10
+ # @param partitions [Array<Object>] logical partition values
11
+ # @param into [String] target physical collection name
12
+ # @param queue [String, nil] optional ActiveJob queue override
13
+ # @param timeout_s [Numeric, nil] maximum wait time in seconds
14
+ # @param poll_interval_s [Numeric, nil] polling interval in seconds
15
+ # @param store [Object, nil] optional run store
16
+ # @param ttl_s [Numeric, nil] run metadata TTL in seconds
17
+ # @return [Hash] lifecycle result with :status, :docs_total, :success_total, :failed_total, :sample_error
18
+ def call(klass:, partitions:, into:, queue: nil, timeout_s: nil, poll_interval_s: nil, store: nil, ttl_s: nil)
19
+ cfg = SearchEngine.config.indexer
20
+ partition_list = Array(partitions)
21
+ run_id = SearchEngine::IndexingRun.generate_id
22
+ queue_name = resolve_queue_name(queue, cfg)
23
+ run_store = SearchEngine::IndexingRunStore.validate!(store || SearchEngine::IndexingRunStore.resolve)
24
+ ttl = ttl_s || cfg.partition_run_ttl_s
25
+
26
+ snapshot = create_run(
27
+ store: run_store,
28
+ run_id: run_id,
29
+ klass: klass,
30
+ into: into,
31
+ partitions: partition_list,
32
+ ttl_s: ttl
33
+ )
34
+ instrument('search_engine.indexing_run.started', event_payload(snapshot, queue: queue_name))
35
+ enqueue_partitions(klass, partition_list, into: into, queue_name: queue_name, run_id: run_id)
36
+ instrument('search_engine.indexing_run.enqueued', event_payload(snapshot, queue: queue_name))
37
+
38
+ wait_for_completion(
39
+ store: run_store,
40
+ run_id: run_id,
41
+ timeout_s: timeout_s.nil? ? cfg.partition_timeout_s : timeout_s,
42
+ poll_interval_s: poll_interval_s.nil? ? cfg.partition_poll_interval_s : poll_interval_s
43
+ )
44
+ end
45
+
46
+ def create_run(store:, run_id:, klass:, into:, partitions:, ttl_s:)
47
+ store.create_run(
48
+ run_id: run_id,
49
+ collection: collection_name(klass),
50
+ collection_class_name: klass.name,
51
+ into: into,
52
+ partitions: partitions,
53
+ ttl_s: ttl_s
54
+ )
55
+ end
56
+ private_class_method :create_run
57
+
58
+ def enqueue_partitions(klass, partitions, into:, queue_name:, run_id:)
59
+ partitions.each do |partition|
60
+ SearchEngine::IndexPartitionJob
61
+ .set(queue: queue_name)
62
+ .perform_later(
63
+ klass.name,
64
+ partition,
65
+ into: into,
66
+ metadata: {},
67
+ run_id: run_id,
68
+ partition_key: SearchEngine::IndexingRun.partition_key(partition)
69
+ )
70
+ end
71
+ end
72
+ private_class_method :enqueue_partitions
73
+
74
+ def wait_for_completion(store:, run_id:, timeout_s:, poll_interval_s:)
75
+ deadline = timeout_s.nil? ? nil : monotonic_seconds + timeout_s.to_f
76
+
77
+ loop do
78
+ snapshot = store.snapshot(run_id: run_id)
79
+ result = result_for_snapshot(snapshot)
80
+ return finish_failure(snapshot, result) if invalid_snapshot_result?(result)
81
+ return finish_success(snapshot, result) if result[:status] == :ok
82
+ return finish_failure(snapshot, result) if failure_terminal?(snapshot)
83
+ return timeout_result(store, run_id, snapshot) if deadline && monotonic_seconds >= deadline
84
+
85
+ sleep([poll_interval_s.to_f, 0].max)
86
+ end
87
+ end
88
+ private_class_method :wait_for_completion
89
+
90
+ def result_for_snapshot(snapshot)
91
+ SearchEngine::IndexingRun.aggregate_result(snapshot)
92
+ end
93
+ private_class_method :result_for_snapshot
94
+
95
+ def invalid_snapshot_result?(result)
96
+ result[:status] == :failed && result[:sample_error].to_s.start_with?('async partition indexing run snapshot')
97
+ end
98
+ private_class_method :invalid_snapshot_result?
99
+
100
+ def finish_success(snapshot, result)
101
+ instrument('search_engine.indexing_run.finished', event_payload(snapshot, result: result))
102
+ result
103
+ end
104
+ private_class_method :finish_success
105
+
106
+ def finish_failure(snapshot, result)
107
+ result = failed_result(snapshot, result)
108
+ instrument('search_engine.indexing_run.failed', event_payload(snapshot, result: result))
109
+ result
110
+ end
111
+ private_class_method :finish_failure
112
+
113
+ def timeout_result(store, run_id, snapshot)
114
+ mark_non_terminal_failed(store, run_id, snapshot)
115
+ snapshot = store.snapshot(run_id: run_id) || snapshot
116
+ result = failed_result(
117
+ snapshot,
118
+ SearchEngine::IndexingRun.aggregate_result(snapshot),
119
+ sample_error: "SearchEngine async partition indexing timed out for run #{run_id}"
120
+ )
121
+ instrument('search_engine.indexing_run.failed', event_payload(snapshot, result: result))
122
+ result
123
+ end
124
+ private_class_method :timeout_result
125
+
126
+ def mark_non_terminal_failed(store, run_id, snapshot)
127
+ partitions = snapshot && snapshot[:partitions]
128
+ return unless partitions.is_a?(Hash)
129
+
130
+ partitions.each do |partition_key, entry|
131
+ status = entry[:status].to_s
132
+ next if %w[succeeded failed].include?(status)
133
+
134
+ store.mark_failed(
135
+ run_id: run_id,
136
+ partition_key: partition_key,
137
+ error: 'partition did not finish before timeout'
138
+ )
139
+ end
140
+ end
141
+ private_class_method :mark_non_terminal_failed
142
+
143
+ def failed_result(snapshot, result, sample_error: nil)
144
+ statuses = partition_statuses(snapshot)
145
+ success_total = result[:success_total].to_i
146
+ status = success_total.positive? || statuses.include?('succeeded') ? :partial : :failed
147
+
148
+ result.merge(
149
+ status: status,
150
+ failed_total: [result[:failed_total].to_i, failed_partition_count(statuses)].max,
151
+ sample_error: sample_error || result[:sample_error] || 'async partition indexing failed'
152
+ )
153
+ end
154
+ private_class_method :failed_result
155
+
156
+ def failure_terminal?(snapshot)
157
+ partition_statuses(snapshot).include?('failed')
158
+ end
159
+ private_class_method :failure_terminal?
160
+
161
+ def partition_statuses(snapshot)
162
+ partitions = snapshot && snapshot[:partitions]
163
+ return [] unless partitions.is_a?(Hash)
164
+
165
+ partitions.values.map { |entry| entry[:status].to_s }
166
+ end
167
+ private_class_method :partition_statuses
168
+
169
+ def failed_partition_count(statuses)
170
+ statuses.count { |status| status == 'failed' }
171
+ end
172
+ private_class_method :failed_partition_count
173
+
174
+ def resolve_queue_name(queue, cfg)
175
+ (queue || cfg.partition_queue_name || cfg.queue_name || 'search_index').to_s
176
+ end
177
+ private_class_method :resolve_queue_name
178
+
179
+ def collection_name(klass)
180
+ klass.respond_to?(:collection) ? klass.collection.to_s : klass.name.to_s
181
+ end
182
+ private_class_method :collection_name
183
+
184
+ def event_payload(snapshot, queue: nil, result: nil)
185
+ payload = {
186
+ run_id: snapshot && snapshot[:run_id],
187
+ collection: snapshot && snapshot[:collection],
188
+ collection_class_name: snapshot && snapshot[:collection_class_name],
189
+ into: snapshot && snapshot[:into],
190
+ total_partitions: snapshot && snapshot[:total_partitions],
191
+ queue: queue
192
+ }
193
+ payload.merge!(result) if result
194
+ payload
195
+ end
196
+ private_class_method :event_payload
197
+
198
+ def instrument(event, payload)
199
+ SearchEngine::Instrumentation.instrument(event, payload) {}
200
+ end
201
+ private_class_method :instrument
202
+
203
+ def monotonic_seconds
204
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
205
+ end
206
+ private_class_method :monotonic_seconds
207
+ end
208
+ end
@@ -281,6 +281,11 @@ module SearchEngine
281
281
  if compiled
282
282
  parts = Array(compiled.partitions)
283
283
  max_p = compiled.max_parallel.to_i
284
+ if __se_async_partition_execution_requested?(parts)
285
+ return __se_async_partition_result_unless_available unless __se_async_partition_execution_available?
286
+
287
+ return SearchEngine::AsyncPartitionCoordinator.call(klass: self, partitions: parts, into: into)
288
+ end
284
289
  return __se_index_partitions_seq!(parts, into, compiled) if max_p <= 1 || parts.size <= 1
285
290
 
286
291
  __se_index_partitions_parallel!(parts, into, max_p, compiled)
@@ -292,6 +297,32 @@ module SearchEngine
292
297
  sample_error: "#{error.class}: #{error.message.to_s[0, 200]}" }
293
298
  end
294
299
 
300
+ def __se_async_partition_execution_requested?(parts)
301
+ parts.size > 1 && SearchEngine.config.indexer.partition_execution.to_sym == :active_job
302
+ end
303
+
304
+ def __se_async_partition_execution_available?
305
+ defined?(::ActiveJob::Base) && defined?(SearchEngine::IndexPartitionJob)
306
+ end
307
+
308
+ def __se_async_partition_result_unless_available
309
+ missing = []
310
+ missing << 'ActiveJob::Base' unless defined?(::ActiveJob::Base)
311
+ missing << 'SearchEngine::IndexPartitionJob' unless defined?(SearchEngine::IndexPartitionJob)
312
+
313
+ {
314
+ status: :failed,
315
+ docs_total: 0,
316
+ success_total: 0,
317
+ failed_total: 0,
318
+ sample_error: "async partition indexing requires #{missing.join(' and ')}"
319
+ }
320
+ end
321
+
322
+ private :__se_async_partition_execution_requested?,
323
+ :__se_async_partition_execution_available?,
324
+ :__se_async_partition_result_unless_available
325
+
295
326
  def __se_index_single_with_renderer!(into)
296
327
  docs_estimate = __se_heuristic_docs_estimate(1)
297
328
  renderer = SearchEngine::Logging::LiveRenderer.new(
@@ -108,6 +108,18 @@ module SearchEngine
108
108
  attr_accessor :dispatch
109
109
  # @return [String] queue name for ActiveJob enqueues
110
110
  attr_accessor :queue_name
111
+ # @return [Symbol] partition execution mode: :inline or :active_job
112
+ attr_accessor :partition_execution
113
+ # @return [String, nil] optional queue override for async partition jobs
114
+ attr_accessor :partition_queue_name
115
+ # @return [Integer] parent wait polling interval for async partition runs (seconds)
116
+ attr_accessor :partition_poll_interval_s
117
+ # @return [Integer, nil] maximum parent wait budget for async partition runs (seconds)
118
+ attr_accessor :partition_timeout_s
119
+ # @return [Integer] TTL for async partition run metadata (seconds)
120
+ attr_accessor :partition_run_ttl_s
121
+ # @return [Object, nil] custom async partition run store
122
+ attr_accessor :partition_run_store
111
123
  # @return [Boolean] whether to run model.count for progress bar estimates (default true)
112
124
  attr_accessor :estimate_progress
113
125
  # @return [Integer, nil] graceful-shutdown timeout (seconds) for the parallel
@@ -121,6 +133,12 @@ module SearchEngine
121
133
  @gzip = false
122
134
  @dispatch = active_job_available? ? :active_job : :inline
123
135
  @queue_name = 'search_index'
136
+ @partition_execution = :inline
137
+ @partition_queue_name = nil
138
+ @partition_poll_interval_s = 2
139
+ @partition_timeout_s = nil
140
+ @partition_run_ttl_s = 86_400
141
+ @partition_run_store = nil
124
142
  @estimate_progress = true
125
143
  @pool_timeout = nil
126
144
  end
@@ -737,6 +755,12 @@ module SearchEngine
737
755
  gzip: indexer.gzip ? true : false,
738
756
  dispatch: indexer.dispatch,
739
757
  queue_name: indexer.queue_name,
758
+ partition_execution: indexer.partition_execution,
759
+ partition_queue_name: indexer.partition_queue_name,
760
+ partition_poll_interval_s: indexer.partition_poll_interval_s,
761
+ partition_timeout_s: indexer.partition_timeout_s,
762
+ partition_run_ttl_s: indexer.partition_run_ttl_s,
763
+ partition_run_store: indexer.partition_run_store,
740
764
  estimate_progress: indexer.estimate_progress
741
765
  }
742
766
  end
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'date'
5
+ require 'json'
6
+ require 'securerandom'
7
+ require 'time'
8
+
9
+ module SearchEngine
10
+ # Helpers for async partition indexing run metadata.
11
+ module IndexingRun
12
+ module_function
13
+
14
+ # Generate a unique run id suitable for operational metadata keys.
15
+ # @return [String]
16
+ def generate_id
17
+ "se-run-#{Time.now.utc.strftime('%Y%m%d%H%M%S')}-#{SecureRandom.hex(8)}"
18
+ end
19
+
20
+ # Return a stable primitive key for a partition value.
21
+ # @param partition [Object]
22
+ # @return [String]
23
+ def partition_key(partition)
24
+ "p-#{Digest::SHA256.hexdigest(normalized_partition_json(partition))}"
25
+ end
26
+
27
+ # Convert a partition value into ActiveJob-friendly primitives when possible.
28
+ # @param value [Object]
29
+ # @return [Object]
30
+ def serialize_partition(value)
31
+ normalize(value)
32
+ end
33
+
34
+ # Return a compact partition string for logs and snapshots.
35
+ # @param partition [Object]
36
+ # @param max [Integer]
37
+ # @return [String]
38
+ def partition_display(partition, max: 160)
39
+ text = begin
40
+ JSON.generate(serialize_partition(partition))
41
+ rescue StandardError
42
+ partition.to_s
43
+ end
44
+ compact(text, max)
45
+ end
46
+
47
+ # Build lifecycle-compatible aggregate counters from a run snapshot.
48
+ # @param snapshot [Hash, nil]
49
+ # @return [Hash] result with :status, :docs_total, :success_total, :failed_total, and :sample_error
50
+ def aggregate_result(snapshot)
51
+ unless valid_snapshot?(snapshot)
52
+ return {
53
+ status: :failed,
54
+ docs_total: 0,
55
+ success_total: 0,
56
+ failed_total: 1,
57
+ sample_error: invalid_snapshot_error(snapshot)
58
+ }
59
+ end
60
+
61
+ partitions = snapshot && snapshot[:partitions]
62
+ values = partitions.is_a?(Hash) ? partitions.values : []
63
+
64
+ docs_total = 0
65
+ success_total = 0
66
+ failed_total = 0
67
+ sample_error = nil
68
+ statuses = []
69
+
70
+ values.each do |entry|
71
+ data = symbolize_keys(entry)
72
+ statuses << data[:status].to_s
73
+ docs_total += data[:docs_total].to_i
74
+ success_total += data[:success_total].to_i
75
+ failed_total += data[:failed_total].to_i
76
+ sample_error ||= present_string(data[:sample_error] || data[:error])
77
+ end
78
+
79
+ {
80
+ status: aggregate_status(statuses, success_total, failed_total),
81
+ docs_total: docs_total,
82
+ success_total: success_total,
83
+ failed_total: failed_total,
84
+ sample_error: sample_error
85
+ }
86
+ end
87
+
88
+ # Build the stored metadata for one partition.
89
+ # @param partition [Object]
90
+ # @return [Hash]
91
+ def partition_entry(partition)
92
+ serialized = serialize_partition(partition)
93
+ {
94
+ partition: serialized,
95
+ partition_display: partition_display(serialized),
96
+ status: 'pending',
97
+ docs_total: 0,
98
+ success_total: 0,
99
+ failed_total: 0,
100
+ sample_error: nil,
101
+ job_id: nil,
102
+ updated_at: iso8601_now
103
+ }
104
+ end
105
+
106
+ # @return [String]
107
+ def iso8601_now
108
+ Time.now.utc.iso8601(6)
109
+ end
110
+
111
+ def normalized_partition_json(value)
112
+ JSON.generate(serialize_partition(value))
113
+ rescue StandardError
114
+ JSON.generate('__search_engine_fallback__' => value.to_s)
115
+ end
116
+
117
+ def normalize(value)
118
+ case value
119
+ when NilClass, TrueClass, FalseClass, Numeric, String
120
+ value
121
+ when Symbol
122
+ value.to_s
123
+ when Time
124
+ value.utc.iso8601(6)
125
+ when Date
126
+ value.iso8601
127
+ when Array
128
+ value.map { |item| normalize(item) }
129
+ when Hash
130
+ value.keys.map(&:to_s).sort.each_with_object({}) do |key, hash|
131
+ original_key = value.key?(key) ? key : value.keys.find { |candidate| candidate.to_s == key }
132
+ hash[key] = normalize(value[original_key])
133
+ end
134
+ else
135
+ value.to_s
136
+ end
137
+ end
138
+ private_class_method :normalized_partition_json, :normalize
139
+
140
+ def aggregate_status(statuses, success_total, failed_total)
141
+ return :failed if statuses.empty?
142
+ return :failed if failed_total.positive? && success_total.zero?
143
+ return :partial if failed_total.positive?
144
+ return :running if statuses.any? { |status| %w[pending running].include?(status) }
145
+
146
+ :ok
147
+ end
148
+ private_class_method :aggregate_status
149
+
150
+ def valid_snapshot?(snapshot)
151
+ return false unless snapshot.is_a?(Hash)
152
+
153
+ partitions = snapshot[:partitions]
154
+ return false unless partitions.is_a?(Hash)
155
+
156
+ expected_total = snapshot[:total_partitions].to_i
157
+ expected_total.positive? && partitions.size == expected_total
158
+ end
159
+ private_class_method :valid_snapshot?
160
+
161
+ def invalid_snapshot_error(snapshot)
162
+ return 'async partition indexing run snapshot is missing' unless snapshot.is_a?(Hash)
163
+ return 'async partition indexing run snapshot has no partition metadata' unless snapshot[:partitions].is_a?(Hash)
164
+
165
+ expected_total = snapshot[:total_partitions].to_i
166
+ actual_total = snapshot[:partitions].size
167
+ "async partition indexing run snapshot has #{actual_total}/#{expected_total} partitions"
168
+ end
169
+ private_class_method :invalid_snapshot_error
170
+
171
+ def symbolize_keys(hash)
172
+ return {} unless hash.is_a?(Hash)
173
+
174
+ hash.transform_keys(&:to_sym)
175
+ end
176
+ private_class_method :symbolize_keys
177
+
178
+ def present_string(value)
179
+ text = value.to_s
180
+ text.strip.empty? ? nil : text
181
+ end
182
+ private_class_method :present_string
183
+
184
+ def compact(text, max)
185
+ return text if text.length <= max
186
+
187
+ "#{text[0, max - 3]}..."
188
+ end
189
+ private_class_method :compact
190
+ end
191
+ end
@@ -0,0 +1,231 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'search_engine/indexing_run'
4
+
5
+ module SearchEngine
6
+ module IndexingRunStore
7
+ # Rails.cache-backed implementation for async partition indexing run metadata.
8
+ class RailsCache
9
+ # @param cache [Object, nil] cache object responding to read/write/delete
10
+ def initialize(cache: nil)
11
+ @cache = cache || resolve_rails_cache!
12
+ validate_cache!
13
+ end
14
+
15
+ # Create a run snapshot with pending partition entries.
16
+ # @return [Hash]
17
+ def create_run(run_id:, collection:, collection_class_name:, into:, partitions:, ttl_s:)
18
+ entries = build_partitions(partitions)
19
+ run = {
20
+ run_id: run_id.to_s,
21
+ collection: collection.to_s,
22
+ collection_class_name: collection_class_name.to_s,
23
+ into: into.to_s,
24
+ status: 'running',
25
+ total_partitions: Array(partitions).size,
26
+ partition_keys: entries.keys,
27
+ ttl_s: ttl_s,
28
+ created_at: SearchEngine::IndexingRun.iso8601_now,
29
+ updated_at: SearchEngine::IndexingRun.iso8601_now
30
+ }
31
+ write_meta(run, ttl_s: ttl_s)
32
+ entries.each do |partition_key, entry|
33
+ @cache.write(partition_cache_key(run_id, partition_key), entry, expires_in: ttl_s)
34
+ end
35
+ snapshot(run_id: run_id)
36
+ end
37
+
38
+ # Mark a partition as started.
39
+ # @return [Hash]
40
+ def mark_started(run_id:, partition_key:, job_id: nil)
41
+ update_partition(run_id, partition_key) do |entry|
42
+ entry[:status] = 'running'
43
+ entry[:job_id] = job_id.to_s unless job_id.nil?
44
+ end
45
+ end
46
+
47
+ # Mark a partition as succeeded with import counters.
48
+ # @return [Hash]
49
+ def mark_succeeded(run_id:, partition_key:, summary:)
50
+ update_partition(run_id, partition_key) do |entry|
51
+ assign_summary!(entry, summary)
52
+ entry[:status] = 'succeeded'
53
+ entry[:sample_error] = nil
54
+ end
55
+ end
56
+
57
+ # Mark a partition as failed.
58
+ # @return [Hash]
59
+ def mark_failed(run_id:, partition_key:, error:)
60
+ update_partition(run_id, partition_key) do |entry|
61
+ entry[:status] = 'failed'
62
+ entry[:sample_error] = error_message(error)
63
+ entry[:failed_total] = [entry[:failed_total].to_i, 1].max
64
+ end
65
+ end
66
+
67
+ # Read the current run snapshot.
68
+ # @return [Hash, nil]
69
+ def snapshot(run_id:)
70
+ meta = symbolize_meta(@cache.read(meta_cache_key(run_id)))
71
+ return nil unless meta
72
+
73
+ partition_keys = Array(meta[:partition_keys]).map(&:to_s)
74
+ partitions = partition_keys.each_with_object({}) do |partition_key, hash|
75
+ entry = @cache.read(partition_cache_key(run_id, partition_key))
76
+ hash[partition_key] = if entry.is_a?(Hash)
77
+ deep_symbolize(entry)
78
+ else
79
+ missing_partition_entry(partition_key)
80
+ end
81
+ end
82
+ meta[:partitions] = partitions
83
+ meta[:status] = run_status(partitions)
84
+ meta
85
+ end
86
+
87
+ # Expire a run immediately.
88
+ # @return [Object]
89
+ def expire(run_id:)
90
+ meta = symbolize_meta(@cache.read(meta_cache_key(run_id)))
91
+ Array(meta && meta[:partition_keys]).each do |partition_key|
92
+ @cache.delete(partition_cache_key(run_id, partition_key))
93
+ end
94
+ @cache.delete(meta_cache_key(run_id))
95
+ end
96
+
97
+ private
98
+
99
+ def resolve_rails_cache!
100
+ rails = Object.const_get(:Rails) if Object.const_defined?(:Rails)
101
+ cache = rails.cache if rails.respond_to?(:cache)
102
+ return cache if cache
103
+
104
+ raise ArgumentError,
105
+ 'SearchEngine async partition indexing requires Rails.cache or a custom partition_run_store'
106
+ rescue NameError
107
+ raise ArgumentError,
108
+ 'SearchEngine async partition indexing requires Rails.cache or a custom partition_run_store'
109
+ end
110
+
111
+ def validate_cache!
112
+ missing = %i[read write delete].reject { |method_name| @cache.respond_to?(method_name) }
113
+ unless missing.empty?
114
+ raise ArgumentError, "Rails.cache is unusable for indexing runs; missing: #{missing.join(', ')}"
115
+ end
116
+
117
+ probe_key = meta_cache_key("__probe__#{object_id}")
118
+ @cache.write(probe_key, { ok: true }, expires_in: 1)
119
+ value = @cache.read(probe_key)
120
+ @cache.delete(probe_key)
121
+ return if value
122
+
123
+ raise ArgumentError, 'Rails.cache is unusable for indexing runs; probe read returned nil'
124
+ rescue ArgumentError
125
+ raise
126
+ rescue StandardError => error
127
+ raise ArgumentError, "Rails.cache is unusable for indexing runs: #{error.class}: #{error.message}"
128
+ end
129
+
130
+ def build_partitions(partitions)
131
+ Array(partitions).each_with_object({}) do |partition, hash|
132
+ hash[SearchEngine::IndexingRun.partition_key(partition)] = SearchEngine::IndexingRun.partition_entry(partition)
133
+ end
134
+ end
135
+
136
+ def write_meta(run, ttl_s:)
137
+ @cache.write(meta_cache_key(run[:run_id]), run, expires_in: ttl_s)
138
+ end
139
+
140
+ def update_partition(run_id, partition_key)
141
+ meta = symbolize_meta(@cache.read(meta_cache_key(run_id)))
142
+ raise KeyError, "indexing run not found: #{run_id}" unless meta
143
+
144
+ partition_key = partition_key.to_s
145
+ unless Array(meta[:partition_keys]).map(&:to_s).include?(partition_key)
146
+ raise KeyError, "indexing run partition not found: #{partition_key}"
147
+ end
148
+
149
+ entry = @cache.read(partition_cache_key(run_id, partition_key))
150
+ raise KeyError, "indexing run partition not found: #{partition_key}" unless entry
151
+
152
+ entry = deep_symbolize(entry)
153
+ yield entry
154
+ now = SearchEngine::IndexingRun.iso8601_now
155
+ entry[:updated_at] = now
156
+ @cache.write(partition_cache_key(run_id, partition_key), entry, expires_in: meta[:ttl_s])
157
+ meta[:updated_at] = now
158
+ write_meta(meta, ttl_s: meta[:ttl_s])
159
+ snapshot(run_id: run_id)
160
+ end
161
+
162
+ def run_status(partitions)
163
+ statuses = partitions.values.map { |entry| entry[:status].to_s }
164
+ return 'failed' if statuses.include?('failed')
165
+ return 'succeeded' if statuses.all? { |status| status == 'succeeded' }
166
+
167
+ 'running'
168
+ end
169
+
170
+ def assign_summary!(entry, summary)
171
+ entry[:docs_total] = summary_value(summary, :docs_total).to_i
172
+ entry[:success_total] = summary_value(summary, :success_total).to_i
173
+ entry[:failed_total] = summary_value(summary, :failed_total).to_i
174
+ entry[:sample_error] = summary_value(summary, :sample_error)
175
+ end
176
+
177
+ def summary_value(summary, key)
178
+ return summary.public_send(key) if summary.respond_to?(key)
179
+ return summary[key] if summary.is_a?(Hash) && summary.key?(key)
180
+ return summary[key.to_s] if summary.is_a?(Hash) && summary.key?(key.to_s)
181
+
182
+ nil
183
+ end
184
+
185
+ def error_message(error)
186
+ return error if error.is_a?(String)
187
+
188
+ "#{error.class}: #{error.message.to_s[0, 200]}"
189
+ end
190
+
191
+ def meta_cache_key(run_id)
192
+ "search_engine:indexing_run:#{run_id}:meta"
193
+ end
194
+
195
+ def partition_cache_key(run_id, partition_key)
196
+ "search_engine:indexing_run:#{run_id}:partition:#{partition_key}"
197
+ end
198
+
199
+ def missing_partition_entry(partition_key)
200
+ {
201
+ partition: nil,
202
+ partition_display: partition_key,
203
+ status: 'failed',
204
+ docs_total: 0,
205
+ success_total: 0,
206
+ failed_total: 1,
207
+ sample_error: "partition metadata missing for #{partition_key}",
208
+ job_id: nil,
209
+ updated_at: SearchEngine::IndexingRun.iso8601_now
210
+ }
211
+ end
212
+
213
+ def symbolize_meta(meta)
214
+ return nil unless meta.is_a?(Hash)
215
+
216
+ deep_symbolize(meta)
217
+ end
218
+
219
+ def deep_symbolize(value)
220
+ case value
221
+ when Hash
222
+ value.each_with_object({}) { |(key, item), hash| hash[key.to_sym] = deep_symbolize(item) }
223
+ when Array
224
+ value.map { |item| deep_symbolize(item) }
225
+ else
226
+ value
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Resolver and interface contract for async partition indexing run stores.
5
+ module IndexingRunStore
6
+ module_function
7
+
8
+ # Resolve a configured store or the Rails.cache-backed default.
9
+ # @param config [SearchEngine::Config, nil]
10
+ # @return [Object]
11
+ def resolve(config: nil)
12
+ cfg = config || SearchEngine.config
13
+ configured = configured_store(cfg)
14
+ return configured if configured
15
+
16
+ default
17
+ end
18
+
19
+ # Build the default run store.
20
+ # @return [SearchEngine::IndexingRunStore::RailsCache]
21
+ def default
22
+ require 'search_engine/indexing_run_store/rails_cache'
23
+ SearchEngine::IndexingRunStore::RailsCache.new
24
+ end
25
+
26
+ # Validate that an object implements the run-store contract.
27
+ # @param store [Object]
28
+ # @return [Object]
29
+ # @raise [ArgumentError]
30
+ def validate!(store)
31
+ missing = required_methods.reject { |method_name| store.respond_to?(method_name) }
32
+ return store if missing.empty?
33
+
34
+ raise ArgumentError, "indexing run store missing methods: #{missing.join(', ')}"
35
+ end
36
+
37
+ # @return [Array<Symbol>]
38
+ def required_methods
39
+ %i[create_run mark_started mark_succeeded mark_failed snapshot expire]
40
+ end
41
+
42
+ def configured_store(config)
43
+ indexer = config.respond_to?(:indexer) ? config.indexer : nil
44
+ return nil unless indexer.respond_to?(:partition_run_store)
45
+
46
+ store = indexer.partition_run_store
47
+ store ? validate!(store) : nil
48
+ end
49
+ private_class_method :configured_store
50
+ end
51
+ end
@@ -3,5 +3,5 @@
3
3
  module SearchEngine
4
4
  # Current gem version.
5
5
  # @return [String]
6
- VERSION = '30.1.7.0'
6
+ VERSION = '30.1.8.0'
7
7
  end
data/lib/search_engine.rb CHANGED
@@ -30,6 +30,10 @@ require 'search_engine/indexer/bulk_import'
30
30
  require 'search_engine/mapper'
31
31
  require 'search_engine/sources'
32
32
  require 'search_engine/partitioner'
33
+ require 'search_engine/indexing_run'
34
+ require 'search_engine/indexing_run_store'
35
+ require 'search_engine/indexing_run_store/rails_cache'
36
+ require 'search_engine/async_partition_coordinator'
33
37
  require 'search_engine/dispatcher'
34
38
  require 'search_engine/joins/guard'
35
39
  require 'search_engine/admin'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: search-engine-for-typesense
3
3
  version: !ruby/object:Gem::Version
4
- version: 30.1.7.0
4
+ version: 30.1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nikita Shkoda
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-13 00:00:00.000000000 Z
11
+ date: 2026-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby
@@ -92,6 +92,7 @@ files:
92
92
  - lib/search_engine/ast/prefix.rb
93
93
  - lib/search_engine/ast/raw.rb
94
94
  - lib/search_engine/ast/unary_op.rb
95
+ - lib/search_engine/async_partition_coordinator.rb
95
96
  - lib/search_engine/base.rb
96
97
  - lib/search_engine/base/creation.rb
97
98
  - lib/search_engine/base/deletion.rb
@@ -150,6 +151,9 @@ files:
150
151
  - lib/search_engine/indexer/import_dispatcher.rb
151
152
  - lib/search_engine/indexer/import_response_parser.rb
152
153
  - lib/search_engine/indexer/retry_policy.rb
154
+ - lib/search_engine/indexing_run.rb
155
+ - lib/search_engine/indexing_run_store.rb
156
+ - lib/search_engine/indexing_run_store/rails_cache.rb
153
157
  - lib/search_engine/instrumentation.rb
154
158
  - lib/search_engine/interruptible_pool.rb
155
159
  - lib/search_engine/joins/guard.rb