search-engine-for-typesense 30.1.8.2 → 30.1.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +154 -0
- data/app/search_engine/search_engine/postgres_outbox/drain_job.rb +24 -0
- data/lib/generators/search_engine/postgres_outbox/install_generator.rb +35 -0
- data/lib/generators/search_engine/postgres_outbox/templates/add_outbox_triggers.rb.tt +41 -0
- data/lib/generators/search_engine/postgres_outbox/templates/create_outbox_events.rb.tt +9 -0
- data/lib/search_engine/active_record_syncable.rb +29 -6
- data/lib/search_engine/bulk.rb +3 -96
- data/lib/search_engine/config.rb +80 -0
- data/lib/search_engine/dependency_planner.rb +241 -0
- data/lib/search_engine/indexer/bulk_import.rb +94 -39
- data/lib/search_engine/indexer.rb +36 -2
- data/lib/search_engine/indexing_run_store/rails_cache.rb +5 -0
- data/lib/search_engine/postgres_outbox/drainer.rb +176 -0
- data/lib/search_engine/postgres_outbox/event.rb +59 -0
- data/lib/search_engine/postgres_outbox/event_processor.rb +65 -0
- data/lib/search_engine/postgres_outbox/listener.rb +243 -0
- data/lib/search_engine/postgres_outbox/migration_helpers.rb +229 -0
- data/lib/search_engine/postgres_outbox/processor_result.rb +37 -0
- data/lib/search_engine/postgres_outbox/repository.rb +197 -0
- data/lib/search_engine/postgres_outbox.rb +15 -0
- data/lib/search_engine/version.rb +1 -1
- data/lib/search_engine.rb +2 -0
- metadata +15 -2
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Public dependency-ordering helpers for Typesense collection reference graphs.
|
|
5
|
+
module DependencyPlanner
|
|
6
|
+
class << self
|
|
7
|
+
# Build a normalized reverse dependency graph.
|
|
8
|
+
# @param source [Symbol] :registry, :typesense, or :auto
|
|
9
|
+
# @param client [SearchEngine::Client, nil]
|
|
10
|
+
# @return [Hash{String=>Array<Hash>}] target collection => reference edges
|
|
11
|
+
def reverse_graph(source: :registry, client: nil)
|
|
12
|
+
graph = case source.to_sym
|
|
13
|
+
when :registry
|
|
14
|
+
SearchEngine::Cascade.send(:build_from_registry)
|
|
15
|
+
when :typesense
|
|
16
|
+
SearchEngine::Cascade.send(:build_from_typesense, client || SearchEngine.client)
|
|
17
|
+
when :auto
|
|
18
|
+
SearchEngine::Cascade.build_reverse_graph(client: client || SearchEngine.client)
|
|
19
|
+
else
|
|
20
|
+
raise ArgumentError, 'source must be :registry, :typesense, or :auto'
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
normalize_reverse_graph(graph)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Order collection names so referenced collections precede referrers.
|
|
27
|
+
# @param collections [Array<Symbol, String, Class>] collections or model classes
|
|
28
|
+
# @param source [Symbol] :registry, :typesense, or :auto
|
|
29
|
+
# @param client [SearchEngine::Client, nil]
|
|
30
|
+
# @param reverse_graph [Hash, nil] prebuilt reverse graph
|
|
31
|
+
# @return [Array<String>]
|
|
32
|
+
def order_collections(collections, source: :registry, client: nil, reverse_graph: nil)
|
|
33
|
+
graph = graph_or_build(reverse_graph, source: source, client: client)
|
|
34
|
+
topo_sort_subset(graph, normalize_collections(collections))
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Order events by their collection dependency order while preserving per-collection event order.
|
|
38
|
+
# @param events [Array<Object, Hash>]
|
|
39
|
+
# @param collection_method [Symbol, String] event reader or hash key for collection name
|
|
40
|
+
# @param source [Symbol] :registry, :typesense, or :auto
|
|
41
|
+
# @param client [SearchEngine::Client, nil]
|
|
42
|
+
# @param reverse_graph [Hash, nil] prebuilt reverse graph
|
|
43
|
+
# @return [Array<Object, Hash>]
|
|
44
|
+
def order_events(events, collection_method: :collection, source: :registry, client: nil, reverse_graph: nil)
|
|
45
|
+
graph = graph_or_build(reverse_graph, source: source, client: client)
|
|
46
|
+
grouped = Hash.new { |h, k| h[k] = [] }
|
|
47
|
+
without_collection = []
|
|
48
|
+
|
|
49
|
+
Array(events).each do |event|
|
|
50
|
+
collection = collection_from_event(event, collection_method)
|
|
51
|
+
if collection.nil? || collection.empty?
|
|
52
|
+
without_collection << event
|
|
53
|
+
else
|
|
54
|
+
grouped[collection] << event
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
ordered_collections = topo_sort_subset(graph, grouped.keys)
|
|
59
|
+
ordered_collections.flat_map { |collection| grouped[collection] } + without_collection
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Return collections that directly reference the given collection.
|
|
63
|
+
# @param collection [Symbol, String, Class]
|
|
64
|
+
# @param source [Symbol] :registry, :typesense, or :auto
|
|
65
|
+
# @param client [SearchEngine::Client, nil]
|
|
66
|
+
# @param reverse_graph [Hash, nil] prebuilt reverse graph
|
|
67
|
+
# @return [Array<String>]
|
|
68
|
+
def referencers_for(collection, source: :registry, client: nil, reverse_graph: nil)
|
|
69
|
+
graph = graph_or_build(reverse_graph, source: source, client: client)
|
|
70
|
+
name = normalize_collection(collection)
|
|
71
|
+
Array(graph[name]).filter_map { |edge| edge[:referrer] }.uniq
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Return collections directly referenced by the given collection.
|
|
75
|
+
# @param collection [Symbol, String, Class]
|
|
76
|
+
# @param source [Symbol] :registry, :typesense, or :auto
|
|
77
|
+
# @param client [SearchEngine::Client, nil]
|
|
78
|
+
# @param reverse_graph [Hash, nil] prebuilt reverse graph
|
|
79
|
+
# @return [Array<String>]
|
|
80
|
+
def dependencies_for(collection, source: :registry, client: nil, reverse_graph: nil)
|
|
81
|
+
graph = graph_or_build(reverse_graph, source: source, client: client)
|
|
82
|
+
name = normalize_collection(collection)
|
|
83
|
+
deps = []
|
|
84
|
+
graph.each do |target, edges|
|
|
85
|
+
deps << target if Array(edges).any? { |edge| edge[:referrer] == name }
|
|
86
|
+
end
|
|
87
|
+
deps.uniq
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Build the two Bulk stages from collection dependencies.
|
|
91
|
+
# @param collections [Array<Symbol, String, Class>] collections or model classes
|
|
92
|
+
# @param source [Symbol] :registry, :typesense, or :auto
|
|
93
|
+
# @param client [SearchEngine::Client, nil]
|
|
94
|
+
# @param reverse_graph [Hash, nil] prebuilt reverse graph
|
|
95
|
+
# @return [Hash{Symbol=>Array<String>}] :stage_1 and :cascade collection names
|
|
96
|
+
def bulk_stages(collections, source: :auto, client: nil, reverse_graph: nil)
|
|
97
|
+
inputs = normalize_collections(collections)
|
|
98
|
+
graph = graph_or_build(reverse_graph, source: source, client: client)
|
|
99
|
+
input_set = inputs.to_h { |name| [name, true] }
|
|
100
|
+
internal_referrers = internal_referrers_within_inputs(graph, input_set)
|
|
101
|
+
cascade_candidates = unique_referencers_of_inputs(graph, inputs)
|
|
102
|
+
|
|
103
|
+
{
|
|
104
|
+
stage_1: inputs.reject { |name| internal_referrers.include?(name) },
|
|
105
|
+
cascade: topo_sort_subset(graph, cascade_candidates)
|
|
106
|
+
}
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
private
|
|
110
|
+
|
|
111
|
+
def graph_or_build(graph, source:, client:)
|
|
112
|
+
graph ? normalize_reverse_graph(graph) : reverse_graph(source: source, client: client)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def normalize_reverse_graph(graph)
|
|
116
|
+
normalized = Hash.new { |h, k| h[k] = [] }
|
|
117
|
+
Hash(graph).each do |target, edges|
|
|
118
|
+
target_name = target.to_s
|
|
119
|
+
next if target_name.empty?
|
|
120
|
+
|
|
121
|
+
Array(edges).each do |edge|
|
|
122
|
+
normalized[target_name] << normalize_edge(edge)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
normalized
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def normalize_edge(edge)
|
|
129
|
+
{
|
|
130
|
+
referrer: edge_value(edge, :referrer).to_s,
|
|
131
|
+
local_key: edge_value(edge, :local_key).to_s,
|
|
132
|
+
foreign_key: edge_value(edge, :foreign_key).to_s
|
|
133
|
+
}
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def edge_value(edge, key)
|
|
137
|
+
return edge.public_send(key) if edge.respond_to?(key)
|
|
138
|
+
|
|
139
|
+
edge[key] || edge[key.to_s]
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def normalize_collections(collections)
|
|
143
|
+
Array(collections).flatten.compact.filter_map do |collection|
|
|
144
|
+
name = normalize_collection(collection)
|
|
145
|
+
name unless name.empty?
|
|
146
|
+
end.uniq
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def normalize_collection(collection)
|
|
150
|
+
if collection.is_a?(Class)
|
|
151
|
+
collection.respond_to?(:collection) ? collection.collection.to_s : collection.name.to_s
|
|
152
|
+
else
|
|
153
|
+
collection.to_s
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def collection_from_event(event, collection_method)
|
|
158
|
+
if event.respond_to?(collection_method)
|
|
159
|
+
event.public_send(collection_method).to_s
|
|
160
|
+
elsif event.respond_to?(:[])
|
|
161
|
+
(event[collection_method.to_sym] || event[collection_method.to_s]).to_s
|
|
162
|
+
else
|
|
163
|
+
''
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def internal_referrers_within_inputs(reverse_graph, input_set)
|
|
168
|
+
require 'set'
|
|
169
|
+
refs = Set.new
|
|
170
|
+
reverse_graph.each do |target, edges|
|
|
171
|
+
next unless input_set[target]
|
|
172
|
+
|
|
173
|
+
Array(edges).each do |edge|
|
|
174
|
+
referrer = edge[:referrer].to_s
|
|
175
|
+
refs.add(referrer) if input_set[referrer]
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
refs
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def unique_referencers_of_inputs(reverse_graph, inputs)
|
|
182
|
+
require 'set'
|
|
183
|
+
seen = Set.new
|
|
184
|
+
inputs.each do |name|
|
|
185
|
+
Array(reverse_graph[name]).each do |edge|
|
|
186
|
+
referrer = edge[:referrer].to_s
|
|
187
|
+
seen.add(referrer) unless referrer.empty?
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
seen.to_a
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def topo_sort_subset(reverse_graph, subset)
|
|
194
|
+
require 'set'
|
|
195
|
+
nodes = Array(subset).uniq
|
|
196
|
+
node_set = nodes.to_h { |name| [name, true] }
|
|
197
|
+
adj = Hash.new { |h, k| h[k] = Set.new }
|
|
198
|
+
indeg = Hash.new(0)
|
|
199
|
+
|
|
200
|
+
nodes.each { |name| indeg[name] = 0 }
|
|
201
|
+
|
|
202
|
+
reverse_graph.each do |target, edges|
|
|
203
|
+
Array(edges).each do |edge|
|
|
204
|
+
referrer = edge[:referrer].to_s
|
|
205
|
+
next unless node_set[referrer] && node_set[target]
|
|
206
|
+
next if adj[target].include?(referrer)
|
|
207
|
+
|
|
208
|
+
adj[target] << referrer
|
|
209
|
+
indeg[referrer] += 1
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
queue = nodes.select { |name| indeg[name].to_i <= 0 }.sort
|
|
214
|
+
order = []
|
|
215
|
+
until queue.empty?
|
|
216
|
+
name = queue.shift
|
|
217
|
+
order << name
|
|
218
|
+
adj[name].each do |dependent|
|
|
219
|
+
indeg[dependent] -= 1
|
|
220
|
+
queue << dependent if indeg[dependent] <= 0
|
|
221
|
+
end
|
|
222
|
+
queue.sort!
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
remaining = nodes - order
|
|
226
|
+
instrument_cycle!(remaining) if remaining.any?
|
|
227
|
+
order + remaining.sort
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def instrument_cycle!(collections)
|
|
231
|
+
payload = { collections: collections.sort }
|
|
232
|
+
SearchEngine::Instrumentation.instrument('search_engine.dependency_planner.cycle', payload) {}
|
|
233
|
+
SearchEngine.config.logger&.warn(
|
|
234
|
+
"search_engine dependency planner cycle detected: #{payload[:collections].join(', ')}"
|
|
235
|
+
)
|
|
236
|
+
rescue StandardError
|
|
237
|
+
nil
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
end
|
|
@@ -66,42 +66,25 @@ module SearchEngine
|
|
|
66
66
|
client = SearchEngine.client
|
|
67
67
|
buffer = +''
|
|
68
68
|
next_index = sequence_generator
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
69
|
+
import_context = {
|
|
70
|
+
client: client,
|
|
71
|
+
collection: into,
|
|
72
|
+
action: action,
|
|
73
|
+
retry_policy: retry_policy,
|
|
74
|
+
buffer: buffer,
|
|
75
|
+
next_index: next_index
|
|
76
|
+
}
|
|
77
|
+
state = initialize_sequential_state
|
|
77
78
|
started_at = monotonic_ms
|
|
78
79
|
|
|
79
80
|
docs_enum.each do |raw_batch|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
collection: into,
|
|
81
|
+
process_single_batch_sequential(
|
|
82
|
+
import_context,
|
|
83
83
|
raw_batch: raw_batch,
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
stats_list.each do |stats|
|
|
91
|
-
docs_total += stats[:docs_count].to_i
|
|
92
|
-
success_total += stats[:success_count].to_i
|
|
93
|
-
failed_total += stats[:failure_count].to_i
|
|
94
|
-
failed_batches_total += 1 if stats[:failure_count].to_i.positive?
|
|
95
|
-
batches_total += 1
|
|
96
|
-
batches << stats
|
|
97
|
-
validate_soft_batch_size!(batch_size, stats[:docs_count])
|
|
98
|
-
log_batch(stats, batches_total) if log_batches
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
source_batches_done += 1
|
|
102
|
-
on_batch&.call(
|
|
103
|
-
batches_done: source_batches_done, docs_total: docs_total,
|
|
104
|
-
success_total: success_total, failed_total: failed_total
|
|
84
|
+
batch_size: batch_size,
|
|
85
|
+
log_batches: log_batches,
|
|
86
|
+
on_batch: on_batch,
|
|
87
|
+
state: state
|
|
105
88
|
)
|
|
106
89
|
end
|
|
107
90
|
|
|
@@ -110,17 +93,56 @@ module SearchEngine
|
|
|
110
93
|
|
|
111
94
|
Summary.new(
|
|
112
95
|
collection: klass.respond_to?(:collection) ? klass.collection : klass.name.to_s,
|
|
113
|
-
status: status_from_counts(success_total, failed_total),
|
|
114
|
-
batches_total: batches_total,
|
|
115
|
-
docs_total: docs_total,
|
|
116
|
-
success_total: success_total,
|
|
117
|
-
failed_total: failed_total,
|
|
118
|
-
failed_batches_total: failed_batches_total,
|
|
96
|
+
status: status_from_counts(state[:success_total], state[:failed_total]),
|
|
97
|
+
batches_total: state[:batches_total],
|
|
98
|
+
docs_total: state[:docs_total],
|
|
99
|
+
success_total: state[:success_total],
|
|
100
|
+
failed_total: state[:failed_total],
|
|
101
|
+
failed_batches_total: state[:failed_batches_total],
|
|
119
102
|
duration_ms_total: total_duration_ms,
|
|
120
|
-
|
|
103
|
+
source_duration_ms_total: state[:source_duration_ms_total].round(1),
|
|
104
|
+
map_duration_ms_total: state[:map_duration_ms_total].round(1),
|
|
105
|
+
jsonl_duration_ms_total: state[:jsonl_duration_ms_total].round(1),
|
|
106
|
+
import_duration_ms_total: state[:import_duration_ms_total].round(1),
|
|
107
|
+
batches: state[:batches]
|
|
121
108
|
)
|
|
122
109
|
end
|
|
123
110
|
|
|
111
|
+
# @return [Hash] sequential import counters using the same keys as parallel aggregation
|
|
112
|
+
def initialize_sequential_state
|
|
113
|
+
{
|
|
114
|
+
batches: [],
|
|
115
|
+
docs_total: 0,
|
|
116
|
+
success_total: 0,
|
|
117
|
+
failed_total: 0,
|
|
118
|
+
failed_batches_total: 0,
|
|
119
|
+
batches_total: 0,
|
|
120
|
+
source_duration_ms_total: 0.0,
|
|
121
|
+
map_duration_ms_total: 0.0,
|
|
122
|
+
jsonl_duration_ms_total: 0.0,
|
|
123
|
+
import_duration_ms_total: 0.0,
|
|
124
|
+
source_batches_done: 0
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def process_single_batch_sequential(import_context, raw_batch:, batch_size:, log_batches:, on_batch:, state:)
|
|
129
|
+
stage_metrics = stage_metrics_for(raw_batch)
|
|
130
|
+
stats_list = import_batch_with_handling(
|
|
131
|
+
client: import_context.fetch(:client),
|
|
132
|
+
collection: import_context.fetch(:collection),
|
|
133
|
+
raw_batch: raw_batch,
|
|
134
|
+
action: import_context.fetch(:action),
|
|
135
|
+
retry_policy: import_context.fetch(:retry_policy),
|
|
136
|
+
buffer: import_context.fetch(:buffer),
|
|
137
|
+
next_index: import_context.fetch(:next_index)
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
aggregate_stats(stats_list, state, batch_size, log_batches)
|
|
141
|
+
aggregate_stage_metrics(stage_metrics, stats_list, state)
|
|
142
|
+
state[:source_batches_done] += 1
|
|
143
|
+
on_batch&.call(**progress_snapshot(state))
|
|
144
|
+
end
|
|
145
|
+
|
|
124
146
|
# Process batches in parallel using a thread pool.
|
|
125
147
|
#
|
|
126
148
|
# Materializes all batches upfront and processes them concurrently using
|
|
@@ -202,6 +224,10 @@ module SearchEngine
|
|
|
202
224
|
failed_total: 0,
|
|
203
225
|
failed_batches_total: 0,
|
|
204
226
|
batches_total: 0,
|
|
227
|
+
source_duration_ms_total: 0.0,
|
|
228
|
+
map_duration_ms_total: 0.0,
|
|
229
|
+
jsonl_duration_ms_total: 0.0,
|
|
230
|
+
import_duration_ms_total: 0.0,
|
|
205
231
|
source_batches_done: 0,
|
|
206
232
|
idx_counter: -1,
|
|
207
233
|
started_at: monotonic_ms,
|
|
@@ -325,6 +351,7 @@ module SearchEngine
|
|
|
325
351
|
thread_client = SearchEngine.client
|
|
326
352
|
thread_buffer = +''
|
|
327
353
|
thread_idx = shared_state[:mtx].synchronize { shared_state[:idx_counter] += 1 }
|
|
354
|
+
stage_metrics = stage_metrics_for(raw_batch)
|
|
328
355
|
|
|
329
356
|
snapshot = begin
|
|
330
357
|
stats_list = import_batch_with_handling(
|
|
@@ -339,6 +366,7 @@ module SearchEngine
|
|
|
339
366
|
|
|
340
367
|
shared_state[:mtx].synchronize do
|
|
341
368
|
aggregate_stats(stats_list, shared_state, batch_size, log_batches)
|
|
369
|
+
aggregate_stage_metrics(stage_metrics, stats_list, shared_state)
|
|
342
370
|
shared_state[:source_batches_done] += 1
|
|
343
371
|
progress_snapshot(shared_state)
|
|
344
372
|
end
|
|
@@ -355,6 +383,7 @@ module SearchEngine
|
|
|
355
383
|
err_msg = " batch_index=#{thread_idx} → error=#{error.class}: #{error.message.to_s[0, 200]}"
|
|
356
384
|
warn(SearchEngine::Logging::Color.apply(err_msg, :red))
|
|
357
385
|
aggregate_stats([failure_stat], shared_state, batch_size, log_batches)
|
|
386
|
+
aggregate_stage_metrics(stage_metrics, [failure_stat], shared_state)
|
|
358
387
|
shared_state[:source_batches_done] += 1
|
|
359
388
|
progress_snapshot(shared_state)
|
|
360
389
|
end
|
|
@@ -420,6 +449,10 @@ module SearchEngine
|
|
|
420
449
|
failed_total: shared_state[:failed_total],
|
|
421
450
|
failed_batches_total: shared_state[:failed_batches_total],
|
|
422
451
|
duration_ms_total: total_duration_ms,
|
|
452
|
+
source_duration_ms_total: shared_state[:source_duration_ms_total].round(1),
|
|
453
|
+
map_duration_ms_total: shared_state[:map_duration_ms_total].round(1),
|
|
454
|
+
jsonl_duration_ms_total: shared_state[:jsonl_duration_ms_total].round(1),
|
|
455
|
+
import_duration_ms_total: shared_state[:import_duration_ms_total].round(1),
|
|
423
456
|
batches: shared_state[:batches]
|
|
424
457
|
)
|
|
425
458
|
end
|
|
@@ -586,8 +619,10 @@ module SearchEngine
|
|
|
586
619
|
docs = BatchPlanner.to_array(raw_batch)
|
|
587
620
|
return [] if docs.empty?
|
|
588
621
|
|
|
622
|
+
jsonl_started_at = monotonic_ms
|
|
589
623
|
docs_count, bytes_sent = BatchPlanner.encode_jsonl!(docs, buffer)
|
|
590
624
|
jsonl = buffer.dup
|
|
625
|
+
jsonl_duration_ms = (monotonic_ms - jsonl_started_at).round(1)
|
|
591
626
|
# Use provided batch_index if available (for recursive splits), otherwise compute from next_index
|
|
592
627
|
idx = batch_index || (next_index.is_a?(Proc) ? next_index.call : next_index)
|
|
593
628
|
|
|
@@ -605,6 +640,8 @@ module SearchEngine
|
|
|
605
640
|
dry_run: false
|
|
606
641
|
)
|
|
607
642
|
stats[:duration_ms] = (monotonic_ms - started_at).round(1)
|
|
643
|
+
stats[:jsonl_duration_ms] = jsonl_duration_ms
|
|
644
|
+
stats[:import_duration_ms] = stats[:duration_ms]
|
|
608
645
|
stats[:index] = idx
|
|
609
646
|
[stats]
|
|
610
647
|
rescue Errors::Api => error
|
|
@@ -646,11 +683,29 @@ module SearchEngine
|
|
|
646
683
|
attempts: 1,
|
|
647
684
|
http_status: error&.status.to_i,
|
|
648
685
|
duration_ms: 0.0,
|
|
686
|
+
jsonl_duration_ms: 0.0,
|
|
687
|
+
import_duration_ms: 0.0,
|
|
649
688
|
bytes_sent: bytes_sent,
|
|
650
689
|
errors_sample: [safe_error_excerpt(error)]
|
|
651
690
|
}
|
|
652
691
|
end
|
|
653
692
|
|
|
693
|
+
def stage_metrics_for(raw_batch)
|
|
694
|
+
metrics = raw_batch.instance_variable_get(:@__search_engine_stage_metrics__)
|
|
695
|
+
return metrics if metrics.is_a?(Hash)
|
|
696
|
+
|
|
697
|
+
{}
|
|
698
|
+
rescue StandardError
|
|
699
|
+
{}
|
|
700
|
+
end
|
|
701
|
+
|
|
702
|
+
def aggregate_stage_metrics(stage_metrics, stats_list, shared_state)
|
|
703
|
+
shared_state[:source_duration_ms_total] += stage_metrics[:source_duration_ms].to_f
|
|
704
|
+
shared_state[:map_duration_ms_total] += stage_metrics[:map_duration_ms].to_f
|
|
705
|
+
shared_state[:jsonl_duration_ms_total] += stats_list.sum { |stats| stats[:jsonl_duration_ms].to_f }
|
|
706
|
+
shared_state[:import_duration_ms_total] += stats_list.sum { |stats| stats[:import_duration_ms].to_f }
|
|
707
|
+
end
|
|
708
|
+
|
|
654
709
|
def safe_error_excerpt(error)
|
|
655
710
|
cls = error&.class&.name
|
|
656
711
|
msg = error&.message.to_s
|
|
@@ -26,6 +26,10 @@ module SearchEngine
|
|
|
26
26
|
:failed_total,
|
|
27
27
|
:failed_batches_total,
|
|
28
28
|
:duration_ms_total,
|
|
29
|
+
:source_duration_ms_total,
|
|
30
|
+
:map_duration_ms_total,
|
|
31
|
+
:jsonl_duration_ms_total,
|
|
32
|
+
:import_duration_ms_total,
|
|
29
33
|
:batches,
|
|
30
34
|
keyword_init: true
|
|
31
35
|
)
|
|
@@ -489,6 +493,7 @@ module SearchEngine
|
|
|
489
493
|
end
|
|
490
494
|
|
|
491
495
|
def instrument_partition_finish(klass, target_into, pfields, summary, started_at)
|
|
496
|
+
duration_ms = (monotonic_ms - started_at).round(1)
|
|
492
497
|
SearchEngine::Instrumentation.instrument(
|
|
493
498
|
'search_engine.indexer.partition_finish',
|
|
494
499
|
{
|
|
@@ -501,8 +506,12 @@ module SearchEngine
|
|
|
501
506
|
success_total: summary.success_total,
|
|
502
507
|
failed_total: summary.failed_total,
|
|
503
508
|
status: summary.status,
|
|
504
|
-
duration_ms:
|
|
505
|
-
|
|
509
|
+
duration_ms: duration_ms,
|
|
510
|
+
source_duration_ms_total: summary_metric(summary, :source_duration_ms_total),
|
|
511
|
+
map_duration_ms_total: summary_metric(summary, :map_duration_ms_total),
|
|
512
|
+
jsonl_duration_ms_total: summary_metric(summary, :jsonl_duration_ms_total),
|
|
513
|
+
import_duration_ms_total: summary_metric(summary, :import_duration_ms_total)
|
|
514
|
+
}.compact
|
|
506
515
|
) {}
|
|
507
516
|
end
|
|
508
517
|
|
|
@@ -510,12 +519,37 @@ module SearchEngine
|
|
|
510
519
|
Enumerator.new do |y|
|
|
511
520
|
idx = 0
|
|
512
521
|
rows_enum.each do |rows|
|
|
522
|
+
source_started_at = monotonic_ms
|
|
523
|
+
rows = BatchPlanner.to_array(rows)
|
|
524
|
+
source_duration_ms = (monotonic_ms - source_started_at).round(1)
|
|
525
|
+
|
|
526
|
+
map_started_at = monotonic_ms
|
|
513
527
|
docs, _report = mapper.map_batch!(rows, batch_index: idx)
|
|
528
|
+
map_duration_ms = (monotonic_ms - map_started_at).round(1)
|
|
529
|
+
attach_stage_metrics!(
|
|
530
|
+
docs,
|
|
531
|
+
source_duration_ms: source_duration_ms,
|
|
532
|
+
map_duration_ms: map_duration_ms,
|
|
533
|
+
source_rows_count: rows.size
|
|
534
|
+
)
|
|
514
535
|
y << docs
|
|
515
536
|
idx += 1
|
|
516
537
|
end
|
|
517
538
|
end
|
|
518
539
|
end
|
|
540
|
+
|
|
541
|
+
def attach_stage_metrics!(docs, metrics)
|
|
542
|
+
docs.instance_variable_set(:@__search_engine_stage_metrics__, metrics)
|
|
543
|
+
rescue StandardError
|
|
544
|
+
nil
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def summary_metric(summary, key)
|
|
548
|
+
return unless summary.respond_to?(key)
|
|
549
|
+
|
|
550
|
+
value = summary.public_send(key)
|
|
551
|
+
value&.to_f&.round(1)
|
|
552
|
+
end
|
|
519
553
|
end
|
|
520
554
|
end
|
|
521
555
|
end
|
|
@@ -172,6 +172,11 @@ module SearchEngine
|
|
|
172
172
|
entry[:success_total] = summary_value(summary, :success_total).to_i
|
|
173
173
|
entry[:failed_total] = summary_value(summary, :failed_total).to_i
|
|
174
174
|
entry[:sample_error] = summary_value(summary, :sample_error)
|
|
175
|
+
entry[:duration_ms_total] = summary_value(summary, :duration_ms_total)
|
|
176
|
+
entry[:source_duration_ms_total] = summary_value(summary, :source_duration_ms_total)
|
|
177
|
+
entry[:map_duration_ms_total] = summary_value(summary, :map_duration_ms_total)
|
|
178
|
+
entry[:jsonl_duration_ms_total] = summary_value(summary, :jsonl_duration_ms_total)
|
|
179
|
+
entry[:import_duration_ms_total] = summary_value(summary, :import_duration_ms_total)
|
|
175
180
|
end
|
|
176
181
|
|
|
177
182
|
def summary_value(summary, key)
|