search-engine-for-typesense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +148 -0
- data/app/search_engine/search_engine/app_info.rb +11 -0
- data/app/search_engine/search_engine/index_partition_job.rb +170 -0
- data/lib/generators/search_engine/install/install_generator.rb +20 -0
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
- data/lib/generators/search_engine/model/model_generator.rb +86 -0
- data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
- data/lib/search-engine-for-typesense.rb +12 -0
- data/lib/search_engine/active_record_syncable.rb +247 -0
- data/lib/search_engine/admin/stopwords.rb +125 -0
- data/lib/search_engine/admin/synonyms.rb +125 -0
- data/lib/search_engine/admin.rb +12 -0
- data/lib/search_engine/ast/and.rb +52 -0
- data/lib/search_engine/ast/binary_op.rb +75 -0
- data/lib/search_engine/ast/eq.rb +19 -0
- data/lib/search_engine/ast/group.rb +18 -0
- data/lib/search_engine/ast/gt.rb +12 -0
- data/lib/search_engine/ast/gte.rb +12 -0
- data/lib/search_engine/ast/in.rb +28 -0
- data/lib/search_engine/ast/lt.rb +12 -0
- data/lib/search_engine/ast/lte.rb +12 -0
- data/lib/search_engine/ast/matches.rb +55 -0
- data/lib/search_engine/ast/node.rb +176 -0
- data/lib/search_engine/ast/not_eq.rb +13 -0
- data/lib/search_engine/ast/not_in.rb +24 -0
- data/lib/search_engine/ast/or.rb +52 -0
- data/lib/search_engine/ast/prefix.rb +51 -0
- data/lib/search_engine/ast/raw.rb +41 -0
- data/lib/search_engine/ast/unary_op.rb +43 -0
- data/lib/search_engine/ast.rb +101 -0
- data/lib/search_engine/base/creation.rb +727 -0
- data/lib/search_engine/base/deletion.rb +80 -0
- data/lib/search_engine/base/display_coercions.rb +36 -0
- data/lib/search_engine/base/hydration.rb +312 -0
- data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
- data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
- data/lib/search_engine/base/index_maintenance.rb +459 -0
- data/lib/search_engine/base/indexing_dsl.rb +255 -0
- data/lib/search_engine/base/joins.rb +479 -0
- data/lib/search_engine/base/model_dsl.rb +472 -0
- data/lib/search_engine/base/presets.rb +43 -0
- data/lib/search_engine/base/pretty_printer.rb +315 -0
- data/lib/search_engine/base/relation_delegation.rb +42 -0
- data/lib/search_engine/base/scopes.rb +113 -0
- data/lib/search_engine/base/updating.rb +92 -0
- data/lib/search_engine/base.rb +38 -0
- data/lib/search_engine/bulk.rb +284 -0
- data/lib/search_engine/cache.rb +33 -0
- data/lib/search_engine/cascade.rb +531 -0
- data/lib/search_engine/cli/doctor.rb +631 -0
- data/lib/search_engine/cli/support.rb +217 -0
- data/lib/search_engine/cli.rb +222 -0
- data/lib/search_engine/client/http_adapter.rb +63 -0
- data/lib/search_engine/client/request_builder.rb +92 -0
- data/lib/search_engine/client/services/base.rb +74 -0
- data/lib/search_engine/client/services/collections.rb +161 -0
- data/lib/search_engine/client/services/documents.rb +214 -0
- data/lib/search_engine/client/services/operations.rb +152 -0
- data/lib/search_engine/client/services/search.rb +190 -0
- data/lib/search_engine/client/services.rb +29 -0
- data/lib/search_engine/client.rb +765 -0
- data/lib/search_engine/client_options.rb +20 -0
- data/lib/search_engine/collection_resolver.rb +191 -0
- data/lib/search_engine/collections_graph.rb +330 -0
- data/lib/search_engine/compiled_params.rb +143 -0
- data/lib/search_engine/compiler.rb +383 -0
- data/lib/search_engine/config/observability.rb +27 -0
- data/lib/search_engine/config/presets.rb +92 -0
- data/lib/search_engine/config/selection.rb +16 -0
- data/lib/search_engine/config/typesense.rb +48 -0
- data/lib/search_engine/config/validators.rb +97 -0
- data/lib/search_engine/config.rb +917 -0
- data/lib/search_engine/console_helpers.rb +130 -0
- data/lib/search_engine/deletion.rb +103 -0
- data/lib/search_engine/dispatcher.rb +125 -0
- data/lib/search_engine/dsl/parser.rb +582 -0
- data/lib/search_engine/engine.rb +167 -0
- data/lib/search_engine/errors.rb +290 -0
- data/lib/search_engine/filters/sanitizer.rb +189 -0
- data/lib/search_engine/hydration/materializers.rb +808 -0
- data/lib/search_engine/hydration/selection_context.rb +96 -0
- data/lib/search_engine/indexer/batch_planner.rb +76 -0
- data/lib/search_engine/indexer/bulk_import.rb +626 -0
- data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
- data/lib/search_engine/indexer/retry_policy.rb +103 -0
- data/lib/search_engine/indexer.rb +747 -0
- data/lib/search_engine/instrumentation.rb +308 -0
- data/lib/search_engine/joins/guard.rb +202 -0
- data/lib/search_engine/joins/resolver.rb +95 -0
- data/lib/search_engine/logging/color.rb +78 -0
- data/lib/search_engine/logging/format_helpers.rb +92 -0
- data/lib/search_engine/logging/partition_progress.rb +53 -0
- data/lib/search_engine/logging_subscriber.rb +388 -0
- data/lib/search_engine/mapper.rb +785 -0
- data/lib/search_engine/multi.rb +286 -0
- data/lib/search_engine/multi_result.rb +186 -0
- data/lib/search_engine/notifications/compact_logger.rb +675 -0
- data/lib/search_engine/observability.rb +162 -0
- data/lib/search_engine/operations.rb +58 -0
- data/lib/search_engine/otel.rb +227 -0
- data/lib/search_engine/partitioner.rb +128 -0
- data/lib/search_engine/ranking_plan.rb +118 -0
- data/lib/search_engine/registry.rb +158 -0
- data/lib/search_engine/relation/compiler.rb +711 -0
- data/lib/search_engine/relation/deletion.rb +37 -0
- data/lib/search_engine/relation/dsl/filters.rb +624 -0
- data/lib/search_engine/relation/dsl/selection.rb +240 -0
- data/lib/search_engine/relation/dsl.rb +903 -0
- data/lib/search_engine/relation/dx/dry_run.rb +59 -0
- data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
- data/lib/search_engine/relation/dx.rb +231 -0
- data/lib/search_engine/relation/materializers.rb +118 -0
- data/lib/search_engine/relation/options.rb +138 -0
- data/lib/search_engine/relation/state.rb +274 -0
- data/lib/search_engine/relation/updating.rb +44 -0
- data/lib/search_engine/relation.rb +623 -0
- data/lib/search_engine/result.rb +664 -0
- data/lib/search_engine/schema.rb +1083 -0
- data/lib/search_engine/sources/active_record_source.rb +185 -0
- data/lib/search_engine/sources/base.rb +62 -0
- data/lib/search_engine/sources/lambda_source.rb +55 -0
- data/lib/search_engine/sources/sql_source.rb +196 -0
- data/lib/search_engine/sources.rb +71 -0
- data/lib/search_engine/stale_rules.rb +160 -0
- data/lib/search_engine/test/minitest_assertions.rb +57 -0
- data/lib/search_engine/test/offline_client.rb +134 -0
- data/lib/search_engine/test/rspec_matchers.rb +77 -0
- data/lib/search_engine/test/stub_client.rb +201 -0
- data/lib/search_engine/test.rb +66 -0
- data/lib/search_engine/test_autoload.rb +8 -0
- data/lib/search_engine/update.rb +35 -0
- data/lib/search_engine/version.rb +7 -0
- data/lib/search_engine.rb +332 -0
- data/lib/tasks/search_engine.rake +501 -0
- data/lib/tasks/search_engine_doctor.rake +16 -0
- metadata +225 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Lightweight wrapper over ActiveSupport::Notifications that standardizes
|
|
5
|
+
# event names, payload shaping, and timing helpers. Provides a thread-local
|
|
6
|
+
# context to propagate dispatch metadata (e.g., mode, job_id) from the
|
|
7
|
+
# dispatcher/job into lower layers such as the indexer.
|
|
8
|
+
#
|
|
9
|
+
# Public API is intentionally small:
|
|
10
|
+
# - {.instrument(event, payload = {}) { |ctx| }}: emit an event (yields mutable ctx)
|
|
11
|
+
# - {.time(event, base_payload = {}) { |payload| }}: measure duration_ms
|
|
12
|
+
# - {.with_context(hash) { }}: set per-thread context for nested calls
|
|
13
|
+
# - {.context}: current shallow context Hash (dup)
|
|
14
|
+
#
|
|
15
|
+
# All payloads are duped, nil values are pruned, and keys are symbolized.
|
|
16
|
+
module Instrumentation
|
|
17
|
+
THREAD_KEY = :__search_engine_context__
|
|
18
|
+
THREAD_CORRELATION_KEY = :__search_engine_correlation_id__
|
|
19
|
+
|
|
20
|
+
CATALOG = {
|
|
21
|
+
'search_engine.search' => {
|
|
22
|
+
required: %i[collection],
|
|
23
|
+
optional: %i[
|
|
24
|
+
labels status duration_ms correlation_id params_preview http_status url_opts
|
|
25
|
+
error_class error_message selection_include_count selection_exclude_count
|
|
26
|
+
selection_nested_assoc_count preset_name preset_mode preset_pruned_keys
|
|
27
|
+
preset_pruned_keys_count preset_locked_domains_count curation_pinned_count
|
|
28
|
+
curation_hidden_count curation_has_override_tags curation_filter_flag
|
|
29
|
+
curation_conflict_type curation_conflict_count
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
'search_engine.multi_search' => {
|
|
33
|
+
required: %i[searches_count],
|
|
34
|
+
optional: %i[labels status duration_ms correlation_id url_opts http_status]
|
|
35
|
+
},
|
|
36
|
+
'search_engine.compile' => {
|
|
37
|
+
required: %i[collection klass node_count source],
|
|
38
|
+
optional: %i[duration_ms]
|
|
39
|
+
},
|
|
40
|
+
'search_engine.grouping.compile' => {
|
|
41
|
+
required: %i[field],
|
|
42
|
+
optional: %i[collection limit missing_values duration_ms]
|
|
43
|
+
},
|
|
44
|
+
'search_engine.joins.compile' => {
|
|
45
|
+
required: %i[collection],
|
|
46
|
+
optional: %i[join_count assocs used_in include_len filter_len sort_len duration_ms has_joins]
|
|
47
|
+
},
|
|
48
|
+
'search_engine.selection.compile' => {
|
|
49
|
+
required: %i[],
|
|
50
|
+
optional: %i[include_count exclude_count nested_assoc_count]
|
|
51
|
+
},
|
|
52
|
+
'search_engine.preset.apply' => {
|
|
53
|
+
required: %i[preset_name],
|
|
54
|
+
optional: %i[preset_mode preset_pruned_keys preset_pruned_keys_count preset_locked_domains_count]
|
|
55
|
+
},
|
|
56
|
+
'search_engine.preset.conflict' => {
|
|
57
|
+
required: %i[type count],
|
|
58
|
+
optional: %i[limit]
|
|
59
|
+
},
|
|
60
|
+
'search_engine.curation.compile' => {
|
|
61
|
+
required: %i[],
|
|
62
|
+
optional: %i[pinned_count hidden_count has_override_tags filter_curated_hits]
|
|
63
|
+
},
|
|
64
|
+
'search_engine.curation.conflict' => {
|
|
65
|
+
required: %i[type count],
|
|
66
|
+
optional: %i[limit]
|
|
67
|
+
},
|
|
68
|
+
'search_engine.schema.diff' => {
|
|
69
|
+
required: %i[collection],
|
|
70
|
+
optional: %i[fields_changed_count added_count removed_count in_sync duration_ms]
|
|
71
|
+
},
|
|
72
|
+
'search_engine.schema.apply' => {
|
|
73
|
+
required: %i[collection],
|
|
74
|
+
optional: %i[physical_new new_physical alias_swapped dropped_count retention_deleted_count status duration_ms]
|
|
75
|
+
},
|
|
76
|
+
'search_engine.indexer.partition_start' => {
|
|
77
|
+
required: %i[collection into partition],
|
|
78
|
+
optional: %i[dispatch_mode job_id timestamp]
|
|
79
|
+
},
|
|
80
|
+
'search_engine.indexer.partition_finish' => {
|
|
81
|
+
required: %i[collection into partition batches_total docs_total success_total failed_total status],
|
|
82
|
+
optional: %i[duration_ms]
|
|
83
|
+
},
|
|
84
|
+
'search_engine.indexer.batch_import' => {
|
|
85
|
+
required: %i[collection into batch_index docs_count],
|
|
86
|
+
optional: %i[
|
|
87
|
+
success_count failure_count attempts http_status bytes_sent duration_ms
|
|
88
|
+
transient_retry error_sample
|
|
89
|
+
]
|
|
90
|
+
},
|
|
91
|
+
'search_engine.indexer.delete_stale' => {
|
|
92
|
+
required: %i[collection into partition filter_hash status],
|
|
93
|
+
optional: %i[deleted_count duration_ms reason]
|
|
94
|
+
},
|
|
95
|
+
'search_engine.result.grouped_parsed' => { required: %i[collection groups_count], optional: %i[] },
|
|
96
|
+
'search_engine.joins.declared' => { required: %i[model name collection], optional: %i[] },
|
|
97
|
+
'search_engine.relation.group_by_updated' => {
|
|
98
|
+
required: %i[collection field],
|
|
99
|
+
optional: %i[limit missing_values]
|
|
100
|
+
},
|
|
101
|
+
'search_engine.facet.compile' => {
|
|
102
|
+
required: %i[],
|
|
103
|
+
optional: %i[collection fields_count queries_count max_facet_values sort_flags conflicts duration_ms]
|
|
104
|
+
},
|
|
105
|
+
'search_engine.highlight.compile' => {
|
|
106
|
+
required: %i[],
|
|
107
|
+
optional: %i[collection fields_count full_fields_count affix_tokens snippet_threshold tag_kind duration_ms]
|
|
108
|
+
},
|
|
109
|
+
'search_engine.synonyms.apply' => {
|
|
110
|
+
required: %i[],
|
|
111
|
+
optional: %i[collection use_synonyms use_stopwords source duration_ms]
|
|
112
|
+
},
|
|
113
|
+
'search_engine.geo.compile' => {
|
|
114
|
+
required: %i[],
|
|
115
|
+
optional: %i[collection filters_count shapes sort_mode radius_bucket duration_ms]
|
|
116
|
+
},
|
|
117
|
+
'search_engine.vector.compile' => {
|
|
118
|
+
required: %i[],
|
|
119
|
+
optional: %i[collection query_vector_present dims hybrid_weight ann_params_present duration_ms]
|
|
120
|
+
},
|
|
121
|
+
'search_engine.hits.limit' => {
|
|
122
|
+
required: %i[],
|
|
123
|
+
optional: %i[collection early_limit validate_max applied_strategy triggered total_hits duration_ms]
|
|
124
|
+
}
|
|
125
|
+
}.freeze
|
|
126
|
+
|
|
127
|
+
def self.catalog
|
|
128
|
+
CATALOG
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Emit an event with a shaped payload and yield a mutable context.
|
|
132
|
+
# Stamps correlation_id, status, error_class/error_message, and duration_ms.
|
|
133
|
+
# @param event [String]
|
|
134
|
+
# @param payload [Hash]
|
|
135
|
+
# @yieldparam ctx [Hash]
|
|
136
|
+
# @return [Object] block result when provided
|
|
137
|
+
def self.instrument(event, payload = {})
|
|
138
|
+
started = monotonic_ms
|
|
139
|
+
ctx = shape_payload(payload)
|
|
140
|
+
ctx[:correlation_id] ||= (Thread.current[THREAD_CORRELATION_KEY] ||= generate_correlation_id)
|
|
141
|
+
|
|
142
|
+
if defined?(ActiveSupport::Notifications)
|
|
143
|
+
result = nil
|
|
144
|
+
ActiveSupport::Notifications.instrument(event, ctx) do
|
|
145
|
+
result = yield(ctx) if block_given?
|
|
146
|
+
ctx[:status] = :ok unless ctx.key?(:status)
|
|
147
|
+
result
|
|
148
|
+
rescue StandardError => error
|
|
149
|
+
fill_error_context!(ctx, error)
|
|
150
|
+
raise
|
|
151
|
+
ensure
|
|
152
|
+
ctx[:duration_ms] = (monotonic_ms - started).round(1)
|
|
153
|
+
end
|
|
154
|
+
return result
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Fallback path when AS::N is unavailable
|
|
158
|
+
begin
|
|
159
|
+
result = yield(ctx) if block_given?
|
|
160
|
+
ctx[:status] = :ok unless ctx.key?(:status)
|
|
161
|
+
result
|
|
162
|
+
rescue StandardError => error
|
|
163
|
+
fill_error_context!(ctx, error)
|
|
164
|
+
raise
|
|
165
|
+
ensure
|
|
166
|
+
ctx[:duration_ms] = (monotonic_ms - started).round(1)
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Known events
|
|
171
|
+
#
|
|
172
|
+
# - "search_engine.joins.compile": emitted once per relation compile summarizing JOIN usage.
|
|
173
|
+
# Payload keys (nil/empty omitted):
|
|
174
|
+
# - :collection [String]
|
|
175
|
+
# - :join_count [Integer]
|
|
176
|
+
# - :assocs [Array<String>]
|
|
177
|
+
# - :used_in [Hash{Symbol=>Array<String>}] include/filter/sort association usage
|
|
178
|
+
# - :include_len [Integer]
|
|
179
|
+
# - :filter_len [Integer]
|
|
180
|
+
# - :sort_len [Integer]
|
|
181
|
+
# - :duration_ms [Float]
|
|
182
|
+
# - :has_joins [Boolean]
|
|
183
|
+
# - "search_engine.selection.compile": emitted once per relation compile summarizing field selection counts.
|
|
184
|
+
# Payload keys:
|
|
185
|
+
# - :include_count [Integer] total effective include fields (root + nested after precedence)
|
|
186
|
+
# - :exclude_count [Integer] total excluded fields (root + nested)
|
|
187
|
+
# - :nested_assoc_count [Integer] associations with any selection state (include or exclude)
|
|
188
|
+
# - "search_engine.preset.apply": emitted once per relation compile when a preset is present.
|
|
189
|
+
# Payload keys (keys-only; values redacted elsewhere):
|
|
190
|
+
# - :preset_name [String] effective namespaced preset
|
|
191
|
+
# - :mode [Symbol] one of :merge, :only, :lock
|
|
192
|
+
# - :locked_domains [Array<Symbol>] configured locked domains for :lock mode
|
|
193
|
+
# - :pruned_keys [Array<Symbol>] keys removed by the chosen mode
|
|
194
|
+
# - "search_engine.curation.compile": emitted once per relation compile when curation state is present.
|
|
195
|
+
# Payload keys:
|
|
196
|
+
# - :pinned_count [Integer]
|
|
197
|
+
# - :hidden_count [Integer]
|
|
198
|
+
# - :has_override_tags [Boolean]
|
|
199
|
+
# - :filter_curated_hits [true,false,nil]
|
|
200
|
+
# - "search_engine.curation.conflict": emitted when overlaps or limits are detected; at most once per compile.
|
|
201
|
+
# Payload keys:
|
|
202
|
+
# - :type [Symbol] one of :overlap, :limit_exceeded
|
|
203
|
+
# - :count [Integer]
|
|
204
|
+
# - :limit [Integer, optional] present when type==:limit_exceeded
|
|
205
|
+
# # See also: https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/presets#observability
|
|
206
|
+
# Measure a block and attach duration_ms to payload.
|
|
207
|
+
# @param event [String]
|
|
208
|
+
# @param base_payload [Hash]
|
|
209
|
+
# @yieldreturn [Object] result of the block
|
|
210
|
+
# @return [Object]
|
|
211
|
+
def self.time(event, base_payload = {})
|
|
212
|
+
started = monotonic_ms
|
|
213
|
+
result = nil
|
|
214
|
+
instrument(event, base_payload.merge(started_at_ms: started)) do
|
|
215
|
+
result = yield(base_payload)
|
|
216
|
+
end
|
|
217
|
+
result
|
|
218
|
+
|
|
219
|
+
# NOTE: ActiveSupport::Notifications already attaches duration; callers
|
|
220
|
+
# should prefer event.duration on the subscriber side. We still provide
|
|
221
|
+
# started_at_ms in the payload for completeness.
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Set a shallow thread-local context for nested calls.
|
|
225
|
+
# @param ctx [Hash]
|
|
226
|
+
# @yield block executed with context applied
|
|
227
|
+
# @return [Object] result of the block
|
|
228
|
+
def self.with_context(ctx)
|
|
229
|
+
prev = Thread.current[THREAD_KEY]
|
|
230
|
+
Thread.current[THREAD_KEY] = (prev || {}).merge(ctx || {})
|
|
231
|
+
yield
|
|
232
|
+
ensure
|
|
233
|
+
Thread.current[THREAD_KEY] = prev
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Current shallow context hash.
|
|
237
|
+
# @return [Hash]
|
|
238
|
+
def self.context
|
|
239
|
+
(Thread.current[THREAD_KEY] || {}).dup
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Apply/propagate correlation id for the current execution (thread/fiber-local)
|
|
243
|
+
# and restore the previous value afterwards.
|
|
244
|
+
# @param id [String, nil]
|
|
245
|
+
# @yieldreturn [Object]
|
|
246
|
+
def self.with_correlation_id(id = nil)
|
|
247
|
+
previous = Thread.current[THREAD_CORRELATION_KEY]
|
|
248
|
+
Thread.current[THREAD_CORRELATION_KEY] = id || previous || generate_correlation_id
|
|
249
|
+
yield
|
|
250
|
+
ensure
|
|
251
|
+
Thread.current[THREAD_CORRELATION_KEY] = if previous.nil?
|
|
252
|
+
nil
|
|
253
|
+
else
|
|
254
|
+
previous
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# @return [String, nil]
|
|
259
|
+
def self.current_correlation_id
|
|
260
|
+
Thread.current[THREAD_CORRELATION_KEY]
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Redact a payload-like structure (delegates to Observability)
|
|
264
|
+
def self.redact(value)
|
|
265
|
+
SearchEngine::Observability.redact(value)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Monotonic clock in milliseconds.
|
|
269
|
+
# @return [Float]
|
|
270
|
+
def self.monotonic_ms
|
|
271
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_millisecond)
|
|
272
|
+
rescue StandardError
|
|
273
|
+
(Time.now.to_f * 1000.0)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# Internal: normalize and prune a payload hash.
|
|
277
|
+
def self.shape_payload(payload)
|
|
278
|
+
shaped = {}
|
|
279
|
+
(payload || {}).each do |k, v|
|
|
280
|
+
next if v.nil?
|
|
281
|
+
|
|
282
|
+
shaped[k.to_sym] = v
|
|
283
|
+
end
|
|
284
|
+
# Attach current context values without overriding explicit keys
|
|
285
|
+
ctx = context
|
|
286
|
+
ctx.each do |k, v|
|
|
287
|
+
shaped[k] = v unless shaped.key?(k)
|
|
288
|
+
end
|
|
289
|
+
shaped
|
|
290
|
+
end
|
|
291
|
+
private_class_method :shape_payload
|
|
292
|
+
|
|
293
|
+
def self.generate_correlation_id
|
|
294
|
+
require 'securerandom'
|
|
295
|
+
SecureRandom.urlsafe_base64(8)
|
|
296
|
+
end
|
|
297
|
+
private_class_method :generate_correlation_id
|
|
298
|
+
|
|
299
|
+
def self.fill_error_context!(ctx, error)
|
|
300
|
+
ctx[:status] = :error unless ctx.key?(:status)
|
|
301
|
+
ctx[:error_class] ||= error.class.name
|
|
302
|
+
ctx[:error_message] ||= SearchEngine::Observability.truncate_message(
|
|
303
|
+
error.message,
|
|
304
|
+
SearchEngine.config.observability&.max_message_length || 200
|
|
305
|
+
)
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
end
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
module Joins
|
|
5
|
+
# Stateless guard for validating join usage across Relation and Parser.
|
|
6
|
+
#
|
|
7
|
+
# Public API is module functions that raise {SearchEngine::Errors::*}
|
|
8
|
+
# on invalid inputs; successful validations return nil.
|
|
9
|
+
module Guard
|
|
10
|
+
module_function
|
|
11
|
+
|
|
12
|
+
# Ensure the association exists on +klass+.
|
|
13
|
+
#
|
|
14
|
+
# @param klass [Class] model class
|
|
15
|
+
# @param assoc [Symbol, String]
|
|
16
|
+
# @raise [SearchEngine::Errors::InvalidJoin]
|
|
17
|
+
# @return [void]
|
|
18
|
+
def ensure_assoc_exists!(klass, assoc)
|
|
19
|
+
key = assoc.to_sym
|
|
20
|
+
cfg = safe_joins_config(klass)[key]
|
|
21
|
+
return if cfg
|
|
22
|
+
|
|
23
|
+
suggestions = suggest(key, safe_joins_config(klass).keys)
|
|
24
|
+
model_name = safe_class_name(klass)
|
|
25
|
+
msg = "association :#{key} is not declared on #{model_name} "
|
|
26
|
+
msg += "(declare with `join :#{key}, ...`)"
|
|
27
|
+
msg += suggestion_suffix(suggestions)
|
|
28
|
+
raise SearchEngine::Errors::InvalidJoin.new(
|
|
29
|
+
msg,
|
|
30
|
+
hint: (suggestions&.any? ? "Did you mean #{suggestions.map { |s| ":#{s}" }.join(', ')}?" : nil),
|
|
31
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
|
|
32
|
+
details: { assoc: key, known: safe_joins_config(klass).keys }
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Ensure the association config is complete (local_key and foreign_key present and non-blank).
|
|
37
|
+
#
|
|
38
|
+
# @param klass [Class]
|
|
39
|
+
# @param assoc [Symbol, String]
|
|
40
|
+
# @raise [SearchEngine::Errors::InvalidJoinConfig]
|
|
41
|
+
# @return [void]
|
|
42
|
+
def ensure_config_complete!(klass, assoc)
|
|
43
|
+
key = assoc.to_sym
|
|
44
|
+
cfg = safe_joins_config(klass)[key]
|
|
45
|
+
# If missing entirely, surface presence error via ensure_assoc_exists!
|
|
46
|
+
unless cfg
|
|
47
|
+
ensure_assoc_exists!(klass, key)
|
|
48
|
+
return
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
missing = []
|
|
52
|
+
missing << :local_key if blank?(cfg[:local_key])
|
|
53
|
+
missing << :foreign_key if blank?(cfg[:foreign_key])
|
|
54
|
+
return if missing.empty?
|
|
55
|
+
|
|
56
|
+
model_name = safe_class_name(klass)
|
|
57
|
+
msg = "join :#{key} on #{model_name} is missing "
|
|
58
|
+
msg += missing.map { |m| ":#{m}" }.join(' and ')
|
|
59
|
+
msg += ' (declare with `join '
|
|
60
|
+
msg += ":#{key}, collection: ..., local_key: ..., foreign_key: ...`)"
|
|
61
|
+
raise SearchEngine::Errors::InvalidJoinConfig.new(
|
|
62
|
+
msg,
|
|
63
|
+
hint: 'Declare local_key and foreign_key in join config.',
|
|
64
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
|
|
65
|
+
details: { assoc: key, missing: missing }
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Ensure the relation has applied the association via .joins(:assoc) before use.
|
|
70
|
+
#
|
|
71
|
+
# @param joins [Array<Symbol>, nil]
|
|
72
|
+
# @param assoc [Symbol, String]
|
|
73
|
+
# @param context [String] optional human-friendly action (e.g., "filtering/sorting")
|
|
74
|
+
# @raise [SearchEngine::Errors::JoinNotApplied]
|
|
75
|
+
# @return [void]
|
|
76
|
+
def ensure_join_applied!(joins, assoc, context: 'filtering/sorting')
|
|
77
|
+
key = assoc.to_sym
|
|
78
|
+
list = Array(joins)
|
|
79
|
+
return if list.include?(key)
|
|
80
|
+
|
|
81
|
+
raise SearchEngine::Errors::JoinNotApplied.new(
|
|
82
|
+
"Call .joins(:#{key}) before #{context} on #{key} fields",
|
|
83
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
|
|
84
|
+
details: { assoc: key, context: context }
|
|
85
|
+
)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Validate that a joined field exists on the target collection when the
|
|
89
|
+
# target model is registered and exposes attributes.
|
|
90
|
+
#
|
|
91
|
+
# Best-effort: when registry or attributes are unavailable, no exception
|
|
92
|
+
# is raised (prefer low-noise behavior on missing metadata).
|
|
93
|
+
#
|
|
94
|
+
# @param assoc_cfg [Hash] normalized association config
|
|
95
|
+
# @param field [Symbol, String]
|
|
96
|
+
# @param source_klass [Class, nil] optional source model class for error messages
|
|
97
|
+
# @raise [SearchEngine::Errors::UnknownJoinField]
|
|
98
|
+
# @return [void]
|
|
99
|
+
def validate_joined_field!(assoc_cfg, field, source_klass: nil)
|
|
100
|
+
return if assoc_cfg.nil?
|
|
101
|
+
|
|
102
|
+
collection = assoc_cfg[:collection]
|
|
103
|
+
return if blank?(collection)
|
|
104
|
+
|
|
105
|
+
target_klass = begin
|
|
106
|
+
SearchEngine.collection_for(collection)
|
|
107
|
+
rescue StandardError
|
|
108
|
+
nil
|
|
109
|
+
end
|
|
110
|
+
return unless target_klass.respond_to?(:attributes)
|
|
111
|
+
|
|
112
|
+
known = Array(target_klass.attributes).map { |k, _| k.to_s }
|
|
113
|
+
known |= ['id'] # allow id implicitly on joined collections
|
|
114
|
+
return if known.empty?
|
|
115
|
+
|
|
116
|
+
fname = field.to_s
|
|
117
|
+
return if known.include?(fname)
|
|
118
|
+
|
|
119
|
+
suggestions = suggest(fname, known)
|
|
120
|
+
# Prefer the source model name for message clarity when provided
|
|
121
|
+
model_name = safe_class_name(source_klass || target_klass)
|
|
122
|
+
assoc_name = assoc_cfg[:name] || begin
|
|
123
|
+
# best effort: derive from collection
|
|
124
|
+
collection.to_s
|
|
125
|
+
end
|
|
126
|
+
msg = "UnknownJoinField: :#{fname} is not declared on association :#{assoc_name} for #{model_name}"
|
|
127
|
+
msg += suggestion_suffix(suggestions)
|
|
128
|
+
raise SearchEngine::Errors::UnknownJoinField.new(
|
|
129
|
+
msg,
|
|
130
|
+
details: { assoc: assoc_name, field: fname }
|
|
131
|
+
)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Reject multi-hop paths like $authors.publisher.name
|
|
135
|
+
#
|
|
136
|
+
# @param path [String]
|
|
137
|
+
# @raise [SearchEngine::Errors::UnsupportedJoinNesting]
|
|
138
|
+
# @return [void]
|
|
139
|
+
def ensure_single_hop_path!(path)
|
|
140
|
+
return if path.to_s.count('.') <= 1
|
|
141
|
+
|
|
142
|
+
raise SearchEngine::Errors::UnsupportedJoinNesting.new(
|
|
143
|
+
'Only one join hop is supported: `$assoc.field`. Use a separate pipeline step to denormalize deeper paths.',
|
|
144
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
|
|
145
|
+
details: { path: path }
|
|
146
|
+
)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# --- internals -------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
def suggest(input, known)
|
|
152
|
+
return [] if known.nil? || known.empty?
|
|
153
|
+
|
|
154
|
+
begin
|
|
155
|
+
require 'did_you_mean'
|
|
156
|
+
require 'did_you_mean/levenshtein'
|
|
157
|
+
rescue StandardError
|
|
158
|
+
return []
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
candidates = Array(known).map(&:to_s)
|
|
162
|
+
str = input.to_s
|
|
163
|
+
distances = candidates.each_with_object({}) do |cand, acc|
|
|
164
|
+
acc[cand] = DidYouMean::Levenshtein.distance(str, cand)
|
|
165
|
+
end
|
|
166
|
+
min = distances.values.min
|
|
167
|
+
return [] if min.nil? || min > 2
|
|
168
|
+
|
|
169
|
+
best = distances.select { |_, d| d == min }.keys.sort
|
|
170
|
+
best.first(3).map(&:to_sym)
|
|
171
|
+
end
|
|
172
|
+
private_class_method :suggest
|
|
173
|
+
|
|
174
|
+
def suggestion_suffix(suggestions)
|
|
175
|
+
return '' if suggestions.nil? || suggestions.empty?
|
|
176
|
+
|
|
177
|
+
tail = suggestions.map { |s| ":#{s}" }.join(', ')
|
|
178
|
+
" (did you mean #{tail}?)"
|
|
179
|
+
end
|
|
180
|
+
private_class_method :suggestion_suffix
|
|
181
|
+
|
|
182
|
+
def safe_joins_config(klass)
|
|
183
|
+
if klass.respond_to?(:joins_config)
|
|
184
|
+
klass.joins_config || {}
|
|
185
|
+
else
|
|
186
|
+
{}
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
private_class_method :safe_joins_config
|
|
190
|
+
|
|
191
|
+
def safe_class_name(klass)
|
|
192
|
+
klass.respond_to?(:name) && klass.name ? klass.name : klass.to_s
|
|
193
|
+
end
|
|
194
|
+
private_class_method :safe_class_name
|
|
195
|
+
|
|
196
|
+
def blank?(value)
|
|
197
|
+
value.nil? || value.to_s.strip.empty?
|
|
198
|
+
end
|
|
199
|
+
private_class_method :blank?
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
module Joins
|
|
5
|
+
# Key resolver for join associations.
|
|
6
|
+
#
|
|
7
|
+
# Determines { local_key, foreign_key } to use for an association, honoring
|
|
8
|
+
# explicitly provided keys and auto-inferring when absent. Auto-inference
|
|
9
|
+
# prefers a single shared attribute name between the base and target models
|
|
10
|
+
# that ends with `_id`. When ambiguity is detected, an error is raised with
|
|
11
|
+
# short suggestions.
|
|
12
|
+
module Resolver
|
|
13
|
+
module_function
|
|
14
|
+
|
|
15
|
+
# Resolve join keys for an association.
|
|
16
|
+
#
|
|
17
|
+
# @param base_klass [Class] the model on which the join is declared
|
|
18
|
+
# @param assoc_cfg [Hash] normalized join config (from joins_config)
|
|
19
|
+
# @return [Hash] { local_key: Symbol, foreign_key: Symbol }
|
|
20
|
+
# @raise [SearchEngine::Errors::InvalidJoinConfig]
|
|
21
|
+
def resolve_keys(base_klass, assoc_cfg)
|
|
22
|
+
local_key = assoc_cfg[:local_key]
|
|
23
|
+
foreign_key = assoc_cfg[:foreign_key]
|
|
24
|
+
|
|
25
|
+
if present?(local_key) && present?(foreign_key)
|
|
26
|
+
validate_key_known!(base_klass, local_key, side: :local)
|
|
27
|
+
validate_foreign_key_known!(assoc_cfg, foreign_key)
|
|
28
|
+
return { local_key: local_key.to_sym, foreign_key: foreign_key.to_sym }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Infer when any of the keys is missing
|
|
32
|
+
base_attrs = safe_attributes(base_klass).keys.map(&:to_s)
|
|
33
|
+
target_klass = safe_target_klass(assoc_cfg[:collection])
|
|
34
|
+
target_attrs = safe_attributes(target_klass).keys.map(&:to_s)
|
|
35
|
+
|
|
36
|
+
shared = (base_attrs & target_attrs)
|
|
37
|
+
candidates = shared.select { |n| n.end_with?('_id') }
|
|
38
|
+
candidates = shared if candidates.empty?
|
|
39
|
+
|
|
40
|
+
raise_ambiguous_keys!(base_klass, assoc_cfg, candidates) if candidates.length != 1
|
|
41
|
+
|
|
42
|
+
key = candidates.first.to_sym
|
|
43
|
+
{ local_key: key, foreign_key: key }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# --- internals --------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
def validate_key_known!(klass, key, side:)
|
|
49
|
+
attrs = safe_attributes(klass)
|
|
50
|
+
return if key.to_sym == :id || attrs.key?(key.to_sym)
|
|
51
|
+
|
|
52
|
+
model_name = klass.respond_to?(:name) && klass.name ? klass.name : klass.to_s
|
|
53
|
+
raise SearchEngine::Errors::InvalidJoin.new(
|
|
54
|
+
"Unknown #{side} key :#{key} for #{model_name}. Declare it via `attribute :#{key}, ...`.",
|
|
55
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#troubleshooting',
|
|
56
|
+
details: { side: side, key: key, model: model_name }
|
|
57
|
+
)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def validate_foreign_key_known!(assoc_cfg, key)
|
|
61
|
+
tklass = safe_target_klass(assoc_cfg[:collection])
|
|
62
|
+
validate_key_known!(tklass, key, side: :foreign)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def safe_target_klass(collection_name)
|
|
66
|
+
SearchEngine.collection_for(collection_name)
|
|
67
|
+
rescue StandardError
|
|
68
|
+
nil
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def safe_attributes(klass)
|
|
72
|
+
return {} unless klass.respond_to?(:attributes)
|
|
73
|
+
|
|
74
|
+
klass.attributes || {}
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def raise_ambiguous_keys!(base_klass, assoc_cfg, candidates)
|
|
78
|
+
base_name = base_klass.respond_to?(:name) && base_klass.name ? base_klass.name : base_klass.to_s
|
|
79
|
+
assoc_name = assoc_cfg[:name] || assoc_cfg[:collection] || :unknown
|
|
80
|
+
sugg = candidates.map { |n| ":#{n}" }.join(', ')
|
|
81
|
+
msg = "Ambiguous join keys for :#{assoc_name} on #{base_name}. " \
|
|
82
|
+
"Could not infer a unique shared key. Candidates: #{sugg}"
|
|
83
|
+
raise SearchEngine::Errors::InvalidJoinConfig.new(
|
|
84
|
+
msg,
|
|
85
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#client-side-fallback',
|
|
86
|
+
details: { assoc: assoc_name, candidates: candidates }
|
|
87
|
+
)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def present?(v)
|
|
91
|
+
!(v.nil? || v.to_s.strip.empty?)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
module Logging
|
|
5
|
+
# ANSI color helpers for CLI output.
|
|
6
|
+
#
|
|
7
|
+
# Applies colors only when $stdout is a TTY and NO_COLOR is not set.
|
|
8
|
+
# Intended for short substring coloring inside existing log lines.
|
|
9
|
+
#
|
|
10
|
+
# @since M8
|
|
11
|
+
module Color
|
|
12
|
+
module_function
|
|
13
|
+
|
|
14
|
+
# @param str [String]
|
|
15
|
+
# @param color [Symbol] one of :green, :yellow, :red
|
|
16
|
+
# @return [String]
|
|
17
|
+
def apply(str, color)
|
|
18
|
+
return str unless enabled?
|
|
19
|
+
|
|
20
|
+
code = case color.to_sym
|
|
21
|
+
when :green then 32
|
|
22
|
+
when :yellow then 33
|
|
23
|
+
when :red then 31
|
|
24
|
+
else 0
|
|
25
|
+
end
|
|
26
|
+
return str if code.zero?
|
|
27
|
+
|
|
28
|
+
"\e[#{code}m#{str}\e[0m"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Apply bold styling to a string.
|
|
32
|
+
# @param str [String]
|
|
33
|
+
# @return [String]
|
|
34
|
+
def bold(str)
|
|
35
|
+
return str unless enabled?
|
|
36
|
+
|
|
37
|
+
"\e[1m#{str}\e[0m"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Map indexation status to a color.
|
|
41
|
+
# @param status [#to_s]
|
|
42
|
+
# @return [Symbol] color name
|
|
43
|
+
def for_status(status)
|
|
44
|
+
case status.to_s
|
|
45
|
+
when 'ok' then :green
|
|
46
|
+
when 'failed' then :red
|
|
47
|
+
when 'partial' then :yellow
|
|
48
|
+
else :yellow
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Determine color for partition/status based on success/failure counts.
|
|
53
|
+
# @param failed_total [Integer] number of failed documents
|
|
54
|
+
# @param success_total [Integer] number of successful documents
|
|
55
|
+
# @return [Symbol] color name (:green, :yellow, or :red)
|
|
56
|
+
def for_partition_status(failed_total, success_total)
|
|
57
|
+
if failed_total.to_i.zero?
|
|
58
|
+
:green
|
|
59
|
+
elsif success_total.to_i.positive?
|
|
60
|
+
:yellow # partial success
|
|
61
|
+
else
|
|
62
|
+
:red # all failed
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# @return [Boolean] whether coloring is active
|
|
67
|
+
def enabled?
|
|
68
|
+
return false if ENV['NO_COLOR']
|
|
69
|
+
|
|
70
|
+
begin
|
|
71
|
+
$stdout.isatty
|
|
72
|
+
rescue StandardError
|
|
73
|
+
false
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|