search-engine-for-typesense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +148 -0
- data/app/search_engine/search_engine/app_info.rb +11 -0
- data/app/search_engine/search_engine/index_partition_job.rb +170 -0
- data/lib/generators/search_engine/install/install_generator.rb +20 -0
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
- data/lib/generators/search_engine/model/model_generator.rb +86 -0
- data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
- data/lib/search-engine-for-typesense.rb +12 -0
- data/lib/search_engine/active_record_syncable.rb +247 -0
- data/lib/search_engine/admin/stopwords.rb +125 -0
- data/lib/search_engine/admin/synonyms.rb +125 -0
- data/lib/search_engine/admin.rb +12 -0
- data/lib/search_engine/ast/and.rb +52 -0
- data/lib/search_engine/ast/binary_op.rb +75 -0
- data/lib/search_engine/ast/eq.rb +19 -0
- data/lib/search_engine/ast/group.rb +18 -0
- data/lib/search_engine/ast/gt.rb +12 -0
- data/lib/search_engine/ast/gte.rb +12 -0
- data/lib/search_engine/ast/in.rb +28 -0
- data/lib/search_engine/ast/lt.rb +12 -0
- data/lib/search_engine/ast/lte.rb +12 -0
- data/lib/search_engine/ast/matches.rb +55 -0
- data/lib/search_engine/ast/node.rb +176 -0
- data/lib/search_engine/ast/not_eq.rb +13 -0
- data/lib/search_engine/ast/not_in.rb +24 -0
- data/lib/search_engine/ast/or.rb +52 -0
- data/lib/search_engine/ast/prefix.rb +51 -0
- data/lib/search_engine/ast/raw.rb +41 -0
- data/lib/search_engine/ast/unary_op.rb +43 -0
- data/lib/search_engine/ast.rb +101 -0
- data/lib/search_engine/base/creation.rb +727 -0
- data/lib/search_engine/base/deletion.rb +80 -0
- data/lib/search_engine/base/display_coercions.rb +36 -0
- data/lib/search_engine/base/hydration.rb +312 -0
- data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
- data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
- data/lib/search_engine/base/index_maintenance.rb +459 -0
- data/lib/search_engine/base/indexing_dsl.rb +255 -0
- data/lib/search_engine/base/joins.rb +479 -0
- data/lib/search_engine/base/model_dsl.rb +472 -0
- data/lib/search_engine/base/presets.rb +43 -0
- data/lib/search_engine/base/pretty_printer.rb +315 -0
- data/lib/search_engine/base/relation_delegation.rb +42 -0
- data/lib/search_engine/base/scopes.rb +113 -0
- data/lib/search_engine/base/updating.rb +92 -0
- data/lib/search_engine/base.rb +38 -0
- data/lib/search_engine/bulk.rb +284 -0
- data/lib/search_engine/cache.rb +33 -0
- data/lib/search_engine/cascade.rb +531 -0
- data/lib/search_engine/cli/doctor.rb +631 -0
- data/lib/search_engine/cli/support.rb +217 -0
- data/lib/search_engine/cli.rb +222 -0
- data/lib/search_engine/client/http_adapter.rb +63 -0
- data/lib/search_engine/client/request_builder.rb +92 -0
- data/lib/search_engine/client/services/base.rb +74 -0
- data/lib/search_engine/client/services/collections.rb +161 -0
- data/lib/search_engine/client/services/documents.rb +214 -0
- data/lib/search_engine/client/services/operations.rb +152 -0
- data/lib/search_engine/client/services/search.rb +190 -0
- data/lib/search_engine/client/services.rb +29 -0
- data/lib/search_engine/client.rb +765 -0
- data/lib/search_engine/client_options.rb +20 -0
- data/lib/search_engine/collection_resolver.rb +191 -0
- data/lib/search_engine/collections_graph.rb +330 -0
- data/lib/search_engine/compiled_params.rb +143 -0
- data/lib/search_engine/compiler.rb +383 -0
- data/lib/search_engine/config/observability.rb +27 -0
- data/lib/search_engine/config/presets.rb +92 -0
- data/lib/search_engine/config/selection.rb +16 -0
- data/lib/search_engine/config/typesense.rb +48 -0
- data/lib/search_engine/config/validators.rb +97 -0
- data/lib/search_engine/config.rb +917 -0
- data/lib/search_engine/console_helpers.rb +130 -0
- data/lib/search_engine/deletion.rb +103 -0
- data/lib/search_engine/dispatcher.rb +125 -0
- data/lib/search_engine/dsl/parser.rb +582 -0
- data/lib/search_engine/engine.rb +167 -0
- data/lib/search_engine/errors.rb +290 -0
- data/lib/search_engine/filters/sanitizer.rb +189 -0
- data/lib/search_engine/hydration/materializers.rb +808 -0
- data/lib/search_engine/hydration/selection_context.rb +96 -0
- data/lib/search_engine/indexer/batch_planner.rb +76 -0
- data/lib/search_engine/indexer/bulk_import.rb +626 -0
- data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
- data/lib/search_engine/indexer/retry_policy.rb +103 -0
- data/lib/search_engine/indexer.rb +747 -0
- data/lib/search_engine/instrumentation.rb +308 -0
- data/lib/search_engine/joins/guard.rb +202 -0
- data/lib/search_engine/joins/resolver.rb +95 -0
- data/lib/search_engine/logging/color.rb +78 -0
- data/lib/search_engine/logging/format_helpers.rb +92 -0
- data/lib/search_engine/logging/partition_progress.rb +53 -0
- data/lib/search_engine/logging_subscriber.rb +388 -0
- data/lib/search_engine/mapper.rb +785 -0
- data/lib/search_engine/multi.rb +286 -0
- data/lib/search_engine/multi_result.rb +186 -0
- data/lib/search_engine/notifications/compact_logger.rb +675 -0
- data/lib/search_engine/observability.rb +162 -0
- data/lib/search_engine/operations.rb +58 -0
- data/lib/search_engine/otel.rb +227 -0
- data/lib/search_engine/partitioner.rb +128 -0
- data/lib/search_engine/ranking_plan.rb +118 -0
- data/lib/search_engine/registry.rb +158 -0
- data/lib/search_engine/relation/compiler.rb +711 -0
- data/lib/search_engine/relation/deletion.rb +37 -0
- data/lib/search_engine/relation/dsl/filters.rb +624 -0
- data/lib/search_engine/relation/dsl/selection.rb +240 -0
- data/lib/search_engine/relation/dsl.rb +903 -0
- data/lib/search_engine/relation/dx/dry_run.rb +59 -0
- data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
- data/lib/search_engine/relation/dx.rb +231 -0
- data/lib/search_engine/relation/materializers.rb +118 -0
- data/lib/search_engine/relation/options.rb +138 -0
- data/lib/search_engine/relation/state.rb +274 -0
- data/lib/search_engine/relation/updating.rb +44 -0
- data/lib/search_engine/relation.rb +623 -0
- data/lib/search_engine/result.rb +664 -0
- data/lib/search_engine/schema.rb +1083 -0
- data/lib/search_engine/sources/active_record_source.rb +185 -0
- data/lib/search_engine/sources/base.rb +62 -0
- data/lib/search_engine/sources/lambda_source.rb +55 -0
- data/lib/search_engine/sources/sql_source.rb +196 -0
- data/lib/search_engine/sources.rb +71 -0
- data/lib/search_engine/stale_rules.rb +160 -0
- data/lib/search_engine/test/minitest_assertions.rb +57 -0
- data/lib/search_engine/test/offline_client.rb +134 -0
- data/lib/search_engine/test/rspec_matchers.rb +77 -0
- data/lib/search_engine/test/stub_client.rb +201 -0
- data/lib/search_engine/test.rb +66 -0
- data/lib/search_engine/test_autoload.rb +8 -0
- data/lib/search_engine/update.rb +35 -0
- data/lib/search_engine/version.rb +7 -0
- data/lib/search_engine.rb +332 -0
- data/lib/tasks/search_engine.rake +501 -0
- data/lib/tasks/search_engine_doctor.rake +16 -0
- metadata +225 -0
|
@@ -0,0 +1,1083 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Schema utilities to compile model DSL into a Typesense-compatible schema
|
|
5
|
+
# hash and to diff it against a live collection.
|
|
6
|
+
#
|
|
7
|
+
# Public API:
|
|
8
|
+
# - {.compile(klass)} => Hash
|
|
9
|
+
# - {.diff(klass, client: SearchEngine.client)} => { diff: Hash, pretty: String }
|
|
10
|
+
module Schema
|
|
11
|
+
# Deterministic mapping from DSL types to Typesense field types.
|
|
12
|
+
#
|
|
13
|
+
# Policy:
|
|
14
|
+
# - :integer -> "int64" (consistent; prefer wider range)
|
|
15
|
+
# - :float/:decimal -> "float"
|
|
16
|
+
# - :string -> "string"
|
|
17
|
+
# - :boolean -> "bool"
|
|
18
|
+
# - :time/:datetime -> "int64" (epoch seconds)
|
|
19
|
+
# - :time_string/:datetime_string -> "string" (ISO8601 timestamps)
|
|
20
|
+
# - Array types (e.g. [:string]) -> "string[]" (when present)
|
|
21
|
+
TYPE_MAPPING = {
|
|
22
|
+
string: 'string',
|
|
23
|
+
integer: 'int64',
|
|
24
|
+
float: 'float',
|
|
25
|
+
decimal: 'float',
|
|
26
|
+
boolean: 'bool',
|
|
27
|
+
time: 'int64',
|
|
28
|
+
datetime: 'int64',
|
|
29
|
+
time_string: 'string',
|
|
30
|
+
datetime_string: 'string'
|
|
31
|
+
}.freeze
|
|
32
|
+
|
|
33
|
+
FIELD_COMPARE_KEYS = %i[type reference async_reference locale sort optional infix facet].freeze
|
|
34
|
+
PHYSICAL_SUFFIX_RE = /_\d{8}_\d{6}_\d{3}\z/
|
|
35
|
+
|
|
36
|
+
class << self
|
|
37
|
+
# Build a Typesense-compatible schema hash from a model class DSL.
|
|
38
|
+
#
|
|
39
|
+
# The output includes only keys that are supported and declared via the DSL.
|
|
40
|
+
# Fields explicitly marked with `index: false` are intentionally omitted
|
|
41
|
+
# from the compiled schema (they can still be sent in documents and will
|
|
42
|
+
# be hydrated/displayed, but are not indexed in memory).
|
|
43
|
+
#
|
|
44
|
+
# @param klass [Class] model class inheriting from {SearchEngine::Base}
|
|
45
|
+
# @return [Hash] frozen schema hash with symbol keys
|
|
46
|
+
# @raise [ArgumentError] if the class has no collection name defined
|
|
47
|
+
# @note Automatically sets `enable_nested_fields: true` at collection level when
|
|
48
|
+
# any attribute is declared with type `:object` or `[:object]`.
|
|
49
|
+
def compile(klass, client: nil)
|
|
50
|
+
collection_name = collection_name_for!(klass)
|
|
51
|
+
|
|
52
|
+
fields_array, needs_nested_fields = compile_fields_for(klass, client: client)
|
|
53
|
+
# Do NOT include implicit `id` in compiled schema: Typesense treats `id` as
|
|
54
|
+
# a special string identifier and it is not declared in collection schema.
|
|
55
|
+
# Keeping it out avoids confusing diffs and mismatches with live schema.
|
|
56
|
+
coerce_doc_updated_at_type!(fields_array)
|
|
57
|
+
|
|
58
|
+
schema = build_schema_hash(collection_name, fields_array, needs_nested_fields)
|
|
59
|
+
deep_freeze(schema)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# No longer used: id is not included in compiled schema.
|
|
63
|
+
def infer_id_field_type(_klass)
|
|
64
|
+
'string'
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Diff the compiled schema for +klass+ against the live physical collection
|
|
68
|
+
# in Typesense, resolving aliases when present. Returns both a structured
|
|
69
|
+
# diff Hash and a compact human-readable summary string.
|
|
70
|
+
#
|
|
71
|
+
# @param klass [Class] model class inheriting from {SearchEngine::Base}
|
|
72
|
+
# @param client [SearchEngine::Client] optional client wrapper (for tests)
|
|
73
|
+
# @return [Hash] { diff: Hash, pretty: String }
|
|
74
|
+
# @see `https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/schema-indexer-e2e`
|
|
75
|
+
# @see `https://typesense.org/docs/latest/api/collections.html`
|
|
76
|
+
def diff(klass, client: nil)
|
|
77
|
+
client ||= SearchEngine.client
|
|
78
|
+
compiled = compile(klass, client: client)
|
|
79
|
+
logical_name = compiled[:name]
|
|
80
|
+
|
|
81
|
+
physical_name = client.resolve_alias(logical_name) || logical_name
|
|
82
|
+
live_schema = client.retrieve_collection_schema(physical_name)
|
|
83
|
+
stale_refs = live_schema ? detect_stale_references(live_schema, client: client) : []
|
|
84
|
+
|
|
85
|
+
if live_schema.nil?
|
|
86
|
+
diff_hash = {
|
|
87
|
+
collection: { name: logical_name, physical: physical_name },
|
|
88
|
+
added_fields: compiled[:fields].dup.first(2),
|
|
89
|
+
removed_fields: [],
|
|
90
|
+
changed_fields: {},
|
|
91
|
+
collection_options: { live: :missing },
|
|
92
|
+
stale_references: stale_refs
|
|
93
|
+
}
|
|
94
|
+
payload = {
|
|
95
|
+
collection: klass.name.to_s,
|
|
96
|
+
logical: logical_name,
|
|
97
|
+
physical_current: nil,
|
|
98
|
+
fields_changed_count: 0,
|
|
99
|
+
added_count: diff_hash[:added_fields].size,
|
|
100
|
+
removed_count: 0,
|
|
101
|
+
stale_references_count: stale_refs.size,
|
|
102
|
+
in_sync: false
|
|
103
|
+
}
|
|
104
|
+
SearchEngine::Instrumentation.instrument('search_engine.schema.diff', payload) {}
|
|
105
|
+
return { diff: diff_hash, pretty: pretty_print(diff_hash) }
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
normalized_compiled = normalize_schema(compiled)
|
|
109
|
+
normalized_live = normalize_schema(live_schema)
|
|
110
|
+
|
|
111
|
+
added, removed, changed = diff_fields(normalized_compiled[:fields], normalized_live[:fields])
|
|
112
|
+
collection_opts_changes = diff_collection_options(normalized_compiled, normalized_live)
|
|
113
|
+
|
|
114
|
+
diff_hash = {
|
|
115
|
+
collection: { name: logical_name, physical: physical_name },
|
|
116
|
+
added_fields: added,
|
|
117
|
+
removed_fields: removed,
|
|
118
|
+
changed_fields: changed,
|
|
119
|
+
collection_options: collection_opts_changes,
|
|
120
|
+
stale_references: stale_refs
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
in_sync = added.empty? && removed.empty? && changed.empty? &&
|
|
124
|
+
collection_opts_changes.empty? && stale_refs.empty?
|
|
125
|
+
|
|
126
|
+
payload = {
|
|
127
|
+
collection: klass.name.to_s,
|
|
128
|
+
logical: logical_name,
|
|
129
|
+
physical_current: physical_name,
|
|
130
|
+
fields_changed_count: changed.size,
|
|
131
|
+
added_count: added.size,
|
|
132
|
+
removed_count: removed.size,
|
|
133
|
+
stale_references_count: stale_refs.size,
|
|
134
|
+
in_sync: in_sync
|
|
135
|
+
}
|
|
136
|
+
SearchEngine::Instrumentation.instrument('search_engine.schema.diff', payload) {}
|
|
137
|
+
|
|
138
|
+
{ diff: diff_hash, pretty: pretty_print(diff_hash) }
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Apply schema lifecycle: create a new physical collection, reindex data into it,
|
|
142
|
+
# atomically point the alias (logical name) to it, and enforce retention.
|
|
143
|
+
#
|
|
144
|
+
# The reindexing step can be provided via an optional block (yielded with the new
|
|
145
|
+
# physical name). If no block is given, and the klass responds to
|
|
146
|
+
# `reindex_all_to(physical_name)`, that method will be called. If neither is available,
|
|
147
|
+
# an ArgumentError is raised and no alias swap occurs. If reindexing fails, the
|
|
148
|
+
# newly created physical is left intact for inspection; retention cleanup only runs
|
|
149
|
+
# after a successful alias swap.
|
|
150
|
+
#
|
|
151
|
+
# @param klass [Class] model class inheriting from {SearchEngine::Base}
|
|
152
|
+
# @param client [SearchEngine::Client] optional client wrapper (for tests)
|
|
153
|
+
# @param force_rebuild [Boolean] if true, skips in-place update check and forces full Blue/Green rebuild (default: false)
|
|
154
|
+
# @yieldparam physical_name [String] the newly created physical collection name
|
|
155
|
+
# @return [Hash] { logical: String, new_physical: String, previous_physical: String, alias_target: String, dropped_physicals: Array<String> }
|
|
156
|
+
# @raise [SearchEngine::Errors::Api, ArgumentError]
|
|
157
|
+
# @see `https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/schema#lifecycle`
|
|
158
|
+
# @see `https://typesense.org/docs/latest/api/collections.html`
|
|
159
|
+
def apply!(klass, client: nil, force_rebuild: false)
|
|
160
|
+
client ||= SearchEngine.client
|
|
161
|
+
# Optimization: Try in-place update first if not forced to rebuild.
|
|
162
|
+
# If update! returns true, the schema is synced (either no changes or successfully patched).
|
|
163
|
+
if !force_rebuild && update!(klass, client: client)
|
|
164
|
+
compiled = compile(klass)
|
|
165
|
+
logical = compiled[:name]
|
|
166
|
+
# Resolve current physical to return consistent result
|
|
167
|
+
physical = client.resolve_alias(logical) || logical
|
|
168
|
+
|
|
169
|
+
return update_result_payload(logical, physical)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
compiled = compile(klass, client: client)
|
|
173
|
+
logical = compiled[:name]
|
|
174
|
+
|
|
175
|
+
cleanup_logical_collection_conflict!(logical, client: client)
|
|
176
|
+
|
|
177
|
+
start_ms = monotonic_ms
|
|
178
|
+
current_target = client.resolve_alias(logical)
|
|
179
|
+
|
|
180
|
+
new_physical = generate_physical_name(logical, client: client)
|
|
181
|
+
create_schema = { name: new_physical, fields: compiled[:fields].map(&:dup) }
|
|
182
|
+
create_schema[:enable_nested_fields] = true if compiled[:enable_nested_fields]
|
|
183
|
+
|
|
184
|
+
# Validate referenced collections exist and have required fields before creating schema
|
|
185
|
+
begin
|
|
186
|
+
validate_referenced_collections!(create_schema[:fields], client: client)
|
|
187
|
+
rescue ArgumentError => error
|
|
188
|
+
# Re-raise validation errors to prevent creating invalid schemas
|
|
189
|
+
raise ArgumentError,
|
|
190
|
+
"Schema validation failed for collection '#{logical}': #{error.message}"
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
client.create_collection(create_schema)
|
|
194
|
+
|
|
195
|
+
if block_given?
|
|
196
|
+
yield new_physical
|
|
197
|
+
elsif klass.respond_to?(:reindex_all_to)
|
|
198
|
+
klass.reindex_all_to(new_physical)
|
|
199
|
+
else
|
|
200
|
+
raise ArgumentError, 'reindex step is required: provide a block or implement klass.reindex_all_to(name)'
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Idempotent: if alias already points to new physical, treat as no-op
|
|
204
|
+
current_after_reindex = client.resolve_alias(logical)
|
|
205
|
+
swapped = current_after_reindex != new_physical
|
|
206
|
+
client.upsert_alias(logical, new_physical) if swapped
|
|
207
|
+
|
|
208
|
+
# Retention cleanup
|
|
209
|
+
_, dropped = enforce_retention!(logical, new_physical, client: client, keep_last: effective_keep_last(klass))
|
|
210
|
+
|
|
211
|
+
if defined?(ActiveSupport::Notifications)
|
|
212
|
+
# Preserve legacy payload shape while adding canonical keys expected by the subscriber
|
|
213
|
+
SearchEngine::Instrumentation.instrument('search_engine.schema.apply',
|
|
214
|
+
logical: logical,
|
|
215
|
+
new_physical: new_physical,
|
|
216
|
+
previous_physical: current_target,
|
|
217
|
+
dropped_count: dropped.size,
|
|
218
|
+
# canonical keys
|
|
219
|
+
collection: klass.name.to_s,
|
|
220
|
+
physical_new: new_physical,
|
|
221
|
+
alias_swapped: swapped,
|
|
222
|
+
retention_deleted_count: dropped.size,
|
|
223
|
+
status: :ok,
|
|
224
|
+
duration_ms: (monotonic_ms - start_ms)
|
|
225
|
+
) {}
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
{
|
|
229
|
+
logical: logical,
|
|
230
|
+
new_physical: new_physical,
|
|
231
|
+
previous_physical: current_target,
|
|
232
|
+
alias_target: new_physical,
|
|
233
|
+
dropped_physicals: dropped,
|
|
234
|
+
action: :rebuild
|
|
235
|
+
}
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Roll back the alias for the given klass to the previous retained physical collection.
|
|
239
|
+
#
|
|
240
|
+
# Chooses the most recent retained physical behind the current alias target. If none
|
|
241
|
+
# is available, raises an ArgumentError explaining that retention may be set to 0.
|
|
242
|
+
# The method is idempotent: if the alias already points to the chosen target, no-op.
|
|
243
|
+
#
|
|
244
|
+
# @param klass [Class]
|
|
245
|
+
# @param client [SearchEngine::Client]
|
|
246
|
+
# @return [Hash] { logical: String, new_target: String, previous_target: String }
|
|
247
|
+
# @raise [ArgumentError] when no previous physical exists
|
|
248
|
+
# @see `https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/schema#retention`
|
|
249
|
+
def rollback(klass, client: nil)
|
|
250
|
+
client ||= SearchEngine.client
|
|
251
|
+
compiled = compile(klass)
|
|
252
|
+
logical = compiled[:name]
|
|
253
|
+
|
|
254
|
+
start_ms = monotonic_ms
|
|
255
|
+
current_target = client.resolve_alias(logical)
|
|
256
|
+
|
|
257
|
+
physicals = list_physicals(logical, client: client)
|
|
258
|
+
ordered = order_physicals_desc(logical, physicals)
|
|
259
|
+
previous = ordered.find { |name| name != current_target }
|
|
260
|
+
if previous.nil?
|
|
261
|
+
raise ArgumentError,
|
|
262
|
+
'No previous physical available for rollback; retention keep_last may be 0'
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Idempotent swap
|
|
266
|
+
client.upsert_alias(logical, previous) unless current_target == previous
|
|
267
|
+
|
|
268
|
+
if defined?(ActiveSupport::Notifications)
|
|
269
|
+
SearchEngine::Instrumentation.instrument('search_engine.schema.rollback',
|
|
270
|
+
logical: logical,
|
|
271
|
+
new_target: previous,
|
|
272
|
+
previous_target: current_target,
|
|
273
|
+
duration_ms: (monotonic_ms - start_ms)
|
|
274
|
+
) {}
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
{ logical: logical, new_target: previous, previous_target: current_target }
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# Attempt to update the collection schema in-place (PATCH) if the changes are compatible.
|
|
281
|
+
#
|
|
282
|
+
# Compatible changes are:
|
|
283
|
+
# - Adding new fields
|
|
284
|
+
# - Removing fields (drop: true)
|
|
285
|
+
#
|
|
286
|
+
# Incompatible changes (triggering return false) are:
|
|
287
|
+
# - Modifying existing fields (type/facet/etc changes)
|
|
288
|
+
# - Changing collection-level options
|
|
289
|
+
#
|
|
290
|
+
# @param klass [Class]
|
|
291
|
+
# @param client [SearchEngine::Client]
|
|
292
|
+
# @return [Boolean] true if updated in-place, false if changes required full rebuild or no changes needed
|
|
293
|
+
def update!(klass, client: nil)
|
|
294
|
+
client ||= SearchEngine.client
|
|
295
|
+
res = diff(klass, client: client)
|
|
296
|
+
diff_hash = res[:diff]
|
|
297
|
+
|
|
298
|
+
# No physical collection implies missing, caller should create
|
|
299
|
+
return false if diff_hash[:collection][:physical].nil?
|
|
300
|
+
|
|
301
|
+
# Check for incompatible changes
|
|
302
|
+
return false if diff_hash[:changed_fields].any?
|
|
303
|
+
return false if diff_hash[:collection_options].any?
|
|
304
|
+
|
|
305
|
+
added_fields = Array(diff_hash[:added_fields])
|
|
306
|
+
|
|
307
|
+
# Reference-bearing fields require full rebuild (Typesense limitation for PATCH)
|
|
308
|
+
return false if added_fields.any? do |field|
|
|
309
|
+
ref = field[:reference] || field['reference']
|
|
310
|
+
async = field[:async_reference] || field['async_reference']
|
|
311
|
+
(ref && !ref.to_s.strip.empty?) || async
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# Check if there is anything to do
|
|
315
|
+
return true if added_fields.empty? && diff_hash[:removed_fields].empty?
|
|
316
|
+
|
|
317
|
+
# Construct patch payload
|
|
318
|
+
fields_payload = []
|
|
319
|
+
Array(diff_hash[:removed_fields]).each do |f|
|
|
320
|
+
fields_payload << { name: f[:name], drop: true }
|
|
321
|
+
end
|
|
322
|
+
added_fields.each do |f|
|
|
323
|
+
fields_payload << f
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
physical = diff_hash[:collection][:physical]
|
|
327
|
+
client.update_collection(physical, { fields: fields_payload })
|
|
328
|
+
true
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
private
|
|
332
|
+
|
|
333
|
+
# Generate a new physical name using UTC timestamp + 3-digit sequence.
|
|
334
|
+
# Example: "products_20250131_235959_001"
|
|
335
|
+
def generate_physical_name(logical, client:)
|
|
336
|
+
now = Time.now.utc
|
|
337
|
+
timestamp = now.strftime('%Y%m%d_%H%M%S')
|
|
338
|
+
prefix = "#{logical}_#{timestamp}_"
|
|
339
|
+
|
|
340
|
+
existing = list_physicals_starting_with(prefix, client: client)
|
|
341
|
+
used_sequences = existing.map { |name| name.split('_').last.to_i }
|
|
342
|
+
|
|
343
|
+
seq = 1
|
|
344
|
+
seq += 1 while used_sequences.include?(seq) && seq < 999
|
|
345
|
+
format('%<prefix>s%<seq>03d', prefix: prefix, seq: seq)
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
# Return current alias target or nil.
|
|
349
|
+
def current_alias_target(logical, client:)
|
|
350
|
+
client.resolve_alias(logical)
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# Atomically swap alias to the provided physical.
|
|
354
|
+
def swap_alias!(logical, physical, client:)
|
|
355
|
+
client.upsert_alias(logical, physical)
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# Enumerate all physicals that match the naming pattern for the logical name.
|
|
359
|
+
def list_physicals(logical, client:)
|
|
360
|
+
meta_timeout = begin
|
|
361
|
+
t = SearchEngine.config.timeout_ms.to_i
|
|
362
|
+
t = 5_000 if t <= 0
|
|
363
|
+
t < 10_000 ? 10_000 : t
|
|
364
|
+
rescue StandardError
|
|
365
|
+
10_000
|
|
366
|
+
end
|
|
367
|
+
collections = Array(client.list_collections(timeout_ms: meta_timeout))
|
|
368
|
+
re = /^#{Regexp.escape(logical)}_\d{8}_\d{6}_\d{3}$/
|
|
369
|
+
names = collections.map { |c| (c[:name] || c['name']).to_s }
|
|
370
|
+
names.select { |n| re.match?(n) }
|
|
371
|
+
rescue StandardError
|
|
372
|
+
[]
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
# Internal: list physicals that share the same timestamp prefix (for sequence calculation)
|
|
376
|
+
def list_physicals_starting_with(prefix, client:)
|
|
377
|
+
meta_timeout = begin
|
|
378
|
+
t = SearchEngine.config.timeout_ms.to_i
|
|
379
|
+
t = 5_000 if t <= 0
|
|
380
|
+
t < 10_000 ? 10_000 : t
|
|
381
|
+
rescue StandardError
|
|
382
|
+
10_000
|
|
383
|
+
end
|
|
384
|
+
collections = Array(client.list_collections(timeout_ms: meta_timeout))
|
|
385
|
+
names = collections.map { |c| (c[:name] || c['name']).to_s }
|
|
386
|
+
names.select { |n| n.start_with?(prefix) }
|
|
387
|
+
rescue StandardError
|
|
388
|
+
[]
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
def enforce_retention!(logical, new_target, client:, keep_last:)
|
|
392
|
+
keep = Integer(keep_last || 0)
|
|
393
|
+
keep = 0 if keep.negative?
|
|
394
|
+
|
|
395
|
+
physicals = list_physicals(logical, client: client)
|
|
396
|
+
ordered = order_physicals_desc(logical, physicals)
|
|
397
|
+
candidates = ordered.reject { |name| name == new_target }
|
|
398
|
+
to_keep = candidates.first(keep)
|
|
399
|
+
to_drop = candidates.drop(keep)
|
|
400
|
+
|
|
401
|
+
to_drop.each do |name|
|
|
402
|
+
# Safety: best-effort delete; ignore 404
|
|
403
|
+
client.delete_collection(name)
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
[to_keep, to_drop]
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def order_physicals_desc(logical, names)
|
|
410
|
+
names.sort_by { |n| [-extract_timestamp(logical, n).to_i, -extract_sequence(logical, n)] }
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def extract_timestamp(logical, name)
|
|
414
|
+
# name format logical_YYYYMMDD_HHMMSS_###
|
|
415
|
+
base = name.delete_prefix("#{logical}_")
|
|
416
|
+
parts = base.split('_')
|
|
417
|
+
return 0 unless parts.size == 3
|
|
418
|
+
|
|
419
|
+
(parts[0] + parts[1]).to_i
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
def extract_sequence(_logical, name)
|
|
423
|
+
name.split('_').last.to_i
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def effective_keep_last(klass)
|
|
427
|
+
per = klass.respond_to?(:schema_retention) && klass.schema_retention ? klass.schema_retention[:keep_last] : nil
|
|
428
|
+
return per unless per.nil?
|
|
429
|
+
|
|
430
|
+
SearchEngine.config.schema.retention.keep_last
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
def monotonic_ms
|
|
434
|
+
SearchEngine::Instrumentation.monotonic_ms
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
# Extract and validate collection name as a non-empty string.
|
|
438
|
+
def collection_name_for!(klass)
|
|
439
|
+
collection_name = klass.respond_to?(:collection) ? klass.collection : nil
|
|
440
|
+
if collection_name.nil? || collection_name.to_s.strip.empty?
|
|
441
|
+
raise ArgumentError, 'klass must define a collection name'
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
collection_name.to_s
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
# Compile attributes from the model DSL into a fields array and detect nested fields requirement.
|
|
448
|
+
def compile_fields_for(klass, client: nil)
|
|
449
|
+
attributes_map = klass.respond_to?(:attributes) ? klass.attributes : {}
|
|
450
|
+
attribute_options = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
|
|
451
|
+
references_by_local_key = build_references_by_local_key(klass, client: client)
|
|
452
|
+
async_reference_by_local_key = build_async_reference_by_local_key(klass)
|
|
453
|
+
|
|
454
|
+
fields_array = []
|
|
455
|
+
needs_nested_fields = false
|
|
456
|
+
|
|
457
|
+
attributes_map.each do |attribute_name, type_descriptor|
|
|
458
|
+
validate_attribute_type!(attribute_name, type_descriptor)
|
|
459
|
+
|
|
460
|
+
opts = attribute_options[attribute_name.to_sym] || {}
|
|
461
|
+
# Skip non-indexed attributes and any nested fields under a non-indexed base
|
|
462
|
+
base_index_false = false
|
|
463
|
+
if attribute_name.to_s.include?('.')
|
|
464
|
+
base_sym = attribute_name.to_s.split('.', 2).first.to_sym
|
|
465
|
+
base_opts = attribute_options[base_sym] || {}
|
|
466
|
+
base_index_false = (base_opts[:index] == false)
|
|
467
|
+
end
|
|
468
|
+
next if opts[:index] == false || base_index_false
|
|
469
|
+
|
|
470
|
+
ts_type = typesense_type_for(type_descriptor)
|
|
471
|
+
|
|
472
|
+
# Validate: reference fields must be typed as string/string[] in the DSL.
|
|
473
|
+
if references_by_local_key.key?(attribute_name.to_sym) &&
|
|
474
|
+
!%w[string string[]].include?(ts_type.to_s)
|
|
475
|
+
raise SearchEngine::Errors::InvalidOption.new(
|
|
476
|
+
"Reference field :#{attribute_name} must be declared as :string or [:string] " \
|
|
477
|
+
"(got #{type_descriptor.inspect}).",
|
|
478
|
+
hint: "Declare attribute :#{attribute_name}, :string in #{klass.name} to match " \
|
|
479
|
+
'Typesense reference requirements.',
|
|
480
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/joins#declaring-references',
|
|
481
|
+
details: {
|
|
482
|
+
field: attribute_name.to_s,
|
|
483
|
+
declared_type: type_descriptor,
|
|
484
|
+
compiled_type: ts_type,
|
|
485
|
+
reference: references_by_local_key[attribute_name.to_sym]
|
|
486
|
+
}
|
|
487
|
+
)
|
|
488
|
+
end
|
|
489
|
+
needs_nested_fields ||= nested_type?(ts_type)
|
|
490
|
+
|
|
491
|
+
fields_array << build_field_entry(
|
|
492
|
+
attribute_name,
|
|
493
|
+
ts_type,
|
|
494
|
+
references_by_local_key,
|
|
495
|
+
async_reference_by_local_key,
|
|
496
|
+
opts
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
# Hidden flags:
|
|
500
|
+
# - <name>_empty for array attributes with empty_filtering enabled
|
|
501
|
+
# - <name>_blank for any attribute with optional enabled
|
|
502
|
+
append_hidden_empty_field(fields_array, attribute_name, type_descriptor, opts)
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
[fields_array, needs_nested_fields]
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
# Validate unsupported or unsafe type descriptors early.
|
|
509
|
+
def validate_attribute_type!(attribute_name, type_descriptor)
|
|
510
|
+
return unless type_descriptor.to_s.downcase == 'auto'
|
|
511
|
+
|
|
512
|
+
return if regex_attribute_name?(attribute_name)
|
|
513
|
+
|
|
514
|
+
raise SearchEngine::Errors::InvalidOption,
|
|
515
|
+
"Attribute #{attribute_name.inspect} must use a regex-style name (e.g. /.*_facet/) to declare type :auto."
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
def regex_attribute_name?(attribute_name)
|
|
519
|
+
name = attribute_name.to_s
|
|
520
|
+
name.match?(/[.*+?\[\]()|{}]/)
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
def nested_type?(ts_type)
|
|
524
|
+
%w[object object[]].include?(ts_type)
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
def build_field_entry(attribute_name, ts_type, references_by_local_key, async_reference_by_local_key, opts)
|
|
528
|
+
{
|
|
529
|
+
name: attribute_name.to_s,
|
|
530
|
+
type: ts_type,
|
|
531
|
+
**{
|
|
532
|
+
locale: opts[:locale],
|
|
533
|
+
sort: opts[:sort],
|
|
534
|
+
optional: opts[:optional],
|
|
535
|
+
infix: opts[:infix],
|
|
536
|
+
facet: opts[:facet],
|
|
537
|
+
reference: references_by_local_key[attribute_name.to_sym],
|
|
538
|
+
async_reference: async_reference_by_local_key[attribute_name.to_sym]
|
|
539
|
+
}.compact
|
|
540
|
+
}
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
# Ensure mandatory system field is present with enforced type when declared by developers.
|
|
544
|
+
def coerce_doc_updated_at_type!(fields_array)
|
|
545
|
+
fields_array.each do |f|
|
|
546
|
+
fname = (f[:name] || f['name']).to_s
|
|
547
|
+
next unless fname == 'doc_updated_at'
|
|
548
|
+
|
|
549
|
+
if f.key?(:type)
|
|
550
|
+
f[:type] = 'int64'
|
|
551
|
+
elsif f.key?('type')
|
|
552
|
+
f['type'] = 'int64'
|
|
553
|
+
else
|
|
554
|
+
f[:type] = 'int64'
|
|
555
|
+
end
|
|
556
|
+
break
|
|
557
|
+
end
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
# Build the final schema hash and set collection-level nested fields when needed.
|
|
561
|
+
def build_schema_hash(collection_name, fields_array, needs_nested_fields)
|
|
562
|
+
schema = { name: collection_name.to_s, fields: fields_array }
|
|
563
|
+
schema[:enable_nested_fields] = true if needs_nested_fields
|
|
564
|
+
schema
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
def typesense_type_for(type_descriptor)
|
|
568
|
+
# Array types (e.g., [:string]) => "string[]"; support nested symbol or string
|
|
569
|
+
if type_descriptor.is_a?(Array) && type_descriptor.size == 1
|
|
570
|
+
inner = type_descriptor.first
|
|
571
|
+
mapped = TYPE_MAPPING[inner.to_s.downcase.to_sym] || inner.to_s
|
|
572
|
+
return "#{mapped}[]"
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
TYPE_MAPPING[type_descriptor.to_s.downcase.to_sym] || type_descriptor.to_s
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
def normalize_schema(schema)
|
|
579
|
+
# Accept either compiled or live schema; return shape with symbol keys
|
|
580
|
+
name = (schema[:name] || schema['name']).to_s
|
|
581
|
+
fields = Array(schema[:fields] || schema['fields'])
|
|
582
|
+
|
|
583
|
+
normalized_fields = {}
|
|
584
|
+
fields.each do |field|
|
|
585
|
+
fname = (field[:name] || field['name']).to_s
|
|
586
|
+
|
|
587
|
+
ftype = (field[:type] || field['type']).to_s
|
|
588
|
+
fref = field[:reference] || field['reference']
|
|
589
|
+
entry = { name: fname, type: normalize_type(ftype) }
|
|
590
|
+
entry[:reference] = fref.to_s unless fref.nil? || fref.to_s.strip.empty?
|
|
591
|
+
# Preserve attribute-level flags from either compiled or live schemas.
|
|
592
|
+
%i[locale sort optional infix facet async_reference].each do |k|
|
|
593
|
+
val = field[k] || field[k.to_s]
|
|
594
|
+
entry[k] = val unless val.nil?
|
|
595
|
+
end
|
|
596
|
+
normalized_fields[fname] = entry
|
|
597
|
+
end
|
|
598
|
+
|
|
599
|
+
{
|
|
600
|
+
name: name,
|
|
601
|
+
fields: normalized_fields,
|
|
602
|
+
default_sorting_field: schema[:default_sorting_field] || schema['default_sorting_field'],
|
|
603
|
+
token_separators: schema[:token_separators] || schema['token_separators'],
|
|
604
|
+
symbols_to_index: schema[:symbols_to_index] || schema['symbols_to_index'],
|
|
605
|
+
enable_nested_fields: schema[:enable_nested_fields] || schema['enable_nested_fields']
|
|
606
|
+
}
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
def detect_stale_references(schema, client:)
|
|
610
|
+
fields = Array(schema[:fields] || schema['fields'])
|
|
611
|
+
stale = []
|
|
612
|
+
|
|
613
|
+
fields.each do |field|
|
|
614
|
+
ref = field[:reference] || field['reference']
|
|
615
|
+
next if ref.nil? || ref.to_s.strip.empty?
|
|
616
|
+
|
|
617
|
+
ref_coll = ref.to_s.split('.', 2).first.to_s
|
|
618
|
+
next if ref_coll.empty?
|
|
619
|
+
next unless physical_reference_name?(ref_coll)
|
|
620
|
+
|
|
621
|
+
field_name = (field[:name] || field['name']).to_s
|
|
622
|
+
logical = strip_physical_suffix(ref_coll)
|
|
623
|
+
reason = nil
|
|
624
|
+
|
|
625
|
+
alias_target = begin
|
|
626
|
+
client.resolve_alias(logical)
|
|
627
|
+
rescue StandardError
|
|
628
|
+
nil
|
|
629
|
+
end
|
|
630
|
+
|
|
631
|
+
if alias_target && !alias_target.to_s.strip.empty? && alias_target.to_s != ref_coll
|
|
632
|
+
reason = 'alias_mismatch'
|
|
633
|
+
else
|
|
634
|
+
ref_schema = begin
|
|
635
|
+
client.retrieve_collection_schema(ref_coll)
|
|
636
|
+
rescue StandardError
|
|
637
|
+
nil
|
|
638
|
+
end
|
|
639
|
+
reason = ref_schema.nil? ? 'missing_physical' : 'physical_reference'
|
|
640
|
+
end
|
|
641
|
+
|
|
642
|
+
stale << {
|
|
643
|
+
field: field_name,
|
|
644
|
+
logical: logical,
|
|
645
|
+
physical: ref_coll,
|
|
646
|
+
reason: reason
|
|
647
|
+
}
|
|
648
|
+
end
|
|
649
|
+
|
|
650
|
+
stale
|
|
651
|
+
rescue StandardError
|
|
652
|
+
[]
|
|
653
|
+
end
|
|
654
|
+
private :detect_stale_references
|
|
655
|
+
|
|
656
|
+
def physical_reference_name?(name)
|
|
657
|
+
name.to_s.match?(PHYSICAL_SUFFIX_RE)
|
|
658
|
+
end
|
|
659
|
+
private :physical_reference_name?
|
|
660
|
+
|
|
661
|
+
def strip_physical_suffix(name)
|
|
662
|
+
name.to_s.sub(PHYSICAL_SUFFIX_RE, '')
|
|
663
|
+
end
|
|
664
|
+
private :strip_physical_suffix
|
|
665
|
+
|
|
666
|
+
def normalize_type(type_string)
|
|
667
|
+
s = type_string.to_s
|
|
668
|
+
return 'string[]' if s.casecmp('string[]').zero?
|
|
669
|
+
return 'int64' if s.casecmp('int64').zero?
|
|
670
|
+
return 'int32' if s.casecmp('int32').zero?
|
|
671
|
+
return 'float' if s.casecmp('float').zero?
|
|
672
|
+
return 'bool' if %w[bool boolean].include?(s.downcase)
|
|
673
|
+
return 'string' if s.casecmp('string').zero?
|
|
674
|
+
|
|
675
|
+
# Fallback: return as-is
|
|
676
|
+
s
|
|
677
|
+
end
|
|
678
|
+
|
|
679
|
+
def diff_fields(compiled_fields_by_name, live_fields_by_name)
|
|
680
|
+
compiled_names = compiled_fields_by_name.keys
|
|
681
|
+
live_names = live_fields_by_name.keys
|
|
682
|
+
|
|
683
|
+
added_names = compiled_names - live_names
|
|
684
|
+
removed_names = live_names - compiled_names
|
|
685
|
+
shared_names = compiled_names & live_names
|
|
686
|
+
|
|
687
|
+
added = added_names.map { |n| compiled_fields_by_name[n] }
|
|
688
|
+
removed = removed_names.map { |n| live_fields_by_name[n] }
|
|
689
|
+
|
|
690
|
+
changed = {}
|
|
691
|
+
shared_names.each do |name|
|
|
692
|
+
compiled_field = compiled_fields_by_name[name]
|
|
693
|
+
live_field = live_fields_by_name[name]
|
|
694
|
+
|
|
695
|
+
field_changes = {}
|
|
696
|
+
FIELD_COMPARE_KEYS.each do |key|
|
|
697
|
+
# Only compare attribute-level flags when declared in compiled schema.
|
|
698
|
+
next unless key == :type || key == :reference || compiled_field.key?(key)
|
|
699
|
+
|
|
700
|
+
cval = compiled_field[key]
|
|
701
|
+
lval = live_field[key]
|
|
702
|
+
next if values_equal?(cval, lval)
|
|
703
|
+
|
|
704
|
+
field_changes[key.to_s] = [cval, lval]
|
|
705
|
+
end
|
|
706
|
+
|
|
707
|
+
changed[name] = field_changes unless field_changes.empty?
|
|
708
|
+
end
|
|
709
|
+
|
|
710
|
+
[added, removed, changed]
|
|
711
|
+
end
|
|
712
|
+
|
|
713
|
+
def diff_collection_options(compiled, live)
|
|
714
|
+
# Compare only keys present in compiled to avoid noisy diffs when DSL
|
|
715
|
+
# does not declare collection-level options.
|
|
716
|
+
keys = %i[default_sorting_field token_separators symbols_to_index enable_nested_fields]
|
|
717
|
+
differences = {}
|
|
718
|
+
keys.each do |key|
|
|
719
|
+
cval = compiled[key]
|
|
720
|
+
next if cval.nil?
|
|
721
|
+
|
|
722
|
+
lval = live[key]
|
|
723
|
+
next if values_equal?(cval, lval)
|
|
724
|
+
|
|
725
|
+
differences[key] = [cval, lval]
|
|
726
|
+
end
|
|
727
|
+
differences
|
|
728
|
+
end
|
|
729
|
+
|
|
730
|
+
def values_equal?(a, b)
|
|
731
|
+
if a.is_a?(Array) && b.is_a?(Array)
|
|
732
|
+
a == b
|
|
733
|
+
else
|
|
734
|
+
a.to_s == b.to_s
|
|
735
|
+
end
|
|
736
|
+
end
|
|
737
|
+
|
|
738
|
+
def deep_freeze(object)
|
|
739
|
+
case object
|
|
740
|
+
when Hash
|
|
741
|
+
object.each_value { |v| deep_freeze(v) }
|
|
742
|
+
when Array
|
|
743
|
+
object.each { |v| deep_freeze(v) }
|
|
744
|
+
end
|
|
745
|
+
object.freeze
|
|
746
|
+
end
|
|
747
|
+
|
|
748
|
+
def pretty_print(diff)
|
|
749
|
+
lines = []
|
|
750
|
+
lines << format_header(diff[:collection] || {})
|
|
751
|
+
|
|
752
|
+
added = diff[:added_fields] || []
|
|
753
|
+
removed = diff[:removed_fields] || []
|
|
754
|
+
changed = diff[:changed_fields] || {}
|
|
755
|
+
coll_opts = diff[:collection_options] || {}
|
|
756
|
+
stale_refs = diff[:stale_references] || []
|
|
757
|
+
|
|
758
|
+
if added.empty? && removed.empty? && changed.empty? && stale_refs.empty? &&
|
|
759
|
+
(coll_opts.nil? || coll_opts.empty?)
|
|
760
|
+
lines << 'No changes'
|
|
761
|
+
return lines.join("\n")
|
|
762
|
+
end
|
|
763
|
+
|
|
764
|
+
lines.concat(format_added_fields(added)) unless added.empty?
|
|
765
|
+
lines.concat(format_removed_fields(removed)) unless removed.empty?
|
|
766
|
+
lines.concat(format_changed_fields(changed)) unless changed.empty?
|
|
767
|
+
lines.concat(format_stale_references(stale_refs)) unless stale_refs.empty?
|
|
768
|
+
lines.concat(format_collection_options(coll_opts)) unless coll_opts.empty?
|
|
769
|
+
|
|
770
|
+
lines.join("\n")
|
|
771
|
+
end
|
|
772
|
+
|
|
773
|
+
def format_header(collection)
|
|
774
|
+
logical = collection[:name]
|
|
775
|
+
physical = collection[:physical]
|
|
776
|
+
if physical && physical != logical
|
|
777
|
+
"Collection: #{logical} -> #{physical}"
|
|
778
|
+
else
|
|
779
|
+
"Collection: #{logical}"
|
|
780
|
+
end
|
|
781
|
+
end
|
|
782
|
+
private :format_header
|
|
783
|
+
|
|
784
|
+
def format_added_fields(list)
|
|
785
|
+
lines = ['+ Added fields:']
|
|
786
|
+
list.each do |f|
|
|
787
|
+
lines << " - #{f[:name]}:#{f[:type]}"
|
|
788
|
+
end
|
|
789
|
+
lines
|
|
790
|
+
end
|
|
791
|
+
private :format_added_fields
|
|
792
|
+
|
|
793
|
+
def format_removed_fields(list)
|
|
794
|
+
lines = ['- Removed fields:']
|
|
795
|
+
list.each do |f|
|
|
796
|
+
lines << " - #{f[:name]}:#{f[:type]}"
|
|
797
|
+
end
|
|
798
|
+
lines
|
|
799
|
+
end
|
|
800
|
+
private :format_removed_fields
|
|
801
|
+
|
|
802
|
+
def format_changed_fields(map)
|
|
803
|
+
lines = ['~ Changed fields:']
|
|
804
|
+
map.keys.sort.each do |fname|
|
|
805
|
+
pairs = map[fname]
|
|
806
|
+
pairs.each do |attr, (cval, lval)|
|
|
807
|
+
lines << " - #{fname}.#{attr}: #{cval} -> #{lval}"
|
|
808
|
+
end
|
|
809
|
+
end
|
|
810
|
+
lines
|
|
811
|
+
end
|
|
812
|
+
private :format_changed_fields
|
|
813
|
+
|
|
814
|
+
def format_stale_references(stale_refs)
|
|
815
|
+
lines = ['~ Stale references:']
|
|
816
|
+
stale_refs.each do |ref|
|
|
817
|
+
field = ref[:field] || ref['field']
|
|
818
|
+
logical = ref[:logical] || ref['logical']
|
|
819
|
+
physical = ref[:physical] || ref['physical']
|
|
820
|
+
reason = ref[:reason] || ref['reason']
|
|
821
|
+
line = +" - #{field}"
|
|
822
|
+
line << " (logical=#{logical}, physical=#{physical}"
|
|
823
|
+
line << ", reason=#{reason}" if reason
|
|
824
|
+
line << ')'
|
|
825
|
+
lines << line
|
|
826
|
+
end
|
|
827
|
+
lines
|
|
828
|
+
end
|
|
829
|
+
private :format_stale_references
|
|
830
|
+
|
|
831
|
+
def format_collection_options(opts)
|
|
832
|
+
lines = ['~ Collection options:']
|
|
833
|
+
opts.each do |key, (cval, lval)|
|
|
834
|
+
next if key == :live && cval.nil? && lval.nil?
|
|
835
|
+
|
|
836
|
+
lines << if key == :live && (cval == :missing || lval == :missing)
|
|
837
|
+
" - live: #{cval || lval}"
|
|
838
|
+
else
|
|
839
|
+
" - #{key}: #{cval} -> #{lval}"
|
|
840
|
+
end
|
|
841
|
+
end
|
|
842
|
+
lines
|
|
843
|
+
end
|
|
844
|
+
private :format_collection_options
|
|
845
|
+
|
|
846
|
+
def update_result_payload(logical, physical)
|
|
847
|
+
{
|
|
848
|
+
logical: logical,
|
|
849
|
+
new_physical: physical,
|
|
850
|
+
previous_physical: physical,
|
|
851
|
+
alias_target: physical,
|
|
852
|
+
dropped_physicals: [],
|
|
853
|
+
action: :update
|
|
854
|
+
}
|
|
855
|
+
end
|
|
856
|
+
|
|
857
|
+
# Build a mapping of local attribute names to referenced collection names based on join declarations.
|
|
858
|
+
# @param klass [Class]
|
|
859
|
+
# @return [Hash{Symbol=>String}]
|
|
860
|
+
def build_references_by_local_key(klass, client: nil)
|
|
861
|
+
refs = {}
|
|
862
|
+
return refs unless klass.respond_to?(:joins_config)
|
|
863
|
+
|
|
864
|
+
(klass.joins_config || {}).each_value do |cfg|
|
|
865
|
+
# Only belongs_to/belongs_to_many contribute references to schema
|
|
866
|
+
kind = (cfg[:kind] || :belongs_to).to_sym
|
|
867
|
+
next if %i[has_one has_many].include?(kind)
|
|
868
|
+
|
|
869
|
+
lk = cfg[:local_key]
|
|
870
|
+
coll = cfg[:collection]
|
|
871
|
+
fk = cfg[:foreign_key]
|
|
872
|
+
next if lk.nil?
|
|
873
|
+
|
|
874
|
+
coll_name = coll.to_s
|
|
875
|
+
coll_name = resolve_reference_collection_name(coll_name, client)
|
|
876
|
+
fk_name = fk.to_s
|
|
877
|
+
next if coll_name.strip.empty? || fk_name.strip.empty?
|
|
878
|
+
|
|
879
|
+
key = lk.to_sym
|
|
880
|
+
refs[key] ||= "#{coll_name}.#{fk_name}"
|
|
881
|
+
end
|
|
882
|
+
refs
|
|
883
|
+
end
|
|
884
|
+
|
|
885
|
+
def resolve_reference_collection_name(logical, _client)
|
|
886
|
+
# Keep reference targets stable across blue/green swaps by using
|
|
887
|
+
# logical collection names (aliases) instead of physical names.
|
|
888
|
+
logical.to_s
|
|
889
|
+
end
|
|
890
|
+
|
|
891
|
+
# Build a mapping of local attribute names to async_reference flag based on belongs_to declarations.
|
|
892
|
+
# @param klass [Class]
|
|
893
|
+
# @return [Hash{Symbol=>Boolean}]
|
|
894
|
+
def build_async_reference_by_local_key(klass)
|
|
895
|
+
out = {}
|
|
896
|
+
return out unless klass.respond_to?(:joins_config)
|
|
897
|
+
|
|
898
|
+
(klass.joins_config || {}).each_value do |cfg|
|
|
899
|
+
kind = (cfg[:kind] || :belongs_to).to_sym
|
|
900
|
+
next if %i[has_one has_many].include?(kind)
|
|
901
|
+
|
|
902
|
+
lk = cfg[:local_key]
|
|
903
|
+
async = cfg[:async_ref]
|
|
904
|
+
next if lk.nil?
|
|
905
|
+
|
|
906
|
+
out[lk.to_sym] = true if async
|
|
907
|
+
end
|
|
908
|
+
out
|
|
909
|
+
end
|
|
910
|
+
|
|
911
|
+
# Append hidden flags based on attribute options:
|
|
912
|
+
# - <name>_empty for array attributes with empty_filtering enabled
|
|
913
|
+
# - <name>_blank for any attribute with optional enabled
|
|
914
|
+
def append_hidden_empty_field(fields_array, attribute_name, type_descriptor, opts)
|
|
915
|
+
add_empty = opts[:empty_filtering] && type_descriptor.is_a?(Array) && type_descriptor.size == 1
|
|
916
|
+
add_blank = opts[:optional]
|
|
917
|
+
|
|
918
|
+
return unless add_empty || add_blank
|
|
919
|
+
|
|
920
|
+
fields_array << { name: "#{attribute_name}_empty", type: 'bool' } if add_empty
|
|
921
|
+
fields_array << { name: "#{attribute_name}_blank", type: 'bool' } if add_blank
|
|
922
|
+
end
|
|
923
|
+
|
|
924
|
+
# Validate that referenced collections exist and have the required fields before creating a schema.
|
|
925
|
+
# Typesense references use logical collection names (aliases), which must point to valid physical
|
|
926
|
+
# collections. The referenced fields must exist in the referenced collection's schema.
|
|
927
|
+
# @param fields [Array<Hash>] field definitions with potential reference values
|
|
928
|
+
# @param client [SearchEngine::Client] client to resolve aliases and retrieve schemas
|
|
929
|
+
# @raise [SearchEngine::Errors::Api, ArgumentError] if referenced collections are invalid
|
|
930
|
+
def validate_referenced_collections!(fields, client:)
|
|
931
|
+
fields.each do |field|
|
|
932
|
+
ref = field[:reference] || field['reference']
|
|
933
|
+
next if ref.nil? || ref.to_s.strip.empty?
|
|
934
|
+
|
|
935
|
+
# Parse reference format: "logical_collection_name.field_name"
|
|
936
|
+
parts = ref.to_s.split('.', 2)
|
|
937
|
+
logical_coll = parts[0].to_s
|
|
938
|
+
field_name = parts[1]&.to_s
|
|
939
|
+
|
|
940
|
+
next if logical_coll.empty? || field_name.nil? || field_name.empty?
|
|
941
|
+
|
|
942
|
+
validate_single_reference!(logical_coll, field_name, ref, client)
|
|
943
|
+
end
|
|
944
|
+
end
|
|
945
|
+
|
|
946
|
+
# Validate a single reference field.
|
|
947
|
+
# @param logical_coll [String] logical collection name
|
|
948
|
+
# @param field_name [String] field name to validate
|
|
949
|
+
# @param ref [String] original reference string for error messages
|
|
950
|
+
# @param client [SearchEngine::Client] client to resolve aliases and retrieve schemas
|
|
951
|
+
# @raise [ArgumentError] if validation fails
|
|
952
|
+
def validate_single_reference!(logical_coll, field_name, ref, client)
|
|
953
|
+
physical_coll = resolve_referenced_collection(logical_coll, client)
|
|
954
|
+
referenced_schema = retrieve_referenced_schema(logical_coll, physical_coll, client)
|
|
955
|
+
schema_fields = Array(referenced_schema[:fields] || referenced_schema['fields'])
|
|
956
|
+
target_field = schema_fields.find { |f| (f[:name] || f['name']).to_s == field_name }
|
|
957
|
+
|
|
958
|
+
if target_field.nil?
|
|
959
|
+
raise ArgumentError,
|
|
960
|
+
build_field_not_found_error(logical_coll, field_name, schema_fields, physical_coll: physical_coll)
|
|
961
|
+
end
|
|
962
|
+
|
|
963
|
+
# Typesense requires referenced fields to be indexed (for JOIN filtering)
|
|
964
|
+
index_val = target_field[:index] || target_field['index']
|
|
965
|
+
if index_val == false
|
|
966
|
+
raise ArgumentError,
|
|
967
|
+
build_field_not_indexed_error(logical_coll, field_name, physical_coll: physical_coll)
|
|
968
|
+
end
|
|
969
|
+
rescue ArgumentError
|
|
970
|
+
raise
|
|
971
|
+
rescue StandardError => error
|
|
972
|
+
raise ArgumentError, "Failed to validate reference '#{ref}': #{error.class}: #{error.message}"
|
|
973
|
+
end
|
|
974
|
+
|
|
975
|
+
# Resolve a logical collection name to its physical collection name.
|
|
976
|
+
# @param logical_coll [String] logical collection name
|
|
977
|
+
# @param client [SearchEngine::Client] client to resolve aliases
|
|
978
|
+
# @return [String] physical collection name
|
|
979
|
+
# @raise [ArgumentError] if alias/physical cannot be resolved
|
|
980
|
+
def resolve_referenced_collection(logical_coll, client)
|
|
981
|
+
physical_coll = client.resolve_alias(logical_coll)
|
|
982
|
+
physical_coll = logical_coll if physical_coll.nil? || physical_coll.to_s.strip.empty?
|
|
983
|
+
|
|
984
|
+
if physical_coll.to_s.strip.empty?
|
|
985
|
+
raise ArgumentError,
|
|
986
|
+
"Referenced collection alias '#{logical_coll}' does not exist or points to no physical collection"
|
|
987
|
+
end
|
|
988
|
+
|
|
989
|
+
physical_coll
|
|
990
|
+
end
|
|
991
|
+
|
|
992
|
+
# Drop a physical collection that shadows an alias with the same name.
|
|
993
|
+
# Typesense allows alias/collection name collisions; when both exist,
|
|
994
|
+
# references to the alias resolve to the physical collection, which
|
|
995
|
+
# breaks joins after blue/green swaps. If an alias exists and points to
|
|
996
|
+
# a different physical, remove the conflicting logical-named collection.
|
|
997
|
+
def cleanup_logical_collection_conflict!(logical, client:)
|
|
998
|
+
logical_name = logical.to_s
|
|
999
|
+
alias_target = client.resolve_alias(logical_name)
|
|
1000
|
+
return if alias_target.nil? || alias_target.to_s.strip.empty?
|
|
1001
|
+
return if alias_target.to_s == logical_name
|
|
1002
|
+
|
|
1003
|
+
existing = client.retrieve_collection_schema(logical_name)
|
|
1004
|
+
return unless existing
|
|
1005
|
+
|
|
1006
|
+
client.delete_collection(logical_name, timeout_ms: 60_000)
|
|
1007
|
+
return unless defined?(ActiveSupport::Notifications)
|
|
1008
|
+
|
|
1009
|
+
SearchEngine::Instrumentation.instrument(
|
|
1010
|
+
'search_engine.schema.cleanup_conflict',
|
|
1011
|
+
logical: logical_name,
|
|
1012
|
+
alias_target: alias_target.to_s,
|
|
1013
|
+
dropped: logical_name
|
|
1014
|
+
) {}
|
|
1015
|
+
end
|
|
1016
|
+
|
|
1017
|
+
# Retrieve the schema for a referenced collection.
|
|
1018
|
+
# @param logical_coll [String] logical collection name
|
|
1019
|
+
# @param physical_coll [String] physical collection name
|
|
1020
|
+
# @param client [SearchEngine::Client] client to retrieve schemas
|
|
1021
|
+
# @return [Hash] collection schema
|
|
1022
|
+
# @raise [ArgumentError] if schema cannot be retrieved
|
|
1023
|
+
def retrieve_referenced_schema(logical_coll, physical_coll, client)
|
|
1024
|
+
referenced_schema = client.retrieve_collection_schema(physical_coll)
|
|
1025
|
+
if referenced_schema.nil?
|
|
1026
|
+
raise ArgumentError,
|
|
1027
|
+
"Referenced collection '#{logical_coll}' (physical: '#{physical_coll}') " \
|
|
1028
|
+
'schema could not be retrieved'
|
|
1029
|
+
end
|
|
1030
|
+
referenced_schema
|
|
1031
|
+
end
|
|
1032
|
+
|
|
1033
|
+
# Build a detailed error message when a referenced field is not found.
|
|
1034
|
+
# @param logical_coll [String] logical collection name
|
|
1035
|
+
# @param field_name [String] field name that was not found
|
|
1036
|
+
# @param schema_fields [Array<Hash>] fields from the referenced collection schema
|
|
1037
|
+
# @param physical_coll [String, nil] physical collection name (optional)
|
|
1038
|
+
# @return [String] error message
|
|
1039
|
+
def build_field_not_found_error(logical_coll, field_name, schema_fields, physical_coll: nil)
|
|
1040
|
+
available_fields = schema_fields.map { |f| (f[:name] || f['name']).to_s }.sort
|
|
1041
|
+
coll_display =
|
|
1042
|
+
physical_coll && physical_coll != logical_coll ? "#{logical_coll} (physical: #{physical_coll})" : logical_coll
|
|
1043
|
+
error_msg = "Referenced field '#{field_name}' not found in collection '#{coll_display}'. "
|
|
1044
|
+
|
|
1045
|
+
referenced_klass = begin
|
|
1046
|
+
SearchEngine::CollectionResolver.model_for_logical(logical_coll)
|
|
1047
|
+
rescue StandardError
|
|
1048
|
+
nil
|
|
1049
|
+
end
|
|
1050
|
+
|
|
1051
|
+
if referenced_klass
|
|
1052
|
+
compiled_schema = compile(referenced_klass)
|
|
1053
|
+
compiled_fields = Array(compiled_schema[:fields]).map { |f| (f[:name] || f['name']).to_s }
|
|
1054
|
+
error_msg += if compiled_fields.include?(field_name)
|
|
1055
|
+
"Field exists in model '#{referenced_klass.name}' but is not indexed " \
|
|
1056
|
+
'(possibly marked with `index: false`). '
|
|
1057
|
+
else
|
|
1058
|
+
"Field is not declared in model '#{referenced_klass.name}'. "
|
|
1059
|
+
end
|
|
1060
|
+
end
|
|
1061
|
+
|
|
1062
|
+
error_msg += "Available fields in collection: #{available_fields.join(', ')}. " \
|
|
1063
|
+
'Ensure the field is declared and indexed in the referenced collection.'
|
|
1064
|
+
error_msg
|
|
1065
|
+
end
|
|
1066
|
+
|
|
1067
|
+
# Build a detailed error message when a referenced field has index: false.
|
|
1068
|
+
# @param logical_coll [String] logical collection name
|
|
1069
|
+
# @param field_name [String] field name that is not indexed
|
|
1070
|
+
# @param physical_coll [String, nil] physical collection name (optional)
|
|
1071
|
+
# @return [String] error message
|
|
1072
|
+
def build_field_not_indexed_error(logical_coll, field_name, physical_coll: nil)
|
|
1073
|
+
coll_display =
|
|
1074
|
+
physical_coll && physical_coll != logical_coll ? "#{logical_coll} (physical: #{physical_coll})" : logical_coll
|
|
1075
|
+
error_msg = "Referenced field '#{field_name}' in collection '#{coll_display}' has index: false. "
|
|
1076
|
+
error_msg += 'Typesense requires referenced fields to be indexed for JOIN operations. '
|
|
1077
|
+
error_msg += "Reindex the '#{logical_coll}' collection to update its schema, ensuring the " \
|
|
1078
|
+
"'#{field_name}' field does not have `index: false` in the model declaration."
|
|
1079
|
+
error_msg
|
|
1080
|
+
end
|
|
1081
|
+
end
|
|
1082
|
+
end
|
|
1083
|
+
end
|