search-engine-for-typesense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +148 -0
- data/app/search_engine/search_engine/app_info.rb +11 -0
- data/app/search_engine/search_engine/index_partition_job.rb +170 -0
- data/lib/generators/search_engine/install/install_generator.rb +20 -0
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
- data/lib/generators/search_engine/model/model_generator.rb +86 -0
- data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
- data/lib/search-engine-for-typesense.rb +12 -0
- data/lib/search_engine/active_record_syncable.rb +247 -0
- data/lib/search_engine/admin/stopwords.rb +125 -0
- data/lib/search_engine/admin/synonyms.rb +125 -0
- data/lib/search_engine/admin.rb +12 -0
- data/lib/search_engine/ast/and.rb +52 -0
- data/lib/search_engine/ast/binary_op.rb +75 -0
- data/lib/search_engine/ast/eq.rb +19 -0
- data/lib/search_engine/ast/group.rb +18 -0
- data/lib/search_engine/ast/gt.rb +12 -0
- data/lib/search_engine/ast/gte.rb +12 -0
- data/lib/search_engine/ast/in.rb +28 -0
- data/lib/search_engine/ast/lt.rb +12 -0
- data/lib/search_engine/ast/lte.rb +12 -0
- data/lib/search_engine/ast/matches.rb +55 -0
- data/lib/search_engine/ast/node.rb +176 -0
- data/lib/search_engine/ast/not_eq.rb +13 -0
- data/lib/search_engine/ast/not_in.rb +24 -0
- data/lib/search_engine/ast/or.rb +52 -0
- data/lib/search_engine/ast/prefix.rb +51 -0
- data/lib/search_engine/ast/raw.rb +41 -0
- data/lib/search_engine/ast/unary_op.rb +43 -0
- data/lib/search_engine/ast.rb +101 -0
- data/lib/search_engine/base/creation.rb +727 -0
- data/lib/search_engine/base/deletion.rb +80 -0
- data/lib/search_engine/base/display_coercions.rb +36 -0
- data/lib/search_engine/base/hydration.rb +312 -0
- data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
- data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
- data/lib/search_engine/base/index_maintenance.rb +459 -0
- data/lib/search_engine/base/indexing_dsl.rb +255 -0
- data/lib/search_engine/base/joins.rb +479 -0
- data/lib/search_engine/base/model_dsl.rb +472 -0
- data/lib/search_engine/base/presets.rb +43 -0
- data/lib/search_engine/base/pretty_printer.rb +315 -0
- data/lib/search_engine/base/relation_delegation.rb +42 -0
- data/lib/search_engine/base/scopes.rb +113 -0
- data/lib/search_engine/base/updating.rb +92 -0
- data/lib/search_engine/base.rb +38 -0
- data/lib/search_engine/bulk.rb +284 -0
- data/lib/search_engine/cache.rb +33 -0
- data/lib/search_engine/cascade.rb +531 -0
- data/lib/search_engine/cli/doctor.rb +631 -0
- data/lib/search_engine/cli/support.rb +217 -0
- data/lib/search_engine/cli.rb +222 -0
- data/lib/search_engine/client/http_adapter.rb +63 -0
- data/lib/search_engine/client/request_builder.rb +92 -0
- data/lib/search_engine/client/services/base.rb +74 -0
- data/lib/search_engine/client/services/collections.rb +161 -0
- data/lib/search_engine/client/services/documents.rb +214 -0
- data/lib/search_engine/client/services/operations.rb +152 -0
- data/lib/search_engine/client/services/search.rb +190 -0
- data/lib/search_engine/client/services.rb +29 -0
- data/lib/search_engine/client.rb +765 -0
- data/lib/search_engine/client_options.rb +20 -0
- data/lib/search_engine/collection_resolver.rb +191 -0
- data/lib/search_engine/collections_graph.rb +330 -0
- data/lib/search_engine/compiled_params.rb +143 -0
- data/lib/search_engine/compiler.rb +383 -0
- data/lib/search_engine/config/observability.rb +27 -0
- data/lib/search_engine/config/presets.rb +92 -0
- data/lib/search_engine/config/selection.rb +16 -0
- data/lib/search_engine/config/typesense.rb +48 -0
- data/lib/search_engine/config/validators.rb +97 -0
- data/lib/search_engine/config.rb +917 -0
- data/lib/search_engine/console_helpers.rb +130 -0
- data/lib/search_engine/deletion.rb +103 -0
- data/lib/search_engine/dispatcher.rb +125 -0
- data/lib/search_engine/dsl/parser.rb +582 -0
- data/lib/search_engine/engine.rb +167 -0
- data/lib/search_engine/errors.rb +290 -0
- data/lib/search_engine/filters/sanitizer.rb +189 -0
- data/lib/search_engine/hydration/materializers.rb +808 -0
- data/lib/search_engine/hydration/selection_context.rb +96 -0
- data/lib/search_engine/indexer/batch_planner.rb +76 -0
- data/lib/search_engine/indexer/bulk_import.rb +626 -0
- data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
- data/lib/search_engine/indexer/retry_policy.rb +103 -0
- data/lib/search_engine/indexer.rb +747 -0
- data/lib/search_engine/instrumentation.rb +308 -0
- data/lib/search_engine/joins/guard.rb +202 -0
- data/lib/search_engine/joins/resolver.rb +95 -0
- data/lib/search_engine/logging/color.rb +78 -0
- data/lib/search_engine/logging/format_helpers.rb +92 -0
- data/lib/search_engine/logging/partition_progress.rb +53 -0
- data/lib/search_engine/logging_subscriber.rb +388 -0
- data/lib/search_engine/mapper.rb +785 -0
- data/lib/search_engine/multi.rb +286 -0
- data/lib/search_engine/multi_result.rb +186 -0
- data/lib/search_engine/notifications/compact_logger.rb +675 -0
- data/lib/search_engine/observability.rb +162 -0
- data/lib/search_engine/operations.rb +58 -0
- data/lib/search_engine/otel.rb +227 -0
- data/lib/search_engine/partitioner.rb +128 -0
- data/lib/search_engine/ranking_plan.rb +118 -0
- data/lib/search_engine/registry.rb +158 -0
- data/lib/search_engine/relation/compiler.rb +711 -0
- data/lib/search_engine/relation/deletion.rb +37 -0
- data/lib/search_engine/relation/dsl/filters.rb +624 -0
- data/lib/search_engine/relation/dsl/selection.rb +240 -0
- data/lib/search_engine/relation/dsl.rb +903 -0
- data/lib/search_engine/relation/dx/dry_run.rb +59 -0
- data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
- data/lib/search_engine/relation/dx.rb +231 -0
- data/lib/search_engine/relation/materializers.rb +118 -0
- data/lib/search_engine/relation/options.rb +138 -0
- data/lib/search_engine/relation/state.rb +274 -0
- data/lib/search_engine/relation/updating.rb +44 -0
- data/lib/search_engine/relation.rb +623 -0
- data/lib/search_engine/result.rb +664 -0
- data/lib/search_engine/schema.rb +1083 -0
- data/lib/search_engine/sources/active_record_source.rb +185 -0
- data/lib/search_engine/sources/base.rb +62 -0
- data/lib/search_engine/sources/lambda_source.rb +55 -0
- data/lib/search_engine/sources/sql_source.rb +196 -0
- data/lib/search_engine/sources.rb +71 -0
- data/lib/search_engine/stale_rules.rb +160 -0
- data/lib/search_engine/test/minitest_assertions.rb +57 -0
- data/lib/search_engine/test/offline_client.rb +134 -0
- data/lib/search_engine/test/rspec_matchers.rb +77 -0
- data/lib/search_engine/test/stub_client.rb +201 -0
- data/lib/search_engine/test.rb +66 -0
- data/lib/search_engine/test_autoload.rb +8 -0
- data/lib/search_engine/update.rb +35 -0
- data/lib/search_engine/version.rb +7 -0
- data/lib/search_engine.rb +332 -0
- data/lib/tasks/search_engine.rake +501 -0
- data/lib/tasks/search_engine_doctor.rake +16 -0
- metadata +225 -0
|
@@ -0,0 +1,727 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'active_support/concern'
|
|
4
|
+
require 'set'
|
|
5
|
+
require 'json'
|
|
6
|
+
require 'search_engine/indexer/batch_planner'
|
|
7
|
+
|
|
8
|
+
module SearchEngine
|
|
9
|
+
class Base
|
|
10
|
+
# Creation helpers for inserting a single document into a collection.
|
|
11
|
+
#
|
|
12
|
+
# Provides ActiveRecord-like `.create(attrs)` that validates and normalizes
|
|
13
|
+
# attributes against the compiled schema, computes hidden flags and forces
|
|
14
|
+
# the `doc_updated_at` timestamp. Returns a hydrated model instance.
|
|
15
|
+
module Creation
|
|
16
|
+
extend ActiveSupport::Concern
|
|
17
|
+
|
|
18
|
+
# Internal helpers extracted to keep the public API lean and within style limits.
|
|
19
|
+
module Helpers
|
|
20
|
+
module_function
|
|
21
|
+
|
|
22
|
+
def normalize_attrs_to_document(attrs)
|
|
23
|
+
out = {}
|
|
24
|
+
attrs.each { |k, v| out[k.to_s] = v }
|
|
25
|
+
out
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def compute_id_for_create(klass, attrs)
|
|
29
|
+
src_type = source_type_for(klass)
|
|
30
|
+
|
|
31
|
+
if src_type == :active_record
|
|
32
|
+
# Try *_id fallback names first
|
|
33
|
+
fallback_id_field_names_for(klass).each do |fk|
|
|
34
|
+
key_sym = fk.to_sym
|
|
35
|
+
next unless attrs.key?(fk) || attrs.key?(key_sym)
|
|
36
|
+
|
|
37
|
+
raw = attrs[fk] || attrs[key_sym]
|
|
38
|
+
return raw.to_s unless raw.nil? || raw.to_s.strip.empty?
|
|
39
|
+
end
|
|
40
|
+
# Fallthrough to identify_by if explicitly defined
|
|
41
|
+
return compute_from_identify_by(klass, attrs) if identify_by_defined?(klass)
|
|
42
|
+
|
|
43
|
+
# Otherwise unresolved
|
|
44
|
+
return nil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Non-AR source:
|
|
48
|
+
# - If identify_by is defined, use it
|
|
49
|
+
return compute_from_identify_by(klass, attrs) if identify_by_defined?(klass)
|
|
50
|
+
|
|
51
|
+
# - Else must be provided via :id (handled by caller); unresolved here
|
|
52
|
+
nil
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def identify_by_defined?(klass)
|
|
56
|
+
klass.instance_variable_defined?(:@identify_by_proc)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def compute_from_identify_by(klass, attrs)
|
|
60
|
+
require 'ostruct'
|
|
61
|
+
shim = OpenStruct.new(attrs)
|
|
62
|
+
val = klass.compute_document_id(shim)
|
|
63
|
+
v = val.is_a?(String) ? val : val.to_s
|
|
64
|
+
v.to_s.strip.empty? ? nil : v
|
|
65
|
+
rescue StandardError
|
|
66
|
+
nil
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def source_type_for(klass)
|
|
70
|
+
t = klass.instance_variable_get(:@__mapper_dsl__)&.dig(:source, :type)
|
|
71
|
+
t&.to_sym
|
|
72
|
+
rescue StandardError
|
|
73
|
+
nil
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def fallback_id_field_names_for(klass)
|
|
77
|
+
# Prefer AR model name from index DSL when available
|
|
78
|
+
names = []
|
|
79
|
+
base_name = nil
|
|
80
|
+
model = klass.instance_variable_get(:@__mapper_dsl__)&.dig(:source, :options, :model)
|
|
81
|
+
if model.respond_to?(:name)
|
|
82
|
+
base_name = model.name.to_s.split('::').last
|
|
83
|
+
elsif model.is_a?(String)
|
|
84
|
+
base_name = model.to_s.split('::').last
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
names << "#{ActiveSupport::Inflector.underscore(base_name)}_id" if base_name
|
|
88
|
+
se_base = klass.name.to_s.split('::').last
|
|
89
|
+
names << "#{ActiveSupport::Inflector.underscore(se_base)}_id"
|
|
90
|
+
names.uniq
|
|
91
|
+
rescue StandardError
|
|
92
|
+
se_base = klass.name.to_s.split('::').last
|
|
93
|
+
["#{ActiveSupport::Inflector.underscore(se_base)}_id"]
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def update_doc_updated_at!(document)
|
|
97
|
+
now_i = if defined?(Time) && defined?(Time.zone) && Time.zone
|
|
98
|
+
Time.zone.now.to_i
|
|
99
|
+
else
|
|
100
|
+
Time.now.to_i
|
|
101
|
+
end
|
|
102
|
+
document['doc_updated_at'] = now_i
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def build_types_by_field_from_schema(compiled_schema)
|
|
106
|
+
h = {}
|
|
107
|
+
Array(compiled_schema[:fields]).each do |f|
|
|
108
|
+
h[(f[:name] || f['name']).to_s] = (f[:type] || f['type']).to_s
|
|
109
|
+
end
|
|
110
|
+
h
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def compute_required_keys_from_schema(klass, compiled_schema)
|
|
114
|
+
fields = Array(compiled_schema[:fields]).map { |f| (f[:name] || f['name']).to_s }
|
|
115
|
+
base = fields.reject { |fname| fname.include?('.') }.to_set
|
|
116
|
+
begin
|
|
117
|
+
opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
|
|
118
|
+
rescue StandardError
|
|
119
|
+
opts = {}
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
opts.each do |fname, o|
|
|
123
|
+
next unless o.is_a?(Hash) && o[:optional]
|
|
124
|
+
|
|
125
|
+
base.delete(fname.to_s)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
base
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def compute_allowed_keys_from_schema_and_dsl(klass, compiled_schema)
|
|
132
|
+
# Start with all compiled field names (including hidden and dotted)
|
|
133
|
+
all = Array(compiled_schema[:fields]).map { |f| (f[:name] || f['name']).to_s }.to_set
|
|
134
|
+
|
|
135
|
+
# Include declared attributes explicitly marked as unindexed (index: false)
|
|
136
|
+
begin
|
|
137
|
+
opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
|
|
138
|
+
rescue StandardError
|
|
139
|
+
opts = {}
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
opts.each do |fname, conf|
|
|
143
|
+
next unless conf.is_a?(Hash)
|
|
144
|
+
|
|
145
|
+
all << fname.to_s if conf[:index] == false
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
all
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def append_hidden_flags!(klass, document, allowed_keys)
|
|
152
|
+
begin
|
|
153
|
+
opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
|
|
154
|
+
rescue StandardError
|
|
155
|
+
opts = {}
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
opts.each do |fname, conf|
|
|
159
|
+
base = fname.to_s
|
|
160
|
+
next unless conf.is_a?(Hash)
|
|
161
|
+
|
|
162
|
+
if conf[:empty_filtering]
|
|
163
|
+
hidden = "#{base}_empty"
|
|
164
|
+
next unless allowed_keys.include?(hidden)
|
|
165
|
+
|
|
166
|
+
value = document[base]
|
|
167
|
+
document[hidden] = value.nil? || (value.is_a?(Array) && value.empty?)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
next unless conf[:optional]
|
|
171
|
+
|
|
172
|
+
hidden = "#{base}_blank"
|
|
173
|
+
next unless allowed_keys.include?(hidden)
|
|
174
|
+
|
|
175
|
+
value = document[base]
|
|
176
|
+
document[hidden] = value.nil?
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
nil
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def prune_nil_optional_fields!(klass, document)
|
|
183
|
+
begin
|
|
184
|
+
opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
|
|
185
|
+
rescue StandardError
|
|
186
|
+
opts = {}
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
opts.each do |fname, conf|
|
|
190
|
+
next unless conf.is_a?(Hash) && conf[:optional]
|
|
191
|
+
|
|
192
|
+
key = fname.to_s
|
|
193
|
+
document.delete(key) if document[key].nil?
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
nil
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def strict_unknown_keys_enabled?
|
|
200
|
+
SearchEngine.config&.mapper&.strict_unknown_keys ? true : false
|
|
201
|
+
rescue StandardError
|
|
202
|
+
false
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def coercions_enabled?
|
|
206
|
+
cfg = SearchEngine.config&.mapper&.coercions || {}
|
|
207
|
+
cfg[:enabled] ? true : false
|
|
208
|
+
rescue StandardError
|
|
209
|
+
false
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def validate_and_coerce_types!(klass, document, types_by_field, coercions_enabled)
|
|
213
|
+
# Collect optional fields from the model DSL to allow nil values for them
|
|
214
|
+
optional_fields =
|
|
215
|
+
begin
|
|
216
|
+
opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
|
|
217
|
+
opts.each_with_object(Set.new) do |(fname, conf), acc|
|
|
218
|
+
acc << fname.to_s if conf.is_a?(Hash) && conf[:optional]
|
|
219
|
+
end
|
|
220
|
+
rescue StandardError
|
|
221
|
+
Set.new
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
document.each do |key, value|
|
|
225
|
+
expected = types_by_field[key.to_s]
|
|
226
|
+
next unless expected
|
|
227
|
+
|
|
228
|
+
# Skip type validation for nil values of optional fields
|
|
229
|
+
next if value.nil? && optional_fields.include?(key.to_s)
|
|
230
|
+
|
|
231
|
+
valid, coerced, err = validate_value_for_type(expected, value, coercions_enabled: coercions_enabled)
|
|
232
|
+
if coerced
|
|
233
|
+
document[key.to_s] = coerced
|
|
234
|
+
elsif !valid
|
|
235
|
+
raise SearchEngine::Errors::InvalidParams.new(
|
|
236
|
+
err,
|
|
237
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#troubleshooting',
|
|
238
|
+
details: { field: key.to_s, expected: expected, got: value.class.name }
|
|
239
|
+
)
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def validate_value_for_type(expected, value, coercions_enabled: false)
|
|
245
|
+
case expected
|
|
246
|
+
when 'int64', 'int32'
|
|
247
|
+
# Accept Time universally by coercing to epoch seconds
|
|
248
|
+
return [true, value.to_i, true] if value.is_a?(Time)
|
|
249
|
+
|
|
250
|
+
validate_integer(value, coercions_enabled)
|
|
251
|
+
when 'float'
|
|
252
|
+
validate_float(value, coercions_enabled)
|
|
253
|
+
when 'bool'
|
|
254
|
+
validate_bool(value, coercions_enabled)
|
|
255
|
+
when 'string'
|
|
256
|
+
# Accept Time/Date/DateTime universally by coercing to ISO8601
|
|
257
|
+
if value.is_a?(Time)
|
|
258
|
+
return [true, value.iso8601, true]
|
|
259
|
+
elsif defined?(DateTime) && value.is_a?(DateTime)
|
|
260
|
+
return [true, value.to_time.utc.iso8601, true]
|
|
261
|
+
elsif defined?(Date) && value.is_a?(Date)
|
|
262
|
+
return [true, value.to_time.utc.iso8601, true]
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
[value.is_a?(String), nil, invalid_type_message('String', value)]
|
|
266
|
+
when 'string[]'
|
|
267
|
+
return [true, nil, nil] if value.is_a?(Array) && value.all? { |v| v.is_a?(String) }
|
|
268
|
+
|
|
269
|
+
[false, nil, invalid_type_message('Array<String>', value)]
|
|
270
|
+
else
|
|
271
|
+
[true, nil, nil]
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def validate_integer(value, coercions_enabled)
|
|
276
|
+
if value.is_a?(Integer)
|
|
277
|
+
[true, nil, nil]
|
|
278
|
+
elsif coercions_enabled && string_integer?(value)
|
|
279
|
+
[true, Integer(value), true]
|
|
280
|
+
else
|
|
281
|
+
[false, nil, invalid_type_message('Integer', value)]
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def validate_float(value, coercions_enabled)
|
|
286
|
+
if value.is_a?(Numeric) && finite_number?(value)
|
|
287
|
+
[true, nil, nil]
|
|
288
|
+
elsif coercions_enabled && string_float?(value)
|
|
289
|
+
f =
|
|
290
|
+
begin
|
|
291
|
+
Float(value)
|
|
292
|
+
rescue StandardError
|
|
293
|
+
nil
|
|
294
|
+
end
|
|
295
|
+
if f && finite_number?(f)
|
|
296
|
+
[true, f, true]
|
|
297
|
+
else
|
|
298
|
+
[false, nil, invalid_type_message('Float', value)]
|
|
299
|
+
end
|
|
300
|
+
else
|
|
301
|
+
[false, nil, invalid_type_message('Float', value)]
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def validate_bool(value, coercions_enabled)
|
|
306
|
+
if [true, false].include?(value)
|
|
307
|
+
[true, nil, nil]
|
|
308
|
+
elsif coercions_enabled && %w[true false 1 0].include?(value.to_s.downcase)
|
|
309
|
+
[true, %w[true 1].include?(value.to_s.downcase), true]
|
|
310
|
+
else
|
|
311
|
+
[false, nil, invalid_type_message('Boolean', value)]
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def string_integer?(v)
|
|
316
|
+
v.is_a?(String) && v.match?(/^[-+]?\d+$/)
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def string_float?(v)
|
|
320
|
+
v.is_a?(String) && v.match?(/^[-+]?\d*(?:\.\d+)?$/)
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def finite_number?(v)
|
|
324
|
+
return v.finite? if v.is_a?(Float)
|
|
325
|
+
|
|
326
|
+
true
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def invalid_type_message(expected, got)
|
|
330
|
+
got_class = got.nil? ? 'NilClass' : got.class.name
|
|
331
|
+
got_preview = got.is_a?(String) ? got[0, 50] : got.to_s[0, 50]
|
|
332
|
+
"Invalid type (expected #{expected}, got #{got_class}: \"#{got_preview}\")."
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def validate_required_and_unknown!(klass, present_keys, allowed_keys, required_keys)
|
|
336
|
+
missing = required_keys - present_keys
|
|
337
|
+
unless missing.empty?
|
|
338
|
+
msg = "Missing required fields: #{missing.to_a.sort.inspect} for #{klass.name}."
|
|
339
|
+
raise SearchEngine::Errors::InvalidParams.new(
|
|
340
|
+
msg,
|
|
341
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#troubleshooting',
|
|
342
|
+
details: { missing_required: missing.to_a.sort }
|
|
343
|
+
)
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
extras = present_keys - allowed_keys
|
|
347
|
+
return unless strict_unknown_keys_enabled? && extras.any?
|
|
348
|
+
|
|
349
|
+
msg = [
|
|
350
|
+
'Unknown fields detected:',
|
|
351
|
+
"#{extras.to_a.sort.inspect} (set mapper.strict_unknown_keys)."
|
|
352
|
+
].join(' ')
|
|
353
|
+
raise SearchEngine::Errors::InvalidField.new(
|
|
354
|
+
msg,
|
|
355
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#troubleshooting',
|
|
356
|
+
details: { extras: extras.to_a.sort }
|
|
357
|
+
)
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
def resolve_target_collection(klass, into:, partition: nil)
|
|
361
|
+
return into.to_s if into && !into.to_s.strip.empty?
|
|
362
|
+
|
|
363
|
+
begin
|
|
364
|
+
ctx_into = SearchEngine::Instrumentation.context[:into]
|
|
365
|
+
return ctx_into if ctx_into && !ctx_into.to_s.strip.empty?
|
|
366
|
+
rescue StandardError
|
|
367
|
+
# fall through to default resolution
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
resolver = begin
|
|
371
|
+
SearchEngine.config.partitioning&.default_into_resolver
|
|
372
|
+
rescue StandardError
|
|
373
|
+
nil
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
if resolver.respond_to?(:arity)
|
|
377
|
+
case resolver.arity
|
|
378
|
+
when 1
|
|
379
|
+
val = resolver.call(klass)
|
|
380
|
+
return val if val && !val.to_s.strip.empty?
|
|
381
|
+
when 2, -1
|
|
382
|
+
val = resolver.call(klass, partition)
|
|
383
|
+
return val if val && !val.to_s.strip.empty?
|
|
384
|
+
end
|
|
385
|
+
elsif resolver
|
|
386
|
+
val = resolver.call(klass)
|
|
387
|
+
return val if val && !val.to_s.strip.empty?
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
if klass.respond_to?(:collection)
|
|
391
|
+
klass.collection.to_s
|
|
392
|
+
else
|
|
393
|
+
klass.name.to_s
|
|
394
|
+
end
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def ensure_document_id!(klass, document)
|
|
398
|
+
id_value = document['id'] || document[:id]
|
|
399
|
+
return if id_value && !id_value.to_s.strip.empty?
|
|
400
|
+
|
|
401
|
+
computed = compute_id_for_create(klass, document)
|
|
402
|
+
if computed.nil? || computed.to_s.strip.empty?
|
|
403
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
404
|
+
'Document id could not be resolved. Provide :id or define identify_by.'
|
|
405
|
+
end
|
|
406
|
+
document['id'] = computed
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def normalize_document!(klass, document, types_by_field, allowed_keys, required_keys)
|
|
410
|
+
ensure_document_id!(klass, document)
|
|
411
|
+
update_doc_updated_at!(document)
|
|
412
|
+
append_hidden_flags!(klass, document, allowed_keys)
|
|
413
|
+
prune_nil_optional_fields!(klass, document)
|
|
414
|
+
|
|
415
|
+
present = document.keys.map(&:to_s).to_set
|
|
416
|
+
validate_required_and_unknown!(klass, present, allowed_keys, required_keys)
|
|
417
|
+
validate_and_coerce_types!(klass, document, types_by_field, coercions_enabled?)
|
|
418
|
+
document
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def normalize_mapped_data!(_klass, hash)
|
|
422
|
+
unless hash.is_a?(Hash)
|
|
423
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
424
|
+
'Mapped data must be a Hash with string/symbol keys.'
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
out = {}
|
|
428
|
+
hash.each do |key, value|
|
|
429
|
+
out[key.to_s] = value
|
|
430
|
+
end
|
|
431
|
+
out
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
def mapper_for!(klass)
|
|
435
|
+
mapper = SearchEngine::Mapper.for(klass)
|
|
436
|
+
return mapper if mapper
|
|
437
|
+
|
|
438
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
439
|
+
"mapper is not defined for #{klass.name}. Define it via `index do ... end`."
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
def map_records!(klass, records)
|
|
443
|
+
mapper = mapper_for!(klass)
|
|
444
|
+
rows = Array(records)
|
|
445
|
+
docs, = mapper.map_batch!(rows, batch_index: 0)
|
|
446
|
+
docs.map do |doc|
|
|
447
|
+
out = {}
|
|
448
|
+
doc.each do |key, value|
|
|
449
|
+
out[key.to_s] = value
|
|
450
|
+
end
|
|
451
|
+
out
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def encode_jsonl!(docs)
|
|
456
|
+
buffer = +''
|
|
457
|
+
count, bytes = SearchEngine::Indexer::BatchPlanner.encode_jsonl!(docs, buffer)
|
|
458
|
+
[count, bytes, buffer]
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def prepare_documents(klass, records:, data:)
|
|
462
|
+
if records && data
|
|
463
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
464
|
+
'Provide either :records or :data, not both.'
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
source_docs =
|
|
468
|
+
if records
|
|
469
|
+
array = normalize_records_input(records)
|
|
470
|
+
return [] if array.empty?
|
|
471
|
+
|
|
472
|
+
map_records!(klass, array)
|
|
473
|
+
elsif data
|
|
474
|
+
docs_arr = normalize_data_input(data)
|
|
475
|
+
return [] if docs_arr.empty?
|
|
476
|
+
|
|
477
|
+
docs_arr.map { |doc| normalize_mapped_data!(klass, doc) }
|
|
478
|
+
else
|
|
479
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
480
|
+
'Provide :records or :data.'
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
compiled = SearchEngine::Schema.compile(klass)
|
|
484
|
+
types_by_field = build_types_by_field_from_schema(compiled)
|
|
485
|
+
allowed_keys = compute_allowed_keys_from_schema_and_dsl(klass, compiled)
|
|
486
|
+
required_keys = compute_required_keys_from_schema(klass, compiled)
|
|
487
|
+
|
|
488
|
+
source_docs.map do |doc|
|
|
489
|
+
normalize_document!(klass, doc, types_by_field, allowed_keys, required_keys)
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
def import_documents!(klass, docs, into:, partition: nil)
|
|
494
|
+
collection = resolve_target_collection(klass, into: into, partition: partition)
|
|
495
|
+
if docs.empty?
|
|
496
|
+
return {
|
|
497
|
+
collection: collection,
|
|
498
|
+
docs_count: 0,
|
|
499
|
+
success_count: 0,
|
|
500
|
+
failure_count: 0,
|
|
501
|
+
bytes_sent: 0,
|
|
502
|
+
response: nil
|
|
503
|
+
}
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
count, bytes, jsonl = encode_jsonl!(docs)
|
|
507
|
+
raw = SearchEngine.client.import_documents(collection: collection, jsonl: jsonl, action: :upsert)
|
|
508
|
+
success_count, failure_count, errors_sample = parse_import_response(raw)
|
|
509
|
+
|
|
510
|
+
result = {
|
|
511
|
+
collection: collection,
|
|
512
|
+
docs_count: count,
|
|
513
|
+
success_count: success_count,
|
|
514
|
+
failure_count: failure_count,
|
|
515
|
+
bytes_sent: bytes,
|
|
516
|
+
response: raw,
|
|
517
|
+
errors_sample: errors_sample
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
if failure_count.positive?
|
|
521
|
+
sample = errors_sample&.first
|
|
522
|
+
msg = "Typesense import failed for #{failure_count}/#{count} document(s)"
|
|
523
|
+
msg = "#{msg} (e.g., #{sample})" if sample
|
|
524
|
+
raise SearchEngine::Errors::InvalidParams.new(
|
|
525
|
+
msg,
|
|
526
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#troubleshooting',
|
|
527
|
+
details: result
|
|
528
|
+
)
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
result
|
|
532
|
+
end
|
|
533
|
+
|
|
534
|
+
def safe_parse_json(str)
|
|
535
|
+
JSON.parse(str)
|
|
536
|
+
rescue StandardError
|
|
537
|
+
nil
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
def parse_import_response(raw)
|
|
541
|
+
return parse_import_response_from_string(raw) if raw.is_a?(String)
|
|
542
|
+
return parse_import_response_from_array(raw) if raw.is_a?(Array)
|
|
543
|
+
|
|
544
|
+
[0, 0, []]
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def parse_import_response_from_string(str)
|
|
548
|
+
success = 0
|
|
549
|
+
failure = 0
|
|
550
|
+
samples = []
|
|
551
|
+
|
|
552
|
+
str.each_line do |line|
|
|
553
|
+
line = line.strip
|
|
554
|
+
next if line.empty?
|
|
555
|
+
|
|
556
|
+
h = safe_parse_json(line)
|
|
557
|
+
unless h
|
|
558
|
+
failure += 1
|
|
559
|
+
samples << 'invalid-json-line'
|
|
560
|
+
next
|
|
561
|
+
end
|
|
562
|
+
|
|
563
|
+
if truthy?(h['success'] || h[:success])
|
|
564
|
+
success += 1
|
|
565
|
+
else
|
|
566
|
+
failure += 1
|
|
567
|
+
msg = h['error'] || h[:error] || h['message'] || h[:message]
|
|
568
|
+
samples << msg.to_s[0, 200] if msg
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
[success, failure, samples[0, 5]]
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
def parse_import_response_from_array(arr)
|
|
576
|
+
success = 0
|
|
577
|
+
failure = 0
|
|
578
|
+
samples = []
|
|
579
|
+
|
|
580
|
+
arr.each do |h|
|
|
581
|
+
if h.is_a?(Hash) && truthy?(h['success'] || h[:success])
|
|
582
|
+
success += 1
|
|
583
|
+
else
|
|
584
|
+
failure += 1
|
|
585
|
+
msg = h.is_a?(Hash) ? (h['error'] || h[:error] || h['message'] || h[:message]) : nil
|
|
586
|
+
samples << msg.to_s[0, 200] if msg
|
|
587
|
+
end
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
[success, failure, samples[0, 5]]
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
def truthy?(val)
|
|
594
|
+
val == true || val.to_s.downcase == 'true'
|
|
595
|
+
end
|
|
596
|
+
|
|
597
|
+
def normalize_records_input(records)
|
|
598
|
+
if records.is_a?(Array)
|
|
599
|
+
records
|
|
600
|
+
elsif records.respond_to?(:to_a)
|
|
601
|
+
Array(records.to_a)
|
|
602
|
+
else
|
|
603
|
+
Array(records)
|
|
604
|
+
end
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
def normalize_data_input(data)
|
|
608
|
+
if data.is_a?(Array)
|
|
609
|
+
data
|
|
610
|
+
elsif data.is_a?(Hash)
|
|
611
|
+
[data]
|
|
612
|
+
elsif data.respond_to?(:to_a)
|
|
613
|
+
Array(data.to_a)
|
|
614
|
+
else
|
|
615
|
+
Array(data)
|
|
616
|
+
end
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
def hydrate_from_document(klass, doc)
|
|
620
|
+
hash = doc || {}
|
|
621
|
+
return klass.from_document(hash) if klass.respond_to?(:from_document)
|
|
622
|
+
|
|
623
|
+
obj = klass.new
|
|
624
|
+
hash.each do |key, value|
|
|
625
|
+
obj.instance_variable_set("@#{key}", value)
|
|
626
|
+
end
|
|
627
|
+
obj
|
|
628
|
+
end
|
|
629
|
+
end
|
|
630
|
+
|
|
631
|
+
class_methods do
|
|
632
|
+
# Create a document in the backing Typesense collection and return a hydrated instance.
|
|
633
|
+
#
|
|
634
|
+
# - Validates required fields (respects `optional` attributes) and rejects unknown fields
|
|
635
|
+
# when `mapper.strict_unknown_keys` is enabled.
|
|
636
|
+
# - Applies basic type validation and optional coercions for numeric and boolean fields
|
|
637
|
+
# based on mapper coercion settings.
|
|
638
|
+
# - Computes hidden flags `<name>_empty` and `<name>_blank` when present in the schema.
|
|
639
|
+
# - Sets `doc_updated_at` to the current timestamp (seconds).
|
|
640
|
+
# - Uses provided `:id` when present; otherwise attempts to compute id using `identify_by`.
|
|
641
|
+
#
|
|
642
|
+
# @param attrs [Hash, nil] document attributes when passed as a single Hash
|
|
643
|
+
# @param into [String, nil] explicit physical collection override (falls back to alias or logical)
|
|
644
|
+
# @param timeout_ms [Integer, nil] reserved for future use
|
|
645
|
+
# @return [Object] hydrated instance of this model
|
|
646
|
+
# @raise [SearchEngine::Errors::InvalidParams, SearchEngine::Errors::InvalidField]
|
|
647
|
+
def create(attrs = nil, into: nil, _timeout_ms: nil, **kwargs)
|
|
648
|
+
raw_attrs = attrs.nil? ? kwargs : attrs
|
|
649
|
+
raise SearchEngine::Errors::InvalidParams, 'attrs must be a Hash or keyword args' unless raw_attrs.is_a?(Hash)
|
|
650
|
+
|
|
651
|
+
compiled = SearchEngine::Schema.compile(self)
|
|
652
|
+
types_by_field = Helpers.build_types_by_field_from_schema(compiled)
|
|
653
|
+
allowed_keys = Helpers.compute_allowed_keys_from_schema_and_dsl(self, compiled)
|
|
654
|
+
required_keys = Helpers.compute_required_keys_from_schema(self, compiled)
|
|
655
|
+
|
|
656
|
+
# Normalize incoming attributes (Hash or kwargs) to a unified document
|
|
657
|
+
document = Helpers.normalize_attrs_to_document(raw_attrs)
|
|
658
|
+
|
|
659
|
+
id_val = document['id']
|
|
660
|
+
if id_val.nil? || id_val.to_s.strip.empty?
|
|
661
|
+
computed_id = Helpers.compute_id_for_create(self, raw_attrs)
|
|
662
|
+
if computed_id.nil? || computed_id.to_s.strip.empty?
|
|
663
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
664
|
+
'Document id could not be resolved. Provide :id or a *_id matching the source model.'
|
|
665
|
+
end
|
|
666
|
+
document['id'] = computed_id
|
|
667
|
+
end
|
|
668
|
+
|
|
669
|
+
Helpers.update_doc_updated_at!(document)
|
|
670
|
+
Helpers.append_hidden_flags!(self, document, allowed_keys)
|
|
671
|
+
|
|
672
|
+
present = document.keys.map(&:to_s).to_set
|
|
673
|
+
Helpers.validate_required_and_unknown!(self, present, allowed_keys, required_keys)
|
|
674
|
+
Helpers.validate_and_coerce_types!(self, document, types_by_field, Helpers.coercions_enabled?)
|
|
675
|
+
|
|
676
|
+
client = SearchEngine.client
|
|
677
|
+
logical = respond_to?(:collection) ? collection.to_s : name.to_s
|
|
678
|
+
target = if into && !into.to_s.strip.empty?
|
|
679
|
+
into.to_s
|
|
680
|
+
else
|
|
681
|
+
client.resolve_alias(logical) || logical
|
|
682
|
+
end
|
|
683
|
+
|
|
684
|
+
created = client.create_document(collection: target, document: document)
|
|
685
|
+
Helpers.hydrate_from_document(self, created)
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
# Upsert a single document into the collection.
|
|
689
|
+
#
|
|
690
|
+
# Accepts either an unmapped source record (mapped via the configured DSL)
|
|
691
|
+
# or pre-mapped data (as emitted by {.mapped_data_for}). The document is
|
|
692
|
+
# normalized against the compiled schema before streaming via JSONL.
|
|
693
|
+
#
|
|
694
|
+
# @param record [Object, nil] source record to map
|
|
695
|
+
# @param data [Hash, nil] pre-mapped document
|
|
696
|
+
# @param into [String, nil] optional physical collection override
|
|
697
|
+
# @param partition [Object, nil] partition token for resolvers
|
|
698
|
+
# @return [Integer] number of successfully upserted documents (0 or 1)
|
|
699
|
+
# @raise [SearchEngine::Errors::InvalidParams]
|
|
700
|
+
def upsert(record: nil, data: nil, into: nil, partition: nil)
|
|
701
|
+
docs = Helpers.prepare_documents(self, records: record ? [record] : nil, data: data)
|
|
702
|
+
return 0 if docs.empty?
|
|
703
|
+
|
|
704
|
+
result = Helpers.import_documents!(self, docs, into: into, partition: partition)
|
|
705
|
+
result[:success_count]
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
# Upsert many documents into the collection in a single JSONL payload.
|
|
709
|
+
#
|
|
710
|
+
# Accepts either an enumerable of unmapped source records or an enumerable
|
|
711
|
+
# of pre-mapped documents. Each entry is normalized using the same
|
|
712
|
+
# validation path as {.create} to ensure schema compatibility prior to import.
|
|
713
|
+
#
|
|
714
|
+
# @param records [Enumerable<Object>, nil]
|
|
715
|
+
# @param data [Enumerable<Hash>, nil]
|
|
716
|
+
# @param into [String, nil]
|
|
717
|
+
# @param partition [Object, nil]
|
|
718
|
+
# @return [Hash] stats payload with keys: :collection, :docs_count, :success_count, :failure_count, :bytes_sent, :response
|
|
719
|
+
# @raise [SearchEngine::Errors::InvalidParams]
|
|
720
|
+
def upsert_bulk(records: nil, data: nil, into: nil, partition: nil)
|
|
721
|
+
docs = Helpers.prepare_documents(self, records: records, data: data)
|
|
722
|
+
Helpers.import_documents!(self, docs, into: into, partition: partition)
|
|
723
|
+
end
|
|
724
|
+
end
|
|
725
|
+
end
|
|
726
|
+
end
|
|
727
|
+
end
|