search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,727 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/concern'
4
+ require 'set'
5
+ require 'json'
6
+ require 'search_engine/indexer/batch_planner'
7
+
8
+ module SearchEngine
9
+ class Base
10
+ # Creation helpers for inserting a single document into a collection.
11
+ #
12
+ # Provides ActiveRecord-like `.create(attrs)` that validates and normalizes
13
+ # attributes against the compiled schema, computes hidden flags and forces
14
+ # the `doc_updated_at` timestamp. Returns a hydrated model instance.
15
+ module Creation
16
+ extend ActiveSupport::Concern
17
+
18
+ # Internal helpers extracted to keep the public API lean and within style limits.
19
+ module Helpers
20
+ module_function
21
+
22
+ def normalize_attrs_to_document(attrs)
23
+ out = {}
24
+ attrs.each { |k, v| out[k.to_s] = v }
25
+ out
26
+ end
27
+
28
+ def compute_id_for_create(klass, attrs)
29
+ src_type = source_type_for(klass)
30
+
31
+ if src_type == :active_record
32
+ # Try *_id fallback names first
33
+ fallback_id_field_names_for(klass).each do |fk|
34
+ key_sym = fk.to_sym
35
+ next unless attrs.key?(fk) || attrs.key?(key_sym)
36
+
37
+ raw = attrs[fk] || attrs[key_sym]
38
+ return raw.to_s unless raw.nil? || raw.to_s.strip.empty?
39
+ end
40
+ # Fallthrough to identify_by if explicitly defined
41
+ return compute_from_identify_by(klass, attrs) if identify_by_defined?(klass)
42
+
43
+ # Otherwise unresolved
44
+ return nil
45
+ end
46
+
47
+ # Non-AR source:
48
+ # - If identify_by is defined, use it
49
+ return compute_from_identify_by(klass, attrs) if identify_by_defined?(klass)
50
+
51
+ # - Else must be provided via :id (handled by caller); unresolved here
52
+ nil
53
+ end
54
+
55
+ def identify_by_defined?(klass)
56
+ klass.instance_variable_defined?(:@identify_by_proc)
57
+ end
58
+
59
+ def compute_from_identify_by(klass, attrs)
60
+ require 'ostruct'
61
+ shim = OpenStruct.new(attrs)
62
+ val = klass.compute_document_id(shim)
63
+ v = val.is_a?(String) ? val : val.to_s
64
+ v.to_s.strip.empty? ? nil : v
65
+ rescue StandardError
66
+ nil
67
+ end
68
+
69
+ def source_type_for(klass)
70
+ t = klass.instance_variable_get(:@__mapper_dsl__)&.dig(:source, :type)
71
+ t&.to_sym
72
+ rescue StandardError
73
+ nil
74
+ end
75
+
76
+ def fallback_id_field_names_for(klass)
77
+ # Prefer AR model name from index DSL when available
78
+ names = []
79
+ base_name = nil
80
+ model = klass.instance_variable_get(:@__mapper_dsl__)&.dig(:source, :options, :model)
81
+ if model.respond_to?(:name)
82
+ base_name = model.name.to_s.split('::').last
83
+ elsif model.is_a?(String)
84
+ base_name = model.to_s.split('::').last
85
+ end
86
+
87
+ names << "#{ActiveSupport::Inflector.underscore(base_name)}_id" if base_name
88
+ se_base = klass.name.to_s.split('::').last
89
+ names << "#{ActiveSupport::Inflector.underscore(se_base)}_id"
90
+ names.uniq
91
+ rescue StandardError
92
+ se_base = klass.name.to_s.split('::').last
93
+ ["#{ActiveSupport::Inflector.underscore(se_base)}_id"]
94
+ end
95
+
96
+ def update_doc_updated_at!(document)
97
+ now_i = if defined?(Time) && defined?(Time.zone) && Time.zone
98
+ Time.zone.now.to_i
99
+ else
100
+ Time.now.to_i
101
+ end
102
+ document['doc_updated_at'] = now_i
103
+ end
104
+
105
+ def build_types_by_field_from_schema(compiled_schema)
106
+ h = {}
107
+ Array(compiled_schema[:fields]).each do |f|
108
+ h[(f[:name] || f['name']).to_s] = (f[:type] || f['type']).to_s
109
+ end
110
+ h
111
+ end
112
+
113
+ def compute_required_keys_from_schema(klass, compiled_schema)
114
+ fields = Array(compiled_schema[:fields]).map { |f| (f[:name] || f['name']).to_s }
115
+ base = fields.reject { |fname| fname.include?('.') }.to_set
116
+ begin
117
+ opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
118
+ rescue StandardError
119
+ opts = {}
120
+ end
121
+
122
+ opts.each do |fname, o|
123
+ next unless o.is_a?(Hash) && o[:optional]
124
+
125
+ base.delete(fname.to_s)
126
+ end
127
+
128
+ base
129
+ end
130
+
131
+ def compute_allowed_keys_from_schema_and_dsl(klass, compiled_schema)
132
+ # Start with all compiled field names (including hidden and dotted)
133
+ all = Array(compiled_schema[:fields]).map { |f| (f[:name] || f['name']).to_s }.to_set
134
+
135
+ # Include declared attributes explicitly marked as unindexed (index: false)
136
+ begin
137
+ opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
138
+ rescue StandardError
139
+ opts = {}
140
+ end
141
+
142
+ opts.each do |fname, conf|
143
+ next unless conf.is_a?(Hash)
144
+
145
+ all << fname.to_s if conf[:index] == false
146
+ end
147
+
148
+ all
149
+ end
150
+
151
+ def append_hidden_flags!(klass, document, allowed_keys)
152
+ begin
153
+ opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
154
+ rescue StandardError
155
+ opts = {}
156
+ end
157
+
158
+ opts.each do |fname, conf|
159
+ base = fname.to_s
160
+ next unless conf.is_a?(Hash)
161
+
162
+ if conf[:empty_filtering]
163
+ hidden = "#{base}_empty"
164
+ next unless allowed_keys.include?(hidden)
165
+
166
+ value = document[base]
167
+ document[hidden] = value.nil? || (value.is_a?(Array) && value.empty?)
168
+ end
169
+
170
+ next unless conf[:optional]
171
+
172
+ hidden = "#{base}_blank"
173
+ next unless allowed_keys.include?(hidden)
174
+
175
+ value = document[base]
176
+ document[hidden] = value.nil?
177
+ end
178
+
179
+ nil
180
+ end
181
+
182
+ def prune_nil_optional_fields!(klass, document)
183
+ begin
184
+ opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
185
+ rescue StandardError
186
+ opts = {}
187
+ end
188
+
189
+ opts.each do |fname, conf|
190
+ next unless conf.is_a?(Hash) && conf[:optional]
191
+
192
+ key = fname.to_s
193
+ document.delete(key) if document[key].nil?
194
+ end
195
+
196
+ nil
197
+ end
198
+
199
+ def strict_unknown_keys_enabled?
200
+ SearchEngine.config&.mapper&.strict_unknown_keys ? true : false
201
+ rescue StandardError
202
+ false
203
+ end
204
+
205
+ def coercions_enabled?
206
+ cfg = SearchEngine.config&.mapper&.coercions || {}
207
+ cfg[:enabled] ? true : false
208
+ rescue StandardError
209
+ false
210
+ end
211
+
212
+ def validate_and_coerce_types!(klass, document, types_by_field, coercions_enabled)
213
+ # Collect optional fields from the model DSL to allow nil values for them
214
+ optional_fields =
215
+ begin
216
+ opts = klass.respond_to?(:attribute_options) ? (klass.attribute_options || {}) : {}
217
+ opts.each_with_object(Set.new) do |(fname, conf), acc|
218
+ acc << fname.to_s if conf.is_a?(Hash) && conf[:optional]
219
+ end
220
+ rescue StandardError
221
+ Set.new
222
+ end
223
+
224
+ document.each do |key, value|
225
+ expected = types_by_field[key.to_s]
226
+ next unless expected
227
+
228
+ # Skip type validation for nil values of optional fields
229
+ next if value.nil? && optional_fields.include?(key.to_s)
230
+
231
+ valid, coerced, err = validate_value_for_type(expected, value, coercions_enabled: coercions_enabled)
232
+ if coerced
233
+ document[key.to_s] = coerced
234
+ elsif !valid
235
+ raise SearchEngine::Errors::InvalidParams.new(
236
+ err,
237
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#troubleshooting',
238
+ details: { field: key.to_s, expected: expected, got: value.class.name }
239
+ )
240
+ end
241
+ end
242
+ end
243
+
244
+ def validate_value_for_type(expected, value, coercions_enabled: false)
245
+ case expected
246
+ when 'int64', 'int32'
247
+ # Accept Time universally by coercing to epoch seconds
248
+ return [true, value.to_i, true] if value.is_a?(Time)
249
+
250
+ validate_integer(value, coercions_enabled)
251
+ when 'float'
252
+ validate_float(value, coercions_enabled)
253
+ when 'bool'
254
+ validate_bool(value, coercions_enabled)
255
+ when 'string'
256
+ # Accept Time/Date/DateTime universally by coercing to ISO8601
257
+ if value.is_a?(Time)
258
+ return [true, value.iso8601, true]
259
+ elsif defined?(DateTime) && value.is_a?(DateTime)
260
+ return [true, value.to_time.utc.iso8601, true]
261
+ elsif defined?(Date) && value.is_a?(Date)
262
+ return [true, value.to_time.utc.iso8601, true]
263
+ end
264
+
265
+ [value.is_a?(String), nil, invalid_type_message('String', value)]
266
+ when 'string[]'
267
+ return [true, nil, nil] if value.is_a?(Array) && value.all? { |v| v.is_a?(String) }
268
+
269
+ [false, nil, invalid_type_message('Array<String>', value)]
270
+ else
271
+ [true, nil, nil]
272
+ end
273
+ end
274
+
275
+ def validate_integer(value, coercions_enabled)
276
+ if value.is_a?(Integer)
277
+ [true, nil, nil]
278
+ elsif coercions_enabled && string_integer?(value)
279
+ [true, Integer(value), true]
280
+ else
281
+ [false, nil, invalid_type_message('Integer', value)]
282
+ end
283
+ end
284
+
285
+ def validate_float(value, coercions_enabled)
286
+ if value.is_a?(Numeric) && finite_number?(value)
287
+ [true, nil, nil]
288
+ elsif coercions_enabled && string_float?(value)
289
+ f =
290
+ begin
291
+ Float(value)
292
+ rescue StandardError
293
+ nil
294
+ end
295
+ if f && finite_number?(f)
296
+ [true, f, true]
297
+ else
298
+ [false, nil, invalid_type_message('Float', value)]
299
+ end
300
+ else
301
+ [false, nil, invalid_type_message('Float', value)]
302
+ end
303
+ end
304
+
305
+ def validate_bool(value, coercions_enabled)
306
+ if [true, false].include?(value)
307
+ [true, nil, nil]
308
+ elsif coercions_enabled && %w[true false 1 0].include?(value.to_s.downcase)
309
+ [true, %w[true 1].include?(value.to_s.downcase), true]
310
+ else
311
+ [false, nil, invalid_type_message('Boolean', value)]
312
+ end
313
+ end
314
+
315
+ def string_integer?(v)
316
+ v.is_a?(String) && v.match?(/^[-+]?\d+$/)
317
+ end
318
+
319
+ def string_float?(v)
320
+ v.is_a?(String) && v.match?(/^[-+]?\d*(?:\.\d+)?$/)
321
+ end
322
+
323
+ def finite_number?(v)
324
+ return v.finite? if v.is_a?(Float)
325
+
326
+ true
327
+ end
328
+
329
+ def invalid_type_message(expected, got)
330
+ got_class = got.nil? ? 'NilClass' : got.class.name
331
+ got_preview = got.is_a?(String) ? got[0, 50] : got.to_s[0, 50]
332
+ "Invalid type (expected #{expected}, got #{got_class}: \"#{got_preview}\")."
333
+ end
334
+
335
+ def validate_required_and_unknown!(klass, present_keys, allowed_keys, required_keys)
336
+ missing = required_keys - present_keys
337
+ unless missing.empty?
338
+ msg = "Missing required fields: #{missing.to_a.sort.inspect} for #{klass.name}."
339
+ raise SearchEngine::Errors::InvalidParams.new(
340
+ msg,
341
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#troubleshooting',
342
+ details: { missing_required: missing.to_a.sort }
343
+ )
344
+ end
345
+
346
+ extras = present_keys - allowed_keys
347
+ return unless strict_unknown_keys_enabled? && extras.any?
348
+
349
+ msg = [
350
+ 'Unknown fields detected:',
351
+ "#{extras.to_a.sort.inspect} (set mapper.strict_unknown_keys)."
352
+ ].join(' ')
353
+ raise SearchEngine::Errors::InvalidField.new(
354
+ msg,
355
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#troubleshooting',
356
+ details: { extras: extras.to_a.sort }
357
+ )
358
+ end
359
+
360
+ def resolve_target_collection(klass, into:, partition: nil)
361
+ return into.to_s if into && !into.to_s.strip.empty?
362
+
363
+ begin
364
+ ctx_into = SearchEngine::Instrumentation.context[:into]
365
+ return ctx_into if ctx_into && !ctx_into.to_s.strip.empty?
366
+ rescue StandardError
367
+ # fall through to default resolution
368
+ end
369
+
370
+ resolver = begin
371
+ SearchEngine.config.partitioning&.default_into_resolver
372
+ rescue StandardError
373
+ nil
374
+ end
375
+
376
+ if resolver.respond_to?(:arity)
377
+ case resolver.arity
378
+ when 1
379
+ val = resolver.call(klass)
380
+ return val if val && !val.to_s.strip.empty?
381
+ when 2, -1
382
+ val = resolver.call(klass, partition)
383
+ return val if val && !val.to_s.strip.empty?
384
+ end
385
+ elsif resolver
386
+ val = resolver.call(klass)
387
+ return val if val && !val.to_s.strip.empty?
388
+ end
389
+
390
+ if klass.respond_to?(:collection)
391
+ klass.collection.to_s
392
+ else
393
+ klass.name.to_s
394
+ end
395
+ end
396
+
397
+ def ensure_document_id!(klass, document)
398
+ id_value = document['id'] || document[:id]
399
+ return if id_value && !id_value.to_s.strip.empty?
400
+
401
+ computed = compute_id_for_create(klass, document)
402
+ if computed.nil? || computed.to_s.strip.empty?
403
+ raise SearchEngine::Errors::InvalidParams,
404
+ 'Document id could not be resolved. Provide :id or define identify_by.'
405
+ end
406
+ document['id'] = computed
407
+ end
408
+
409
+ def normalize_document!(klass, document, types_by_field, allowed_keys, required_keys)
410
+ ensure_document_id!(klass, document)
411
+ update_doc_updated_at!(document)
412
+ append_hidden_flags!(klass, document, allowed_keys)
413
+ prune_nil_optional_fields!(klass, document)
414
+
415
+ present = document.keys.map(&:to_s).to_set
416
+ validate_required_and_unknown!(klass, present, allowed_keys, required_keys)
417
+ validate_and_coerce_types!(klass, document, types_by_field, coercions_enabled?)
418
+ document
419
+ end
420
+
421
+ def normalize_mapped_data!(_klass, hash)
422
+ unless hash.is_a?(Hash)
423
+ raise SearchEngine::Errors::InvalidParams,
424
+ 'Mapped data must be a Hash with string/symbol keys.'
425
+ end
426
+
427
+ out = {}
428
+ hash.each do |key, value|
429
+ out[key.to_s] = value
430
+ end
431
+ out
432
+ end
433
+
434
+ def mapper_for!(klass)
435
+ mapper = SearchEngine::Mapper.for(klass)
436
+ return mapper if mapper
437
+
438
+ raise SearchEngine::Errors::InvalidParams,
439
+ "mapper is not defined for #{klass.name}. Define it via `index do ... end`."
440
+ end
441
+
442
+ def map_records!(klass, records)
443
+ mapper = mapper_for!(klass)
444
+ rows = Array(records)
445
+ docs, = mapper.map_batch!(rows, batch_index: 0)
446
+ docs.map do |doc|
447
+ out = {}
448
+ doc.each do |key, value|
449
+ out[key.to_s] = value
450
+ end
451
+ out
452
+ end
453
+ end
454
+
455
+ def encode_jsonl!(docs)
456
+ buffer = +''
457
+ count, bytes = SearchEngine::Indexer::BatchPlanner.encode_jsonl!(docs, buffer)
458
+ [count, bytes, buffer]
459
+ end
460
+
461
+ def prepare_documents(klass, records:, data:)
462
+ if records && data
463
+ raise SearchEngine::Errors::InvalidParams,
464
+ 'Provide either :records or :data, not both.'
465
+ end
466
+
467
+ source_docs =
468
+ if records
469
+ array = normalize_records_input(records)
470
+ return [] if array.empty?
471
+
472
+ map_records!(klass, array)
473
+ elsif data
474
+ docs_arr = normalize_data_input(data)
475
+ return [] if docs_arr.empty?
476
+
477
+ docs_arr.map { |doc| normalize_mapped_data!(klass, doc) }
478
+ else
479
+ raise SearchEngine::Errors::InvalidParams,
480
+ 'Provide :records or :data.'
481
+ end
482
+
483
+ compiled = SearchEngine::Schema.compile(klass)
484
+ types_by_field = build_types_by_field_from_schema(compiled)
485
+ allowed_keys = compute_allowed_keys_from_schema_and_dsl(klass, compiled)
486
+ required_keys = compute_required_keys_from_schema(klass, compiled)
487
+
488
+ source_docs.map do |doc|
489
+ normalize_document!(klass, doc, types_by_field, allowed_keys, required_keys)
490
+ end
491
+ end
492
+
493
+ def import_documents!(klass, docs, into:, partition: nil)
494
+ collection = resolve_target_collection(klass, into: into, partition: partition)
495
+ if docs.empty?
496
+ return {
497
+ collection: collection,
498
+ docs_count: 0,
499
+ success_count: 0,
500
+ failure_count: 0,
501
+ bytes_sent: 0,
502
+ response: nil
503
+ }
504
+ end
505
+
506
+ count, bytes, jsonl = encode_jsonl!(docs)
507
+ raw = SearchEngine.client.import_documents(collection: collection, jsonl: jsonl, action: :upsert)
508
+ success_count, failure_count, errors_sample = parse_import_response(raw)
509
+
510
+ result = {
511
+ collection: collection,
512
+ docs_count: count,
513
+ success_count: success_count,
514
+ failure_count: failure_count,
515
+ bytes_sent: bytes,
516
+ response: raw,
517
+ errors_sample: errors_sample
518
+ }
519
+
520
+ if failure_count.positive?
521
+ sample = errors_sample&.first
522
+ msg = "Typesense import failed for #{failure_count}/#{count} document(s)"
523
+ msg = "#{msg} (e.g., #{sample})" if sample
524
+ raise SearchEngine::Errors::InvalidParams.new(
525
+ msg,
526
+ doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#troubleshooting',
527
+ details: result
528
+ )
529
+ end
530
+
531
+ result
532
+ end
533
+
534
+ def safe_parse_json(str)
535
+ JSON.parse(str)
536
+ rescue StandardError
537
+ nil
538
+ end
539
+
540
+ def parse_import_response(raw)
541
+ return parse_import_response_from_string(raw) if raw.is_a?(String)
542
+ return parse_import_response_from_array(raw) if raw.is_a?(Array)
543
+
544
+ [0, 0, []]
545
+ end
546
+
547
+ def parse_import_response_from_string(str)
548
+ success = 0
549
+ failure = 0
550
+ samples = []
551
+
552
+ str.each_line do |line|
553
+ line = line.strip
554
+ next if line.empty?
555
+
556
+ h = safe_parse_json(line)
557
+ unless h
558
+ failure += 1
559
+ samples << 'invalid-json-line'
560
+ next
561
+ end
562
+
563
+ if truthy?(h['success'] || h[:success])
564
+ success += 1
565
+ else
566
+ failure += 1
567
+ msg = h['error'] || h[:error] || h['message'] || h[:message]
568
+ samples << msg.to_s[0, 200] if msg
569
+ end
570
+ end
571
+
572
+ [success, failure, samples[0, 5]]
573
+ end
574
+
575
+ def parse_import_response_from_array(arr)
576
+ success = 0
577
+ failure = 0
578
+ samples = []
579
+
580
+ arr.each do |h|
581
+ if h.is_a?(Hash) && truthy?(h['success'] || h[:success])
582
+ success += 1
583
+ else
584
+ failure += 1
585
+ msg = h.is_a?(Hash) ? (h['error'] || h[:error] || h['message'] || h[:message]) : nil
586
+ samples << msg.to_s[0, 200] if msg
587
+ end
588
+ end
589
+
590
+ [success, failure, samples[0, 5]]
591
+ end
592
+
593
+ def truthy?(val)
594
+ val == true || val.to_s.downcase == 'true'
595
+ end
596
+
597
+ def normalize_records_input(records)
598
+ if records.is_a?(Array)
599
+ records
600
+ elsif records.respond_to?(:to_a)
601
+ Array(records.to_a)
602
+ else
603
+ Array(records)
604
+ end
605
+ end
606
+
607
+ def normalize_data_input(data)
608
+ if data.is_a?(Array)
609
+ data
610
+ elsif data.is_a?(Hash)
611
+ [data]
612
+ elsif data.respond_to?(:to_a)
613
+ Array(data.to_a)
614
+ else
615
+ Array(data)
616
+ end
617
+ end
618
+
619
+ def hydrate_from_document(klass, doc)
620
+ hash = doc || {}
621
+ return klass.from_document(hash) if klass.respond_to?(:from_document)
622
+
623
+ obj = klass.new
624
+ hash.each do |key, value|
625
+ obj.instance_variable_set("@#{key}", value)
626
+ end
627
+ obj
628
+ end
629
+ end
630
+
631
+ class_methods do
632
+ # Create a document in the backing Typesense collection and return a hydrated instance.
633
+ #
634
+ # - Validates required fields (respects `optional` attributes) and rejects unknown fields
635
+ # when `mapper.strict_unknown_keys` is enabled.
636
+ # - Applies basic type validation and optional coercions for numeric and boolean fields
637
+ # based on mapper coercion settings.
638
+ # - Computes hidden flags `<name>_empty` and `<name>_blank` when present in the schema.
639
+ # - Sets `doc_updated_at` to the current timestamp (seconds).
640
+ # - Uses provided `:id` when present; otherwise attempts to compute id using `identify_by`.
641
+ #
642
+ # @param attrs [Hash, nil] document attributes when passed as a single Hash
643
+ # @param into [String, nil] explicit physical collection override (falls back to alias or logical)
644
+ # @param timeout_ms [Integer, nil] reserved for future use
645
+ # @return [Object] hydrated instance of this model
646
+ # @raise [SearchEngine::Errors::InvalidParams, SearchEngine::Errors::InvalidField]
647
+ def create(attrs = nil, into: nil, _timeout_ms: nil, **kwargs)
648
+ raw_attrs = attrs.nil? ? kwargs : attrs
649
+ raise SearchEngine::Errors::InvalidParams, 'attrs must be a Hash or keyword args' unless raw_attrs.is_a?(Hash)
650
+
651
+ compiled = SearchEngine::Schema.compile(self)
652
+ types_by_field = Helpers.build_types_by_field_from_schema(compiled)
653
+ allowed_keys = Helpers.compute_allowed_keys_from_schema_and_dsl(self, compiled)
654
+ required_keys = Helpers.compute_required_keys_from_schema(self, compiled)
655
+
656
+ # Normalize incoming attributes (Hash or kwargs) to a unified document
657
+ document = Helpers.normalize_attrs_to_document(raw_attrs)
658
+
659
+ id_val = document['id']
660
+ if id_val.nil? || id_val.to_s.strip.empty?
661
+ computed_id = Helpers.compute_id_for_create(self, raw_attrs)
662
+ if computed_id.nil? || computed_id.to_s.strip.empty?
663
+ raise SearchEngine::Errors::InvalidParams,
664
+ 'Document id could not be resolved. Provide :id or a *_id matching the source model.'
665
+ end
666
+ document['id'] = computed_id
667
+ end
668
+
669
+ Helpers.update_doc_updated_at!(document)
670
+ Helpers.append_hidden_flags!(self, document, allowed_keys)
671
+
672
+ present = document.keys.map(&:to_s).to_set
673
+ Helpers.validate_required_and_unknown!(self, present, allowed_keys, required_keys)
674
+ Helpers.validate_and_coerce_types!(self, document, types_by_field, Helpers.coercions_enabled?)
675
+
676
+ client = SearchEngine.client
677
+ logical = respond_to?(:collection) ? collection.to_s : name.to_s
678
+ target = if into && !into.to_s.strip.empty?
679
+ into.to_s
680
+ else
681
+ client.resolve_alias(logical) || logical
682
+ end
683
+
684
+ created = client.create_document(collection: target, document: document)
685
+ Helpers.hydrate_from_document(self, created)
686
+ end
687
+
688
+ # Upsert a single document into the collection.
689
+ #
690
+ # Accepts either an unmapped source record (mapped via the configured DSL)
691
+ # or pre-mapped data (as emitted by {.mapped_data_for}). The document is
692
+ # normalized against the compiled schema before streaming via JSONL.
693
+ #
694
+ # @param record [Object, nil] source record to map
695
+ # @param data [Hash, nil] pre-mapped document
696
+ # @param into [String, nil] optional physical collection override
697
+ # @param partition [Object, nil] partition token for resolvers
698
+ # @return [Integer] number of successfully upserted documents (0 or 1)
699
+ # @raise [SearchEngine::Errors::InvalidParams]
700
+ def upsert(record: nil, data: nil, into: nil, partition: nil)
701
+ docs = Helpers.prepare_documents(self, records: record ? [record] : nil, data: data)
702
+ return 0 if docs.empty?
703
+
704
+ result = Helpers.import_documents!(self, docs, into: into, partition: partition)
705
+ result[:success_count]
706
+ end
707
+
708
+ # Upsert many documents into the collection in a single JSONL payload.
709
+ #
710
+ # Accepts either an enumerable of unmapped source records or an enumerable
711
+ # of pre-mapped documents. Each entry is normalized using the same
712
+ # validation path as {.create} to ensure schema compatibility prior to import.
713
+ #
714
+ # @param records [Enumerable<Object>, nil]
715
+ # @param data [Enumerable<Hash>, nil]
716
+ # @param into [String, nil]
717
+ # @param partition [Object, nil]
718
+ # @return [Hash] stats payload with keys: :collection, :docs_count, :success_count, :failure_count, :bytes_sent, :response
719
+ # @raise [SearchEngine::Errors::InvalidParams]
720
+ def upsert_bulk(records: nil, data: nil, into: nil, partition: nil)
721
+ docs = Helpers.prepare_documents(self, records: records, data: data)
722
+ Helpers.import_documents!(self, docs, into: into, partition: partition)
723
+ end
724
+ end
725
+ end
726
+ end
727
+ end