search-engine-for-typesense 30.1.5 → 30.1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ae511da617970ab104c22bde9c151ed099d9eb9c55909d9bc34f328ea56ac5f4
4
- data.tar.gz: 49d4de291b6239a7f70e6bd93de83ef175916d7c403029d7a7382189fe90b9d7
3
+ metadata.gz: 52454481281edd904b9d0a3b68e191b6c8499426194b37f81e47f9fdc1a62435
4
+ data.tar.gz: 6adbd869d14f77faabf6ea10c42bc9f141d247a713ea016e97ab4188ce31564c
5
5
  SHA512:
6
- metadata.gz: ba3c5e26e351975c5c967954b3f20e0fe14f438aab7126672c1b9ad42ec1bbc15c224f5412a5d0cd778e237451af23a8a4e237731ab2c9bc2e2dabb3650256e9
7
- data.tar.gz: 82766d2a92e2f2a3a81f430d066254ff83b14966a3cce4b7171102979c07e0fe53925e2aa118075b6c679f8dd0881364d5c5d681c4c56e9c819b6ecb5aceaea1
6
+ metadata.gz: '08c22e27bf1e8c039d453584c34af900c4e50b7b55efbad70a29854e8154dfa5353ccb24095f991aaa9ae38bb55cf9a799a5a183e07344a63f4b050c0cf5a23f'
7
+ data.tar.gz: b54156dde160147eb1463d1c108f38c15436a52fcf3958f7462f7acaa49b1b844525aa971f4c1bdd4de482962f21935463ff26c4cb332a6ecd1f90e2da43b105
@@ -8,6 +8,8 @@ module SearchEngine
8
8
  module ModelDsl
9
9
  extend ActiveSupport::Concern
10
10
 
11
+ EMBEDDING_SUFFIX = '_embedding'
12
+
11
13
  class_methods do
12
14
  # Get or set the Typesense collection name for this model.
13
15
  #
@@ -282,6 +284,183 @@ module SearchEngine
282
284
  end
283
285
  end
284
286
 
287
+ class_methods do
288
+ # Declare a vector embedding field with automatic name resolution, model
289
+ # inference, and source-field validation.
290
+ #
291
+ # @param name [Symbol, String, nil] field name (auto-derived when omitted)
292
+ # @param from [Array<Symbol>, nil] source attribute names to embed from
293
+ # @param suffix [Boolean] append `_embedding` to the field name (default: true)
294
+ # @param model [String, nil] embedding model override (per-field)
295
+ # @param api_key [String, nil] API key for remote embedding providers
296
+ # @param num_dim [Integer, nil] vector dimensions for external embeddings
297
+ # @param hnsw [Hash, nil] HNSW index tuning parameters
298
+ # @param model_config [Hash, nil] extra model_config overrides
299
+ # @return [void]
300
+ def embedding(name = nil, from: nil, suffix: true, model: nil,
301
+ api_key: nil, num_dim: nil, hnsw: nil, model_config: nil)
302
+ resolved_name = __se_resolve_embedding_name(name, from: from, suffix: suffix, num_dim: num_dim)
303
+ resolved_sym = resolved_name.to_sym
304
+
305
+ __se_check_embedding_duplicate!(resolved_sym)
306
+
307
+ external = from.nil? && num_dim
308
+ from_fields = external ? nil : __se_infer_embedding_from(resolved_name, from)
309
+
310
+ __se_validate_embedding_sources!(from_fields) if from_fields
311
+
312
+ vector_opts = {}
313
+
314
+ if external
315
+ vector_opts[:num_dim] = Integer(num_dim)
316
+ else
317
+ resolved_model = __se_resolve_embedding_model(model)
318
+ vector_opts[:embed] = __se_build_embed_hash(
319
+ from_fields, resolved_model,
320
+ api_key: api_key, model_config: model_config
321
+ )
322
+ end
323
+
324
+ vector_opts[:hnsw_params] = hnsw if hnsw
325
+
326
+ attribute(resolved_name, :vector)
327
+ @attribute_options ||= {}
328
+ @attribute_options[resolved_sym] = (@attribute_options[resolved_sym] || {}).merge(vector_opts)
329
+
330
+ __se_store_embedding_metadata!(
331
+ resolved_sym, from: from_fields,
332
+ model: external ? nil : resolved_model,
333
+ external: external ? true : false,
334
+ num_dim: num_dim
335
+ )
336
+ end
337
+
338
+ private
339
+
340
+ # Resolve the canonical field name for an embedding declaration.
341
+ # @return [String]
342
+ def __se_resolve_embedding_name(name, from:, suffix:, num_dim:)
343
+ if name.nil?
344
+ if from
345
+ "#{self.name.demodulize.underscore}#{EMBEDDING_SUFFIX}"
346
+ elsif num_dim
347
+ raise ArgumentError,
348
+ 'External embedding (num_dim: without from:) requires an explicit field name'
349
+ else
350
+ raise ArgumentError,
351
+ 'embedding requires at least one of: a field name, from: sources, or num_dim: for external vectors'
352
+ end
353
+ else
354
+ n = name.to_s
355
+ if suffix && !n.end_with?(EMBEDDING_SUFFIX)
356
+ "#{n}#{EMBEDDING_SUFFIX}"
357
+ else
358
+ n
359
+ end
360
+ end
361
+ end
362
+
363
+ # Infer `from:` source fields when not explicitly provided.
364
+ # @return [Array<Symbol>]
365
+ def __se_infer_embedding_from(resolved_name, from)
366
+ if from
367
+ Array(from).map(&:to_sym)
368
+ else
369
+ bare = resolved_name.to_s.delete_suffix(EMBEDDING_SUFFIX)
370
+ if bare.empty?
371
+ raise ArgumentError,
372
+ "Cannot infer from: for embedding '#{resolved_name}'; provide from: explicitly"
373
+ end
374
+
375
+ [bare.to_sym]
376
+ end
377
+ end
378
+
379
+ # Validate that all source fields exist and are string-typed.
380
+ # @raise [ArgumentError]
381
+ def __se_validate_embedding_sources!(from_fields)
382
+ attrs = @attributes || {}
383
+ from_fields.each do |field|
384
+ unless attrs.key?(field)
385
+ raise ArgumentError,
386
+ "embedding from: references undeclared attribute :#{field}. " \
387
+ 'Declare it with `attribute` before the `embedding` call.'
388
+ end
389
+
390
+ ts_type = __se_typesense_type_for(attrs[field])
391
+ next if %w[string string[]].include?(ts_type)
392
+
393
+ raise ArgumentError,
394
+ "embedding from: field :#{field} must be string-typed " \
395
+ "(got :#{attrs[field]} -> \"#{ts_type}\"). " \
396
+ 'Typesense only auto-embeds text fields.'
397
+ end
398
+ end
399
+
400
+ # Resolve the embedding model with fallback to global config.
401
+ # @return [String]
402
+ # @raise [SearchEngine::Errors::ConfigurationError]
403
+ def __se_resolve_embedding_model(per_field_model)
404
+ return per_field_model if per_field_model && !per_field_model.to_s.strip.empty?
405
+
406
+ global = SearchEngine.config.embedding.model
407
+ return global if global && !global.to_s.strip.empty?
408
+
409
+ raise SearchEngine::Errors::ConfigurationError.new(
410
+ 'No embedding model configured. Set `model:` on the embedding declaration ' \
411
+ 'or set `SearchEngine.config.embedding.model` globally.',
412
+ hint: "Add `config.embedding.model = 'ts/all-MiniLM-L12-v2'` to your SearchEngine initializer.",
413
+ doc: 'https://typesense.org/docs/30.1/api/vector-search.html#option-b-auto-embedding-generation-within-typesense'
414
+ )
415
+ end
416
+
417
+ # Build the Typesense `embed` hash for auto-embedding fields.
418
+ # @return [Hash]
419
+ def __se_build_embed_hash(from_fields, model_name, api_key: nil, model_config: nil)
420
+ mc = {}
421
+
422
+ global_mc = SearchEngine.config.embedding.model_config
423
+ mc.merge!(global_mc) if global_mc.is_a?(Hash)
424
+ mc.merge!(model_config) if model_config.is_a?(Hash)
425
+ mc[:model_name] = model_name
426
+
427
+ resolved_api_key = api_key || SearchEngine.config.embedding.api_key
428
+ mc[:api_key] = resolved_api_key if resolved_api_key && !resolved_api_key.to_s.strip.empty?
429
+
430
+ { from: from_fields.map(&:to_s), model_config: mc }
431
+ end
432
+
433
+ # Raise on duplicate embedding field names.
434
+ def __se_check_embedding_duplicate!(resolved_sym)
435
+ return unless (@embeddings_config || {}).key?(resolved_sym)
436
+
437
+ raise ArgumentError, "Duplicate embedding field :#{resolved_sym} already declared"
438
+ end
439
+
440
+ # Store embedding metadata for downstream consumers (mapper, indexer, compiler).
441
+ def __se_store_embedding_metadata!(resolved_sym, from:, model:, external:, num_dim:)
442
+ @embeddings_config ||= {}
443
+ @embeddings_config[resolved_sym] = {
444
+ field_name: resolved_sym.to_s,
445
+ from: from,
446
+ model: model,
447
+ external: external,
448
+ num_dim: num_dim
449
+ }.compact
450
+ end
451
+
452
+ # Minimal type resolution for validation (mirrors Schema.typesense_type_for).
453
+ def __se_typesense_type_for(type_descriptor)
454
+ if type_descriptor.is_a?(Array) && type_descriptor.size == 1
455
+ inner = type_descriptor.first
456
+ mapped = SearchEngine::Schema::TYPE_MAPPING[inner.to_s.downcase.to_sym] || inner.to_s
457
+ return "#{mapped}[]"
458
+ end
459
+
460
+ SearchEngine::Schema::TYPE_MAPPING[type_descriptor.to_s.downcase.to_sym] || type_descriptor.to_s
461
+ end
462
+ end
463
+
285
464
  class_methods do
286
465
  # Validate whether an attribute name is a valid Ruby reader method name
287
466
  # (skip dotted names and other invalid identifiers).
@@ -415,6 +594,14 @@ module SearchEngine
415
594
  end
416
595
  end
417
596
 
597
+ class_methods do
598
+ # Read-only view of declared embedding metadata for this class.
599
+ # @return [Hash{Symbol=>Hash}] frozen hash keyed by embedding field name
600
+ def embeddings_config
601
+ (@embeddings_config || {}).dup.freeze
602
+ end
603
+ end
604
+
418
605
  class_methods do
419
606
  # Configure schema retention policy for this collection.
420
607
  # @param keep_last [Integer] how many previous physicals to keep after swap
@@ -444,6 +631,9 @@ module SearchEngine
444
631
  parent_retention = @schema_retention || {}
445
632
  subclass.instance_variable_set(:@schema_retention, parent_retention.dup)
446
633
 
634
+ parent_embeddings = @embeddings_config || {}
635
+ subclass.instance_variable_set(:@embeddings_config, parent_embeddings.dup)
636
+
447
637
  parent_joins = @joins_config || {}
448
638
  subclass.instance_variable_set(:@joins_config, parent_joins.dup.freeze)
449
639
 
@@ -251,6 +251,10 @@ module SearchEngine
251
251
  attr_accessor :include_error_messages
252
252
  # @return [Boolean] also emit legacy event aliases where applicable
253
253
  attr_accessor :emit_legacy_event_aliases
254
+ # @return [Boolean] when true (default), float arrays inside +vector_query+
255
+ # strings are replaced with +[<N dims>]+ in logs and telemetry payloads.
256
+ # Set to +false+ to see raw vectors for debugging.
257
+ attr_accessor :redact_vectors
254
258
 
255
259
  def initialize
256
260
  super()
@@ -260,6 +264,7 @@ module SearchEngine
260
264
  @max_message_length = 200
261
265
  @include_error_messages = false
262
266
  @emit_legacy_event_aliases = true
267
+ @redact_vectors = true
263
268
  end
264
269
  end
265
270
 
@@ -321,6 +326,30 @@ module SearchEngine
321
326
  end
322
327
  end
323
328
 
329
+ # Lightweight nested configuration for global embedding defaults.
330
+ # Provides a default model, optional API key, and extra model_config
331
+ # used by the Schema DSL when compiling auto-embedding fields.
332
+ class EmbeddingConfig
333
+ # @return [String, nil] default embedding model name (e.g. "ts/all-MiniLM-L12-v2")
334
+ attr_accessor :model
335
+ # @return [String, nil] API key for remote embedding providers (e.g. OpenAI)
336
+ attr_accessor :api_key
337
+ # @return [Hash, nil] extra model_config passed to Typesense embed block
338
+ attr_accessor :model_config
339
+ # @return [Float] tolerance for vector search weights sum validation.
340
+ # Weights must sum to ~1.0 within this tolerance. With many small weights
341
+ # floating-point drift can exceed the default. Adjust via
342
+ # +config.embedding.weights_sum_tolerance = 0.05+.
343
+ attr_accessor :weights_sum_tolerance
344
+
345
+ def initialize
346
+ @model = nil
347
+ @api_key = nil
348
+ @model_config = nil
349
+ @weights_sum_tolerance = 0.01
350
+ end
351
+ end
352
+
324
353
  # Create a new configuration with defaults, optionally hydrated from ENV.
325
354
  #
326
355
  # @param env [#[]] environment-like object (defaults to ::ENV)
@@ -360,6 +389,7 @@ module SearchEngine
360
389
  @selection = SelectionConfig.new
361
390
  @presets = PresetsConfig.new
362
391
  @curation = CurationConfig.new
392
+ @embedding = EmbeddingConfig.new
363
393
  @default_console_model = nil
364
394
  # Path may be relative to Rails.root or absolute. Set nil/false to disable.
365
395
  @search_engine_models = 'app/search_engine'
@@ -438,6 +468,12 @@ module SearchEngine
438
468
  @curation ||= CurationConfig.new
439
469
  end
440
470
 
471
+ # Expose global embedding configuration.
472
+ # @return [SearchEngine::Config::EmbeddingConfig]
473
+ def embedding
474
+ @embedding ||= EmbeddingConfig.new
475
+ end
476
+
441
477
  # Expose observability/logging configuration.
442
478
  # @return [SearchEngine::Config::ObservabilityConfig]
443
479
  def observability
@@ -657,6 +693,7 @@ module SearchEngine
657
693
  selection: selection_hash_for_to_h,
658
694
  presets: presets_hash_for_to_h,
659
695
  curation: curation_hash_for_to_h,
696
+ embedding: embedding_hash_for_to_h,
660
697
  relation_print_materializes: relation_print_materializes ? true : false
661
698
  }
662
699
  end
@@ -666,6 +703,7 @@ module SearchEngine
666
703
  def to_h_redacted
667
704
  redacted = to_h.dup
668
705
  redacted[:api_key] = '[REDACTED]' unless string_blank?(api_key)
706
+ redacted[:embedding] = redacted[:embedding].merge(api_key: '[REDACTED]') unless string_blank?(embedding.api_key)
669
707
  redacted
670
708
  end
671
709
 
@@ -726,7 +764,8 @@ module SearchEngine
726
764
  log_format: observability.log_format,
727
765
  max_message_length: observability.max_message_length,
728
766
  include_error_messages: observability.include_error_messages ? true : false,
729
- emit_legacy_event_aliases: observability.emit_legacy_event_aliases ? true : false
767
+ emit_legacy_event_aliases: observability.emit_legacy_event_aliases ? true : false,
768
+ redact_vectors: observability.redact_vectors ? true : false
730
769
  }
731
770
  end
732
771
 
@@ -752,6 +791,15 @@ module SearchEngine
752
791
  }
753
792
  end
754
793
 
794
+ def embedding_hash_for_to_h
795
+ {
796
+ model: embedding.model,
797
+ api_key: embedding.api_key,
798
+ model_config: embedding.model_config,
799
+ weights_sum_tolerance: embedding.weights_sum_tolerance
800
+ }
801
+ end
802
+
755
803
  def default_strict_fields
756
804
  if defined?(::Rails)
757
805
  !::Rails.env.production?
@@ -259,6 +259,12 @@ module SearchEngine
259
259
  # raise SearchEngine::Errors::InvalidOverrideTag, 'InvalidOverrideTag: "" is invalid. Use non-blank strings that match the allowed pattern.'
260
260
  class InvalidOverrideTag < Error; end
261
261
 
262
+ # Raised when a required configuration value is missing or invalid.
263
+ #
264
+ # Typical cause: embedding model not set in either the per-field `model:`
265
+ # kwarg or the global `SearchEngine.config.embedding.model`.
266
+ class ConfigurationError < Error; end
267
+
262
268
  # Raised when an option value is invalid or unsupported for a public API.
263
269
  #
264
270
  # Used by DSL methods to fail fast with actionable hints.
@@ -286,5 +292,20 @@ module SearchEngine
286
292
  # details: { total_hits: 12_000, max: 10_000, collection: 'products' }
287
293
  # )
288
294
  class HitLimitExceeded < Error; end
295
+
296
+ # Raised when vector search DSL receives invalid inputs.
297
+ #
298
+ # Typical causes: unknown embedding field, mutually exclusive query modes,
299
+ # invalid alpha range, or malformed vector arrays.
300
+ #
301
+ # @example
302
+ # raise SearchEngine::Errors::InvalidVectorQuery.new(
303
+ # 'InvalidVectorQuery: query: and id: are mutually exclusive',
304
+ # hint: 'Provide only one of query:, id:, or queries:',
305
+ # doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/v30.1/vector-search'
306
+ # )
307
+ class InvalidVectorQuery < Error
308
+ DOC_URL = 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/v30.1/vector-search'
309
+ end
289
310
  end
290
311
  end
@@ -38,8 +38,8 @@ module SearchEngine
38
38
  optional: %i[duration_ms]
39
39
  },
40
40
  'search_engine.grouping.compile' => {
41
- required: %i[field],
42
- optional: %i[collection limit missing_values duration_ms]
41
+ required: %i[group_by],
42
+ optional: %i[collection group_limit missing_values duration_ms]
43
43
  },
44
44
  'search_engine.joins.compile' => {
45
45
  required: %i[collection],
@@ -116,7 +116,7 @@ module SearchEngine
116
116
  },
117
117
  'search_engine.vector.compile' => {
118
118
  required: %i[],
119
- optional: %i[collection query_vector_present dims hybrid_weight ann_params_present duration_ms]
119
+ optional: %i[collection field mode k query_vector_present dims hybrid_weight ann_params_present duration_ms]
120
120
  },
121
121
  'search_engine.hits.limit' => {
122
122
  required: %i[],
@@ -294,9 +294,12 @@ module SearchEngine
294
294
  parts << "[#{short}]"
295
295
  parts << "id=#{cid}"
296
296
  parts << "coll=#{collection}" if collection
297
+ parts << "mode=#{p[:mode] || SearchEngine::Logging::FormatHelpers::DASH}"
298
+ parts << "field=#{p[:field] || SearchEngine::Logging::FormatHelpers::DASH}"
299
+ parts << "k=#{p[:k] || SearchEngine::Logging::FormatHelpers::DASH}"
297
300
  parts << "qvec=#{SearchEngine::Logging::FormatHelpers.display_or_dash(p, :query_vector_present)}"
298
301
  parts << "dims=#{p[:dims] || SearchEngine::Logging::FormatHelpers::DASH}"
299
- parts << "hybrid=#{p[:hybrid_weight] || SearchEngine::Logging::FormatHelpers::DASH}"
302
+ parts << "alpha=#{p[:hybrid_weight] || SearchEngine::Logging::FormatHelpers::DASH}"
300
303
  parts << "ann=#{SearchEngine::Logging::FormatHelpers.display_or_dash(p, :ann_params_present)}"
301
304
  parts << "dur=#{duration}ms"
302
305
  parts.join(' ')
@@ -336,7 +339,7 @@ module SearchEngine
336
339
  h['radius_bucket'] = p[:radius_bucket] if p.key?(:radius_bucket)
337
340
  h
338
341
  when 'search_engine.vector.compile'
339
- keys = %i[query_vector_present dims hybrid_weight ann_params_present]
342
+ keys = %i[field mode k query_vector_present dims hybrid_weight ann_params_present]
340
343
  keys.each_with_object({}) { |k, h| h[k.to_s] = p[k] if p.key?(k) }
341
344
  when 'search_engine.hits.limit'
342
345
  keys = %i[early_limit validate_max applied_strategy triggered total_hits]
@@ -320,11 +320,15 @@ module SearchEngine
320
320
  class Compiled
321
321
  attr_reader :klass
322
322
 
323
- def initialize(klass:, map_proc:, schema_fields:, types_by_field:, options: {})
323
+ def initialize(klass:, map_proc:, schema_fields:, types_by_field:,
324
+ auto_embedding_fields: Set.new, external_embedding_dims: {},
325
+ options: {})
324
326
  @klass = klass
325
327
  @map_proc = map_proc
326
328
  @schema_fields = schema_fields.freeze # Array of field names (String)
327
329
  @types_by_field = types_by_field.freeze # { "field" => "int64" }
330
+ @auto_embedding_fields = auto_embedding_fields.freeze # Set<String> — server-generated fields
331
+ @external_embedding_dims = external_embedding_dims.freeze # { "field" => num_dim }
328
332
  # Allow all schema fields; treat required as schema fields minus optional attributes
329
333
  @allowed_keys = @schema_fields.map(&:to_sym).to_set.freeze
330
334
  @required_keys = compute_required_keys
@@ -360,6 +364,7 @@ module SearchEngine
360
364
  hash[:id] = computed_id
361
365
  hash[:doc_updated_at] = now_i
362
366
 
367
+ strip_auto_embedding_fields!(hash)
363
368
  normalize_optional_blank_strings!(hash)
364
369
 
365
370
  # Populate hidden flags
@@ -458,6 +463,17 @@ module SearchEngine
458
463
  end
459
464
  end
460
465
 
466
+ # Remove auto-embedding fields from the document payload.
467
+ # Typesense generates these server-side; including them would cause errors.
468
+ def strip_auto_embedding_fields!(doc)
469
+ return if @auto_embedding_fields.empty?
470
+
471
+ @auto_embedding_fields.each do |name|
472
+ doc.delete(name.to_sym)
473
+ doc.delete(name.to_s)
474
+ end
475
+ end
476
+
461
477
  # Normalize empty-string values for optional fields to nil.
462
478
  def normalize_optional_blank_strings!(doc)
463
479
  return if @__optional_blank_targets__.empty?
@@ -554,6 +570,10 @@ module SearchEngine
554
570
 
555
571
  required.delete(fname.to_sym)
556
572
  end
573
+
574
+ # Auto-embedding fields are generated server-side; never require them in documents
575
+ @auto_embedding_fields.each { |name| required.delete(name.to_sym) }
576
+
557
577
  required.freeze
558
578
  end
559
579
 
@@ -593,6 +613,8 @@ module SearchEngine
593
613
  return [true, nil, nil] if value.is_a?(Array) && value.all? { |v| v.is_a?(String) }
594
614
 
595
615
  [false, nil, invalid_type_message(field, 'Array<String>', value)]
616
+ when 'float[]'
617
+ validate_float_array(value, field)
596
618
  else
597
619
  # Unknown/opaque type: accept
598
620
  [true, nil, nil]
@@ -634,6 +656,20 @@ module SearchEngine
634
656
  end
635
657
  end
636
658
 
659
+ def validate_float_array(value, field)
660
+ unless value.is_a?(Array) && value.all? { |v| v.is_a?(Numeric) }
661
+ return [false, nil, invalid_type_message(field, 'Array<Float>', value)]
662
+ end
663
+
664
+ expected_dim = @external_embedding_dims[field]
665
+ if expected_dim && value.size != expected_dim
666
+ msg = "Dimension mismatch for field :#{field} (expected #{expected_dim}, got #{value.size})."
667
+ return [false, nil, msg]
668
+ end
669
+
670
+ [true, nil, nil]
671
+ end
672
+
637
673
  def string_integer?(v)
638
674
  v.is_a?(String) && v.match?(/^[-+]?\d+$/)
639
675
  end
@@ -749,6 +785,8 @@ module SearchEngine
749
785
  types_by_field[f[:name].to_s] = f[:type].to_s
750
786
  end
751
787
 
788
+ auto_embedding_fields, external_embedding_dims = partition_embedding_fields(klass)
789
+
752
790
  mapper_cfg = SearchEngine.config&.mapper
753
791
  coercions_cfg = mapper_cfg&.coercions || {}
754
792
  options = {
@@ -763,10 +801,32 @@ module SearchEngine
763
801
  map_proc: dsl[:map],
764
802
  schema_fields: fields,
765
803
  types_by_field: types_by_field,
804
+ auto_embedding_fields: auto_embedding_fields,
805
+ external_embedding_dims: external_embedding_dims,
766
806
  options: options
767
807
  )
768
808
  end
769
809
 
810
+ # Partition embeddings_config into auto-embedding field names and
811
+ # external embedding field-name-to-dimension mapping.
812
+ # @return [Array(Set<String>, Hash{String=>Integer})]
813
+ def partition_embedding_fields(klass)
814
+ embeddings = klass.respond_to?(:embeddings_config) ? klass.embeddings_config : {}
815
+ auto_fields = Set.new
816
+ external_dims = {}
817
+
818
+ embeddings.each do |sym, meta|
819
+ name = (meta[:field_name] || sym).to_s
820
+ if meta[:external]
821
+ external_dims[name] = meta[:num_dim].to_i if meta[:num_dim]
822
+ else
823
+ auto_fields << name
824
+ end
825
+ end
826
+
827
+ [auto_fields, external_dims]
828
+ end
829
+
770
830
  def mapper_dsl_for(klass)
771
831
  return unless klass.instance_variable_defined?(:@__mapper_dsl__)
772
832
 
@@ -12,9 +12,11 @@ module SearchEngine
12
12
 
13
13
  # Whitelisted search parameter keys to include in payload excerpts.
14
14
  PARAM_WHITELIST = %i[
15
- q query_by include_fields exclude_fields per_page page infix filter_by group_by group_limit group_missing_values
15
+ q query_by include_fields exclude_fields per_page page infix filter_by sort_by
16
+ group_by group_limit group_missing_values
16
17
  facet_by max_facet_values facet_query
17
18
  num_typos drop_tokens_threshold prioritize_exact_match query_by_weights
19
+ vector_query
18
20
  ].freeze
19
21
 
20
22
  # Maximum length for `q` values before truncation.
@@ -58,6 +60,8 @@ module SearchEngine
58
60
  result[:q] = truncate_q(val)
59
61
  when :filter_by
60
62
  result[:filter_by] = redact_filter_by(val)
63
+ when :vector_query
64
+ result[:vector_query] = redact_vector_query(val)
61
65
  else
62
66
  result[key] = redact_simple_value(val)
63
67
  end
@@ -106,6 +110,25 @@ module SearchEngine
106
110
  masked.gsub(/\b\d+(?:\.\d+)?\b/, '***')
107
111
  end
108
112
 
113
+ # Internal: Redact raw float arrays in a vector_query string while
114
+ # preserving the structural tokens (field name, k, alpha, etc.).
115
+ # Replaces `[0.1,0.2,...]` with `[<N dims>]`.
116
+ #
117
+ # Disable via +config.observability.redact_vectors = false+ to see
118
+ # raw vectors (useful for debugging).
119
+ #
120
+ # @param vq [String]
121
+ # @return [String]
122
+ def self.redact_vector_query(vq)
123
+ return vq unless vq.is_a?(String)
124
+ return vq if SearchEngine.config.observability.redact_vectors == false
125
+
126
+ vq.gsub(/\[(-?\d+(?:\.\d+)?(?:\s*,\s*-?\d+(?:\.\d+?))*)\]/) do
127
+ dims = Regexp.last_match(1).split(',').size
128
+ "[<#{dims} dims>]"
129
+ end
130
+ end
131
+
109
132
  # Build a filtered URL/common options hash for payloads.
110
133
  # @param url_opts [Hash]
111
134
  # @return [Hash]
@@ -157,6 +180,6 @@ module SearchEngine
157
180
  end
158
181
 
159
182
  private_class_method :redact_params_hash, :redact_simple_value, :truncate_q,
160
- :redact_string, :redact_filter_by
183
+ :redact_string, :redact_filter_by, :redact_vector_query
161
184
  end
162
185
  end
@@ -130,8 +130,8 @@ module SearchEngine
130
130
  assign_attr(span, 'se.node_count', p[:node_count]) if p.key?(:node_count)
131
131
  assign_attr(span, 'se.join_count', p[:join_count]) if p.key?(:join_count)
132
132
  assign_attr(span, 'se.groups_count', p[:groups_count]) if p.key?(:groups_count)
133
- assign_attr(span, 'se.group_by', p[:field] || p[:group_by]) if p.key?(:field) || p.key?(:group_by)
134
- assign_attr(span, 'se.group_limit', p[:limit] || p[:group_limit]) if p.key?(:limit) || p.key?(:group_limit)
133
+ assign_attr(span, 'se.group_by', p[:group_by]) if p.key?(:group_by)
134
+ assign_attr(span, 'se.group_limit', p[:group_limit]) if p.key?(:group_limit)
135
135
  return unless p.key?(:missing_values) || p.key?(:group_missing_values)
136
136
 
137
137
  assign_attr(span, 'se.group_missing_values', p[:missing_values] || p[:group_missing_values])
@@ -142,7 +142,14 @@ module SearchEngine
142
142
  deleted_count searches_count fields_changed_count added_count removed_count in_sync].each do |k|
143
143
  assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
144
144
  end
145
- # New event attributes (redacted/summarized)
145
+ apply_feature_detail_attributes(span, p)
146
+ apply_vector_attributes(span, p)
147
+ %i[early_limit validate_max applied_strategy triggered total_hits].each do |k|
148
+ assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
149
+ end
150
+ end
151
+
152
+ def apply_feature_detail_attributes(span, p)
146
153
  %i[fields_count queries_count max_facet_values sort_flags conflicts].each do |k|
147
154
  assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
148
155
  end
@@ -161,12 +168,17 @@ module SearchEngine
161
168
  %i[sort_mode radius_bucket].each do |k|
162
169
  assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
163
170
  end
171
+ end
172
+
173
+ def apply_vector_attributes(span, p)
174
+ return unless p.key?(:query_vector_present)
175
+
176
+ assign_attr(span, 'se.vector.field', p[:field]) if p.key?(:field)
177
+ assign_attr(span, 'se.vector.mode', p[:mode]&.to_s) if p.key?(:mode)
178
+ assign_attr(span, 'se.vector.k', p[:k]) if p.key?(:k)
164
179
  %i[query_vector_present dims hybrid_weight ann_params_present].each do |k|
165
180
  assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
166
181
  end
167
- %i[early_limit validate_max applied_strategy triggered total_hits].each do |k|
168
- assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
169
- end
170
182
  end
171
183
 
172
184
  def apply_params_preview(span, payload)