search-engine-for-typesense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +148 -0
- data/app/search_engine/search_engine/app_info.rb +11 -0
- data/app/search_engine/search_engine/index_partition_job.rb +170 -0
- data/lib/generators/search_engine/install/install_generator.rb +20 -0
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
- data/lib/generators/search_engine/model/model_generator.rb +86 -0
- data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
- data/lib/search-engine-for-typesense.rb +12 -0
- data/lib/search_engine/active_record_syncable.rb +247 -0
- data/lib/search_engine/admin/stopwords.rb +125 -0
- data/lib/search_engine/admin/synonyms.rb +125 -0
- data/lib/search_engine/admin.rb +12 -0
- data/lib/search_engine/ast/and.rb +52 -0
- data/lib/search_engine/ast/binary_op.rb +75 -0
- data/lib/search_engine/ast/eq.rb +19 -0
- data/lib/search_engine/ast/group.rb +18 -0
- data/lib/search_engine/ast/gt.rb +12 -0
- data/lib/search_engine/ast/gte.rb +12 -0
- data/lib/search_engine/ast/in.rb +28 -0
- data/lib/search_engine/ast/lt.rb +12 -0
- data/lib/search_engine/ast/lte.rb +12 -0
- data/lib/search_engine/ast/matches.rb +55 -0
- data/lib/search_engine/ast/node.rb +176 -0
- data/lib/search_engine/ast/not_eq.rb +13 -0
- data/lib/search_engine/ast/not_in.rb +24 -0
- data/lib/search_engine/ast/or.rb +52 -0
- data/lib/search_engine/ast/prefix.rb +51 -0
- data/lib/search_engine/ast/raw.rb +41 -0
- data/lib/search_engine/ast/unary_op.rb +43 -0
- data/lib/search_engine/ast.rb +101 -0
- data/lib/search_engine/base/creation.rb +727 -0
- data/lib/search_engine/base/deletion.rb +80 -0
- data/lib/search_engine/base/display_coercions.rb +36 -0
- data/lib/search_engine/base/hydration.rb +312 -0
- data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
- data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
- data/lib/search_engine/base/index_maintenance.rb +459 -0
- data/lib/search_engine/base/indexing_dsl.rb +255 -0
- data/lib/search_engine/base/joins.rb +479 -0
- data/lib/search_engine/base/model_dsl.rb +472 -0
- data/lib/search_engine/base/presets.rb +43 -0
- data/lib/search_engine/base/pretty_printer.rb +315 -0
- data/lib/search_engine/base/relation_delegation.rb +42 -0
- data/lib/search_engine/base/scopes.rb +113 -0
- data/lib/search_engine/base/updating.rb +92 -0
- data/lib/search_engine/base.rb +38 -0
- data/lib/search_engine/bulk.rb +284 -0
- data/lib/search_engine/cache.rb +33 -0
- data/lib/search_engine/cascade.rb +531 -0
- data/lib/search_engine/cli/doctor.rb +631 -0
- data/lib/search_engine/cli/support.rb +217 -0
- data/lib/search_engine/cli.rb +222 -0
- data/lib/search_engine/client/http_adapter.rb +63 -0
- data/lib/search_engine/client/request_builder.rb +92 -0
- data/lib/search_engine/client/services/base.rb +74 -0
- data/lib/search_engine/client/services/collections.rb +161 -0
- data/lib/search_engine/client/services/documents.rb +214 -0
- data/lib/search_engine/client/services/operations.rb +152 -0
- data/lib/search_engine/client/services/search.rb +190 -0
- data/lib/search_engine/client/services.rb +29 -0
- data/lib/search_engine/client.rb +765 -0
- data/lib/search_engine/client_options.rb +20 -0
- data/lib/search_engine/collection_resolver.rb +191 -0
- data/lib/search_engine/collections_graph.rb +330 -0
- data/lib/search_engine/compiled_params.rb +143 -0
- data/lib/search_engine/compiler.rb +383 -0
- data/lib/search_engine/config/observability.rb +27 -0
- data/lib/search_engine/config/presets.rb +92 -0
- data/lib/search_engine/config/selection.rb +16 -0
- data/lib/search_engine/config/typesense.rb +48 -0
- data/lib/search_engine/config/validators.rb +97 -0
- data/lib/search_engine/config.rb +917 -0
- data/lib/search_engine/console_helpers.rb +130 -0
- data/lib/search_engine/deletion.rb +103 -0
- data/lib/search_engine/dispatcher.rb +125 -0
- data/lib/search_engine/dsl/parser.rb +582 -0
- data/lib/search_engine/engine.rb +167 -0
- data/lib/search_engine/errors.rb +290 -0
- data/lib/search_engine/filters/sanitizer.rb +189 -0
- data/lib/search_engine/hydration/materializers.rb +808 -0
- data/lib/search_engine/hydration/selection_context.rb +96 -0
- data/lib/search_engine/indexer/batch_planner.rb +76 -0
- data/lib/search_engine/indexer/bulk_import.rb +626 -0
- data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
- data/lib/search_engine/indexer/retry_policy.rb +103 -0
- data/lib/search_engine/indexer.rb +747 -0
- data/lib/search_engine/instrumentation.rb +308 -0
- data/lib/search_engine/joins/guard.rb +202 -0
- data/lib/search_engine/joins/resolver.rb +95 -0
- data/lib/search_engine/logging/color.rb +78 -0
- data/lib/search_engine/logging/format_helpers.rb +92 -0
- data/lib/search_engine/logging/partition_progress.rb +53 -0
- data/lib/search_engine/logging_subscriber.rb +388 -0
- data/lib/search_engine/mapper.rb +785 -0
- data/lib/search_engine/multi.rb +286 -0
- data/lib/search_engine/multi_result.rb +186 -0
- data/lib/search_engine/notifications/compact_logger.rb +675 -0
- data/lib/search_engine/observability.rb +162 -0
- data/lib/search_engine/operations.rb +58 -0
- data/lib/search_engine/otel.rb +227 -0
- data/lib/search_engine/partitioner.rb +128 -0
- data/lib/search_engine/ranking_plan.rb +118 -0
- data/lib/search_engine/registry.rb +158 -0
- data/lib/search_engine/relation/compiler.rb +711 -0
- data/lib/search_engine/relation/deletion.rb +37 -0
- data/lib/search_engine/relation/dsl/filters.rb +624 -0
- data/lib/search_engine/relation/dsl/selection.rb +240 -0
- data/lib/search_engine/relation/dsl.rb +903 -0
- data/lib/search_engine/relation/dx/dry_run.rb +59 -0
- data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
- data/lib/search_engine/relation/dx.rb +231 -0
- data/lib/search_engine/relation/materializers.rb +118 -0
- data/lib/search_engine/relation/options.rb +138 -0
- data/lib/search_engine/relation/state.rb +274 -0
- data/lib/search_engine/relation/updating.rb +44 -0
- data/lib/search_engine/relation.rb +623 -0
- data/lib/search_engine/result.rb +664 -0
- data/lib/search_engine/schema.rb +1083 -0
- data/lib/search_engine/sources/active_record_source.rb +185 -0
- data/lib/search_engine/sources/base.rb +62 -0
- data/lib/search_engine/sources/lambda_source.rb +55 -0
- data/lib/search_engine/sources/sql_source.rb +196 -0
- data/lib/search_engine/sources.rb +71 -0
- data/lib/search_engine/stale_rules.rb +160 -0
- data/lib/search_engine/test/minitest_assertions.rb +57 -0
- data/lib/search_engine/test/offline_client.rb +134 -0
- data/lib/search_engine/test/rspec_matchers.rb +77 -0
- data/lib/search_engine/test/stub_client.rb +201 -0
- data/lib/search_engine/test.rb +66 -0
- data/lib/search_engine/test_autoload.rb +8 -0
- data/lib/search_engine/update.rb +35 -0
- data/lib/search_engine/version.rb +7 -0
- data/lib/search_engine.rb +332 -0
- data/lib/tasks/search_engine.rake +501 -0
- data/lib/tasks/search_engine_doctor.rake +16 -0
- metadata +225 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Lightweight utilities for observability concerns (redaction, excerpts).
|
|
5
|
+
#
|
|
6
|
+
# Provides a single public entry point {.redact} used by the client and
|
|
7
|
+
# optional subscribers to produce compact, redacted payloads that avoid
|
|
8
|
+
# leaking secrets while keeping useful context.
|
|
9
|
+
module Observability
|
|
10
|
+
# Keys that are considered sensitive and must be redacted whenever present.
|
|
11
|
+
SENSITIVE_KEY_PATTERN = /key|token|secret|password/i
|
|
12
|
+
|
|
13
|
+
# Whitelisted search parameter keys to include in payload excerpts.
|
|
14
|
+
PARAM_WHITELIST = %i[
|
|
15
|
+
q query_by include_fields exclude_fields per_page page infix filter_by group_by group_limit group_missing_values
|
|
16
|
+
facet_by max_facet_values facet_query
|
|
17
|
+
num_typos drop_tokens_threshold prioritize_exact_match query_by_weights
|
|
18
|
+
].freeze
|
|
19
|
+
|
|
20
|
+
# Maximum length for `q` values before truncation.
|
|
21
|
+
MAX_Q_LENGTH = 128
|
|
22
|
+
|
|
23
|
+
# Redact a value producing a new structure without mutating the input.
|
|
24
|
+
#
|
|
25
|
+
# - When given a Hash of search params, returns a compact excerpt that only
|
|
26
|
+
# includes whitelisted keys with secrets redacted and `filter_by` masked.
|
|
27
|
+
# - When given an Array, returns a redacted array by applying the same logic
|
|
28
|
+
# to each element.
|
|
29
|
+
# - For other values, returns a best-effort redacted representation.
|
|
30
|
+
#
|
|
31
|
+
# @param value [Object]
|
|
32
|
+
# @return [Object]
|
|
33
|
+
def self.redact(value)
|
|
34
|
+
case value
|
|
35
|
+
when Hash
|
|
36
|
+
redact_params_hash(value)
|
|
37
|
+
when Array
|
|
38
|
+
value.map { |v| redact(v) }
|
|
39
|
+
when String
|
|
40
|
+
redact_string(value)
|
|
41
|
+
else
|
|
42
|
+
value
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Internal: Redact a Hash presumed to be Typesense search params.
|
|
47
|
+
# Returns a new Hash with only whitelisted keys preserved. Sensitive keys
|
|
48
|
+
# are not included; `filter_by` literals are masked.
|
|
49
|
+
def self.redact_params_hash(params)
|
|
50
|
+
result = {}
|
|
51
|
+
|
|
52
|
+
PARAM_WHITELIST.each do |key|
|
|
53
|
+
next unless params.key?(key)
|
|
54
|
+
|
|
55
|
+
val = params[key]
|
|
56
|
+
case key
|
|
57
|
+
when :q
|
|
58
|
+
result[:q] = truncate_q(val)
|
|
59
|
+
when :filter_by
|
|
60
|
+
result[:filter_by] = redact_filter_by(val)
|
|
61
|
+
else
|
|
62
|
+
result[key] = redact_simple_value(val)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
result
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Internal: Best-effort redaction for simple scalar values.
|
|
70
|
+
def self.redact_simple_value(value)
|
|
71
|
+
return value unless value.is_a?(String)
|
|
72
|
+
|
|
73
|
+
redact_string(value)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Internal: Truncate overly long query strings.
|
|
77
|
+
def self.truncate_q(query)
|
|
78
|
+
return query unless query.is_a?(String)
|
|
79
|
+
|
|
80
|
+
query.length > MAX_Q_LENGTH ? "#{query[0, MAX_Q_LENGTH]}..." : query
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Internal: Redact secrets in a string and mask obvious literal fragments.
|
|
84
|
+
def self.redact_string(str)
|
|
85
|
+
return str unless str.is_a?(String)
|
|
86
|
+
|
|
87
|
+
# Mask obvious quoted literals first
|
|
88
|
+
redacted = str.gsub(/"[^"]*"|'[^']*'/, '***')
|
|
89
|
+
|
|
90
|
+
# Mask numeric literals (best-effort)
|
|
91
|
+
redacted.gsub(/\b\d+(?:\.\d+)?\b/, '***')
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Internal: Mask literal values in Typesense filter expressions while
|
|
95
|
+
# preserving attribute/operator structure. Best-effort and lightweight.
|
|
96
|
+
# Examples:
|
|
97
|
+
# "category_id:=123" => "category_id:=***"
|
|
98
|
+
# "price:>10 && brand:='Acme'" => "price:>*** && brand:=***"
|
|
99
|
+
def self.redact_filter_by(filter)
|
|
100
|
+
return filter unless filter.is_a?(String)
|
|
101
|
+
|
|
102
|
+
# Replace values that follow a comparator or a colon with *** until a
|
|
103
|
+
# delimiter is reached. Also mask quoted strings and numbers.
|
|
104
|
+
masked = filter.gsub(/([!:><=]{1,2})\s*([^\s)&|]+)/, '\1***')
|
|
105
|
+
masked = masked.gsub(/"[^"]*"|'[^']*'/, '***')
|
|
106
|
+
masked.gsub(/\b\d+(?:\.\d+)?\b/, '***')
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Build a filtered URL/common options hash for payloads.
|
|
110
|
+
# @param url_opts [Hash]
|
|
111
|
+
# @return [Hash]
|
|
112
|
+
def self.filtered_url_opts(url_opts)
|
|
113
|
+
return {} unless url_opts.is_a?(Hash)
|
|
114
|
+
|
|
115
|
+
{
|
|
116
|
+
use_cache: url_opts[:use_cache],
|
|
117
|
+
cache_ttl: url_opts[:cache_ttl]
|
|
118
|
+
}
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Compute a SHA1 hex digest for a value.
|
|
122
|
+
# @param value [#to_s]
|
|
123
|
+
# @return [String]
|
|
124
|
+
def self.sha_1(value)
|
|
125
|
+
require 'digest'
|
|
126
|
+
Digest::SHA1.hexdigest(value.to_s)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Return a shortened hash prefix for display/logging.
|
|
130
|
+
# @param hexdigest [String]
|
|
131
|
+
# @param length [Integer]
|
|
132
|
+
# @return [String]
|
|
133
|
+
def self.short_hash(hexdigest, length = 8)
|
|
134
|
+
s = hexdigest.to_s
|
|
135
|
+
s[0, length]
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Truncate and normalize a free-text message to a single line.
|
|
139
|
+
# @param message [String]
|
|
140
|
+
# @param max [Integer]
|
|
141
|
+
# @return [String]
|
|
142
|
+
def self.truncate_message(message, max = 200)
|
|
143
|
+
s = message.to_s.gsub(/\s+/, ' ').strip
|
|
144
|
+
s[0, max]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Compute partition helpers used in logs: prefer raw numeric; hash strings.
|
|
148
|
+
# @param partition [Object]
|
|
149
|
+
# @return [Hash] { partition: <raw>, partition_hash: <String,nil> }
|
|
150
|
+
def self.partition_fields(partition)
|
|
151
|
+
if partition.is_a?(Numeric)
|
|
152
|
+
{ partition: partition, partition_hash: nil }
|
|
153
|
+
else
|
|
154
|
+
hex = sha_1(partition)
|
|
155
|
+
{ partition: partition, partition_hash: short_hash(hex) }
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
private_class_method :redact_params_hash, :redact_simple_value, :truncate_q,
|
|
160
|
+
:redact_string, :redact_filter_by
|
|
161
|
+
end
|
|
162
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Operations namespace for convenience access to operational endpoints.
|
|
5
|
+
#
|
|
6
|
+
# Provides small wrappers around {SearchEngine::Client} for metrics, stats,
|
|
7
|
+
# and health, emitting instrumentation and supporting dependency injection
|
|
8
|
+
# via an optional client.
|
|
9
|
+
module Operations
|
|
10
|
+
class << self
|
|
11
|
+
# Retrieve raw server metrics.
|
|
12
|
+
#
|
|
13
|
+
# @param client [SearchEngine::Client, nil] optional injected client
|
|
14
|
+
# @return [Hash] raw payload from Typesense `/metrics.json`
|
|
15
|
+
# @see SearchEngine::Client#metrics
|
|
16
|
+
def metrics(client: nil)
|
|
17
|
+
SearchEngine::Instrumentation.instrument('search_engine.operations.metrics', {}) do
|
|
18
|
+
ts_client = client || SearchEngine.client
|
|
19
|
+
ts_client.metrics
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Retrieve raw server stats.
|
|
24
|
+
#
|
|
25
|
+
# @param client [SearchEngine::Client, nil] optional injected client
|
|
26
|
+
# @return [Hash] raw payload from Typesense `/stats.json`
|
|
27
|
+
# @see SearchEngine::Client#stats
|
|
28
|
+
def stats(client: nil)
|
|
29
|
+
SearchEngine::Instrumentation.instrument('search_engine.operations.stats', {}) do
|
|
30
|
+
ts_client = client || SearchEngine.client
|
|
31
|
+
ts_client.stats
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Retrieve server health.
|
|
36
|
+
#
|
|
37
|
+
# @param client [SearchEngine::Client, nil] optional injected client
|
|
38
|
+
# @return [Hash] Typesense health response
|
|
39
|
+
# @see SearchEngine::Client#health
|
|
40
|
+
def health(client: nil)
|
|
41
|
+
SearchEngine::Instrumentation.instrument('search_engine.operations.health', {}) do
|
|
42
|
+
ts_client = client || SearchEngine.client
|
|
43
|
+
ts_client.health
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def configured_client
|
|
50
|
+
return unless SearchEngine.config.respond_to?(:client)
|
|
51
|
+
|
|
52
|
+
SearchEngine.config.client
|
|
53
|
+
rescue StandardError
|
|
54
|
+
nil
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Optional OpenTelemetry adapter that translates unified instrumentation
|
|
5
|
+
# events into OpenTelemetry spans. Activation is gated by the presence of the
|
|
6
|
+
# OpenTelemetry SDK and by `SearchEngine.config.opentelemetry.enabled`.
|
|
7
|
+
#
|
|
8
|
+
# @since M8
|
|
9
|
+
# @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/observability#opentelemetry
|
|
10
|
+
#
|
|
11
|
+
# Public API:
|
|
12
|
+
# - .installed? => Boolean
|
|
13
|
+
# - .enabled? => Boolean (config + SDK present)
|
|
14
|
+
# - .start! => idempotently subscribes to events
|
|
15
|
+
# - .stop! => unsubscribes
|
|
16
|
+
module Otel
|
|
17
|
+
class << self
|
|
18
|
+
# @return [Boolean] whether the OpenTelemetry SDK is available
|
|
19
|
+
# @since M8
|
|
20
|
+
# @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/observability#opentelemetry
|
|
21
|
+
def installed?
|
|
22
|
+
defined?(::OpenTelemetry::SDK)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# @return [Boolean] whether the adapter should be active
|
|
26
|
+
# @since M8
|
|
27
|
+
# @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/observability#opentelemetry
|
|
28
|
+
def enabled?
|
|
29
|
+
installed? && SearchEngine.respond_to?(:config) && SearchEngine.config&.opentelemetry&.enabled
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Start the adapter (idempotent). No-ops when disabled or SDK unavailable.
|
|
33
|
+
# @return [Object, nil] subscription handle or nil when not installed/enabled
|
|
34
|
+
# @since M8
|
|
35
|
+
# @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/observability#opentelemetry
|
|
36
|
+
def start!
|
|
37
|
+
stop!
|
|
38
|
+
return nil unless enabled?
|
|
39
|
+
return nil unless defined?(ActiveSupport::Notifications)
|
|
40
|
+
|
|
41
|
+
@service_name = begin
|
|
42
|
+
SearchEngine.config.opentelemetry.service_name
|
|
43
|
+
rescue StandardError
|
|
44
|
+
'search_engine'
|
|
45
|
+
end
|
|
46
|
+
@tracer = tracer_provider&.tracer('search_engine', SearchEngine::VERSION)
|
|
47
|
+
|
|
48
|
+
@handle = ActiveSupport::Notifications.subscribe(/^search_engine\./) do |*args|
|
|
49
|
+
# Lazily build Event only when sampled downstream; allocation kept minimal otherwise
|
|
50
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
|
51
|
+
handle_event(event)
|
|
52
|
+
end
|
|
53
|
+
rescue StandardError
|
|
54
|
+
# Never raise from adapter startup
|
|
55
|
+
nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Stop the adapter if previously started.
|
|
59
|
+
# @return [Boolean]
|
|
60
|
+
# @since M8
|
|
61
|
+
# @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/observability#opentelemetry
|
|
62
|
+
def stop!
|
|
63
|
+
return false unless defined?(ActiveSupport::Notifications)
|
|
64
|
+
return false unless @handle
|
|
65
|
+
|
|
66
|
+
ActiveSupport::Notifications.unsubscribe(@handle)
|
|
67
|
+
@handle = nil
|
|
68
|
+
true
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def tracer_provider
|
|
74
|
+
return nil unless installed?
|
|
75
|
+
|
|
76
|
+
::OpenTelemetry.tracer_provider
|
|
77
|
+
rescue StandardError
|
|
78
|
+
nil
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
attr_reader :tracer
|
|
82
|
+
|
|
83
|
+
def handle_event(event)
|
|
84
|
+
return unless tracer
|
|
85
|
+
|
|
86
|
+
payload = event.payload || {}
|
|
87
|
+
duration = compute_duration(event, payload)
|
|
88
|
+
|
|
89
|
+
tracer.in_span(event.name) do |span|
|
|
90
|
+
apply_common_attributes(span, event, payload, duration)
|
|
91
|
+
apply_url_attributes(span, payload)
|
|
92
|
+
apply_feature_attributes(span, payload)
|
|
93
|
+
apply_indexer_schema_attributes(span, payload)
|
|
94
|
+
apply_params_preview(span, payload)
|
|
95
|
+
apply_status(span, payload)
|
|
96
|
+
end
|
|
97
|
+
rescue StandardError
|
|
98
|
+
# Never raise from subscriber
|
|
99
|
+
nil
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def compute_duration(event, payload)
|
|
103
|
+
d = (event.respond_to?(:duration) ? event.duration : payload[:duration_ms]).to_f
|
|
104
|
+
d = payload[:duration_ms].to_f if d.zero? && payload[:duration_ms]
|
|
105
|
+
d.round(1)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def apply_common_attributes(span, event, payload, duration)
|
|
109
|
+
assign_attr(span, 'service.name', @service_name)
|
|
110
|
+
assign_attr(span, 'se.event', event.name)
|
|
111
|
+
assign_attr(span, 'se.cid', payload[:correlation_id]) if payload.key?(:correlation_id)
|
|
112
|
+
assign_attr(span, 'http.status_code', payload[:http_status]) if payload.key?(:http_status)
|
|
113
|
+
assign_attr(span, 'se.duration_ms', duration) if duration.positive?
|
|
114
|
+
return unless payload[:collection] || payload[:logical]
|
|
115
|
+
|
|
116
|
+
assign_attr(span, 'se.collection', payload[:collection] || payload[:logical])
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def apply_url_attributes(span, payload)
|
|
120
|
+
url_opts = payload[:url_opts]
|
|
121
|
+
return unless url_opts.is_a?(Hash)
|
|
122
|
+
|
|
123
|
+
assign_attr(span, 'se.url_use_cache', url_opts[:use_cache]) if url_opts.key?(:use_cache)
|
|
124
|
+
assign_attr(span, 'se.url_cache_ttl', url_opts[:cache_ttl]) if url_opts.key?(:cache_ttl)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def apply_feature_attributes(span, p)
|
|
128
|
+
assign_attr(span, 'se.labels_count', Array(p[:labels]).size) if p.key?(:labels)
|
|
129
|
+
assign_attr(span, 'se.searches_count', p[:searches_count]) if p.key?(:searches_count)
|
|
130
|
+
assign_attr(span, 'se.node_count', p[:node_count]) if p.key?(:node_count)
|
|
131
|
+
assign_attr(span, 'se.join_count', p[:join_count]) if p.key?(:join_count)
|
|
132
|
+
assign_attr(span, 'se.groups_count', p[:groups_count]) if p.key?(:groups_count)
|
|
133
|
+
assign_attr(span, 'se.group_by', p[:field] || p[:group_by]) if p.key?(:field) || p.key?(:group_by)
|
|
134
|
+
assign_attr(span, 'se.group_limit', p[:limit] || p[:group_limit]) if p.key?(:limit) || p.key?(:group_limit)
|
|
135
|
+
return unless p.key?(:missing_values) || p.key?(:group_missing_values)
|
|
136
|
+
|
|
137
|
+
assign_attr(span, 'se.group_missing_values', p[:missing_values] || p[:group_missing_values])
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def apply_indexer_schema_attributes(span, p)
|
|
141
|
+
%i[into partition partition_hash batch_index docs_count success_count failure_count attempts bytes_sent
|
|
142
|
+
deleted_count searches_count fields_changed_count added_count removed_count in_sync].each do |k|
|
|
143
|
+
assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
|
|
144
|
+
end
|
|
145
|
+
# New event attributes (redacted/summarized)
|
|
146
|
+
%i[fields_count queries_count max_facet_values sort_flags conflicts].each do |k|
|
|
147
|
+
assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
|
|
148
|
+
end
|
|
149
|
+
%i[full_fields_count affix_tokens snippet_threshold tag_kind].each do |k|
|
|
150
|
+
assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
|
|
151
|
+
end
|
|
152
|
+
%i[use_synonyms use_stopwords source].each do |k|
|
|
153
|
+
assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
|
|
154
|
+
end
|
|
155
|
+
if p.key?(:shapes)
|
|
156
|
+
shapes = p[:shapes] || {}
|
|
157
|
+
assign_attr(span, 'se.shapes.point', shapes[:point]) if shapes.key?(:point)
|
|
158
|
+
assign_attr(span, 'se.shapes.rect', shapes[:rect]) if shapes.key?(:rect)
|
|
159
|
+
assign_attr(span, 'se.shapes.circle', shapes[:circle]) if shapes.key?(:circle)
|
|
160
|
+
end
|
|
161
|
+
%i[sort_mode radius_bucket].each do |k|
|
|
162
|
+
assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
|
|
163
|
+
end
|
|
164
|
+
%i[query_vector_present dims hybrid_weight ann_params_present].each do |k|
|
|
165
|
+
assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
|
|
166
|
+
end
|
|
167
|
+
%i[early_limit validate_max applied_strategy triggered total_hits].each do |k|
|
|
168
|
+
assign_attr(span, "se.#{k}", p[k]) if p.key?(k)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def apply_params_preview(span, payload)
|
|
173
|
+
return unless payload.key?(:params_preview)
|
|
174
|
+
|
|
175
|
+
red = SearchEngine::Instrumentation.redact(payload[:params_preview])
|
|
176
|
+
keys_count = (red.is_a?(Hash) ? red.keys.size : nil)
|
|
177
|
+
assign_attr(span, 'se.params_preview_keys', keys_count) if keys_count
|
|
178
|
+
rescue StandardError
|
|
179
|
+
nil
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def apply_status(span, payload)
|
|
183
|
+
http = payload[:http_status]
|
|
184
|
+
status = payload[:status]
|
|
185
|
+
err_class = payload[:error_class]
|
|
186
|
+
err_msg = payload[:error_message]
|
|
187
|
+
|
|
188
|
+
if (status && status.to_sym == :error) || (http && http.to_i >= 400) || err_class
|
|
189
|
+
# Record a lightweight exception event with sanitized message
|
|
190
|
+
if err_class || err_msg
|
|
191
|
+
msg = SearchEngine::Observability.truncate_message(err_msg || err_class.to_s, 200)
|
|
192
|
+
span.add_event(
|
|
193
|
+
'exception',
|
|
194
|
+
attributes: {
|
|
195
|
+
'exception.type' => (err_class || 'Error').to_s,
|
|
196
|
+
'exception.message' => msg
|
|
197
|
+
}
|
|
198
|
+
)
|
|
199
|
+
end
|
|
200
|
+
span_status_error(span, err_msg || err_class)
|
|
201
|
+
else
|
|
202
|
+
span_status_ok(span)
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def span_status_error(span, description = nil)
|
|
207
|
+
span.status = ::OpenTelemetry::Trace::Status.error(description.to_s) if defined?(::OpenTelemetry::Trace::Status)
|
|
208
|
+
rescue StandardError
|
|
209
|
+
nil
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def span_status_ok(span)
|
|
213
|
+
span.status = ::OpenTelemetry::Trace::Status.ok if defined?(::OpenTelemetry::Trace::Status)
|
|
214
|
+
rescue StandardError
|
|
215
|
+
nil
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def assign_attr(span, key, value)
|
|
219
|
+
return if value.nil?
|
|
220
|
+
|
|
221
|
+
span.set_attribute(key, value)
|
|
222
|
+
rescue StandardError
|
|
223
|
+
nil
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Compiles and validates partitioning directives captured by the index DSL.
|
|
5
|
+
#
|
|
6
|
+
# Provides an immutable object with callables for:
|
|
7
|
+
# - partitions -> Enumerable of partition keys
|
|
8
|
+
# - partition_fetch(partition) -> Enumerable of batches (Arrays of records)
|
|
9
|
+
# - before_hook(partition)
|
|
10
|
+
# - after_hook(partition)
|
|
11
|
+
class Partitioner
|
|
12
|
+
# Immutable compiled holder
|
|
13
|
+
class Compiled
|
|
14
|
+
attr_reader :klass, :partitions_proc, :partition_fetch_proc, :before_hook_proc, :after_hook_proc, :max_parallel
|
|
15
|
+
|
|
16
|
+
def initialize(klass:, partitions_proc:, partition_fetch_proc:, before_hook_proc:, after_hook_proc:,
|
|
17
|
+
max_parallel: 1)
|
|
18
|
+
@klass = klass
|
|
19
|
+
@partitions_proc = partitions_proc
|
|
20
|
+
@partition_fetch_proc = partition_fetch_proc
|
|
21
|
+
validate_hook_arity!(before_hook_proc, name: 'before_partition') if before_hook_proc
|
|
22
|
+
validate_hook_arity!(after_hook_proc, name: 'after_partition') if after_hook_proc
|
|
23
|
+
@before_hook_proc = before_hook_proc
|
|
24
|
+
@after_hook_proc = after_hook_proc
|
|
25
|
+
mp = begin
|
|
26
|
+
Integer(max_parallel)
|
|
27
|
+
rescue StandardError
|
|
28
|
+
1
|
|
29
|
+
end
|
|
30
|
+
@max_parallel = mp.positive? ? mp : 1
|
|
31
|
+
freeze
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Enumerate partition keys. Validates the return value shape.
|
|
35
|
+
# @return [Enumerable] list/Enumerable of opaque partition tokens
|
|
36
|
+
# @raise [SearchEngine::Errors::InvalidParams] when the block does not return an Enumerable
|
|
37
|
+
# @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#partitioning
|
|
38
|
+
def partitions
|
|
39
|
+
return [] unless @partitions_proc
|
|
40
|
+
|
|
41
|
+
res = @partitions_proc.call
|
|
42
|
+
unless res.respond_to?(:each)
|
|
43
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
44
|
+
'partitions block must return an Enumerable of partition keys (Array acceptable). ' \
|
|
45
|
+
'See https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#partitioning.'
|
|
46
|
+
end
|
|
47
|
+
res
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Return an Enumerator for batches for the given partition, validating element shape.
|
|
51
|
+
# @param partition [Object]
|
|
52
|
+
# @return [Enumerable<Array>] enumerator yielding Arrays of records
|
|
53
|
+
# @raise [ArgumentError] when partition_fetch is not defined
|
|
54
|
+
# @raise [SearchEngine::Errors::InvalidParams] when the block returns a non-enumerable or yields non-arrays
|
|
55
|
+
# @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#partitioning
|
|
56
|
+
def partition_fetch_enum(partition)
|
|
57
|
+
raise ArgumentError, 'partition_fetch not defined' unless @partition_fetch_proc
|
|
58
|
+
|
|
59
|
+
enum = @partition_fetch_proc.call(partition)
|
|
60
|
+
unless enum.respond_to?(:each)
|
|
61
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
62
|
+
'partition_fetch must return an Enumerable yielding Arrays of records. ' \
|
|
63
|
+
'See https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#partitioning.'
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
Enumerator.new do |y|
|
|
67
|
+
idx = 0
|
|
68
|
+
enum.each do |batch|
|
|
69
|
+
unless batch.is_a?(Array) || batch.respond_to?(:to_a)
|
|
70
|
+
raise SearchEngine::Errors::InvalidParams,
|
|
71
|
+
"partition_fetch must yield Arrays of records; got #{batch.class} at index #{idx}."
|
|
72
|
+
end
|
|
73
|
+
y << (batch.is_a?(Array) ? batch : batch.to_a)
|
|
74
|
+
idx += 1
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def validate_hook_arity!(proc_obj, name:)
|
|
82
|
+
ar = proc_obj.arity
|
|
83
|
+
return if ar == 1 || ar.negative?
|
|
84
|
+
|
|
85
|
+
raise SearchEngine::Errors::InvalidParams, "#{name} block must accept exactly 1 parameter (partition)."
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
class << self
|
|
90
|
+
# Resolve a compiled partitioner for a model class, or nil if directives are absent.
|
|
91
|
+
# @param klass [Class]
|
|
92
|
+
# @return [SearchEngine::Partitioner::Compiled, nil]
|
|
93
|
+
# @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer#partitioning
|
|
94
|
+
def for(klass)
|
|
95
|
+
dsl = mapper_dsl_for(klass)
|
|
96
|
+
return nil unless dsl
|
|
97
|
+
|
|
98
|
+
any = dsl[:partitions] || dsl[:partition_fetch] || dsl[:before_partition] || dsl[:after_partition]
|
|
99
|
+
return nil unless any
|
|
100
|
+
|
|
101
|
+
cache[klass] ||= compile(klass, dsl)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
def cache
|
|
107
|
+
@cache ||= {}
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def compile(klass, dsl)
|
|
111
|
+
Compiled.new(
|
|
112
|
+
klass: klass,
|
|
113
|
+
partitions_proc: dsl[:partitions],
|
|
114
|
+
partition_fetch_proc: dsl[:partition_fetch],
|
|
115
|
+
before_hook_proc: dsl[:before_partition],
|
|
116
|
+
after_hook_proc: dsl[:after_partition],
|
|
117
|
+
max_parallel: dsl[:partition_max_parallel]
|
|
118
|
+
)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def mapper_dsl_for(klass)
|
|
122
|
+
return unless klass.instance_variable_defined?(:@__mapper_dsl__)
|
|
123
|
+
|
|
124
|
+
klass.instance_variable_get(:@__mapper_dsl__)
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Pure, deterministic normalizer for ranking/typo/prefix tuning.
|
|
5
|
+
# Accepts relation context, effective query_by, and raw ranking state,
|
|
6
|
+
# validates and emits authoritative Typesense params.
|
|
7
|
+
#
|
|
8
|
+
# Usage: RankingPlan.new(relation: rel, query_by: "name,description", ranking: {...}).params
|
|
9
|
+
class RankingPlan
|
|
10
|
+
# @return [Hash]
|
|
11
|
+
attr_reader :params
|
|
12
|
+
|
|
13
|
+
# @param relation [SearchEngine::Relation]
|
|
14
|
+
# @param query_by [String, nil]
|
|
15
|
+
# @param ranking [Hash]
|
|
16
|
+
def initialize(relation:, query_by:, ranking: {})
|
|
17
|
+
@relation = relation
|
|
18
|
+
@raw_query_by = query_by
|
|
19
|
+
@raw = ranking || {}
|
|
20
|
+
@params = compile!
|
|
21
|
+
freeze
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Return effective query_by fields as an Array<String> (trimmed, non-blank)
|
|
25
|
+
def effective_query_by_fields
|
|
26
|
+
resolve_query_by(@raw_query_by)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def compile!
|
|
32
|
+
out = {}
|
|
33
|
+
|
|
34
|
+
out[:num_typos] = @raw[:num_typos] if @raw.key?(:num_typos) && !@raw[:num_typos].nil?
|
|
35
|
+
|
|
36
|
+
if @raw.key?(:drop_tokens_threshold) && !@raw[:drop_tokens_threshold].nil?
|
|
37
|
+
out[:drop_tokens_threshold] = @raw[:drop_tokens_threshold]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
if @raw.key?(:prioritize_exact_match) && !@raw[:prioritize_exact_match].nil?
|
|
41
|
+
out[:prioritize_exact_match] = @raw[:prioritize_exact_match]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
if (weights = @raw[:query_by_weights])
|
|
45
|
+
fields = effective_query_by_fields
|
|
46
|
+
if fields.empty?
|
|
47
|
+
raise SearchEngine::Errors::InvalidOption.new(
|
|
48
|
+
'InvalidOption: query_by is empty; cannot apply query_by_weights',
|
|
49
|
+
hint: 'Set SearchEngine.config.default_query_by or pass options(query_by: ...)',
|
|
50
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#weights'
|
|
51
|
+
)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
normalized_weights = build_weight_vector!(fields, weights)
|
|
55
|
+
if normalized_weights.all? { |w| w.to_i.zero? }
|
|
56
|
+
raise SearchEngine::Errors::InvalidOption.new(
|
|
57
|
+
'InvalidOption: at least one weighted field must have weight > 0',
|
|
58
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#weights'
|
|
59
|
+
)
|
|
60
|
+
end
|
|
61
|
+
out[:query_by_weights] = normalized_weights.join(',')
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
out
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def resolve_query_by(query_by)
|
|
68
|
+
query_by.to_s.split(',').map(&:strip).reject(&:empty?)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def build_weight_vector!(fields, weight_map)
|
|
72
|
+
# Validate that provided keys are subset of effective query_by
|
|
73
|
+
known = fields
|
|
74
|
+
provided = weight_map.keys.map(&:to_s)
|
|
75
|
+
unknown = provided - known
|
|
76
|
+
unless unknown.empty?
|
|
77
|
+
suggestions = suggest_for(unknown.first, known)
|
|
78
|
+
suffix = if suggestions.empty?
|
|
79
|
+
''
|
|
80
|
+
elsif suggestions.length == 1
|
|
81
|
+
" (did you mean #{suggestions.first.inspect}?)"
|
|
82
|
+
else
|
|
83
|
+
others = suggestions[0..-2].map(&:inspect).join(', ')
|
|
84
|
+
last = suggestions.last.inspect
|
|
85
|
+
" (did you mean #{others}, or #{last}?)"
|
|
86
|
+
end
|
|
87
|
+
raise SearchEngine::Errors::InvalidOption.new(
|
|
88
|
+
"InvalidOption: weight specified for unknown field #{unknown.first.inspect}#{suffix}",
|
|
89
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/relation-reference#selection',
|
|
90
|
+
details: { unknown: unknown.first, allowed: known }
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
fields.map { |f| Integer(weight_map.fetch(f, 1)) }
|
|
95
|
+
rescue ArgumentError, TypeError
|
|
96
|
+
raise SearchEngine::Errors::InvalidOption.new(
|
|
97
|
+
'InvalidOption: query_by_weights must compile to integers',
|
|
98
|
+
doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/ranking#weights'
|
|
99
|
+
)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def suggest_for(input, candidates)
|
|
103
|
+
return [] if candidates.empty?
|
|
104
|
+
|
|
105
|
+
begin
|
|
106
|
+
require 'did_you_mean'
|
|
107
|
+
require 'did_you_mean/levenshtein'
|
|
108
|
+
rescue StandardError
|
|
109
|
+
return []
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
distances = candidates.each_with_object({}) do |cand, acc|
|
|
113
|
+
acc[cand] = DidYouMean::Levenshtein.distance(input.to_s, cand.to_s)
|
|
114
|
+
end
|
|
115
|
+
distances.sort_by { |(_c, d)| d }.take(3).select { |(_c, d)| d <= 2 }.map(&:first)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|