search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Rails engine for the SearchEngine gem.
5
+ # Configures autoloading and eager-loading paths.
6
+ class Engine < ::Rails::Engine
7
+ engine_name 'search_engine'
8
+ # isolate_namespace SearchEngine # enable later if controllers/routes appear
9
+
10
+ # Ensure Zeitwerk loads from lib/
11
+ config.autoload_paths << root.join('lib').to_s
12
+
13
+ # Ensure app/search_engine is eager-loadable in production
14
+ config.paths.add 'app/search_engine', eager_load: true
15
+
16
+ initializer 'search_engine.configuration' do
17
+ cfg = SearchEngine.config
18
+ # Hydrate only blank/unset fields from ENV to avoid clobbering
19
+ # host app overrides. ENV resolution is centralized in Config.
20
+ cfg.hydrate_from_env!(ENV, override_existing: false)
21
+ cfg.warn_if_incomplete!
22
+ end
23
+
24
+ # Ignore hyphenated compatibility shim and CLI entrypoint so Zeitwerk doesn't try to constantize them.
25
+ # These files are manually required (CLI is required from Rake tasks).
26
+ initializer 'search_engine.zeitwerk_ignores', before: :set_autoload_paths do
27
+ # Rails 6.1+ exposes a loader per engine via `loader`. Guard presence for safety.
28
+ loader = respond_to?(:loader) ? self.loader : nil
29
+ shim = root.join('lib', 'search-engine-for-typesense.rb').to_s
30
+ cli_file = root.join('lib', 'search_engine', 'cli.rb').to_s
31
+ loader&.ignore(shim)
32
+ loader&.ignore(cli_file)
33
+
34
+ # Also ensure Rails global autoloaders ignore these files, since the engine
35
+ # adds lib/ to autoload paths and the main/once loaders may scan it.
36
+ if defined?(Rails) && Rails.respond_to?(:autoloaders)
37
+ al = Rails.autoloaders
38
+ al.main.ignore(shim) if al.respond_to?(:main)
39
+ al.once.ignore(shim) if al.respond_to?(:once)
40
+ al.main.ignore(cli_file) if al.respond_to?(:main)
41
+ al.once.ignore(cli_file) if al.respond_to?(:once)
42
+ end
43
+ end
44
+
45
+ initializer 'search_engine.observability' do
46
+ cfg = SearchEngine.config
47
+
48
+ # Prefer new structured LoggingSubscriber when configured; otherwise
49
+ # fall back to legacy Notifications::CompactLogger gated by cfg.observability.
50
+ begin
51
+ require 'search_engine/logging_subscriber'
52
+ rescue LoadError
53
+ # no-op; allow running without ActiveSupport
54
+ end
55
+
56
+ if defined?(SearchEngine::LoggingSubscriber)
57
+ logging_cfg = cfg.respond_to?(:logging) ? cfg.logging : nil
58
+ # Opt-out when mode is nil or sample is explicitly 0.0
59
+ if logging_cfg.respond_to?(:mode) && !logging_cfg.mode.nil?
60
+ sample = logging_cfg.respond_to?(:sample) ? logging_cfg.sample : nil
61
+ if sample.nil? || sample.to_f > 0.0
62
+ SearchEngine::LoggingSubscriber.install!(logging_cfg)
63
+ next
64
+ end
65
+ end
66
+ end
67
+
68
+ next unless cfg.observability&.enabled
69
+
70
+ # Defer requiring subscriber to runtime to avoid eager load issues
71
+ begin
72
+ require 'search_engine/notifications/compact_logger'
73
+ rescue LoadError
74
+ # no-op; allow running without ActiveSupport
75
+ end
76
+
77
+ if defined?(SearchEngine::Notifications::CompactLogger)
78
+ # Subscribe once per boot; store handle in a class ivar in the subscriber
79
+ SearchEngine::Notifications::CompactLogger.subscribe(
80
+ logger: cfg.logger,
81
+ level: :info,
82
+ include_params: false
83
+ )
84
+ end
85
+ end
86
+
87
+ initializer 'search_engine.opentelemetry' do
88
+ SearchEngine.config
89
+ begin
90
+ require 'search_engine/otel'
91
+ rescue LoadError
92
+ # no-op; adapter is fully optional
93
+ end
94
+
95
+ if defined?(SearchEngine::Otel)
96
+ # Start adapter only when SDK is present and config enables it
97
+ SearchEngine::Otel.start!
98
+ end
99
+ end
100
+
101
+ initializer 'search_engine.console_helpers' do
102
+ if defined?(Rails::Console) || $PROGRAM_NAME&.end_with?('console')
103
+ begin
104
+ require 'search_engine/console_helpers'
105
+ SearchEngine::ConsoleHelpers.install!
106
+ rescue LoadError
107
+ # no-op; helpers are optional
108
+ end
109
+ end
110
+ end
111
+
112
+ # Manage a dedicated Zeitwerk loader for host app SearchEngine models.
113
+ # Loads after Rails so application models/constants are available.
114
+ initializer 'search_engine.models_loader' do
115
+ # Resolve configured path; allow disabling via nil/false/empty.
116
+ cfg = SearchEngine.config
117
+ models_path_value = cfg.respond_to?(:search_engine_models) ? cfg.search_engine_models : nil
118
+ next if models_path_value.nil? || models_path_value == false || models_path_value.to_s.strip.empty?
119
+
120
+ require 'pathname'
121
+ path = Pathname.new(models_path_value.to_s)
122
+ path = Rails.root.join(path) unless path.absolute?
123
+ path_s = path.to_s
124
+ next unless File.directory?(path_s)
125
+
126
+ # Ensure Rails' autoloaders do not also manage this directory.
127
+ if defined?(Rails) && Rails.respond_to?(:autoloaders)
128
+ al = Rails.autoloaders
129
+ %i[main once].each do |key|
130
+ al.public_send(key).ignore(path_s) if al.respond_to?(key)
131
+ end
132
+ end
133
+
134
+ # Create or reuse a dedicated loader under SearchEngine namespace.
135
+ loader = SearchEngine.instance_variable_get(:@_models_loader)
136
+ unless loader
137
+ loader = Zeitwerk::Loader.new
138
+ loader.tag = 'search_engine.models'
139
+ # Reuse Rails' inflector for consistent constantization rules.
140
+ if defined?(Rails) && Rails.respond_to?(:autoloaders) && Rails.autoloaders.respond_to?(:main)
141
+ loader.inflector = Rails.autoloaders.main.inflector
142
+ end
143
+ loader.push_dir(path_s, namespace: SearchEngine)
144
+ loader.enable_reloading if defined?(Rails) && Rails.env.development?
145
+ SearchEngine.instance_variable_set(:@_models_loader, loader)
146
+ end
147
+
148
+ # Setup immediately so host to_prepare callbacks that run early can rely on
149
+ # SearchEngine models being available.
150
+ unless SearchEngine.instance_variable_defined?(:@_models_loader_setup)
151
+ loader.setup
152
+ SearchEngine.instance_variable_set(:@_models_loader_setup, true)
153
+ end
154
+
155
+ # Set up and coordinate with Rails reloader lifecycle.
156
+ config.to_prepare do
157
+ l = SearchEngine.instance_variable_get(:@_models_loader)
158
+ next unless l
159
+
160
+ l.reload if defined?(Rails) && Rails.env.development?
161
+ # Always eager-load configured SearchEngine models so their `collection` calls
162
+ # register mappings at boot, regardless of Rails.eager_load setting.
163
+ l.eager_load
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,290 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Public error hierarchy for the SearchEngine client wrapper.
5
+ #
6
+ # These exceptions provide a stable contract to callers regardless of the
7
+ # underlying HTTP client or the Typesense gem's internal error types.
8
+ module Errors
9
+ # Base error for all SearchEngine failures.
10
+ # Carries optional structured metadata for enhanced DX.
11
+ #
12
+ # Keyword options are optional and backwards-compatible. Existing call sites
13
+ # that pass only a message remain valid.
14
+ #
15
+ # @!attribute [r] hint
16
+ # @return [String, nil] short actionable suggestion (no secrets)
17
+ # @!attribute [r] doc
18
+ # @return [String, nil] docs URL with optional anchor (e.g., "https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/query-dsl#operators")
19
+ # @!attribute [r] details
20
+ # @return [Object, nil] machine-readable context (JSON-serializable)
21
+ # @!attribute [r] code
22
+ # @return [Symbol, nil] stable symbolic code when defined by subclasses
23
+ # @abstract
24
+ class Error < StandardError
25
+ attr_reader :hint, :doc, :details, :code
26
+
27
+ # @param message [String]
28
+ # @param hint [String, nil]
29
+ # @param doc [String, nil]
30
+ # @param details [Object, nil]
31
+ # @param code [Symbol, nil]
32
+ def initialize(message = nil, hint: nil, doc: nil, details: nil, code: nil, **_ignore)
33
+ super(message)
34
+ @hint = presence_or_nil(hint)
35
+ @doc = presence_or_nil(doc)
36
+ @details = sanitize_details(details)
37
+ @code = code
38
+ end
39
+
40
+ # Return a stable, redaction-aware hash for logging/telemetry.
41
+ # Keys are predictable for downstream processing.
42
+ # @return [Hash]
43
+ def to_h
44
+ base = {
45
+ type: self.class.name,
46
+ message: to_base_message,
47
+ hint: @hint,
48
+ doc: @doc,
49
+ details: @details
50
+ }
51
+ base[:code] = @code if @code
52
+ prune_nils(base)
53
+ end
54
+
55
+ # Preserve historic message but append a concise suffix when hint/doc present.
56
+ # Single-line for log friendliness.
57
+ # @return [String]
58
+ def to_s
59
+ base = to_base_message
60
+ suffix = []
61
+ suffix << "Hint: #{@hint}" if @hint
62
+ suffix << "see #{@doc}" if @doc
63
+ return base if suffix.empty?
64
+
65
+ "#{base} — #{suffix.join(' ')}"
66
+ end
67
+
68
+ private
69
+
70
+ def to_base_message
71
+ # Call Exception#to_s directly to avoid our overridden to_s suffix
72
+ Exception.instance_method(:to_s).bind_call(self).to_s
73
+ end
74
+
75
+ def sanitize_details(obj)
76
+ return nil if obj.nil?
77
+
78
+ if defined?(SearchEngine::Observability)
79
+ begin
80
+ red = SearchEngine::Observability.redact(obj)
81
+ return jsonable(red)
82
+ rescue StandardError
83
+ return jsonable(obj)
84
+ end
85
+ end
86
+
87
+ jsonable(obj)
88
+ end
89
+
90
+ def jsonable(obj)
91
+ case obj
92
+ when Hash
93
+ obj.each_with_object({}) { |(k, v), h| h[k.to_sym] = jsonable(v) }
94
+ when Array
95
+ obj.map { |v| jsonable(v) }
96
+ when Numeric, TrueClass, FalseClass, NilClass, String
97
+ obj
98
+ else
99
+ obj.to_s
100
+ end
101
+ end
102
+
103
+ def prune_nils(h)
104
+ h.each_with_object({}) do |(k, v), acc|
105
+ acc[k] = v unless v.nil?
106
+ end
107
+ end
108
+
109
+ def presence_or_nil(v)
110
+ return nil if v.nil?
111
+
112
+ s = v.to_s
113
+ s.strip.empty? ? nil : s
114
+ end
115
+ end
116
+
117
+ # Raised when a request exceeds the configured timeout budget.
118
+ #
119
+ # Typical causes include connect/open timeouts or read timeouts surfaced by
120
+ # the underlying HTTP client used by the official Typesense gem.
121
+ class Timeout < Error; end
122
+
123
+ # Raised for network-level connectivity issues prior to receiving a response.
124
+ #
125
+ # Examples: DNS resolution failures, refused TCP connections, TLS handshake
126
+ # errors, or other socket-level errors.
127
+ class Connection < Error; end
128
+
129
+ # Raised when Typesense responds with a non-2xx HTTP status code.
130
+ #
131
+ # Carries the HTTP status and the parsed error body (when available) to aid
132
+ # in debugging and programmatic handling upstream.
133
+ class Api < Error
134
+ # @return [Integer] HTTP status code
135
+ attr_reader :status
136
+
137
+ # @return [Object, nil] Parsed error body (Hash/String), when available
138
+ attr_reader :body
139
+
140
+ # @param msg [String]
141
+ # @param status [Integer]
142
+ # @param body [Object, nil]
143
+ def initialize(msg, status:, body: nil, **kw)
144
+ super(msg, **kw)
145
+ @status = status
146
+ @body = body
147
+ end
148
+ end
149
+
150
+ # Raised when wrapper-level validation fails before making a request.
151
+ #
152
+ # Use this for actionable, developer-facing messages that indicate a caller
153
+ # constructed an invalid request (e.g., blank collection name).
154
+ class InvalidParams < Error; end
155
+
156
+ # Raised when a provided field name is unknown or disallowed for a model.
157
+ #
158
+ # Typical cause: a typo or using a field that is not declared via
159
+ # {SearchEngine::Base.attribute}.
160
+ class InvalidField < Error; end
161
+
162
+ # Raised when a base attribute referenced by the Field Selection DSL is not
163
+ # declared on the model.
164
+ #
165
+ # Prefer this over {InvalidField} for selection-time validation to provide
166
+ # developer-friendly guidance and suggestions.
167
+ class UnknownField < Error; end
168
+
169
+ # Raised when an operator or fragment token is not recognized by the SQL-ish
170
+ # grammar accepted by the Parser.
171
+ class InvalidOperator < Error; end
172
+
173
+ # Raised when a value cannot be coerced to the declared attribute type, or
174
+ # when its shape is incompatible (e.g., empty array for IN/NOT IN).
175
+ class InvalidType < Error; end
176
+
177
+ # Raised when a requested join association is not declared for a model.
178
+ #
179
+ # Typical cause: a typo or referencing an association that has not been
180
+ # registered via {SearchEngine::Base.join}.
181
+ class UnknownJoin < Error; end
182
+
183
+ # Raised when an association reference is invalid for the model and should
184
+ # be declared via {SearchEngine::Base.join}.
185
+ #
186
+ # Prefer this for high-level validation messaging with guidance and
187
+ # suggestions ("did you mean ..."), while keeping {UnknownJoin} for
188
+ # lower-level registry lookups.
189
+ class InvalidJoin < Error; end
190
+
191
+ # Raised when a query references a joined association field without applying
192
+ # the association on the relation via {SearchEngine::Relation#joins}.
193
+ #
194
+ # Example: calling `where(authors: { last_name: "Rowling" })` without
195
+ # `.joins(:authors)` on the relation first.
196
+ class JoinNotApplied < Error; end
197
+
198
+ # Raised when a nested attribute referenced by the Field Selection DSL is
199
+ # not declared on the joined association's target model.
200
+ #
201
+ # Typical cause: a typo in a nested field name or a stale attribute map.
202
+ class UnknownJoinField < Error; end
203
+
204
+ # Raised when selection inputs are malformed or ambiguous and cannot be
205
+ # deterministically normalized (e.g., invalid nested shapes or incompatible
206
+ # payload types).
207
+ class ConflictingSelection < Error; end
208
+
209
+ # Raised when grouping DSL is used with invalid inputs.
210
+ #
211
+ # Use for actionable messages like unknown field names, invalid limit values,
212
+ # or non-boolean missing_values.
213
+ #
214
+ # @example Unknown field with suggestion
215
+ # raise SearchEngine::Errors::InvalidGroup, "InvalidGroup: unknown field :brand for grouping on SearchEngine::Product (did you mean :brand_id?)"
216
+ class InvalidGroup < Error; end
217
+
218
+ # Raised when grouping references unsupported constructs such as joined/path fields
219
+ # (e.g., "$assoc.field"). Only base fields are supported for grouping.
220
+ #
221
+ # @example
222
+ # raise SearchEngine::Errors::UnsupportedGroupField, 'UnsupportedGroupField: grouping supports base fields only (got "$authors.last_name")'
223
+ class UnsupportedGroupField < Error; end
224
+
225
+ # Raised when strict selection is enabled and a requested field is absent
226
+ # in the hydrated document (e.g., excluded by API mapping).
227
+ #
228
+ # This error is actionable and guides remediation: adjust the relation's
229
+ # selection (select/exclude/reselect), relax strictness, or ensure the
230
+ # upstream Typesense include/exclude mapping includes the fields.
231
+ class MissingField < Error; end
232
+
233
+ # Raised when a materializer requests fields that are not permitted by the
234
+ # relation's effective selection (include − exclude, with exclude taking precedence).
235
+ #
236
+ # Used by selection-aware materializers like {SearchEngine::Relation#pluck},
237
+ # {SearchEngine::Relation#pick}, and {SearchEngine::Relation#ids} to fail fast
238
+ # before any network call.
239
+ class InvalidSelection < Error; end
240
+
241
+ # Raised when a curated ID does not match the configured pattern.
242
+ #
243
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/curation
244
+ # @example
245
+ # raise SearchEngine::Errors::InvalidCuratedId, 'InvalidCuratedId: "foo bar" is not a valid curated ID. Expected pattern: /\A[\w\-:\.]+\z/. Try removing illegal characters.'
246
+ class InvalidCuratedId < Error; end
247
+
248
+ # Raised when pinned/hidden lists exceed configured limits after normalization.
249
+ #
250
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/curation
251
+ # @example
252
+ # raise SearchEngine::Errors::CurationLimitExceeded, 'CurationLimitExceeded: pinned list exceeds max_pins=50 (attempted 51). Reduce inputs or raise the limit in SearchEngine.config.curation.'
253
+ class CurationLimitExceeded < Error; end
254
+
255
+ # Raised when an override tag is blank or invalid per allowed pattern.
256
+ #
257
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/curation
258
+ # @example
259
+ # raise SearchEngine::Errors::InvalidOverrideTag, 'InvalidOverrideTag: "" is invalid. Use non-blank strings that match the allowed pattern.'
260
+ class InvalidOverrideTag < Error; end
261
+
262
+ # Raised when an option value is invalid or unsupported for a public API.
263
+ #
264
+ # Used by DSL methods to fail fast with actionable hints.
265
+ # Typical causes: invalid HTML tag tokens, negative integers for thresholds,
266
+ # or empty field lists where at least one field is required.
267
+ #
268
+ # @example
269
+ # raise SearchEngine::Errors::InvalidOption.new(
270
+ # 'InvalidOption: tag must be a simple HTML-like token',
271
+ # hint: 'Use a simple tag like <em> or <mark>',
272
+ # doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/highlighting#options'
273
+ # )
274
+ class InvalidOption < Error; end
275
+
276
+ # Raised when a query result exceeds the configured post-fetch hits ceiling.
277
+ #
278
+ # Use this for actionable messages when {Relation#validate_hits!} is set and
279
+ # the backend reports a total hits count above the allowed maximum.
280
+ #
281
+ # @example
282
+ # raise SearchEngine::Errors::HitLimitExceeded.new(
283
+ # 'HitLimitExceeded: 12000 results exceed max=10000',
284
+ # hint: 'Increase `validate_hits!(max:)` or narrow your filters.',
285
+ # doc: 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/hit-limits#validation',
286
+ # details: { total_hits: 12_000, max: 10_000, collection: 'products' }
287
+ # )
288
+ class HitLimitExceeded < Error; end
289
+ end
290
+ end
@@ -0,0 +1,189 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ module Filters
5
+ # Sanitizer utilities for Typesense-compatible filters.
6
+ #
7
+ # Provides quoting/escaping and helpers to build normalized filter strings
8
+ # from hashes and templates with placeholders.
9
+ module Sanitizer
10
+ module_function
11
+
12
+ # Quote a Ruby value into a Typesense filter literal.
13
+ #
14
+ # - NilClass => "null"
15
+ # - TrueClass/FalseClass => "true"/"false"
16
+ # - Numeric => as-is (stringified)
17
+ # - String => double-quoted with minimal escaping for \ and "
18
+ # - Time/DateTime/Date => ISO8601 string, then quoted as a string
19
+ # - Array => one-level flatten, each element quoted, wrapped with [ ... ]
20
+ #
21
+ # @param value [Object]
22
+ # @return [String]
23
+ def quote(value)
24
+ case value
25
+ when NilClass
26
+ 'null'
27
+ when TrueClass
28
+ 'true'
29
+ when FalseClass
30
+ 'false'
31
+ when Numeric
32
+ value.to_s
33
+ when String
34
+ %("#{escape_string(value)}")
35
+ when Time
36
+ %("#{value.iso8601}")
37
+ when DateTime
38
+ %("#{value.iso8601}")
39
+ when Date
40
+ %("#{value.iso8601}")
41
+ when Array
42
+ elements = value.flatten(1).map { |el| quote(el) }
43
+ "[#{elements.join(', ')}]"
44
+ else
45
+ if value.respond_to?(:to_time)
46
+ %("#{value.to_time.iso8601}")
47
+ else
48
+ %("#{escape_string(value.to_s)}")
49
+ end
50
+ end
51
+ end
52
+
53
+ # Quote a scalar Ruby value for Typesense filters with conditional quoting for strings.
54
+ #
55
+ # Rules (based on Typesense filter_by syntax):
56
+ # - Strings that match a safe token pattern (e.g., Active, ACTIVE_1, foo-bar) are emitted bare
57
+ # - Reserved words true/false/null remain bare only when actually booleans/nil; string forms are quoted
58
+ # - Strings with any other characters are double-quoted with escaping
59
+ # - Arrays are delegated to +quote+ to preserve element quoting rules
60
+ #
61
+ # @param value [Object]
62
+ # @return [String]
63
+ def quote_scalar_for_filter(value)
64
+ return quote(value) if value.is_a?(Array)
65
+
66
+ case value
67
+ when NilClass
68
+ 'null'
69
+ when TrueClass
70
+ 'true'
71
+ when FalseClass
72
+ 'false'
73
+ when Numeric
74
+ value.to_s
75
+ when String, Symbol
76
+ str = value.to_s
77
+ lc = str.strip.downcase
78
+ # Avoid ambiguity with special literals when user passes them as strings
79
+ return %("#{escape_string(str)}") if %w[true false null].include?(lc)
80
+
81
+ if safe_bare_string?(str)
82
+ str
83
+ else
84
+ %("#{escape_string(str)}")
85
+ end
86
+ when Time
87
+ %("#{value.iso8601}")
88
+ when DateTime
89
+ %("#{value.iso8601}")
90
+ when Date
91
+ %("#{value.iso8601}")
92
+ else
93
+ if value.respond_to?(:to_time)
94
+ %("#{value.to_time.iso8601}")
95
+ else
96
+ %("#{escape_string(value.to_s)}")
97
+ end
98
+ end
99
+ end
100
+
101
+ # Build normalized filter fragments from a Hash.
102
+ # Scalars become "field:=<quoted>", arrays become "field:=<quoted_list>".
103
+ #
104
+ # @param hash [Hash{#to_sym=>Object}]
105
+ # @param _attributes_map [Hash] (ignored here; validation should be done by caller)
106
+ # @return [Array<String>]
107
+ def build_from_hash(hash, _attributes_map = nil)
108
+ raise ArgumentError, 'filters hash must be a Hash' unless hash.is_a?(Hash)
109
+
110
+ hash.map do |key, raw|
111
+ field = key.to_sym.to_s
112
+ if array_like?(raw)
113
+ "#{field}:=#{quote(Array(raw))}"
114
+ else
115
+ "#{field}:=#{quote_scalar_for_filter(raw)}"
116
+ end
117
+ end
118
+ end
119
+
120
+ # Apply placeholder substitution for templates with '?' markers.
121
+ #
122
+ # Each unescaped '?' is replaced with a quoted argument from +args+ in order.
123
+ #
124
+ # @param template [String]
125
+ # @param args [Array<Object>]
126
+ # @return [String]
127
+ def apply_placeholders(template, args)
128
+ raise ArgumentError, 'template must be a String' unless template.is_a?(String)
129
+ raise ArgumentError, 'args must be an Array' unless args.is_a?(Array)
130
+
131
+ needed = count_placeholders(template)
132
+ provided = args.length
133
+ raise ArgumentError, "expected #{needed} args for #{needed} placeholders, got #{provided}" if needed != provided
134
+
135
+ idx = -1
136
+ template.gsub(/(?<!\\)\?/) do
137
+ idx += 1
138
+ val = args[idx]
139
+ val.is_a?(Array) ? quote(val) : quote_scalar_for_filter(val)
140
+ end
141
+ end
142
+
143
+ # Count unescaped '?' placeholders.
144
+ # @param template [String]
145
+ # @return [Integer]
146
+ def count_placeholders(template)
147
+ count = 0
148
+ escaped = false
149
+ template.each_char do |ch|
150
+ if escaped
151
+ escaped = false
152
+ next
153
+ end
154
+ if ch == '\\'
155
+ escaped = true
156
+ elsif ch == '?'
157
+ count += 1
158
+ end
159
+ end
160
+ count
161
+ end
162
+
163
+ # Escape a raw string for inclusion inside double quotes.
164
+ # @param str [String]
165
+ # @return [String]
166
+ def escape_string(str)
167
+ str.gsub('\\', '\\\\').gsub('"', '\\"')
168
+ end
169
+
170
+ # Determine whether a string can be emitted bare without quotes in filter_by.
171
+ # Safe if it matches: starts with a letter or underscore; then letters/digits/underscore/hyphen.
172
+ # This avoids ambiguity with numbers/booleans/null and special characters.
173
+ def safe_bare_string?(str)
174
+ return false if str.nil? || str.empty?
175
+
176
+ # Disallow surrounding/backtick/dquote characters quickly
177
+ return false if str.include?('"') || str.include?('`') || str.include?(',') || str.include?(' ')
178
+
179
+ # Must start with a letter or underscore; subsequent chars may include digits or hyphens/underscores
180
+ !!(str =~ /^[A-Za-z_][A-Za-z0-9_-]*$/)
181
+ end
182
+
183
+ # @api private
184
+ def array_like?(value)
185
+ value.is_a?(Array)
186
+ end
187
+ end
188
+ end
189
+ end