search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,255 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/concern'
4
+ require 'active_support/core_ext/hash/indifferent_access'
5
+
6
+ module SearchEngine
7
+ class Base
8
+ # Indexing DSL: define index mapping, identity computation and stale filter.
9
+ module IndexingDsl
10
+ extend ActiveSupport::Concern
11
+
12
+ class_methods do
13
+ # Define collection-level indexing configuration and mapping.
14
+ # @yieldparam dsl [SearchEngine::Mapper::Dsl]
15
+ # @return [void]
16
+ def index(&block)
17
+ raise ArgumentError, 'index requires a block' unless block
18
+
19
+ dsl = SearchEngine::Mapper::Dsl.new(self)
20
+ # Support both styles:
21
+ # - index { source :active_record, ...; map { ... } }
22
+ # - index { |dsl| dsl.source :active_record, ...; dsl.map { ... } }
23
+ if block.arity == 1
24
+ yield dsl
25
+ else
26
+ dsl.instance_eval(&block)
27
+ end
28
+
29
+ definition = dsl.to_definition
30
+ unless definition[:map].respond_to?(:call)
31
+ raise ArgumentError, 'index requires a map { |record| ... } block returning a document'
32
+ end
33
+
34
+ # Store definition on the class; Mapper.for will compile and cache
35
+ instance_variable_set(:@__mapper_dsl__, definition)
36
+ instance_variable_set(:@__stale_entries__, Array(definition[:stale]))
37
+ nil
38
+ end
39
+
40
+ # Configure how to compute the Typesense document id for this collection.
41
+ # @param strategy [Symbol, String, Proc]
42
+ # @yield [record]
43
+ # @return [Class]
44
+ def identify_by(strategy = nil, &block)
45
+ callable = if block_given?
46
+ block
47
+ elsif strategy.is_a?(Proc)
48
+ if strategy.arity != 1 && strategy.arity != -1
49
+ raise SearchEngine::Errors::InvalidOption,
50
+ 'identify_by Proc/Lambda must accept exactly 1 argument (record)'
51
+ end
52
+
53
+ strategy
54
+ elsif strategy.is_a?(Symbol) || strategy.is_a?(String)
55
+ meth = strategy.to_s
56
+ ->(record) { record.public_send(meth) }
57
+ else
58
+ raise SearchEngine::Errors::InvalidOption,
59
+ 'identify_by expects a Symbol/String method name or a Proc/Lambda (or block)'
60
+ end
61
+
62
+ # Normalize to a proc that always returns String
63
+ @identify_by_proc = lambda do |record|
64
+ val = callable.call(record)
65
+ val.is_a?(String) ? val : val.to_s
66
+ end
67
+
68
+ # Persist minimal metadata about the raw strategy to inform type hints
69
+ if strategy.is_a?(Symbol) || strategy.is_a?(String)
70
+ @__identify_by_kind__ = :symbol
71
+ @__identify_by_symbol__ = strategy.to_sym
72
+ else
73
+ @__identify_by_kind__ = :proc
74
+ @__identify_by_symbol__ = nil
75
+ end
76
+ self
77
+ end
78
+ end
79
+
80
+ class_methods do
81
+ # Build mapped data for given input using the model's declared source and mapping.
82
+ #
83
+ # Accepts input corresponding to the configured source:
84
+ # - When source is :active_record, accepts an instance of the configured AR model
85
+ # or an Array of such instances. Output preserves input shape.
86
+ # - When source is :sql, accepts a SQL String, executes it to fetch rows, and
87
+ # always returns an Array of results (even when a single row is returned).
88
+ #
89
+ # The mapping is compiled from the model's `index` DSL and validated against
90
+ # the local schema. No Typesense calls are made.
91
+ #
92
+ # @param data [Object] source input (AR instance/Array for :active_record; SQL String for :sql)
93
+ # @param mode [Symbol] output mode; :instance returns hydrated model instances,
94
+ # :hash returns HashWithIndifferentAccess documents
95
+ # @return [Object] a single instance/hash or an Array, per shape policy
96
+ # @raise [SearchEngine::Errors::InvalidOption] when mode is unknown or source unsupported
97
+ # @raise [SearchEngine::Errors::InvalidParams] when inputs are invalid or mapper/source missing
98
+ #
99
+ # @example ActiveRecord source -> instance
100
+ # SearchEngine::Product.from(Product.first)
101
+ # # => #<SearchEngine::Product ...>
102
+ #
103
+ # @example ActiveRecord source -> array of instances
104
+ # SearchEngine::Product.from([Product.first, Product.second])
105
+ # # => [#<SearchEngine::Product ...>, #<SearchEngine::Product ...>]
106
+ #
107
+ # @example SQL source -> array of instances
108
+ # SearchEngine::Product.from("SELECT id, name FROM products", mode: :instance)
109
+ # # => [#<SearchEngine::Product ...>, ...]
110
+ #
111
+ # @example Hash mode with indifferent access
112
+ # SearchEngine::Product.from(Product.first, mode: :hash) # => {"id"=>..., :id=>..., ...}
113
+ def from(data, mode: :instance)
114
+ unless %i[instance hash].include?(mode)
115
+ raise SearchEngine::Errors::InvalidOption,
116
+ "Invalid mode: #{mode.inspect}. Allowed: :instance or :hash"
117
+ end
118
+
119
+ mapper = __se_resolve_mapper_for_from!
120
+
121
+ # Resolve source definition captured by DSL
122
+ source_def = __se_resolve_source_for_from!
123
+
124
+ type = source_def[:type].to_sym
125
+ rows = []
126
+ output_shape = :array
127
+
128
+ case type
129
+ when :active_record
130
+ model = source_def[:options] && source_def[:options][:model]
131
+ rows, output_shape = __se_normalize_rows_for_active_record!(model, data)
132
+ when :sql
133
+ rows = __se_fetch_rows_for_sql!(data)
134
+ output_shape = :array
135
+ else
136
+ raise SearchEngine::Errors::InvalidOption,
137
+ "Unsupported source type: #{type.inspect}. Supported: :active_record, :sql"
138
+ end
139
+
140
+ docs, = mapper.map_batch!(rows, batch_index: 0)
141
+
142
+ case mode
143
+ when :instance
144
+ instances = docs.map { |doc| from_document(doc) }
145
+ return instances.first if output_shape == :single
146
+
147
+ instances
148
+ when :hash
149
+ hashes = docs.map(&:with_indifferent_access)
150
+ return hashes.first if output_shape == :single
151
+
152
+ hashes
153
+ end
154
+ end
155
+
156
+ # -- helpers (class methods) -------------------------------------------------
157
+
158
+ # @return [SearchEngine::Mapper::Compiled]
159
+ def __se_resolve_mapper_for_from!
160
+ mapper = SearchEngine::Mapper.for(self)
161
+ return mapper if mapper
162
+
163
+ raise SearchEngine::Errors::InvalidParams,
164
+ "mapper is not defined for #{name}. Define it via `index do ... map { ... } end`."
165
+ end
166
+
167
+ # @return [Hash]
168
+ def __se_resolve_source_for_from!
169
+ dsl_def = instance_variable_get(:@__mapper_dsl__) if instance_variable_defined?(:@__mapper_dsl__)
170
+ source_def = dsl_def && dsl_def[:source]
171
+ return source_def if source_def && source_def[:type]
172
+
173
+ raise SearchEngine::Errors::InvalidParams,
174
+ "source is not defined for #{name}. Define it via `index { source :active_record, ... }` or `:sql`."
175
+ end
176
+
177
+ # @param model [Class]
178
+ # @param data [Object]
179
+ # @return [Array<Array, Symbol>] [rows, shape]
180
+ def __se_normalize_rows_for_active_record!(model, data)
181
+ unless model.is_a?(Class)
182
+ raise SearchEngine::Errors::InvalidParams,
183
+ 'ActiveRecord source requires a :model class in index DSL'
184
+ end
185
+
186
+ if data.is_a?(Array)
187
+ unless data.all? { |r| r.is_a?(model) }
188
+ raise SearchEngine::Errors::InvalidParams,
189
+ "All elements must be instances of #{model.name} for :active_record source"
190
+ end
191
+ [data, :array]
192
+ else
193
+ unless data.is_a?(model)
194
+ raise SearchEngine::Errors::InvalidParams,
195
+ "Expected instance of #{model.name} for :active_record source"
196
+ end
197
+ [[data], :single]
198
+ end
199
+ end
200
+
201
+ # @param sql [String]
202
+ # @return [Array<Hash>]
203
+ def __se_fetch_rows_for_sql!(sql)
204
+ unless sql.is_a?(String) && !sql.strip.empty?
205
+ raise SearchEngine::Errors::InvalidParams,
206
+ 'SQL input must be a non-empty String for :sql source'
207
+ end
208
+ src = SearchEngine::Sources::SqlSource.new(sql: sql)
209
+ rows = []
210
+ src.each_batch(partition: nil, cursor: nil) { |batch| rows.concat(batch) }
211
+ rows
212
+ end
213
+
214
+ # Compute the Typesense document id for a given source record using the configured
215
+ # identity strategy (or the default +record.id.to_s+ when unset).
216
+ # @param record [Object]
217
+ # @return [String]
218
+ def compute_document_id(record)
219
+ val =
220
+ if instance_variable_defined?(:@identify_by_proc) && (proc = @identify_by_proc)
221
+ proc.call(record)
222
+ else
223
+ record.respond_to?(:id) ? record.id : nil
224
+ end
225
+ val.is_a?(String) ? val : val.to_s
226
+ end
227
+
228
+ # Map a single record using the compiled mapper for this collection.
229
+ # Returns the normalized document as it would be imported during indexation.
230
+ # @param record [Object]
231
+ # @return [Hash]
232
+ # @raise [SearchEngine::Errors::InvalidParams] when mapper is missing or record is nil
233
+ def mapped_data_for(record)
234
+ raise SearchEngine::Errors::InvalidParams, 'record must be provided' if record.nil?
235
+
236
+ mapper = SearchEngine::Mapper.for(self)
237
+ unless mapper
238
+ raise SearchEngine::Errors::InvalidParams,
239
+ "mapper is not defined for #{name}. Define it via `index do ... map { ... } end`."
240
+ end
241
+
242
+ docs, = mapper.map_batch!([record], batch_index: 0)
243
+ docs.first
244
+ end
245
+
246
+ # Return frozen stale cleanup entries defined on this model.
247
+ # @return [Array<Hash>]
248
+ def stale_entries
249
+ list = instance_variable_defined?(:@__stale_entries__) ? @__stale_entries__ : []
250
+ list.dup.freeze
251
+ end
252
+ end
253
+ end
254
+ end
255
+ end