search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,284 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Bulk index/reindex multiple collections with reference-aware ordering.
5
+ #
6
+ # Usage:
7
+ # SearchEngine::Bulk.index_collections(:shops, :promotions, :product_groups)
8
+ # SearchEngine::Bulk.reindex_collections!(SearchEngine::Promotion, SearchEngine::ProductGroup, SearchEngine::Shop)
9
+ #
10
+ # Behavior:
11
+ # - Stage 1: process inputs that are not referrers of other inputs (referenced-first order among inputs)
12
+ # - Stage 2: process the unique set of referencers of any input exactly once (the "cascade" step)
13
+ # - All internal indexations run with cascade suppressed; only the final collected cascade step is executed
14
+ # (also suppressed for nested cascades to avoid duplicates).
15
+ module Bulk
16
+ class << self
17
+ # Blue/green indexing (non-destructive), mirroring {SearchEngine::Base.index_collection}.
18
+ # When no targets are provided, all declared/registered collections are indexed
19
+ # (models are eagerly loaded from the configured `search_engine_models` path).
20
+ # @param targets [Array<Symbol, String, Class>] collections or model classes
21
+ # @return [Hash] summary (includes :failed_collections_total for unresolved targets)
22
+ def index_collections(*targets, client: nil)
23
+ run!(mode: :index, targets: targets, client: client)
24
+ end
25
+
26
+ # Index all registered/declared collections.
27
+ #
28
+ # Ensures models from the configured `search_engine_models` directory are
29
+ # loaded (via the engine's dedicated loader), discovers all collections,
30
+ # and runs indexing as if they were passed to {.index_collections}.
31
+ #
32
+ # @param client [SearchEngine::Client, nil]
33
+ # @return [Hash] summary (includes :failed_collections_total for unresolved targets)
34
+ def index_all(client: nil)
35
+ ensure_models_loaded_from_configured_path!
36
+ names = SearchEngine::CollectionResolver.models_map.keys
37
+ run!(mode: :index, targets: names, client: client)
38
+ end
39
+
40
+ # Drop+index (destructive), mirroring {SearchEngine::Base.reindex_collection!}.
41
+ # When no targets are provided, all declared/registered collections are reindexed
42
+ # (models are eagerly loaded from the configured `search_engine_models` path).
43
+ # @param targets [Array<Symbol, String, Class>] collections or model classes
44
+ # @return [Hash] summary (includes :failed_collections_total for unresolved targets)
45
+ def reindex_collections!(*targets, client: nil)
46
+ run!(mode: :reindex, targets: targets, client: client)
47
+ end
48
+
49
+ # Reindex all registered/declared collections.
50
+ #
51
+ # Ensures models from the configured `search_engine_models` directory are
52
+ # loaded (via the engine's dedicated loader), discovers all collections,
53
+ # and runs reindexing as if they were passed to {.reindex_collections!}.
54
+ #
55
+ # @param client [SearchEngine::Client, nil]
56
+ # @return [Hash] summary (includes :failed_collections_total for unresolved targets)
57
+ def reindex_all!(client: nil)
58
+ ensure_models_loaded_from_configured_path!
59
+ names = SearchEngine::CollectionResolver.models_map.keys
60
+ run!(mode: :reindex, targets: names, client: client)
61
+ end
62
+
63
+ private
64
+
65
+ # @param mode [Symbol] :index | :reindex
66
+ # @param targets [Array]
67
+ # @param client [SearchEngine::Client, nil]
68
+ # @return [Hash]
69
+ def run!(mode:, targets:, client: nil)
70
+ raise ArgumentError, 'mode must be :index or :reindex' unless %i[index reindex].include?(mode.to_sym)
71
+
72
+ ts_client = client || SearchEngine.client
73
+ input_names = normalize_targets(targets)
74
+
75
+ # Ensure models are loaded before resolving collection classes.
76
+ # This is needed whether targets are provided or not, so that collection
77
+ # name resolution can find the model classes.
78
+ ensure_models_loaded_from_configured_path!
79
+
80
+ # Fallback to all declared/registered collections when no explicit targets are given.
81
+ input_names = SearchEngine::CollectionResolver.models_map.keys if input_names.empty?
82
+
83
+ reverse_graph = SearchEngine::Cascade.build_reverse_graph(client: ts_client)
84
+ input_set = input_names.to_h { |n| [n, true] }
85
+
86
+ # Identify inputs that are referrers of other inputs (skip them in stage 1)
87
+ internal_referrers = internal_referrers_within_inputs(reverse_graph, input_set)
88
+
89
+ stage1_list = input_names.reject { |n| internal_referrers.include?(n) }
90
+
91
+ # Collect unique referencers of any input for the final cascade step
92
+ cascade_candidates = unique_referencers_of_inputs(reverse_graph, input_names)
93
+
94
+ # Order cascade candidates among themselves by dependency (referenced first)
95
+ cascade_order = topo_sort_subset(reverse_graph, cascade_candidates)
96
+
97
+ stats = {
98
+ inputs: input_names,
99
+ stage_1: stage1_list,
100
+ cascade: cascade_order
101
+ }
102
+
103
+ payload = {
104
+ mode: mode.to_sym,
105
+ inputs_count: input_names.size,
106
+ stage_1_count: stage1_list.size,
107
+ cascade_count: cascade_order.size
108
+ }
109
+
110
+ failed_collections_total = 0
111
+
112
+ SearchEngine::Instrumentation.with_context(bulk: true, bulk_suppress_cascade: true, bulk_mode: mode.to_sym) do
113
+ SearchEngine::Instrumentation.instrument('search_engine.bulk.run', payload.merge(stats)) do |ctx|
114
+ # Stage 1 — process referenced-first inputs (that are not referrers of other inputs)
115
+ stage1_list.each do |name|
116
+ klass = safe_collection_class(name)
117
+ unless klass
118
+ failed_collections_total += 1
119
+ next
120
+ end
121
+
122
+ case mode.to_sym
123
+ when :index
124
+ klass.index_collection
125
+ else
126
+ klass.reindex_collection!
127
+ end
128
+ end
129
+
130
+ # Stage 2 — process collected referencers once
131
+ cascade_order.each do |name|
132
+ klass = safe_collection_class(name)
133
+ unless klass
134
+ failed_collections_total += 1
135
+ next
136
+ end
137
+
138
+ case mode.to_sym
139
+ when :index
140
+ klass.index_collection(pre: :ensure, force_rebuild: true)
141
+ else
142
+ klass.reindex_collection!
143
+ end
144
+ end
145
+
146
+ ctx[:failed_collections_total] = failed_collections_total
147
+ end
148
+ end
149
+
150
+ payload[:failed_collections_total] = failed_collections_total
151
+ payload.merge(stats)
152
+ end
153
+
154
+ # Normalize inputs to logical collection names.
155
+ # @param list [Array<Symbol, String, Class>]
156
+ # @return [Array<String>]
157
+ def normalize_targets(list)
158
+ arr = Array(list).flatten.compact
159
+ mapped = arr.map do |item|
160
+ if item.is_a?(Class)
161
+ item.respond_to?(:collection) ? item.collection.to_s : item.name.to_s
162
+ else
163
+ item.to_s
164
+ end
165
+ end
166
+ filtered = mapped.reject { |s| s.to_s.strip.empty? }
167
+ filtered.uniq
168
+ end
169
+
170
+ # Compute the subset of inputs that are referrers of other inputs.
171
+ # reverse_graph: target => [{ referrer, local_key, foreign_key }, ...]
172
+ # @param reverse_graph [Hash]
173
+ # @param input_set [Hash{String=>true}]
174
+ # @return [Set<String>]
175
+ def internal_referrers_within_inputs(reverse_graph, input_set)
176
+ require 'set'
177
+ refs = Set.new
178
+ reverse_graph.each do |target, edges|
179
+ next unless input_set[target]
180
+
181
+ Array(edges).each do |e|
182
+ r = (e[:referrer] || e['referrer']).to_s
183
+ refs.add(r) if input_set[r]
184
+ end
185
+ end
186
+ refs
187
+ end
188
+
189
+ # Unique list of referencers of any input logical name.
190
+ # @param reverse_graph [Hash]
191
+ # @param inputs [Array<String>]
192
+ # @return [Array<String>]
193
+ def unique_referencers_of_inputs(reverse_graph, inputs)
194
+ require 'set'
195
+ seen = Set.new
196
+ Array(inputs).each do |name|
197
+ Array(reverse_graph[name]).each do |e|
198
+ r = (e[:referrer] || e['referrer']).to_s
199
+ seen.add(r) unless r.strip.empty?
200
+ end
201
+ end
202
+ seen.to_a
203
+ end
204
+
205
+ # Topologically sort a subset of nodes using reverse_graph edges.
206
+ # Nodes are referencers; for any edge referrer -> target, ensure target comes first when it is in the subset.
207
+ # @param reverse_graph [Hash]
208
+ # @param subset [Array<String>]
209
+ # @return [Array<String>]
210
+ def topo_sort_subset(reverse_graph, subset)
211
+ require 'set'
212
+ nodes = Array(subset).uniq
213
+ node_set = nodes.to_h { |n| [n, true] }
214
+
215
+ # Build forward adjacency among subset nodes and indegree counts
216
+ adj = Hash.new { |h, k| h[k] = Set.new }
217
+ indeg = Hash.new(0)
218
+
219
+ nodes.each { |n| indeg[n] = 0 }
220
+
221
+ reverse_graph.each do |target, edges|
222
+ Array(edges).each do |e|
223
+ ref = (e[:referrer] || e['referrer']).to_s
224
+ tgt = target.to_s
225
+ next unless node_set[ref] && node_set[tgt]
226
+
227
+ # referrer depends on target: target should precede referrer
228
+ unless adj[tgt].include?(ref)
229
+ adj[tgt] << ref
230
+ indeg[ref] += 1
231
+ end
232
+ end
233
+ end
234
+
235
+ # Kahn's algorithm (stable by name)
236
+ queue = nodes.select { |n| indeg[n].to_i <= 0 }.sort
237
+ order = []
238
+ until queue.empty?
239
+ n = queue.shift
240
+ order << n
241
+ adj[n].each do |m|
242
+ indeg[m] -= 1
243
+ queue << m if indeg[m] <= 0
244
+ end
245
+ queue.sort!
246
+ end
247
+
248
+ # Append any remaining nodes (cycles) in stable name order
249
+ remaining = nodes - order
250
+ order + remaining.sort
251
+ end
252
+
253
+ # Resolve a collection model class from a collection name.
254
+ # Uses CollectionResolver for better fallback logic and model discovery.
255
+ # @param name [String]
256
+ # @return [Class, nil]
257
+ def safe_collection_class(name)
258
+ SearchEngine::CollectionResolver.model_for_logical(name)
259
+ rescue StandardError
260
+ nil
261
+ end
262
+
263
+ # Ensure host app SearchEngine models are loaded so registry and
264
+ # namespace scans see all declared collections.
265
+ # Uses the engine-managed Zeitwerk loader when available.
266
+ # @return [void]
267
+ def ensure_models_loaded_from_configured_path!
268
+ loader = SearchEngine.instance_variable_get(:@_models_loader)
269
+ return unless loader
270
+
271
+ unless SearchEngine.instance_variable_defined?(:@_models_loader_setup)
272
+ loader.setup
273
+ SearchEngine.instance_variable_set(:@_models_loader_setup, true)
274
+ end
275
+
276
+ loader.eager_load
277
+ nil
278
+ rescue StandardError
279
+ # Best-effort: proceed even if the dedicated loader is unavailable
280
+ nil
281
+ end
282
+ end
283
+ end
284
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Helpers for interacting with the Typesense cache operations API.
5
+ #
6
+ # Provides a simple facade over {SearchEngine::Client#clear_cache} that emits
7
+ # instrumentation and allows caller-provided clients for dependency injection.
8
+ module Cache
9
+ class << self
10
+ # Clear the Typesense server-side search cache.
11
+ #
12
+ # @param client [SearchEngine::Client, nil] optional injected client
13
+ # @return [Hash] response payload with symbolized keys
14
+ # @see SearchEngine::Client#clear_cache
15
+ def clear(client: nil)
16
+ SearchEngine::Instrumentation.instrument('search_engine.cache.clear', {}) do
17
+ ts_client = client || SearchEngine.client
18
+ ts_client.clear_cache
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def configured_client
25
+ return unless SearchEngine.config.respond_to?(:client)
26
+
27
+ SearchEngine.config.client
28
+ rescue StandardError
29
+ nil
30
+ end
31
+ end
32
+ end
33
+ end