search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,251 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ class Base
5
+ module IndexMaintenance
6
+ # Lifecycle orchestration for full/partial indexing flows.
7
+ module Lifecycle
8
+ extend ActiveSupport::Concern
9
+
10
+ class_methods do
11
+ # Run indexing workflow for this collection.
12
+ # @param partition [Object, Array<Object>, nil]
13
+ # @param client [SearchEngine::Client, nil]
14
+ # @param pre [Symbol, nil] :ensure (ensure presence) or :index (ensure + fix drift)
15
+ # @param force_rebuild [Boolean] when true, force schema rebuild (blue/green)
16
+ # @return [void]
17
+ def index_collection(partition: nil, client: nil, pre: nil, force_rebuild: false)
18
+ logical = respond_to?(:collection) ? collection.to_s : name.to_s
19
+ puts
20
+ puts(%(>>>>>> Indexing Collection "#{logical}"))
21
+ client_obj = client || SearchEngine.client
22
+
23
+ if partition.nil?
24
+ __se_index_full(client: client_obj, pre: pre, force_rebuild: force_rebuild)
25
+ else
26
+ __se_index_partial(partition: partition, client: client_obj, pre: pre)
27
+ end
28
+ nil
29
+ end
30
+
31
+ def reindex_collection!(pre: nil)
32
+ drop_collection!
33
+ index_collection(pre: pre)
34
+ end
35
+
36
+ def rebuild_partition!(partition:, into: nil, pre: nil)
37
+ if pre
38
+ client_obj = SearchEngine.client
39
+ __se_preflight_dependencies!(mode: pre, client: client_obj)
40
+ end
41
+ parts = if partition.nil? || (partition.respond_to?(:empty?) && partition.empty?)
42
+ [nil]
43
+ else
44
+ Array(partition)
45
+ end
46
+
47
+ return SearchEngine::Indexer.rebuild_partition!(self, partition: parts.first, into: into) if parts.size == 1
48
+
49
+ parts.map { |p| SearchEngine::Indexer.rebuild_partition!(self, partition: p, into: into) }
50
+ end
51
+
52
+ def __se_index_full(client:, pre: nil, force_rebuild: false)
53
+ logical = respond_to?(:collection) ? collection.to_s : name.to_s
54
+ __se_preflight_dependencies!(mode: pre, client: client) if pre
55
+
56
+ diff = SearchEngine::Schema.diff(self, client: client)[:diff] || {}
57
+ missing = __se_schema_missing?(diff)
58
+ puts("Step 1: Presence — processing → #{missing ? 'missing' : 'present'}")
59
+
60
+ applied, indexed_inside_apply = __se_full_apply_if_missing(client, missing)
61
+ drift = __se_full_check_drift(diff, missing, force_rebuild)
62
+ applied, indexed_inside_apply = __se_full_apply_if_drift(
63
+ client,
64
+ drift,
65
+ applied,
66
+ indexed_inside_apply,
67
+ force_rebuild
68
+ )
69
+ __se_full_indexation(applied, indexed_inside_apply)
70
+ __se_full_retention(applied, logical, client)
71
+ end
72
+
73
+ def __se_full_apply_if_missing(client, missing)
74
+ applied = false
75
+ indexed_inside_apply = false
76
+ if missing
77
+ puts('Step 2: Create+Apply Schema — processing')
78
+ SearchEngine::Schema.apply!(self, client: client) do |new_physical|
79
+ indexed_inside_apply = __se_index_partitions!(into: new_physical)
80
+ end
81
+ applied = true
82
+ puts('Step 2: Create+Apply Schema — done')
83
+ else
84
+ puts('Step 2: Create+Apply Schema — skip (collection present)')
85
+ end
86
+ [applied, indexed_inside_apply]
87
+ end
88
+
89
+ def __se_full_check_drift(diff, missing, force_rebuild)
90
+ unless missing
91
+ puts('Step 3: Check Schema Status — processing')
92
+ drift = __se_schema_drift?(diff)
93
+ if force_rebuild && !drift
94
+ puts('Step 3: Check Schema Status — force_rebuild')
95
+ return true
96
+ end
97
+ puts("Step 3: Check Schema Status — #{drift ? 'drift' : 'in_sync'}")
98
+ return drift
99
+ end
100
+ puts('Step 3: Check Schema Status — skip (just created)')
101
+ false
102
+ end
103
+
104
+ def __se_full_apply_if_drift(client, drift, applied, indexed_inside_apply, force_rebuild)
105
+ if drift
106
+ puts('Step 4: Apply New Schema — processing')
107
+ SearchEngine::Schema.apply!(self, client: client, force_rebuild: force_rebuild) do |new_physical|
108
+ indexed_inside_apply = __se_index_partitions!(into: new_physical)
109
+ end
110
+ applied = true
111
+ puts('Step 4: Apply New Schema — done')
112
+ else
113
+ puts('Step 4: Apply New Schema — skip')
114
+ end
115
+ [applied, indexed_inside_apply]
116
+ end
117
+
118
+ def __se_full_indexation(applied, indexed_inside_apply)
119
+ cascade_ok = false
120
+ if applied && indexed_inside_apply
121
+ puts('Step 5: Indexing — skip (performed during schema apply)')
122
+ begin
123
+ cascade_ok = indexed_inside_apply.to_sym == :ok
124
+ rescue StandardError
125
+ cascade_ok = false
126
+ end
127
+ else
128
+ puts('Step 5: Indexing — processing')
129
+ idx_status = __se_index_partitions!(into: nil)
130
+ puts('Step 5: Indexing — done')
131
+ cascade_ok = (idx_status == :ok)
132
+ end
133
+ __se_cascade_after_indexation!(context: :full) if cascade_ok
134
+ end
135
+
136
+ def __se_full_retention(applied, logical, client)
137
+ if applied
138
+ puts('Step 6: Retention Cleanup — skip (handled by schema apply)')
139
+ else
140
+ puts('Step 6: Retention Cleanup — processing')
141
+ dropped = __se_retention_cleanup!(logical: logical, client: client)
142
+ puts("Step 6: Retention Cleanup — dropped=#{dropped.inspect}")
143
+ end
144
+ end
145
+
146
+ def __se_index_partial(partition:, client:, pre: nil)
147
+ partitions = Array(partition)
148
+ diff_res = SearchEngine::Schema.diff(self, client: client)
149
+ diff = diff_res[:diff] || {}
150
+
151
+ missing = __se_schema_missing?(diff)
152
+ puts("Step 1: Presence — processing → #{missing ? 'missing' : 'present'}")
153
+ if missing
154
+ puts('Partial: collection is not present. Quit early.')
155
+ return
156
+ end
157
+
158
+ puts('Step 2: Check Schema Status — processing')
159
+ drift = __se_schema_drift?(diff)
160
+ if drift
161
+ puts('Partial: schema is not up-to-date. Exit early (run full indexing).')
162
+ return
163
+ end
164
+ puts('Step 2: Check Schema Status — in_sync')
165
+
166
+ __se_preflight_dependencies!(mode: pre, client: client) if pre
167
+
168
+ puts('Step 3: Partial Indexing — processing')
169
+ all_ok = true
170
+ partitions.each do |p|
171
+ summary = SearchEngine::Indexer.rebuild_partition!(self, partition: p, into: nil)
172
+ puts(SearchEngine::Logging::PartitionProgress.line(p, summary))
173
+ begin
174
+ all_ok &&= (summary.status == :ok)
175
+ rescue StandardError
176
+ all_ok &&= false
177
+ end
178
+ end
179
+ puts('Step 3: Partial Indexing — done')
180
+ __se_cascade_after_indexation!(context: :full) if all_ok
181
+ end
182
+
183
+ # rubocop:disable Metrics/PerceivedComplexity, Metrics/AbcSize
184
+ def __se_cascade_after_indexation!(context: :full)
185
+ if SearchEngine::Instrumentation.context&.[](:bulk_suppress_cascade)
186
+ puts
187
+ puts('>>>>>> Cascade Referencers — suppressed (bulk)')
188
+ return
189
+ end
190
+ puts
191
+ puts(%(>>>>>> Cascade Referencers))
192
+ results = SearchEngine::Cascade.cascade_reindex!(source: self, ids: nil, context: context)
193
+ outcomes = Array(results[:outcomes])
194
+ if outcomes.empty?
195
+ puts(' none')
196
+ else
197
+ outcomes.each do |o|
198
+ coll = o[:collection] || o['collection']
199
+ mode = (o[:mode] || o['mode']).to_s
200
+ case mode
201
+ when 'partial'
202
+ puts(%( Referencer "#{coll}" → partial reindex))
203
+ when 'full'
204
+ puts(%( Referencer "#{coll}" → full reindex))
205
+ when 'skipped_unregistered'
206
+ puts(%( Referencer "#{coll}" → skipped (unregistered)))
207
+ when 'skipped_cycle'
208
+ puts(%( Referencer "#{coll}" → skipped (cycle)))
209
+ else
210
+ puts(%( Referencer "#{coll}" → #{mode}))
211
+ end
212
+ end
213
+ end
214
+ puts('>>>>>> Cascade Done')
215
+ rescue StandardError => error
216
+ base = "Cascade — error=#{error.class}: #{error.message.to_s[0, 200]}"
217
+ if error.respond_to?(:status) || error.respond_to?(:body)
218
+ status = begin
219
+ error.respond_to?(:status) ? error.status : nil
220
+ rescue StandardError
221
+ nil
222
+ end
223
+ body_preview = begin
224
+ b = error.respond_to?(:body) ? error.body : nil
225
+ if b.is_a?(String)
226
+ b[0, 500]
227
+ elsif b.is_a?(Hash)
228
+ b.inspect[0, 500]
229
+ else
230
+ b.to_s[0, 500]
231
+ end
232
+ rescue StandardError
233
+ nil
234
+ end
235
+ warn([base, ("status=#{status}" if status), ("body=#{body_preview}" if body_preview)].compact.join(' '))
236
+ else
237
+ warn(base)
238
+ end
239
+ end
240
+ # rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize
241
+
242
+ def __se_retention_cleanup!(*_)
243
+ SearchEngine::Schema.prune_history!(self)
244
+ rescue StandardError
245
+ nil
246
+ end
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ class Base
5
+ module IndexMaintenance
6
+ # Schema lifecycle helpers (ensure/apply/drop/prune).
7
+ module Schema
8
+ extend ActiveSupport::Concern
9
+
10
+ class_methods do
11
+ def schema
12
+ SearchEngine::Schema.compile(self)
13
+ end
14
+
15
+ def current_schema
16
+ client = SearchEngine.client
17
+ logical = respond_to?(:collection) ? collection.to_s : name.to_s
18
+ physical = client.resolve_alias(logical) || logical
19
+ client.retrieve_collection_schema(physical)
20
+ end
21
+
22
+ def schema_diff
23
+ client = SearchEngine.client
24
+ res = SearchEngine::Schema.diff(self, client: client)
25
+ res[:diff]
26
+ end
27
+
28
+ def update_collection!
29
+ client = SearchEngine.client
30
+
31
+ puts 'Update Collection — analyzing diff for in-place update...'
32
+ updated = SearchEngine::Schema.update!(self, client: client)
33
+
34
+ if updated
35
+ puts 'Update Collection — schema updated in-place (PATCH)'
36
+ else
37
+ puts 'Update Collection — in-place update not possible (no changes or incompatible)'
38
+ end
39
+ updated
40
+ end
41
+
42
+ def drop_collection!
43
+ client = SearchEngine.client
44
+ logical = respond_to?(:collection) ? collection.to_s : name.to_s
45
+
46
+ # Resolve alias with a safer timeout for control-plane operations
47
+ alias_target = client.resolve_alias(logical, timeout_ms: 10_000)
48
+ physical = if alias_target && !alias_target.to_s.strip.empty?
49
+ alias_target.to_s
50
+ else
51
+ live = client.retrieve_collection_schema(logical, timeout_ms: 10_000)
52
+ live ? logical : nil
53
+ end
54
+
55
+ if physical.nil?
56
+ puts('Drop Collection — skip (not present)')
57
+ return
58
+ end
59
+
60
+ puts
61
+ puts(%(>>>>>> Dropping Collection "#{logical}"))
62
+ puts("Drop Collection — processing (logical=#{logical} physical=#{physical})")
63
+ # Use an extended timeout to accommodate large collection drops
64
+ client.delete_collection(physical, timeout_ms: 60_000)
65
+ puts('Drop Collection — done')
66
+ puts(%(>>>>>> Dropped Collection "#{logical}"))
67
+ nil
68
+ end
69
+
70
+ def recreate_collection!
71
+ client = SearchEngine.client
72
+ logical = respond_to?(:collection) ? collection.to_s : name.to_s
73
+
74
+ alias_target = client.resolve_alias(logical)
75
+ physical = if alias_target && !alias_target.to_s.strip.empty?
76
+ alias_target.to_s
77
+ else
78
+ live = client.retrieve_collection_schema(logical)
79
+ live ? logical : nil
80
+ end
81
+
82
+ if physical
83
+ puts("Recreate Collection — dropping existing (logical=#{logical} physical=#{physical})")
84
+ client.delete_collection(physical)
85
+ else
86
+ puts('Recreate Collection — no existing collection (skip drop)')
87
+ end
88
+
89
+ schema = SearchEngine::Schema.compile(self)
90
+ puts("Recreate Collection — creating collection with schema (logical=#{logical})")
91
+ client.create_collection(schema)
92
+ puts('Recreate Collection — done')
93
+ nil
94
+ end
95
+
96
+ def __se_retention_cleanup!(_logical:, _client:)
97
+ SearchEngine::Schema.prune_history!(self)
98
+ end
99
+
100
+ def __se_schema_missing?(diff)
101
+ opts = diff[:collection_options]
102
+ opts.is_a?(Hash) && opts[:live] == :missing
103
+ end
104
+
105
+ def __se_schema_drift?(diff)
106
+ added = Array(diff[:added_fields])
107
+ removed = Array(diff[:removed_fields])
108
+ changed = (diff[:changed_fields] || {}).to_h
109
+ coll_opts = (diff[:collection_options] || {}).to_h
110
+ stale_refs = Array(diff[:stale_references])
111
+ added.any? || removed.any? || !changed.empty? || !coll_opts.empty? || stale_refs.any?
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
117
+ end