noiseless 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +28 -0
  3. data/README.md +214 -0
  4. data/lib/application_search.rb +15 -0
  5. data/lib/noiseless/adapter.rb +313 -0
  6. data/lib/noiseless/adapters/elasticsearch.rb +70 -0
  7. data/lib/noiseless/adapters/execution_modules/elasticsearch_execution.rb +188 -0
  8. data/lib/noiseless/adapters/execution_modules/opensearch_execution.rb +377 -0
  9. data/lib/noiseless/adapters/execution_modules/pgvector_support.rb +219 -0
  10. data/lib/noiseless/adapters/execution_modules/postgresql_execution.rb +461 -0
  11. data/lib/noiseless/adapters/execution_modules/typesense_execution.rb +472 -0
  12. data/lib/noiseless/adapters/open_search.rb +208 -0
  13. data/lib/noiseless/adapters/postgresql.rb +171 -0
  14. data/lib/noiseless/adapters/typesense.rb +70 -0
  15. data/lib/noiseless/adapters.rb +14 -0
  16. data/lib/noiseless/ast/aggregation.rb +56 -0
  17. data/lib/noiseless/ast/bool.rb +16 -0
  18. data/lib/noiseless/ast/bulk.rb +18 -0
  19. data/lib/noiseless/ast/collapse.rb +16 -0
  20. data/lib/noiseless/ast/combined_fields.rb +33 -0
  21. data/lib/noiseless/ast/conversation.rb +29 -0
  22. data/lib/noiseless/ast/filter.rb +15 -0
  23. data/lib/noiseless/ast/hybrid.rb +35 -0
  24. data/lib/noiseless/ast/image_query.rb +29 -0
  25. data/lib/noiseless/ast/join.rb +31 -0
  26. data/lib/noiseless/ast/match.rb +15 -0
  27. data/lib/noiseless/ast/multi_match.rb +24 -0
  28. data/lib/noiseless/ast/paginate.rb +15 -0
  29. data/lib/noiseless/ast/prefix.rb +15 -0
  30. data/lib/noiseless/ast/range.rb +18 -0
  31. data/lib/noiseless/ast/root.rb +69 -0
  32. data/lib/noiseless/ast/search_after.rb +14 -0
  33. data/lib/noiseless/ast/sort.rb +15 -0
  34. data/lib/noiseless/ast/vector.rb +27 -0
  35. data/lib/noiseless/ast/wildcard.rb +15 -0
  36. data/lib/noiseless/ast.rb +30 -0
  37. data/lib/noiseless/bulk_importer.rb +195 -0
  38. data/lib/noiseless/callbacks.rb +138 -0
  39. data/lib/noiseless/connection_manager.rb +26 -0
  40. data/lib/noiseless/document_manager.rb +137 -0
  41. data/lib/noiseless/dsl.rb +107 -0
  42. data/lib/noiseless/generators/application_search_generator.rb +24 -0
  43. data/lib/noiseless/instrumentation.rb +174 -0
  44. data/lib/noiseless/introspection/console.rb +228 -0
  45. data/lib/noiseless/introspection/query_visualizer.rb +533 -0
  46. data/lib/noiseless/introspection.rb +221 -0
  47. data/lib/noiseless/mapping.rb +253 -0
  48. data/lib/noiseless/mapping_definition_processor.rb +231 -0
  49. data/lib/noiseless/model.rb +111 -0
  50. data/lib/noiseless/model_registry.rb +77 -0
  51. data/lib/noiseless/multi_search.rb +244 -0
  52. data/lib/noiseless/pagination.rb +375 -0
  53. data/lib/noiseless/query_builder.rb +284 -0
  54. data/lib/noiseless/railtie.rb +35 -0
  55. data/lib/noiseless/response/aggregations.rb +46 -0
  56. data/lib/noiseless/response/empty.rb +20 -0
  57. data/lib/noiseless/response/records.rb +94 -0
  58. data/lib/noiseless/response/results.rb +110 -0
  59. data/lib/noiseless/response/suggestions.rb +55 -0
  60. data/lib/noiseless/response.rb +98 -0
  61. data/lib/noiseless/response_factory.rb +32 -0
  62. data/lib/noiseless/runtime_reset_middleware.rb +15 -0
  63. data/lib/noiseless/search_index_update_job.rb +84 -0
  64. data/lib/noiseless/test_case.rb +230 -0
  65. data/lib/noiseless/test_helper.rb +295 -0
  66. data/lib/noiseless/version.rb +2 -2
  67. data/lib/noiseless.rb +130 -2
  68. data/lib/tasks/benchmark.rake +35 -0
  69. data/lib/tasks/release.rake +22 -0
  70. data/lib/tasks/test.rake +11 -0
  71. metadata +260 -14
@@ -0,0 +1,219 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Noiseless
4
+ module Adapters
5
+ module ExecutionModules
6
+ # pgvector support for semantic/vector search in PostgreSQL
7
+ # Provides similarity search using embeddings
8
+ #
9
+ # Required:
10
+ # CREATE EXTENSION IF NOT EXISTS vector;
11
+ #
12
+ # Table setup:
13
+ # ALTER TABLE your_table ADD COLUMN embedding vector(1536);
14
+ # CREATE INDEX ON your_table USING ivfflat (embedding vector_cosine_ops);
15
+ #
16
+ module PgvectorSupport
17
+ # Perform semantic search using vector similarity
18
+ #
19
+ # @param scope [ActiveRecord::Relation] The base scope to search
20
+ # @param embedding [Array<Float>] The query embedding vector
21
+ # @param column [Symbol] The column containing embeddings (default: :embedding)
22
+ # @param limit [Integer] Maximum results to return
23
+ # @param distance_threshold [Float] Maximum distance threshold (optional)
24
+ # @param distance_metric [Symbol] :cosine, :l2, or :inner_product
25
+ # @return [ActiveRecord::Relation] Scope with vector similarity ordering
26
+ #
27
+ def vector_search(scope, embedding, column: :embedding, limit: 20, distance_threshold: nil,
28
+ distance_metric: :cosine)
29
+ return scope unless pgvector_available?
30
+
31
+ vector_string = "[#{embedding.join(',')}]"
32
+ distance_op = distance_operator(distance_metric)
33
+
34
+ # Build the query with distance calculation
35
+ scope = scope.select(
36
+ "#{scope.table_name}.*",
37
+ "#{quoted_column(column)} #{distance_op} '#{vector_string}' AS vector_distance"
38
+ )
39
+
40
+ # Apply distance threshold if specified
41
+ if distance_threshold
42
+ scope = scope.where(
43
+ "#{quoted_column(column)} #{distance_op} '#{vector_string}' < ?",
44
+ distance_threshold
45
+ )
46
+ end
47
+
48
+ # Order by similarity (ascending distance = more similar)
49
+ scope.order(Arel.sql("#{quoted_column(column)} #{distance_op} '#{vector_string}'"))
50
+ .limit(limit)
51
+ end
52
+
53
+ # Hybrid search combining text and vector search
54
+ #
55
+ # @param scope [ActiveRecord::Relation] Base scope
56
+ # @param text_query [String] Text query for pg_trgm search
57
+ # @param embedding [Array<Float>] Query embedding for vector search
58
+ # @param text_fields [Array<Symbol>] Fields to search with text
59
+ # @param vector_column [Symbol] Column containing embeddings
60
+ # @param text_weight [Float] Weight for text similarity (0.0-1.0)
61
+ # @param vector_weight [Float] Weight for vector similarity (0.0-1.0)
62
+ # @return [ActiveRecord::Relation]
63
+ #
64
+ def hybrid_search(scope, text_query:, embedding:, text_fields:, vector_column: :embedding,
65
+ text_weight: 0.5, vector_weight: 0.5, limit: 20)
66
+ return scope unless pgvector_available?
67
+
68
+ vector_string = "[#{embedding.join(',')}]"
69
+ text_conditions = text_fields.map { |f| "similarity(#{quoted_column(f)}, ?)" }.join(" + ")
70
+ text_similarity_count = text_fields.size
71
+
72
+ # Normalized combined score
73
+ scope.select(
74
+ "#{scope.table_name}.*",
75
+ # Text similarity (0-1 per field, averaged)
76
+ Arel.sql(
77
+ "(#{text_conditions}) / #{text_similarity_count} * #{text_weight} AS text_score"
78
+ ),
79
+ # Vector similarity (convert distance to similarity: 1 - distance for cosine)
80
+ "(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight} AS vector_score",
81
+ # Combined score
82
+ "(((#{text_conditions}) / #{text_similarity_count}) * #{text_weight} + " \
83
+ "(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight}) AS combined_score"
84
+ ).where(
85
+ "#{text_conditions} > 0 OR #{quoted_column(vector_column)} IS NOT NULL",
86
+ *Array.new(text_similarity_count, text_query)
87
+ ).order(Arel.sql("combined_score DESC"))
88
+ .limit(limit)
89
+ .tap { |s| s.bind_values.concat(Array.new(text_similarity_count, text_query)) }
90
+ end
91
+
92
+ # Execute a KNN (K-Nearest Neighbors) search
93
+ #
94
+ # @param model [Class] The ActiveRecord model
95
+ # @param embedding [Array<Float>] Query embedding
96
+ # @param k [Integer] Number of nearest neighbors
97
+ # @param column [Symbol] Embedding column
98
+ # @param filters [Hash] Additional WHERE conditions
99
+ # @return [Array<Hash>] Results with distance scores
100
+ #
101
+ def knn_search(model, embedding, k: 10, column: :embedding, filters: {})
102
+ return [] unless pgvector_available?
103
+
104
+ vector_string = "[#{embedding.join(',')}]"
105
+
106
+ scope = model.all
107
+ scope = scope.where(filters) if filters.any?
108
+
109
+ results = scope.select(
110
+ "#{model.table_name}.*",
111
+ "#{quoted_column(column)} <=> '#{vector_string}' AS distance"
112
+ ).order(Arel.sql("#{quoted_column(column)} <=> '#{vector_string}'"))
113
+ .limit(k)
114
+
115
+ format_knn_response(results, model)
116
+ end
117
+
118
+ # Store an embedding for a record
119
+ #
120
+ # @param record [ActiveRecord::Base] The record to update
121
+ # @param embedding [Array<Float>] The embedding vector
122
+ # @param column [Symbol] The column to store the embedding
123
+ #
124
+ def store_embedding(record, embedding, column: :embedding)
125
+ return false unless pgvector_available?
126
+
127
+ vector_string = "[#{embedding.join(',')}]"
128
+ record.update_column(column, vector_string)
129
+ end
130
+
131
+ # Batch store embeddings
132
+ #
133
+ # @param model [Class] The ActiveRecord model
134
+ # @param embeddings [Hash<String, Array<Float>>] Map of ID -> embedding
135
+ # @param column [Symbol] The column to store embeddings
136
+ #
137
+ def batch_store_embeddings(model, embeddings, column: :embedding)
138
+ return 0 unless pgvector_available?
139
+
140
+ # Use UPDATE FROM VALUES for efficient batch update
141
+ values = embeddings.map do |id, emb|
142
+ "(#{ActiveRecord::Base.connection.quote(id)}, '[#{emb.join(',')}]'::vector)"
143
+ end.join(",")
144
+
145
+ sql = <<~SQL.squish
146
+ UPDATE #{model.table_name}
147
+ SET #{column} = v.embedding
148
+ FROM (VALUES #{values}) AS v(id, embedding)
149
+ WHERE #{model.table_name}.id = v.id::uuid
150
+ SQL
151
+
152
+ ActiveRecord::Base.connection.execute(sql)
153
+ embeddings.size
154
+ rescue StandardError => e
155
+ Rails.logger.error("Failed to batch store embeddings: #{e.message}")
156
+ 0
157
+ end
158
+
159
+ # Find similar records to a given record
160
+ #
161
+ # @param record [ActiveRecord::Base] The reference record
162
+ # @param limit [Integer] Number of similar records
163
+ # @param column [Symbol] Embedding column
164
+ # @param exclude_self [Boolean] Exclude the reference record
165
+ # @return [ActiveRecord::Relation]
166
+ #
167
+ def find_similar(record, limit: 10, column: :embedding, exclude_self: true)
168
+ embedding = record.send(column)
169
+ return record.class.none unless embedding && pgvector_available?
170
+
171
+ scope = record.class.where.not(column => nil)
172
+ scope = scope.where.not(id: record.id) if exclude_self
173
+
174
+ vector_search(scope, embedding, column: column, limit: limit)
175
+ end
176
+
177
+ # Check if pgvector is available
178
+ def pgvector_available?
179
+ @pgvector_available ||= available_extensions.include?("vector")
180
+ end
181
+
182
+ private
183
+
184
+ def distance_operator(metric)
185
+ case metric
186
+ when :l2, :euclidean
187
+ "<->" # L2/Euclidean distance
188
+ when :inner_product
189
+ "<#>" # Negative inner product
190
+ else
191
+ "<=>" # Cosine distance (default)
192
+ end
193
+ end
194
+
195
+ def format_knn_response(records, model)
196
+ hits = records.map do |record|
197
+ {
198
+ "_index" => model.table_name,
199
+ "_id" => record.id.to_s,
200
+ "_score" => 1.0 - (record.respond_to?(:distance) ? record.distance : 0),
201
+ "_source" => record.as_json(except: [:distance])
202
+ }
203
+ end
204
+
205
+ {
206
+ "took" => 0,
207
+ "timed_out" => false,
208
+ "_shards" => { "total" => 1, "successful" => 1, "skipped" => 0, "failed" => 0 },
209
+ "hits" => {
210
+ "total" => { "value" => hits.size, "relation" => "eq" },
211
+ "max_score" => hits.first&.dig("_score"),
212
+ "hits" => hits
213
+ }
214
+ }
215
+ end
216
+ end
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,461 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "pgvector_support"
4
+
5
+ module Noiseless
6
+ module Adapters
7
+ module ExecutionModules
8
+ # PostgreSQL execution module - translates noiseless AST to PostgreSQL queries
9
+ # Uses pg_trgm for fuzzy matching, unaccent for accent-insensitive search,
10
+ # and optionally pgvector for semantic search
11
+ module PostgresqlExecution
12
+ include PgvectorSupport
13
+
14
+ SIMILARITY_THRESHOLD = 0.3
15
+ DEFAULT_LIMIT = 20
16
+
17
+ private
18
+
19
+ def execute_search(query_hash, model_class: nil, **)
20
+ model = resolve_model(query_hash[:indexes], model_class)
21
+ return empty_response unless model
22
+
23
+ # Check if this is a vector search
24
+ return execute_vector_search(model, query_hash) if query_hash[:vector]
25
+
26
+ scope = build_search_scope(model, query_hash)
27
+ records = scope.to_a
28
+
29
+ format_as_search_response(records, model)
30
+ rescue StandardError => e
31
+ error_response(e)
32
+ end
33
+
34
+ def execute_vector_search(model, query_hash)
35
+ vector_node = query_hash[:vector]
36
+ return empty_response unless vector_node && pgvector_available?
37
+
38
+ # Start with base scope
39
+ scope = model.all
40
+
41
+ # Apply any filters first
42
+ scope = apply_filter_clauses(scope, query_hash[:bool]&.filter || [])
43
+
44
+ # Apply vector search
45
+ scope = vector_search(
46
+ scope,
47
+ vector_node.embedding,
48
+ column: vector_node.field,
49
+ limit: vector_node.k,
50
+ distance_metric: vector_node.distance_metric
51
+ )
52
+
53
+ records = scope.to_a
54
+ format_vector_response(records, model, vector_node)
55
+ rescue StandardError => e
56
+ error_response(e)
57
+ end
58
+
59
+ def format_vector_response(records, model, _vector_node)
60
+ hits = records.map do |record|
61
+ distance = record.respond_to?(:vector_distance) ? record.vector_distance : 0
62
+ {
63
+ "_index" => model.table_name,
64
+ "_id" => record.id.to_s,
65
+ "_score" => 1.0 - distance, # Convert distance to similarity score
66
+ "_source" => record.as_json(except: [:vector_distance])
67
+ }
68
+ end
69
+
70
+ {
71
+ "took" => 0,
72
+ "timed_out" => false,
73
+ "_shards" => { "total" => 1, "successful" => 1, "skipped" => 0, "failed" => 0 },
74
+ "hits" => {
75
+ "total" => { "value" => hits.size, "relation" => "eq" },
76
+ "max_score" => hits.first&.dig("_score"),
77
+ "hits" => hits
78
+ }
79
+ }
80
+ end
81
+
82
+ def execute_bulk(actions, **)
83
+ results = actions.map do |action|
84
+ process_bulk_action(action)
85
+ end
86
+
87
+ { "items" => results, "errors" => results.any? { |r| r["error"] } }
88
+ end
89
+
90
+ def execute_create_index(_index_name, **)
91
+ # No-op for PostgreSQL - tables already exist
92
+ { "acknowledged" => true }
93
+ end
94
+
95
+ def execute_delete_index(_index_name, **)
96
+ # No-op - we don't delete tables via search adapter
97
+ { "acknowledged" => true }
98
+ end
99
+
100
+ def execute_index_exists?(index_name)
101
+ model = resolve_model([index_name])
102
+ model.present? && model.table_exists?
103
+ rescue StandardError
104
+ false
105
+ end
106
+
107
+ def execute_index_document(index, id, document, **)
108
+ model = resolve_model([index])
109
+ return { "_id" => id, "result" => "error", "error" => "Model not found" } unless model
110
+
111
+ record = model.find_or_initialize_by(id: id)
112
+ record.assign_attributes(document.slice(*model.column_names))
113
+ record.save!
114
+
115
+ { "_index" => index, "_id" => id, "result" => record.previously_new_record? ? "created" : "updated" }
116
+ rescue StandardError => e
117
+ { "_index" => index, "_id" => id, "result" => "error", "error" => e.message }
118
+ end
119
+
120
+ def execute_update_document(index, id, changes, **)
121
+ model = resolve_model([index])
122
+ return { "_id" => id, "result" => "error", "error" => "Model not found" } unless model
123
+
124
+ record = model.find(id)
125
+ record.update!(changes.slice(*model.column_names))
126
+
127
+ { "_index" => index, "_id" => id, "result" => "updated" }
128
+ rescue ActiveRecord::RecordNotFound
129
+ { "_index" => index, "_id" => id, "result" => "not_found" }
130
+ rescue StandardError => e
131
+ { "_index" => index, "_id" => id, "result" => "error", "error" => e.message }
132
+ end
133
+
134
+ def execute_delete_document(index, id, **)
135
+ model = resolve_model([index])
136
+ return { "_id" => id, "result" => "error", "error" => "Model not found" } unless model
137
+
138
+ model.destroy(id)
139
+ { "_index" => index, "_id" => id, "result" => "deleted" }
140
+ rescue ActiveRecord::RecordNotFound
141
+ { "_index" => index, "_id" => id, "result" => "not_found" }
142
+ rescue StandardError => e
143
+ { "_index" => index, "_id" => id, "result" => "error", "error" => e.message }
144
+ end
145
+
146
+ def execute_document_exists?(index, id)
147
+ model = resolve_model([index])
148
+ model&.exists?(id: id) || false
149
+ rescue StandardError
150
+ false
151
+ end
152
+
153
+ def execute_cluster_health(**)
154
+ # Verify PostgreSQL connection
155
+ ActiveRecord::Base.connection.execute("SELECT 1")
156
+ {
157
+ "cluster_name" => "postgresql",
158
+ "status" => "green",
159
+ "number_of_nodes" => 1
160
+ }
161
+ rescue StandardError => e
162
+ {
163
+ "cluster_name" => "postgresql",
164
+ "status" => "red",
165
+ "error" => e.message
166
+ }
167
+ end
168
+
169
+ # Query building methods
170
+
171
+ def build_search_scope(model, query_hash)
172
+ scope = model.all
173
+
174
+ # Apply must clauses (full-text search)
175
+ scope = apply_must_clauses(scope, query_hash[:bool]&.must || [], model)
176
+
177
+ # Apply filter clauses (exact matches)
178
+ scope = apply_filter_clauses(scope, query_hash[:bool]&.filter || [])
179
+
180
+ # Apply sorting
181
+ scope = apply_sorting(scope, query_hash[:sort] || [])
182
+
183
+ # Apply pagination
184
+ apply_pagination(scope, query_hash[:paginate])
185
+ end
186
+
187
+ def apply_must_clauses(scope, must_nodes, model)
188
+ return scope if must_nodes.empty?
189
+
190
+ must_nodes.each do |node|
191
+ scope = case node
192
+ when AST::Match
193
+ apply_match(scope, node, model)
194
+ when AST::MultiMatch
195
+ apply_multi_match(scope, node, model)
196
+ when AST::Wildcard
197
+ apply_wildcard(scope, node)
198
+ when AST::Range
199
+ apply_range(scope, node)
200
+ when AST::Prefix
201
+ apply_prefix(scope, node)
202
+ else
203
+ scope
204
+ end
205
+ end
206
+
207
+ scope
208
+ end
209
+
210
+ def apply_match(scope, node, model)
211
+ field = node.field.to_s
212
+ value = node.value.to_s
213
+
214
+ # Use pg_trgm similarity for fuzzy matching with unaccent
215
+ if trgm_available? && text_column?(model, field)
216
+ scope.where(
217
+ "unaccent(#{quoted_column(field)}) % unaccent(?) OR " \
218
+ "unaccent(#{quoted_column(field)}) ILIKE unaccent(?)",
219
+ value,
220
+ "%#{sanitize_like(value)}%"
221
+ )
222
+ else
223
+ # Fallback to ILIKE
224
+ scope.where("#{quoted_column(field)} ILIKE ?", "%#{sanitize_like(value)}%")
225
+ end
226
+ end
227
+
228
+ def apply_multi_match(scope, node, model)
229
+ query = node.query.to_s
230
+ fields = node.fields.map(&:to_s)
231
+
232
+ conditions = fields.map do |field|
233
+ if trgm_available? && text_column?(model, field)
234
+ "(unaccent(#{quoted_column(field)}) % unaccent(?) OR " \
235
+ "unaccent(#{quoted_column(field)}) ILIKE unaccent(?))"
236
+ else
237
+ "#{quoted_column(field)} ILIKE ?"
238
+ end
239
+ end
240
+
241
+ params = fields.flat_map do |field|
242
+ if trgm_available? && text_column?(model, field)
243
+ [query, "%#{sanitize_like(query)}%"]
244
+ else
245
+ ["%#{sanitize_like(query)}%"]
246
+ end
247
+ end
248
+
249
+ scope.where(conditions.join(" OR "), *params)
250
+ end
251
+
252
+ def apply_wildcard(scope, node)
253
+ field = node.field.to_s
254
+ # Convert OpenSearch wildcards to SQL: * -> %, ? -> _
255
+ pattern = node.value.to_s.tr("*", "%").tr("?", "_")
256
+
257
+ scope.where("#{quoted_column(field)} ILIKE ?", pattern)
258
+ end
259
+
260
+ def apply_range(scope, node)
261
+ field = quoted_column(node.field.to_s)
262
+
263
+ scope = scope.where("#{field} >= ?", node.gte) if node.gte
264
+ scope = scope.where("#{field} <= ?", node.lte) if node.lte
265
+ scope = scope.where("#{field} > ?", node.gt) if node.gt
266
+ scope = scope.where("#{field} < ?", node.lt) if node.lt
267
+
268
+ scope
269
+ end
270
+
271
+ def apply_prefix(scope, node)
272
+ scope.where("#{quoted_column(node.field.to_s)} ILIKE ?", "#{sanitize_like(node.value)}%")
273
+ end
274
+
275
+ def apply_filter_clauses(scope, filter_nodes)
276
+ return scope if filter_nodes.empty?
277
+
278
+ filter_nodes.each do |node|
279
+ value = node.value
280
+
281
+ scope = if value.is_a?(Hash) && value[:geo_distance]
282
+ apply_geo_filter(scope, node)
283
+ else
284
+ scope.where(node.field => value)
285
+ end
286
+ end
287
+
288
+ scope
289
+ end
290
+
291
+ def apply_geo_filter(scope, node)
292
+ # Requires PostGIS
293
+ geo_config = node.value[:geo_distance]
294
+ distance = geo_config[:distance]
295
+ field = node.field.to_s
296
+
297
+ # Find the geo point in config
298
+ geo_point = geo_config.find { |_k, v| v.is_a?(Hash) && v[:lat] && v[:lon] }&.last
299
+ return scope unless geo_point
300
+
301
+ # Use PostGIS ST_DWithin for efficient geo filtering
302
+ scope.where(
303
+ "ST_DWithin(#{field}::geography, ST_SetSRID(ST_MakePoint(?, ?), 4326)::geography, ?)",
304
+ geo_point[:lon],
305
+ geo_point[:lat],
306
+ parse_distance(distance)
307
+ )
308
+ rescue StandardError
309
+ # If PostGIS not available, skip geo filter
310
+ scope
311
+ end
312
+
313
+ def apply_sorting(scope, sort_nodes)
314
+ return scope if sort_nodes.empty?
315
+
316
+ order_clauses = sort_nodes.map do |node|
317
+ direction = node.direction.to_s.upcase == "DESC" ? "DESC" : "ASC"
318
+ "#{quoted_column(node.field.to_s)} #{direction}"
319
+ end
320
+
321
+ scope.order(Arel.sql(order_clauses.join(", ")))
322
+ end
323
+
324
+ def apply_pagination(scope, paginate_node)
325
+ page = paginate_node&.page || 1
326
+ per_page = paginate_node&.per_page || DEFAULT_LIMIT
327
+
328
+ offset = (page - 1) * per_page
329
+
330
+ scope.limit(per_page).offset(offset)
331
+ end
332
+
333
+ # Response formatting
334
+
335
+ def format_as_search_response(records, model)
336
+ total = records.size
337
+
338
+ hits = records.map do |record|
339
+ {
340
+ "_index" => model.table_name,
341
+ "_id" => record.id.to_s,
342
+ "_score" => 1.0,
343
+ "_source" => record.as_json
344
+ }
345
+ end
346
+
347
+ {
348
+ "took" => 0,
349
+ "timed_out" => false,
350
+ "_shards" => { "total" => 1, "successful" => 1, "skipped" => 0, "failed" => 0 },
351
+ "hits" => {
352
+ "total" => { "value" => total, "relation" => "eq" },
353
+ "max_score" => hits.any? ? 1.0 : nil,
354
+ "hits" => hits
355
+ }
356
+ }
357
+ end
358
+
359
+ def empty_response
360
+ {
361
+ "took" => 0,
362
+ "timed_out" => false,
363
+ "_shards" => { "total" => 1, "successful" => 1, "skipped" => 0, "failed" => 0 },
364
+ "hits" => {
365
+ "total" => { "value" => 0, "relation" => "eq" },
366
+ "max_score" => nil,
367
+ "hits" => []
368
+ }
369
+ }
370
+ end
371
+
372
+ def error_response(error)
373
+ {
374
+ "took" => 0,
375
+ "timed_out" => false,
376
+ "_shards" => { "total" => 1, "successful" => 0, "skipped" => 0, "failed" => 1 },
377
+ "hits" => {
378
+ "total" => { "value" => 0, "relation" => "eq" },
379
+ "max_score" => nil,
380
+ "hits" => []
381
+ },
382
+ "error" => { "type" => error.class.name, "reason" => error.message }
383
+ }
384
+ end
385
+
386
+ # Helper methods
387
+
388
+ def resolve_model(indexes, model_class = nil)
389
+ return model_class if model_class
390
+
391
+ index_name = indexes&.first
392
+ return nil unless index_name
393
+
394
+ # Try cached model first
395
+ return @model_class_cache[index_name] if @model_class_cache&.key?(index_name)
396
+
397
+ # Try to infer model from index name
398
+ model_name = index_name.to_s.classify
399
+ model_name.constantize
400
+ rescue NameError
401
+ nil
402
+ end
403
+
404
+ def trgm_available?
405
+ @trgm_available ||= available_extensions.include?("pg_trgm")
406
+ end
407
+
408
+ def unaccent_available?
409
+ @unaccent_available ||= available_extensions.include?("unaccent")
410
+ end
411
+
412
+ def text_column?(model, field)
413
+ column = model.columns_hash[field.to_s]
414
+ column && %i[string text].include?(column.type)
415
+ end
416
+
417
+ def quoted_column(field)
418
+ ActiveRecord::Base.connection.quote_column_name(field)
419
+ end
420
+
421
+ def sanitize_like(value)
422
+ # Escape special LIKE characters
423
+ value.to_s.gsub(/[%_\\]/) { |x| "\\#{x}" }
424
+ end
425
+
426
+ def parse_distance(distance)
427
+ # Parse OpenSearch distance format (e.g., "10km", "5mi")
428
+ case distance.to_s
429
+ when /(\d+(?:\.\d+)?)\s*km/i
430
+ ::Regexp.last_match(1).to_f * 1000
431
+ when /(\d+(?:\.\d+)?)\s*mi/i
432
+ ::Regexp.last_match(1).to_f * 1609.34
433
+ when /(\d+(?:\.\d+)?)\s*m/i
434
+ ::Regexp.last_match(1).to_f
435
+ else
436
+ distance.to_f
437
+ end
438
+ end
439
+
440
+ def process_bulk_action(action)
441
+ if action[:index]
442
+ index = action[:index][:_index]
443
+ id = action[:index][:_id]
444
+ data = action[:index][:data]
445
+
446
+ result = execute_index_document(index, id, data)
447
+ { "index" => result }
448
+ elsif action[:delete]
449
+ index = action[:delete][:_index]
450
+ id = action[:delete][:_id]
451
+
452
+ result = execute_delete_document(index, id)
453
+ { "delete" => result }
454
+ else
455
+ { "error" => "Unknown action type" }
456
+ end
457
+ end
458
+ end
459
+ end
460
+ end
461
+ end