noiseless 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +28 -0
- data/README.md +214 -0
- data/lib/application_search.rb +15 -0
- data/lib/noiseless/adapter.rb +313 -0
- data/lib/noiseless/adapters/elasticsearch.rb +70 -0
- data/lib/noiseless/adapters/execution_modules/elasticsearch_execution.rb +188 -0
- data/lib/noiseless/adapters/execution_modules/opensearch_execution.rb +377 -0
- data/lib/noiseless/adapters/execution_modules/pgvector_support.rb +219 -0
- data/lib/noiseless/adapters/execution_modules/postgresql_execution.rb +461 -0
- data/lib/noiseless/adapters/execution_modules/typesense_execution.rb +472 -0
- data/lib/noiseless/adapters/open_search.rb +208 -0
- data/lib/noiseless/adapters/postgresql.rb +171 -0
- data/lib/noiseless/adapters/typesense.rb +70 -0
- data/lib/noiseless/adapters.rb +14 -0
- data/lib/noiseless/ast/aggregation.rb +56 -0
- data/lib/noiseless/ast/bool.rb +16 -0
- data/lib/noiseless/ast/bulk.rb +18 -0
- data/lib/noiseless/ast/collapse.rb +16 -0
- data/lib/noiseless/ast/combined_fields.rb +33 -0
- data/lib/noiseless/ast/conversation.rb +29 -0
- data/lib/noiseless/ast/filter.rb +15 -0
- data/lib/noiseless/ast/hybrid.rb +35 -0
- data/lib/noiseless/ast/image_query.rb +29 -0
- data/lib/noiseless/ast/join.rb +31 -0
- data/lib/noiseless/ast/match.rb +15 -0
- data/lib/noiseless/ast/multi_match.rb +24 -0
- data/lib/noiseless/ast/paginate.rb +15 -0
- data/lib/noiseless/ast/prefix.rb +15 -0
- data/lib/noiseless/ast/range.rb +18 -0
- data/lib/noiseless/ast/root.rb +69 -0
- data/lib/noiseless/ast/search_after.rb +14 -0
- data/lib/noiseless/ast/sort.rb +15 -0
- data/lib/noiseless/ast/vector.rb +27 -0
- data/lib/noiseless/ast/wildcard.rb +15 -0
- data/lib/noiseless/ast.rb +30 -0
- data/lib/noiseless/bulk_importer.rb +195 -0
- data/lib/noiseless/callbacks.rb +138 -0
- data/lib/noiseless/connection_manager.rb +26 -0
- data/lib/noiseless/document_manager.rb +137 -0
- data/lib/noiseless/dsl.rb +107 -0
- data/lib/noiseless/generators/application_search_generator.rb +24 -0
- data/lib/noiseless/instrumentation.rb +174 -0
- data/lib/noiseless/introspection/console.rb +228 -0
- data/lib/noiseless/introspection/query_visualizer.rb +533 -0
- data/lib/noiseless/introspection.rb +221 -0
- data/lib/noiseless/mapping.rb +253 -0
- data/lib/noiseless/mapping_definition_processor.rb +231 -0
- data/lib/noiseless/model.rb +111 -0
- data/lib/noiseless/model_registry.rb +77 -0
- data/lib/noiseless/multi_search.rb +244 -0
- data/lib/noiseless/pagination.rb +375 -0
- data/lib/noiseless/query_builder.rb +284 -0
- data/lib/noiseless/railtie.rb +35 -0
- data/lib/noiseless/response/aggregations.rb +46 -0
- data/lib/noiseless/response/empty.rb +20 -0
- data/lib/noiseless/response/records.rb +94 -0
- data/lib/noiseless/response/results.rb +110 -0
- data/lib/noiseless/response/suggestions.rb +55 -0
- data/lib/noiseless/response.rb +98 -0
- data/lib/noiseless/response_factory.rb +32 -0
- data/lib/noiseless/runtime_reset_middleware.rb +15 -0
- data/lib/noiseless/search_index_update_job.rb +84 -0
- data/lib/noiseless/test_case.rb +230 -0
- data/lib/noiseless/test_helper.rb +295 -0
- data/lib/noiseless/version.rb +2 -2
- data/lib/noiseless.rb +130 -2
- data/lib/tasks/benchmark.rake +35 -0
- data/lib/tasks/release.rake +22 -0
- data/lib/tasks/test.rake +11 -0
- metadata +260 -14
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module Adapters
|
|
5
|
+
module ExecutionModules
|
|
6
|
+
# pgvector support for semantic/vector search in PostgreSQL
|
|
7
|
+
# Provides similarity search using embeddings
|
|
8
|
+
#
|
|
9
|
+
# Required:
|
|
10
|
+
# CREATE EXTENSION IF NOT EXISTS vector;
|
|
11
|
+
#
|
|
12
|
+
# Table setup:
|
|
13
|
+
# ALTER TABLE your_table ADD COLUMN embedding vector(1536);
|
|
14
|
+
# CREATE INDEX ON your_table USING ivfflat (embedding vector_cosine_ops);
|
|
15
|
+
#
|
|
16
|
+
module PgvectorSupport
|
|
17
|
+
# Perform semantic search using vector similarity
|
|
18
|
+
#
|
|
19
|
+
# @param scope [ActiveRecord::Relation] The base scope to search
|
|
20
|
+
# @param embedding [Array<Float>] The query embedding vector
|
|
21
|
+
# @param column [Symbol] The column containing embeddings (default: :embedding)
|
|
22
|
+
# @param limit [Integer] Maximum results to return
|
|
23
|
+
# @param distance_threshold [Float] Maximum distance threshold (optional)
|
|
24
|
+
# @param distance_metric [Symbol] :cosine, :l2, or :inner_product
|
|
25
|
+
# @return [ActiveRecord::Relation] Scope with vector similarity ordering
|
|
26
|
+
#
|
|
27
|
+
def vector_search(scope, embedding, column: :embedding, limit: 20, distance_threshold: nil,
|
|
28
|
+
distance_metric: :cosine)
|
|
29
|
+
return scope unless pgvector_available?
|
|
30
|
+
|
|
31
|
+
vector_string = "[#{embedding.join(',')}]"
|
|
32
|
+
distance_op = distance_operator(distance_metric)
|
|
33
|
+
|
|
34
|
+
# Build the query with distance calculation
|
|
35
|
+
scope = scope.select(
|
|
36
|
+
"#{scope.table_name}.*",
|
|
37
|
+
"#{quoted_column(column)} #{distance_op} '#{vector_string}' AS vector_distance"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Apply distance threshold if specified
|
|
41
|
+
if distance_threshold
|
|
42
|
+
scope = scope.where(
|
|
43
|
+
"#{quoted_column(column)} #{distance_op} '#{vector_string}' < ?",
|
|
44
|
+
distance_threshold
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Order by similarity (ascending distance = more similar)
|
|
49
|
+
scope.order(Arel.sql("#{quoted_column(column)} #{distance_op} '#{vector_string}'"))
|
|
50
|
+
.limit(limit)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Hybrid search combining text and vector search
|
|
54
|
+
#
|
|
55
|
+
# @param scope [ActiveRecord::Relation] Base scope
|
|
56
|
+
# @param text_query [String] Text query for pg_trgm search
|
|
57
|
+
# @param embedding [Array<Float>] Query embedding for vector search
|
|
58
|
+
# @param text_fields [Array<Symbol>] Fields to search with text
|
|
59
|
+
# @param vector_column [Symbol] Column containing embeddings
|
|
60
|
+
# @param text_weight [Float] Weight for text similarity (0.0-1.0)
|
|
61
|
+
# @param vector_weight [Float] Weight for vector similarity (0.0-1.0)
|
|
62
|
+
# @return [ActiveRecord::Relation]
|
|
63
|
+
#
|
|
64
|
+
def hybrid_search(scope, text_query:, embedding:, text_fields:, vector_column: :embedding,
|
|
65
|
+
text_weight: 0.5, vector_weight: 0.5, limit: 20)
|
|
66
|
+
return scope unless pgvector_available?
|
|
67
|
+
|
|
68
|
+
vector_string = "[#{embedding.join(',')}]"
|
|
69
|
+
text_conditions = text_fields.map { |f| "similarity(#{quoted_column(f)}, ?)" }.join(" + ")
|
|
70
|
+
text_similarity_count = text_fields.size
|
|
71
|
+
|
|
72
|
+
# Normalized combined score
|
|
73
|
+
scope.select(
|
|
74
|
+
"#{scope.table_name}.*",
|
|
75
|
+
# Text similarity (0-1 per field, averaged)
|
|
76
|
+
Arel.sql(
|
|
77
|
+
"(#{text_conditions}) / #{text_similarity_count} * #{text_weight} AS text_score"
|
|
78
|
+
),
|
|
79
|
+
# Vector similarity (convert distance to similarity: 1 - distance for cosine)
|
|
80
|
+
"(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight} AS vector_score",
|
|
81
|
+
# Combined score
|
|
82
|
+
"(((#{text_conditions}) / #{text_similarity_count}) * #{text_weight} + " \
|
|
83
|
+
"(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight}) AS combined_score"
|
|
84
|
+
).where(
|
|
85
|
+
"#{text_conditions} > 0 OR #{quoted_column(vector_column)} IS NOT NULL",
|
|
86
|
+
*Array.new(text_similarity_count, text_query)
|
|
87
|
+
).order(Arel.sql("combined_score DESC"))
|
|
88
|
+
.limit(limit)
|
|
89
|
+
.tap { |s| s.bind_values.concat(Array.new(text_similarity_count, text_query)) }
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Execute a KNN (K-Nearest Neighbors) search
|
|
93
|
+
#
|
|
94
|
+
# @param model [Class] The ActiveRecord model
|
|
95
|
+
# @param embedding [Array<Float>] Query embedding
|
|
96
|
+
# @param k [Integer] Number of nearest neighbors
|
|
97
|
+
# @param column [Symbol] Embedding column
|
|
98
|
+
# @param filters [Hash] Additional WHERE conditions
|
|
99
|
+
# @return [Array<Hash>] Results with distance scores
|
|
100
|
+
#
|
|
101
|
+
def knn_search(model, embedding, k: 10, column: :embedding, filters: {})
|
|
102
|
+
return [] unless pgvector_available?
|
|
103
|
+
|
|
104
|
+
vector_string = "[#{embedding.join(',')}]"
|
|
105
|
+
|
|
106
|
+
scope = model.all
|
|
107
|
+
scope = scope.where(filters) if filters.any?
|
|
108
|
+
|
|
109
|
+
results = scope.select(
|
|
110
|
+
"#{model.table_name}.*",
|
|
111
|
+
"#{quoted_column(column)} <=> '#{vector_string}' AS distance"
|
|
112
|
+
).order(Arel.sql("#{quoted_column(column)} <=> '#{vector_string}'"))
|
|
113
|
+
.limit(k)
|
|
114
|
+
|
|
115
|
+
format_knn_response(results, model)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Store an embedding for a record
|
|
119
|
+
#
|
|
120
|
+
# @param record [ActiveRecord::Base] The record to update
|
|
121
|
+
# @param embedding [Array<Float>] The embedding vector
|
|
122
|
+
# @param column [Symbol] The column to store the embedding
|
|
123
|
+
#
|
|
124
|
+
def store_embedding(record, embedding, column: :embedding)
|
|
125
|
+
return false unless pgvector_available?
|
|
126
|
+
|
|
127
|
+
vector_string = "[#{embedding.join(',')}]"
|
|
128
|
+
record.update_column(column, vector_string)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Batch store embeddings
|
|
132
|
+
#
|
|
133
|
+
# @param model [Class] The ActiveRecord model
|
|
134
|
+
# @param embeddings [Hash<String, Array<Float>>] Map of ID -> embedding
|
|
135
|
+
# @param column [Symbol] The column to store embeddings
|
|
136
|
+
#
|
|
137
|
+
def batch_store_embeddings(model, embeddings, column: :embedding)
|
|
138
|
+
return 0 unless pgvector_available?
|
|
139
|
+
|
|
140
|
+
# Use UPDATE FROM VALUES for efficient batch update
|
|
141
|
+
values = embeddings.map do |id, emb|
|
|
142
|
+
"(#{ActiveRecord::Base.connection.quote(id)}, '[#{emb.join(',')}]'::vector)"
|
|
143
|
+
end.join(",")
|
|
144
|
+
|
|
145
|
+
sql = <<~SQL.squish
|
|
146
|
+
UPDATE #{model.table_name}
|
|
147
|
+
SET #{column} = v.embedding
|
|
148
|
+
FROM (VALUES #{values}) AS v(id, embedding)
|
|
149
|
+
WHERE #{model.table_name}.id = v.id::uuid
|
|
150
|
+
SQL
|
|
151
|
+
|
|
152
|
+
ActiveRecord::Base.connection.execute(sql)
|
|
153
|
+
embeddings.size
|
|
154
|
+
rescue StandardError => e
|
|
155
|
+
Rails.logger.error("Failed to batch store embeddings: #{e.message}")
|
|
156
|
+
0
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Find similar records to a given record
|
|
160
|
+
#
|
|
161
|
+
# @param record [ActiveRecord::Base] The reference record
|
|
162
|
+
# @param limit [Integer] Number of similar records
|
|
163
|
+
# @param column [Symbol] Embedding column
|
|
164
|
+
# @param exclude_self [Boolean] Exclude the reference record
|
|
165
|
+
# @return [ActiveRecord::Relation]
|
|
166
|
+
#
|
|
167
|
+
def find_similar(record, limit: 10, column: :embedding, exclude_self: true)
|
|
168
|
+
embedding = record.send(column)
|
|
169
|
+
return record.class.none unless embedding && pgvector_available?
|
|
170
|
+
|
|
171
|
+
scope = record.class.where.not(column => nil)
|
|
172
|
+
scope = scope.where.not(id: record.id) if exclude_self
|
|
173
|
+
|
|
174
|
+
vector_search(scope, embedding, column: column, limit: limit)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Check if pgvector is available
|
|
178
|
+
def pgvector_available?
|
|
179
|
+
@pgvector_available ||= available_extensions.include?("vector")
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
private
|
|
183
|
+
|
|
184
|
+
def distance_operator(metric)
|
|
185
|
+
case metric
|
|
186
|
+
when :l2, :euclidean
|
|
187
|
+
"<->" # L2/Euclidean distance
|
|
188
|
+
when :inner_product
|
|
189
|
+
"<#>" # Negative inner product
|
|
190
|
+
else
|
|
191
|
+
"<=>" # Cosine distance (default)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def format_knn_response(records, model)
|
|
196
|
+
hits = records.map do |record|
|
|
197
|
+
{
|
|
198
|
+
"_index" => model.table_name,
|
|
199
|
+
"_id" => record.id.to_s,
|
|
200
|
+
"_score" => 1.0 - (record.respond_to?(:distance) ? record.distance : 0),
|
|
201
|
+
"_source" => record.as_json(except: [:distance])
|
|
202
|
+
}
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
{
|
|
206
|
+
"took" => 0,
|
|
207
|
+
"timed_out" => false,
|
|
208
|
+
"_shards" => { "total" => 1, "successful" => 1, "skipped" => 0, "failed" => 0 },
|
|
209
|
+
"hits" => {
|
|
210
|
+
"total" => { "value" => hits.size, "relation" => "eq" },
|
|
211
|
+
"max_score" => hits.first&.dig("_score"),
|
|
212
|
+
"hits" => hits
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "pgvector_support"
|
|
4
|
+
|
|
5
|
+
module Noiseless
|
|
6
|
+
module Adapters
|
|
7
|
+
module ExecutionModules
|
|
8
|
+
# PostgreSQL execution module - translates noiseless AST to PostgreSQL queries
|
|
9
|
+
# Uses pg_trgm for fuzzy matching, unaccent for accent-insensitive search,
|
|
10
|
+
# and optionally pgvector for semantic search
|
|
11
|
+
module PostgresqlExecution
|
|
12
|
+
include PgvectorSupport
|
|
13
|
+
|
|
14
|
+
SIMILARITY_THRESHOLD = 0.3
|
|
15
|
+
DEFAULT_LIMIT = 20
|
|
16
|
+
|
|
17
|
+
private
|
|
18
|
+
|
|
19
|
+
def execute_search(query_hash, model_class: nil, **)
|
|
20
|
+
model = resolve_model(query_hash[:indexes], model_class)
|
|
21
|
+
return empty_response unless model
|
|
22
|
+
|
|
23
|
+
# Check if this is a vector search
|
|
24
|
+
return execute_vector_search(model, query_hash) if query_hash[:vector]
|
|
25
|
+
|
|
26
|
+
scope = build_search_scope(model, query_hash)
|
|
27
|
+
records = scope.to_a
|
|
28
|
+
|
|
29
|
+
format_as_search_response(records, model)
|
|
30
|
+
rescue StandardError => e
|
|
31
|
+
error_response(e)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def execute_vector_search(model, query_hash)
|
|
35
|
+
vector_node = query_hash[:vector]
|
|
36
|
+
return empty_response unless vector_node && pgvector_available?
|
|
37
|
+
|
|
38
|
+
# Start with base scope
|
|
39
|
+
scope = model.all
|
|
40
|
+
|
|
41
|
+
# Apply any filters first
|
|
42
|
+
scope = apply_filter_clauses(scope, query_hash[:bool]&.filter || [])
|
|
43
|
+
|
|
44
|
+
# Apply vector search
|
|
45
|
+
scope = vector_search(
|
|
46
|
+
scope,
|
|
47
|
+
vector_node.embedding,
|
|
48
|
+
column: vector_node.field,
|
|
49
|
+
limit: vector_node.k,
|
|
50
|
+
distance_metric: vector_node.distance_metric
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
records = scope.to_a
|
|
54
|
+
format_vector_response(records, model, vector_node)
|
|
55
|
+
rescue StandardError => e
|
|
56
|
+
error_response(e)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def format_vector_response(records, model, _vector_node)
|
|
60
|
+
hits = records.map do |record|
|
|
61
|
+
distance = record.respond_to?(:vector_distance) ? record.vector_distance : 0
|
|
62
|
+
{
|
|
63
|
+
"_index" => model.table_name,
|
|
64
|
+
"_id" => record.id.to_s,
|
|
65
|
+
"_score" => 1.0 - distance, # Convert distance to similarity score
|
|
66
|
+
"_source" => record.as_json(except: [:vector_distance])
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
{
|
|
71
|
+
"took" => 0,
|
|
72
|
+
"timed_out" => false,
|
|
73
|
+
"_shards" => { "total" => 1, "successful" => 1, "skipped" => 0, "failed" => 0 },
|
|
74
|
+
"hits" => {
|
|
75
|
+
"total" => { "value" => hits.size, "relation" => "eq" },
|
|
76
|
+
"max_score" => hits.first&.dig("_score"),
|
|
77
|
+
"hits" => hits
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def execute_bulk(actions, **)
|
|
83
|
+
results = actions.map do |action|
|
|
84
|
+
process_bulk_action(action)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
{ "items" => results, "errors" => results.any? { |r| r["error"] } }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def execute_create_index(_index_name, **)
|
|
91
|
+
# No-op for PostgreSQL - tables already exist
|
|
92
|
+
{ "acknowledged" => true }
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def execute_delete_index(_index_name, **)
|
|
96
|
+
# No-op - we don't delete tables via search adapter
|
|
97
|
+
{ "acknowledged" => true }
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def execute_index_exists?(index_name)
|
|
101
|
+
model = resolve_model([index_name])
|
|
102
|
+
model.present? && model.table_exists?
|
|
103
|
+
rescue StandardError
|
|
104
|
+
false
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def execute_index_document(index, id, document, **)
|
|
108
|
+
model = resolve_model([index])
|
|
109
|
+
return { "_id" => id, "result" => "error", "error" => "Model not found" } unless model
|
|
110
|
+
|
|
111
|
+
record = model.find_or_initialize_by(id: id)
|
|
112
|
+
record.assign_attributes(document.slice(*model.column_names))
|
|
113
|
+
record.save!
|
|
114
|
+
|
|
115
|
+
{ "_index" => index, "_id" => id, "result" => record.previously_new_record? ? "created" : "updated" }
|
|
116
|
+
rescue StandardError => e
|
|
117
|
+
{ "_index" => index, "_id" => id, "result" => "error", "error" => e.message }
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def execute_update_document(index, id, changes, **)
|
|
121
|
+
model = resolve_model([index])
|
|
122
|
+
return { "_id" => id, "result" => "error", "error" => "Model not found" } unless model
|
|
123
|
+
|
|
124
|
+
record = model.find(id)
|
|
125
|
+
record.update!(changes.slice(*model.column_names))
|
|
126
|
+
|
|
127
|
+
{ "_index" => index, "_id" => id, "result" => "updated" }
|
|
128
|
+
rescue ActiveRecord::RecordNotFound
|
|
129
|
+
{ "_index" => index, "_id" => id, "result" => "not_found" }
|
|
130
|
+
rescue StandardError => e
|
|
131
|
+
{ "_index" => index, "_id" => id, "result" => "error", "error" => e.message }
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def execute_delete_document(index, id, **)
|
|
135
|
+
model = resolve_model([index])
|
|
136
|
+
return { "_id" => id, "result" => "error", "error" => "Model not found" } unless model
|
|
137
|
+
|
|
138
|
+
model.destroy(id)
|
|
139
|
+
{ "_index" => index, "_id" => id, "result" => "deleted" }
|
|
140
|
+
rescue ActiveRecord::RecordNotFound
|
|
141
|
+
{ "_index" => index, "_id" => id, "result" => "not_found" }
|
|
142
|
+
rescue StandardError => e
|
|
143
|
+
{ "_index" => index, "_id" => id, "result" => "error", "error" => e.message }
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def execute_document_exists?(index, id)
|
|
147
|
+
model = resolve_model([index])
|
|
148
|
+
model&.exists?(id: id) || false
|
|
149
|
+
rescue StandardError
|
|
150
|
+
false
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def execute_cluster_health(**)
|
|
154
|
+
# Verify PostgreSQL connection
|
|
155
|
+
ActiveRecord::Base.connection.execute("SELECT 1")
|
|
156
|
+
{
|
|
157
|
+
"cluster_name" => "postgresql",
|
|
158
|
+
"status" => "green",
|
|
159
|
+
"number_of_nodes" => 1
|
|
160
|
+
}
|
|
161
|
+
rescue StandardError => e
|
|
162
|
+
{
|
|
163
|
+
"cluster_name" => "postgresql",
|
|
164
|
+
"status" => "red",
|
|
165
|
+
"error" => e.message
|
|
166
|
+
}
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Query building methods
|
|
170
|
+
|
|
171
|
+
def build_search_scope(model, query_hash)
|
|
172
|
+
scope = model.all
|
|
173
|
+
|
|
174
|
+
# Apply must clauses (full-text search)
|
|
175
|
+
scope = apply_must_clauses(scope, query_hash[:bool]&.must || [], model)
|
|
176
|
+
|
|
177
|
+
# Apply filter clauses (exact matches)
|
|
178
|
+
scope = apply_filter_clauses(scope, query_hash[:bool]&.filter || [])
|
|
179
|
+
|
|
180
|
+
# Apply sorting
|
|
181
|
+
scope = apply_sorting(scope, query_hash[:sort] || [])
|
|
182
|
+
|
|
183
|
+
# Apply pagination
|
|
184
|
+
apply_pagination(scope, query_hash[:paginate])
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def apply_must_clauses(scope, must_nodes, model)
|
|
188
|
+
return scope if must_nodes.empty?
|
|
189
|
+
|
|
190
|
+
must_nodes.each do |node|
|
|
191
|
+
scope = case node
|
|
192
|
+
when AST::Match
|
|
193
|
+
apply_match(scope, node, model)
|
|
194
|
+
when AST::MultiMatch
|
|
195
|
+
apply_multi_match(scope, node, model)
|
|
196
|
+
when AST::Wildcard
|
|
197
|
+
apply_wildcard(scope, node)
|
|
198
|
+
when AST::Range
|
|
199
|
+
apply_range(scope, node)
|
|
200
|
+
when AST::Prefix
|
|
201
|
+
apply_prefix(scope, node)
|
|
202
|
+
else
|
|
203
|
+
scope
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
scope
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def apply_match(scope, node, model)
|
|
211
|
+
field = node.field.to_s
|
|
212
|
+
value = node.value.to_s
|
|
213
|
+
|
|
214
|
+
# Use pg_trgm similarity for fuzzy matching with unaccent
|
|
215
|
+
if trgm_available? && text_column?(model, field)
|
|
216
|
+
scope.where(
|
|
217
|
+
"unaccent(#{quoted_column(field)}) % unaccent(?) OR " \
|
|
218
|
+
"unaccent(#{quoted_column(field)}) ILIKE unaccent(?)",
|
|
219
|
+
value,
|
|
220
|
+
"%#{sanitize_like(value)}%"
|
|
221
|
+
)
|
|
222
|
+
else
|
|
223
|
+
# Fallback to ILIKE
|
|
224
|
+
scope.where("#{quoted_column(field)} ILIKE ?", "%#{sanitize_like(value)}%")
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def apply_multi_match(scope, node, model)
|
|
229
|
+
query = node.query.to_s
|
|
230
|
+
fields = node.fields.map(&:to_s)
|
|
231
|
+
|
|
232
|
+
conditions = fields.map do |field|
|
|
233
|
+
if trgm_available? && text_column?(model, field)
|
|
234
|
+
"(unaccent(#{quoted_column(field)}) % unaccent(?) OR " \
|
|
235
|
+
"unaccent(#{quoted_column(field)}) ILIKE unaccent(?))"
|
|
236
|
+
else
|
|
237
|
+
"#{quoted_column(field)} ILIKE ?"
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
params = fields.flat_map do |field|
|
|
242
|
+
if trgm_available? && text_column?(model, field)
|
|
243
|
+
[query, "%#{sanitize_like(query)}%"]
|
|
244
|
+
else
|
|
245
|
+
["%#{sanitize_like(query)}%"]
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
scope.where(conditions.join(" OR "), *params)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def apply_wildcard(scope, node)
|
|
253
|
+
field = node.field.to_s
|
|
254
|
+
# Convert OpenSearch wildcards to SQL: * -> %, ? -> _
|
|
255
|
+
pattern = node.value.to_s.tr("*", "%").tr("?", "_")
|
|
256
|
+
|
|
257
|
+
scope.where("#{quoted_column(field)} ILIKE ?", pattern)
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def apply_range(scope, node)
|
|
261
|
+
field = quoted_column(node.field.to_s)
|
|
262
|
+
|
|
263
|
+
scope = scope.where("#{field} >= ?", node.gte) if node.gte
|
|
264
|
+
scope = scope.where("#{field} <= ?", node.lte) if node.lte
|
|
265
|
+
scope = scope.where("#{field} > ?", node.gt) if node.gt
|
|
266
|
+
scope = scope.where("#{field} < ?", node.lt) if node.lt
|
|
267
|
+
|
|
268
|
+
scope
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def apply_prefix(scope, node)
|
|
272
|
+
scope.where("#{quoted_column(node.field.to_s)} ILIKE ?", "#{sanitize_like(node.value)}%")
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def apply_filter_clauses(scope, filter_nodes)
|
|
276
|
+
return scope if filter_nodes.empty?
|
|
277
|
+
|
|
278
|
+
filter_nodes.each do |node|
|
|
279
|
+
value = node.value
|
|
280
|
+
|
|
281
|
+
scope = if value.is_a?(Hash) && value[:geo_distance]
|
|
282
|
+
apply_geo_filter(scope, node)
|
|
283
|
+
else
|
|
284
|
+
scope.where(node.field => value)
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
scope
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def apply_geo_filter(scope, node)
|
|
292
|
+
# Requires PostGIS
|
|
293
|
+
geo_config = node.value[:geo_distance]
|
|
294
|
+
distance = geo_config[:distance]
|
|
295
|
+
field = node.field.to_s
|
|
296
|
+
|
|
297
|
+
# Find the geo point in config
|
|
298
|
+
geo_point = geo_config.find { |_k, v| v.is_a?(Hash) && v[:lat] && v[:lon] }&.last
|
|
299
|
+
return scope unless geo_point
|
|
300
|
+
|
|
301
|
+
# Use PostGIS ST_DWithin for efficient geo filtering
|
|
302
|
+
scope.where(
|
|
303
|
+
"ST_DWithin(#{field}::geography, ST_SetSRID(ST_MakePoint(?, ?), 4326)::geography, ?)",
|
|
304
|
+
geo_point[:lon],
|
|
305
|
+
geo_point[:lat],
|
|
306
|
+
parse_distance(distance)
|
|
307
|
+
)
|
|
308
|
+
rescue StandardError
|
|
309
|
+
# If PostGIS not available, skip geo filter
|
|
310
|
+
scope
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def apply_sorting(scope, sort_nodes)
|
|
314
|
+
return scope if sort_nodes.empty?
|
|
315
|
+
|
|
316
|
+
order_clauses = sort_nodes.map do |node|
|
|
317
|
+
direction = node.direction.to_s.upcase == "DESC" ? "DESC" : "ASC"
|
|
318
|
+
"#{quoted_column(node.field.to_s)} #{direction}"
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
scope.order(Arel.sql(order_clauses.join(", ")))
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def apply_pagination(scope, paginate_node)
|
|
325
|
+
page = paginate_node&.page || 1
|
|
326
|
+
per_page = paginate_node&.per_page || DEFAULT_LIMIT
|
|
327
|
+
|
|
328
|
+
offset = (page - 1) * per_page
|
|
329
|
+
|
|
330
|
+
scope.limit(per_page).offset(offset)
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# Response formatting
|
|
334
|
+
|
|
335
|
+
def format_as_search_response(records, model)
|
|
336
|
+
total = records.size
|
|
337
|
+
|
|
338
|
+
hits = records.map do |record|
|
|
339
|
+
{
|
|
340
|
+
"_index" => model.table_name,
|
|
341
|
+
"_id" => record.id.to_s,
|
|
342
|
+
"_score" => 1.0,
|
|
343
|
+
"_source" => record.as_json
|
|
344
|
+
}
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
{
|
|
348
|
+
"took" => 0,
|
|
349
|
+
"timed_out" => false,
|
|
350
|
+
"_shards" => { "total" => 1, "successful" => 1, "skipped" => 0, "failed" => 0 },
|
|
351
|
+
"hits" => {
|
|
352
|
+
"total" => { "value" => total, "relation" => "eq" },
|
|
353
|
+
"max_score" => hits.any? ? 1.0 : nil,
|
|
354
|
+
"hits" => hits
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def empty_response
|
|
360
|
+
{
|
|
361
|
+
"took" => 0,
|
|
362
|
+
"timed_out" => false,
|
|
363
|
+
"_shards" => { "total" => 1, "successful" => 1, "skipped" => 0, "failed" => 0 },
|
|
364
|
+
"hits" => {
|
|
365
|
+
"total" => { "value" => 0, "relation" => "eq" },
|
|
366
|
+
"max_score" => nil,
|
|
367
|
+
"hits" => []
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
def error_response(error)
|
|
373
|
+
{
|
|
374
|
+
"took" => 0,
|
|
375
|
+
"timed_out" => false,
|
|
376
|
+
"_shards" => { "total" => 1, "successful" => 0, "skipped" => 0, "failed" => 1 },
|
|
377
|
+
"hits" => {
|
|
378
|
+
"total" => { "value" => 0, "relation" => "eq" },
|
|
379
|
+
"max_score" => nil,
|
|
380
|
+
"hits" => []
|
|
381
|
+
},
|
|
382
|
+
"error" => { "type" => error.class.name, "reason" => error.message }
|
|
383
|
+
}
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
# Helper methods
|
|
387
|
+
|
|
388
|
+
def resolve_model(indexes, model_class = nil)
|
|
389
|
+
return model_class if model_class
|
|
390
|
+
|
|
391
|
+
index_name = indexes&.first
|
|
392
|
+
return nil unless index_name
|
|
393
|
+
|
|
394
|
+
# Try cached model first
|
|
395
|
+
return @model_class_cache[index_name] if @model_class_cache&.key?(index_name)
|
|
396
|
+
|
|
397
|
+
# Try to infer model from index name
|
|
398
|
+
model_name = index_name.to_s.classify
|
|
399
|
+
model_name.constantize
|
|
400
|
+
rescue NameError
|
|
401
|
+
nil
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
def trgm_available?
|
|
405
|
+
@trgm_available ||= available_extensions.include?("pg_trgm")
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
def unaccent_available?
|
|
409
|
+
@unaccent_available ||= available_extensions.include?("unaccent")
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
def text_column?(model, field)
|
|
413
|
+
column = model.columns_hash[field.to_s]
|
|
414
|
+
column && %i[string text].include?(column.type)
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def quoted_column(field)
|
|
418
|
+
ActiveRecord::Base.connection.quote_column_name(field)
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def sanitize_like(value)
|
|
422
|
+
# Escape special LIKE characters
|
|
423
|
+
value.to_s.gsub(/[%_\\]/) { |x| "\\#{x}" }
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def parse_distance(distance)
|
|
427
|
+
# Parse OpenSearch distance format (e.g., "10km", "5mi")
|
|
428
|
+
case distance.to_s
|
|
429
|
+
when /(\d+(?:\.\d+)?)\s*km/i
|
|
430
|
+
::Regexp.last_match(1).to_f * 1000
|
|
431
|
+
when /(\d+(?:\.\d+)?)\s*mi/i
|
|
432
|
+
::Regexp.last_match(1).to_f * 1609.34
|
|
433
|
+
when /(\d+(?:\.\d+)?)\s*m/i
|
|
434
|
+
::Regexp.last_match(1).to_f
|
|
435
|
+
else
|
|
436
|
+
distance.to_f
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
def process_bulk_action(action)
|
|
441
|
+
if action[:index]
|
|
442
|
+
index = action[:index][:_index]
|
|
443
|
+
id = action[:index][:_id]
|
|
444
|
+
data = action[:index][:data]
|
|
445
|
+
|
|
446
|
+
result = execute_index_document(index, id, data)
|
|
447
|
+
{ "index" => result }
|
|
448
|
+
elsif action[:delete]
|
|
449
|
+
index = action[:delete][:_index]
|
|
450
|
+
id = action[:delete][:_id]
|
|
451
|
+
|
|
452
|
+
result = execute_delete_document(index, id)
|
|
453
|
+
{ "delete" => result }
|
|
454
|
+
else
|
|
455
|
+
{ "error" => "Unknown action type" }
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
end
|