noiseless 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +28 -0
- data/README.md +214 -0
- data/lib/application_search.rb +15 -0
- data/lib/noiseless/adapter.rb +313 -0
- data/lib/noiseless/adapters/elasticsearch.rb +70 -0
- data/lib/noiseless/adapters/execution_modules/elasticsearch_execution.rb +188 -0
- data/lib/noiseless/adapters/execution_modules/opensearch_execution.rb +377 -0
- data/lib/noiseless/adapters/execution_modules/pgvector_support.rb +219 -0
- data/lib/noiseless/adapters/execution_modules/postgresql_execution.rb +461 -0
- data/lib/noiseless/adapters/execution_modules/typesense_execution.rb +472 -0
- data/lib/noiseless/adapters/open_search.rb +208 -0
- data/lib/noiseless/adapters/postgresql.rb +171 -0
- data/lib/noiseless/adapters/typesense.rb +70 -0
- data/lib/noiseless/adapters.rb +14 -0
- data/lib/noiseless/ast/aggregation.rb +56 -0
- data/lib/noiseless/ast/bool.rb +16 -0
- data/lib/noiseless/ast/bulk.rb +18 -0
- data/lib/noiseless/ast/collapse.rb +16 -0
- data/lib/noiseless/ast/combined_fields.rb +33 -0
- data/lib/noiseless/ast/conversation.rb +29 -0
- data/lib/noiseless/ast/filter.rb +15 -0
- data/lib/noiseless/ast/hybrid.rb +35 -0
- data/lib/noiseless/ast/image_query.rb +29 -0
- data/lib/noiseless/ast/join.rb +31 -0
- data/lib/noiseless/ast/match.rb +15 -0
- data/lib/noiseless/ast/multi_match.rb +24 -0
- data/lib/noiseless/ast/paginate.rb +15 -0
- data/lib/noiseless/ast/prefix.rb +15 -0
- data/lib/noiseless/ast/range.rb +18 -0
- data/lib/noiseless/ast/root.rb +69 -0
- data/lib/noiseless/ast/search_after.rb +14 -0
- data/lib/noiseless/ast/sort.rb +15 -0
- data/lib/noiseless/ast/vector.rb +27 -0
- data/lib/noiseless/ast/wildcard.rb +15 -0
- data/lib/noiseless/ast.rb +30 -0
- data/lib/noiseless/bulk_importer.rb +195 -0
- data/lib/noiseless/callbacks.rb +138 -0
- data/lib/noiseless/connection_manager.rb +26 -0
- data/lib/noiseless/document_manager.rb +137 -0
- data/lib/noiseless/dsl.rb +107 -0
- data/lib/noiseless/generators/application_search_generator.rb +24 -0
- data/lib/noiseless/instrumentation.rb +174 -0
- data/lib/noiseless/introspection/console.rb +228 -0
- data/lib/noiseless/introspection/query_visualizer.rb +533 -0
- data/lib/noiseless/introspection.rb +221 -0
- data/lib/noiseless/mapping.rb +253 -0
- data/lib/noiseless/mapping_definition_processor.rb +231 -0
- data/lib/noiseless/model.rb +111 -0
- data/lib/noiseless/model_registry.rb +77 -0
- data/lib/noiseless/multi_search.rb +244 -0
- data/lib/noiseless/pagination.rb +375 -0
- data/lib/noiseless/query_builder.rb +284 -0
- data/lib/noiseless/railtie.rb +35 -0
- data/lib/noiseless/response/aggregations.rb +46 -0
- data/lib/noiseless/response/empty.rb +20 -0
- data/lib/noiseless/response/records.rb +94 -0
- data/lib/noiseless/response/results.rb +110 -0
- data/lib/noiseless/response/suggestions.rb +55 -0
- data/lib/noiseless/response.rb +98 -0
- data/lib/noiseless/response_factory.rb +32 -0
- data/lib/noiseless/runtime_reset_middleware.rb +15 -0
- data/lib/noiseless/search_index_update_job.rb +84 -0
- data/lib/noiseless/test_case.rb +230 -0
- data/lib/noiseless/test_helper.rb +295 -0
- data/lib/noiseless/version.rb +2 -2
- data/lib/noiseless.rb +130 -2
- data/lib/tasks/benchmark.rake +35 -0
- data/lib/tasks/release.rake +22 -0
- data/lib/tasks/test.rake +11 -0
- metadata +260 -14
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Noiseless
|
|
6
|
+
module Adapters
|
|
7
|
+
module ExecutionModules
|
|
8
|
+
module TypesenseExecution
|
|
9
|
+
def close
|
|
10
|
+
@clients&.each_value(&:close)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
# Override AST to Hash conversion for Typesense query format
|
|
16
|
+
def ast_to_hash(ast_node)
|
|
17
|
+
result = {}
|
|
18
|
+
|
|
19
|
+
# Build search query from match nodes
|
|
20
|
+
query_parts = build_search_query(ast_node.bool)
|
|
21
|
+
result[:q] = query_parts unless query_parts.empty?
|
|
22
|
+
|
|
23
|
+
# Build query_by from multi_match nodes
|
|
24
|
+
query_by_fields = build_query_by_fields(ast_node.bool)
|
|
25
|
+
result[:query_by] = query_by_fields unless query_by_fields.empty?
|
|
26
|
+
|
|
27
|
+
# Build filter expressions from filter nodes
|
|
28
|
+
filter_expr = build_filter_expression(ast_node.bool)
|
|
29
|
+
result[:filter_by] = filter_expr unless filter_expr.empty?
|
|
30
|
+
|
|
31
|
+
# Build sort expressions from sort nodes
|
|
32
|
+
sort_expr = build_sort_expression(ast_node.sort)
|
|
33
|
+
result[:sort_by] = sort_expr unless sort_expr.empty?
|
|
34
|
+
|
|
35
|
+
# Add pagination
|
|
36
|
+
pagination = build_pagination_params(ast_node.paginate)
|
|
37
|
+
result.merge!(pagination)
|
|
38
|
+
|
|
39
|
+
# Field collapsing -> Typesense group_by
|
|
40
|
+
if ast_node.collapse
|
|
41
|
+
result[:group_by] = ast_node.collapse.field
|
|
42
|
+
result[:group_limit] = 1 # Collapse shows 1 per group by default
|
|
43
|
+
if ast_node.collapse.max_concurrent_group_searches
|
|
44
|
+
# Typesense v30+: improve found accuracy for grouped results up to this threshold.
|
|
45
|
+
result[:group_max_candidates] = ast_node.collapse.max_concurrent_group_searches
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Aggregations -> Typesense facet_by
|
|
50
|
+
if ast_node.aggregations.any?
|
|
51
|
+
facet_fields = ast_node.aggregations
|
|
52
|
+
.select { |agg| agg.type == :terms }
|
|
53
|
+
.filter_map(&:field)
|
|
54
|
+
|
|
55
|
+
result[:facet_by] = facet_fields.join(",") if facet_fields.any?
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Vector search -> Typesense vector_query
|
|
59
|
+
if ast_node.vector_search?
|
|
60
|
+
vector = ast_node.vector
|
|
61
|
+
# Typesense uses format: "field_name:([vector], k:N)"
|
|
62
|
+
vector_str = vector.embedding.join(",")
|
|
63
|
+
result[:vector_query] = "#{vector.field}:([#{vector_str}], k:#{vector.k})"
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Hybrid search -> Typesense native hybrid with q + vector_query
|
|
67
|
+
if ast_node.hybrid_search?
|
|
68
|
+
hybrid = ast_node.hybrid
|
|
69
|
+
vector = hybrid.vector
|
|
70
|
+
vector_str = vector.embedding.join(",")
|
|
71
|
+
|
|
72
|
+
# Typesense natively supports hybrid by combining q and vector_query
|
|
73
|
+
result[:q] = hybrid.text_query
|
|
74
|
+
result[:vector_query] = "#{vector.field}:([#{vector_str}], k:#{vector.k}, alpha:#{hybrid.vector_weight})"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Image search -> Typesense image embedding search
|
|
78
|
+
if ast_node.image_search?
|
|
79
|
+
img = ast_node.image_query
|
|
80
|
+
# Typesense accepts image URL or base64 directly in vector_query
|
|
81
|
+
result[:vector_query] = "#{img.field}:(#{img.image_data}, k:#{img.k})"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Conversational/RAG search
|
|
85
|
+
if ast_node.conversational?
|
|
86
|
+
conv = ast_node.conversation
|
|
87
|
+
result[:conversation] = true
|
|
88
|
+
result[:conversation_model_id] = conv.model_id
|
|
89
|
+
result[:conversation_id] = conv.conversation_id if conv.conversation_id
|
|
90
|
+
result[:system_prompt] = conv.system_prompt if conv.system_prompt
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# JOINs across collections
|
|
94
|
+
if ast_node.has_joins?
|
|
95
|
+
include_fields = ast_node.joins.map do |join_node|
|
|
96
|
+
fields = join_node.include_fields.join(", ")
|
|
97
|
+
"$#{join_node.collection}(#{fields})"
|
|
98
|
+
end
|
|
99
|
+
result[:include_fields] = include_fields.join(", ")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Union-search related options (Typesense v30+).
|
|
103
|
+
result[:remove_duplicates] = ast_node.remove_duplicates unless ast_node.remove_duplicates.nil?
|
|
104
|
+
result[:facet_sample_slope] = ast_node.facet_sample_slope unless ast_node.facet_sample_slope.nil?
|
|
105
|
+
result[:pinned_hits] = ast_node.pinned_hits unless ast_node.pinned_hits.nil?
|
|
106
|
+
|
|
107
|
+
result
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def build_search_query(bool_node)
|
|
111
|
+
# Combine all match queries into a single search string
|
|
112
|
+
queries = bool_node.must.filter_map do |node|
|
|
113
|
+
case node
|
|
114
|
+
when AST::Match
|
|
115
|
+
"#{node.field}:#{node.value}"
|
|
116
|
+
when AST::MultiMatch
|
|
117
|
+
# For Typesense, multi_match becomes a broader search across fields
|
|
118
|
+
node.query
|
|
119
|
+
when AST::Range
|
|
120
|
+
# Range queries are handled in filters, not search
|
|
121
|
+
nil
|
|
122
|
+
else
|
|
123
|
+
node.respond_to?(:value) ? "#{node.field}:#{node.value}" : nil
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
queries.join(" ")
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def build_query_by_fields(bool_node)
|
|
130
|
+
# Extract fields from multi_match nodes for Typesense query_by parameter
|
|
131
|
+
fields = bool_node.must.filter_map do |node|
|
|
132
|
+
case node
|
|
133
|
+
when AST::MultiMatch
|
|
134
|
+
node.fields
|
|
135
|
+
end
|
|
136
|
+
end.flatten.uniq
|
|
137
|
+
|
|
138
|
+
fields.join(",")
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def build_filter_expression(bool_node)
|
|
142
|
+
# Convert filter and range nodes to Typesense filter expressions
|
|
143
|
+
filters = bool_node.filter.map { |filter| "#{filter.field}:=#{filter.value}" }
|
|
144
|
+
|
|
145
|
+
# Add range filters from must clause
|
|
146
|
+
range_filters = bool_node.must.filter_map do |node|
|
|
147
|
+
next unless node.is_a?(AST::Range)
|
|
148
|
+
|
|
149
|
+
conditions = []
|
|
150
|
+
conditions << "#{node.field}:>#{node.gt}" if node.gt
|
|
151
|
+
conditions << "#{node.field}:>=#{node.gte}" if node.gte
|
|
152
|
+
conditions << "#{node.field}:<#{node.lt}" if node.lt
|
|
153
|
+
conditions << "#{node.field}:<=#{node.lte}" if node.lte
|
|
154
|
+
conditions.join(" && ")
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
(filters + range_filters).compact.join(" && ")
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def build_sort_expression(sort_nodes)
|
|
161
|
+
# Convert sort nodes to Typesense sort format
|
|
162
|
+
sorts = sort_nodes.map do |sort|
|
|
163
|
+
direction = sort.direction == :desc ? "desc" : "asc"
|
|
164
|
+
"#{sort.field}:#{direction}"
|
|
165
|
+
end
|
|
166
|
+
sorts.join(",")
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def build_pagination_params(paginate_node)
|
|
170
|
+
return { page: 1, per_page: 20 } unless paginate_node
|
|
171
|
+
|
|
172
|
+
{
|
|
173
|
+
page: paginate_node.page,
|
|
174
|
+
per_page: paginate_node.per_page
|
|
175
|
+
}
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def execute_search(query_hash, collections: [], **_opts)
|
|
179
|
+
collection_path = collections.any? ? "/collections/#{collections.first}/documents/search" : "/multi_search"
|
|
180
|
+
|
|
181
|
+
# Convert query_hash to URL params for Typesense
|
|
182
|
+
params = query_hash.map { |k, v| "#{k}=#{CGI.escape(v.to_s)}" }.join("&")
|
|
183
|
+
path = "#{collection_path}?#{params}"
|
|
184
|
+
|
|
185
|
+
response = get_request(path)
|
|
186
|
+
result = JSON.parse(response.read)
|
|
187
|
+
|
|
188
|
+
# Convert Typesense format to Elasticsearch-like format
|
|
189
|
+
{
|
|
190
|
+
took: result["search_time_ms"] || 0,
|
|
191
|
+
timed_out: false,
|
|
192
|
+
_shards: { total: 1, successful: 1, skipped: 0, failed: 0 },
|
|
193
|
+
hits: {
|
|
194
|
+
total: { value: result["found"] || 0, relation: "eq" },
|
|
195
|
+
max_score: nil,
|
|
196
|
+
hits: (result["hits"] || []).map do |hit|
|
|
197
|
+
{
|
|
198
|
+
_index: collections.first || "typesense",
|
|
199
|
+
_type: "_doc",
|
|
200
|
+
_id: hit["document"]["id"],
|
|
201
|
+
_score: hit["text_match"] || 1.0,
|
|
202
|
+
_source: hit["document"]
|
|
203
|
+
}
|
|
204
|
+
end
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
rescue StandardError => e
|
|
208
|
+
# Return empty response on error to maintain compatibility
|
|
209
|
+
{
|
|
210
|
+
took: 0,
|
|
211
|
+
timed_out: false,
|
|
212
|
+
_shards: { total: 0, successful: 0, skipped: 0, failed: 0 },
|
|
213
|
+
hits: {
|
|
214
|
+
total: { value: 0, relation: "eq" },
|
|
215
|
+
max_score: nil,
|
|
216
|
+
hits: []
|
|
217
|
+
},
|
|
218
|
+
error: {
|
|
219
|
+
type: e.class.name,
|
|
220
|
+
reason: e.message
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
ensure
|
|
224
|
+
response&.close
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def execute_bulk(actions, **_opts)
|
|
228
|
+
# Typesense uses different endpoints for different operations
|
|
229
|
+
results = actions.map do |action|
|
|
230
|
+
if action[:index]
|
|
231
|
+
collection = action[:index][:_index]
|
|
232
|
+
id = action[:index][:_id]
|
|
233
|
+
document = action[:index][:data]
|
|
234
|
+
|
|
235
|
+
path = "/collections/#{collection}/documents"
|
|
236
|
+
body = JSON.generate(document.merge(id: id))
|
|
237
|
+
|
|
238
|
+
response = post_request(path, body)
|
|
239
|
+
result = JSON.parse(response.read)
|
|
240
|
+
response.close
|
|
241
|
+
|
|
242
|
+
{ index: { _id: result["id"], status: 201, result: "created" } }
|
|
243
|
+
elsif action[:delete]
|
|
244
|
+
collection = action[:delete][:_index]
|
|
245
|
+
id = action[:delete][:_id]
|
|
246
|
+
|
|
247
|
+
path = "/collections/#{collection}/documents/#{id}"
|
|
248
|
+
|
|
249
|
+
response = delete_request(path)
|
|
250
|
+
response.close
|
|
251
|
+
|
|
252
|
+
{ delete: { _id: id, status: 200, result: "deleted" } }
|
|
253
|
+
else
|
|
254
|
+
{ error: { status: 400, error: "Unsupported action" } }
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
{ items: results }
|
|
259
|
+
rescue StandardError => e
|
|
260
|
+
{ items: [], errors: true, error: { type: e.class.name, reason: e.message } }
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def execute_create_index(collection_name, mappings: nil, **_opts)
|
|
264
|
+
# Typesense calls indexes "collections"
|
|
265
|
+
schema = {
|
|
266
|
+
name: collection_name,
|
|
267
|
+
fields: []
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
# Convert mappings to Typesense schema if provided
|
|
271
|
+
if mappings && mappings["properties"]
|
|
272
|
+
schema[:fields] = mappings["properties"].map do |field_name, field_config|
|
|
273
|
+
{
|
|
274
|
+
name: field_name,
|
|
275
|
+
type: map_type_to_typesense(field_config["type"] || "string"),
|
|
276
|
+
facet: field_config["facet"] || false
|
|
277
|
+
}
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
body = JSON.generate(schema)
|
|
282
|
+
response = post_request("/collections", body)
|
|
283
|
+
result = JSON.parse(response.read)
|
|
284
|
+
|
|
285
|
+
{ acknowledged: true, index: result["name"] }
|
|
286
|
+
rescue StandardError => e
|
|
287
|
+
{ acknowledged: false, error: { type: e.class.name, reason: e.message } }
|
|
288
|
+
ensure
|
|
289
|
+
response&.close
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def execute_delete_index(collection_name, **_opts)
|
|
293
|
+
response = delete_request("/collections/#{collection_name}")
|
|
294
|
+
JSON.parse(response.read)
|
|
295
|
+
|
|
296
|
+
{ acknowledged: true }
|
|
297
|
+
rescue StandardError => e
|
|
298
|
+
{ acknowledged: false, error: { type: e.class.name, reason: e.message } }
|
|
299
|
+
ensure
|
|
300
|
+
response&.close
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def execute_index_exists?(collection_name)
|
|
304
|
+
response = head_request("/collections/#{collection_name}")
|
|
305
|
+
response.success?
|
|
306
|
+
rescue StandardError
|
|
307
|
+
false
|
|
308
|
+
ensure
|
|
309
|
+
response&.close
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def execute_index_document(collection, id, document, **_opts)
|
|
313
|
+
path = "/collections/#{collection}/documents"
|
|
314
|
+
body = JSON.generate(document.merge(id: id))
|
|
315
|
+
|
|
316
|
+
response = post_request(path, body)
|
|
317
|
+
result = JSON.parse(response.read)
|
|
318
|
+
|
|
319
|
+
{ _index: collection, _id: result["id"], result: "created" }
|
|
320
|
+
rescue StandardError => e
|
|
321
|
+
{ _index: collection, _id: id, result: "error", error: { type: e.class.name, reason: e.message } }
|
|
322
|
+
ensure
|
|
323
|
+
response&.close
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def execute_update_document(collection, id, changes, **_opts)
|
|
327
|
+
# Typesense doesn't have partial updates, so we need to fetch and merge
|
|
328
|
+
get_response = get_request("/collections/#{collection}/documents/#{id}")
|
|
329
|
+
document = JSON.parse(get_response.read)
|
|
330
|
+
get_response.close
|
|
331
|
+
|
|
332
|
+
updated_document = document.merge(changes).merge(id: id)
|
|
333
|
+
body = JSON.generate(updated_document)
|
|
334
|
+
|
|
335
|
+
response = put_request("/collections/#{collection}/documents/#{id}", body)
|
|
336
|
+
result = JSON.parse(response.read)
|
|
337
|
+
|
|
338
|
+
{ _index: collection, _id: result["id"], result: "updated" }
|
|
339
|
+
rescue StandardError => e
|
|
340
|
+
{ _index: collection, _id: id, result: "error", error: { type: e.class.name, reason: e.message } }
|
|
341
|
+
ensure
|
|
342
|
+
response&.close if defined?(response)
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def execute_delete_document(collection, id, **_opts)
|
|
346
|
+
response = delete_request("/collections/#{collection}/documents/#{id}")
|
|
347
|
+
|
|
348
|
+
{ _index: collection, _id: id, result: "deleted" }
|
|
349
|
+
rescue StandardError => e
|
|
350
|
+
{ _index: collection, _id: id, result: "error", error: { type: e.class.name, reason: e.message } }
|
|
351
|
+
ensure
|
|
352
|
+
response&.close
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def execute_document_exists?(collection, id)
|
|
356
|
+
response = head_request("/collections/#{collection}/documents/#{id}")
|
|
357
|
+
response.success?
|
|
358
|
+
rescue StandardError
|
|
359
|
+
false
|
|
360
|
+
ensure
|
|
361
|
+
response&.close
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def execute_cluster_health(**_opts)
|
|
365
|
+
response = get_request("/health")
|
|
366
|
+
health_data = JSON.parse(response.read)
|
|
367
|
+
|
|
368
|
+
# Convert Typesense health format to match expected format
|
|
369
|
+
{
|
|
370
|
+
cluster_name: "typesense",
|
|
371
|
+
status: health_data["ok"] ? "green" : "red",
|
|
372
|
+
timed_out: false,
|
|
373
|
+
number_of_nodes: 1,
|
|
374
|
+
number_of_data_nodes: 1,
|
|
375
|
+
active_primary_shards: 0,
|
|
376
|
+
active_shards: 0,
|
|
377
|
+
typesense_ok: health_data["ok"]
|
|
378
|
+
}
|
|
379
|
+
rescue StandardError => e
|
|
380
|
+
{
|
|
381
|
+
cluster_name: "unknown",
|
|
382
|
+
status: "red",
|
|
383
|
+
timed_out: false,
|
|
384
|
+
number_of_nodes: 0,
|
|
385
|
+
number_of_data_nodes: 0,
|
|
386
|
+
active_primary_shards: 0,
|
|
387
|
+
active_shards: 0,
|
|
388
|
+
error: { type: e.class.name, reason: e.message }
|
|
389
|
+
}
|
|
390
|
+
ensure
|
|
391
|
+
response&.close
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
# HTTP helpers using Async::HTTP with connection pooling
|
|
395
|
+
def get_request(path)
|
|
396
|
+
with_client do |client|
|
|
397
|
+
client.get(path, default_headers)
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
def post_request(path, body, content_type: "application/json")
|
|
402
|
+
headers = body ? default_headers + [["content-type", content_type]] : default_headers
|
|
403
|
+
|
|
404
|
+
with_client do |client|
|
|
405
|
+
client.post(path, headers, body)
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def put_request(path, body, content_type: "application/json")
|
|
410
|
+
headers = body ? default_headers + [["content-type", content_type]] : default_headers
|
|
411
|
+
|
|
412
|
+
with_client do |client|
|
|
413
|
+
client.put(path, headers, body)
|
|
414
|
+
end
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def delete_request(path)
|
|
418
|
+
with_client do |client|
|
|
419
|
+
client.delete(path, default_headers)
|
|
420
|
+
end
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def head_request(path)
|
|
424
|
+
with_client do |client|
|
|
425
|
+
client.head(path, default_headers)
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def with_client
|
|
430
|
+
# Select a random host for load balancing
|
|
431
|
+
host = @hosts.sample
|
|
432
|
+
client = @clients[host]
|
|
433
|
+
|
|
434
|
+
yield(client)
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
def default_headers
|
|
438
|
+
headers = [
|
|
439
|
+
["accept", "application/json"],
|
|
440
|
+
["user-agent", "Noiseless/#{Noiseless::VERSION} (Ruby/#{RUBY_VERSION})"]
|
|
441
|
+
]
|
|
442
|
+
|
|
443
|
+
# Add Typesense API key if configured
|
|
444
|
+
if @connection_params && @connection_params[:api_key]
|
|
445
|
+
headers << ["X-TYPESENSE-API-KEY",
|
|
446
|
+
@connection_params[:api_key]]
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
headers
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
# rubocop:disable Lint/DuplicateBranch
|
|
453
|
+
def map_type_to_typesense(elasticsearch_type)
|
|
454
|
+
# Map Elasticsearch types to Typesense types
|
|
455
|
+
case elasticsearch_type
|
|
456
|
+
when "text", "keyword"
|
|
457
|
+
"string"
|
|
458
|
+
when "long", "integer", "short", "byte", "date"
|
|
459
|
+
"int64" # date uses Unix timestamps
|
|
460
|
+
when "double", "float", "half_float", "scaled_float"
|
|
461
|
+
"float"
|
|
462
|
+
when "boolean"
|
|
463
|
+
"bool"
|
|
464
|
+
else
|
|
465
|
+
"string" # Default to string for unknown types
|
|
466
|
+
end
|
|
467
|
+
end
|
|
468
|
+
# rubocop:enable Lint/DuplicateBranch
|
|
469
|
+
end
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
end
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "execution_modules/opensearch_execution"
|
|
4
|
+
|
|
5
|
+
module Noiseless
|
|
6
|
+
module Adapters
|
|
7
|
+
class OpenSearch < Adapter
|
|
8
|
+
include ExecutionModules::OpensearchExecution
|
|
9
|
+
|
|
10
|
+
def initialize(hosts: [], **connection_params)
|
|
11
|
+
# Ensure we always have at least one host
|
|
12
|
+
hosts_array = Array(hosts)
|
|
13
|
+
default_port = ENV["OPENSEARCH_PORT"] || 9200
|
|
14
|
+
@hosts = hosts_array.empty? ? ["http://localhost:#{default_port}"] : hosts_array
|
|
15
|
+
@connection_params = connection_params
|
|
16
|
+
|
|
17
|
+
# Initialize HTTP clients for each host
|
|
18
|
+
@clients = {}
|
|
19
|
+
@hosts.each do |host|
|
|
20
|
+
endpoint = Async::HTTP::Endpoint.parse(host)
|
|
21
|
+
@clients[host] = Async::HTTP::Client.new(endpoint)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
super(hosts: @hosts, **connection_params)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# OpenSearch-specific features
|
|
28
|
+
def point_in_time_search(ast_node, pit_id:, **)
|
|
29
|
+
query_hash = ast_to_hash(ast_node)
|
|
30
|
+
Async do
|
|
31
|
+
execute_point_in_time_search(query_hash, pit_id: pit_id, **)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def search_template(template_id:, params: {}, **)
|
|
36
|
+
Async do
|
|
37
|
+
execute_search_template(template_id: template_id, params: params, **)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Cluster health API - needed for Rails healthcheck
|
|
42
|
+
def cluster
|
|
43
|
+
@cluster ||= ClusterAPI.new(self)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Indices API - needed for index management operations
|
|
47
|
+
def indices
|
|
48
|
+
@indices ||= IndicesAPI.new(self)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Search Pipelines API - OpenSearch 3.x feature
|
|
52
|
+
def pipelines
|
|
53
|
+
@pipelines ||= PipelinesAPI.new(self)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Query Rules API - OpenSearch 3.x feature
|
|
57
|
+
def rules
|
|
58
|
+
@rules ||= RulesAPI.new(self)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Raw search for CommonShare compatibility
|
|
62
|
+
def search_raw(query_body, indexes: [], **)
|
|
63
|
+
Async do
|
|
64
|
+
execute_search(query_body, indexes: indexes, **)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
class ClusterAPI
|
|
69
|
+
def initialize(adapter)
|
|
70
|
+
@adapter = adapter
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def health(**)
|
|
74
|
+
Sync do
|
|
75
|
+
@adapter.send(:execute_cluster_health, **)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
class IndicesAPI
|
|
81
|
+
def initialize(adapter)
|
|
82
|
+
@adapter = adapter
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def get(index:)
|
|
86
|
+
@adapter.execute_index_exists?(index) ? { index => {} } : raise("Index not found")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def stats(index:)
|
|
90
|
+
# Return basic stats structure
|
|
91
|
+
{ "indices" => { index => {} } }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def refresh(index:)
|
|
95
|
+
# Refresh the index to make documents immediately searchable
|
|
96
|
+
@adapter.send(:execute_refresh_index, index)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Search Pipelines API for OpenSearch 3.x
|
|
101
|
+
# Pipelines can include request and response processors for neural search, reranking, etc.
|
|
102
|
+
class PipelinesAPI
|
|
103
|
+
def initialize(adapter)
|
|
104
|
+
@adapter = adapter
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Create or update a search pipeline
|
|
108
|
+
# @param name [String] Pipeline name
|
|
109
|
+
# @param request_processors [Array<Hash>] Request phase processors
|
|
110
|
+
# @param response_processors [Array<Hash>] Response phase processors
|
|
111
|
+
# @param description [String, nil] Optional description
|
|
112
|
+
def create(name, request_processors: [], response_processors: [], description: nil)
|
|
113
|
+
Sync do
|
|
114
|
+
@adapter.send(:execute_create_pipeline, name,
|
|
115
|
+
request_processors: request_processors,
|
|
116
|
+
response_processors: response_processors,
|
|
117
|
+
description: description)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
alias put create
|
|
122
|
+
|
|
123
|
+
# Get a specific pipeline
|
|
124
|
+
def get(name)
|
|
125
|
+
Sync do
|
|
126
|
+
@adapter.send(:execute_get_pipeline, name)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# List all pipelines
|
|
131
|
+
def list
|
|
132
|
+
Sync do
|
|
133
|
+
@adapter.send(:execute_list_pipelines)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
alias all list
|
|
138
|
+
|
|
139
|
+
# Delete a pipeline
|
|
140
|
+
def delete(name)
|
|
141
|
+
Sync do
|
|
142
|
+
@adapter.send(:execute_delete_pipeline, name)
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Check if a pipeline exists
|
|
147
|
+
def exists?(name)
|
|
148
|
+
Sync do
|
|
149
|
+
@adapter.send(:execute_pipeline_exists?, name)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Query Rules API for OpenSearch 3.x
|
|
155
|
+
# Rules allow pinning, boosting, or hiding specific results based on query patterns
|
|
156
|
+
class RulesAPI
|
|
157
|
+
def initialize(adapter)
|
|
158
|
+
@adapter = adapter
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Create or update a rule
|
|
162
|
+
# @param feature_type [String] Feature type (e.g., 'pinned_queries')
|
|
163
|
+
# @param rule_id [String] Unique rule identifier
|
|
164
|
+
# @param attributes [Hash] Rule matching attributes
|
|
165
|
+
# @param feature_value [Hash] The feature value to apply
|
|
166
|
+
def create(feature_type, rule_id, attributes:, feature_value:)
|
|
167
|
+
Sync do
|
|
168
|
+
@adapter.send(:execute_create_rule, feature_type, rule_id,
|
|
169
|
+
attributes: attributes,
|
|
170
|
+
feature_value: feature_value)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
alias put create
|
|
175
|
+
|
|
176
|
+
# Get a specific rule
|
|
177
|
+
def get(feature_type, rule_id)
|
|
178
|
+
Sync do
|
|
179
|
+
@adapter.send(:execute_get_rule, feature_type, rule_id)
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# List rules for a feature type
|
|
184
|
+
def list(feature_type, search_after: nil)
|
|
185
|
+
Sync do
|
|
186
|
+
@adapter.send(:execute_list_rules, feature_type, search_after: search_after)
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
alias all list
|
|
191
|
+
|
|
192
|
+
# Delete a rule
|
|
193
|
+
def delete(feature_type, rule_id)
|
|
194
|
+
Sync do
|
|
195
|
+
@adapter.send(:execute_delete_rule, feature_type, rule_id)
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Check if a rule exists
|
|
200
|
+
def exists?(feature_type, rule_id)
|
|
201
|
+
Sync do
|
|
202
|
+
@adapter.send(:execute_rule_exists?, feature_type, rule_id)
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|