noiseless 0.0.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +28 -0
- data/README.md +214 -0
- data/lib/application_search.rb +15 -0
- data/lib/noiseless/adapter.rb +339 -0
- data/lib/noiseless/adapters/cluster_api.rb +18 -0
- data/lib/noiseless/adapters/elasticsearch.rb +30 -0
- data/lib/noiseless/adapters/execution_modules/elasticsearch_execution.rb +68 -0
- data/lib/noiseless/adapters/execution_modules/es_compatible_execution.rb +83 -0
- data/lib/noiseless/adapters/execution_modules/http_transport.rb +83 -0
- data/lib/noiseless/adapters/execution_modules/opensearch_execution.rb +209 -0
- data/lib/noiseless/adapters/execution_modules/pgvector_support.rb +219 -0
- data/lib/noiseless/adapters/execution_modules/postgresql_execution.rb +461 -0
- data/lib/noiseless/adapters/execution_modules/typesense_execution.rb +425 -0
- data/lib/noiseless/adapters/indices_api.rb +26 -0
- data/lib/noiseless/adapters/open_search.rb +168 -0
- data/lib/noiseless/adapters/postgresql.rb +171 -0
- data/lib/noiseless/adapters/typesense.rb +36 -0
- data/lib/noiseless/adapters.rb +14 -0
- data/lib/noiseless/ast/aggregation.rb +56 -0
- data/lib/noiseless/ast/bool.rb +16 -0
- data/lib/noiseless/ast/bulk.rb +18 -0
- data/lib/noiseless/ast/collapse.rb +16 -0
- data/lib/noiseless/ast/combined_fields.rb +33 -0
- data/lib/noiseless/ast/conversation.rb +29 -0
- data/lib/noiseless/ast/field_value_node.rb +16 -0
- data/lib/noiseless/ast/filter.rb +8 -0
- data/lib/noiseless/ast/hybrid.rb +35 -0
- data/lib/noiseless/ast/image_query.rb +29 -0
- data/lib/noiseless/ast/join.rb +31 -0
- data/lib/noiseless/ast/match.rb +8 -0
- data/lib/noiseless/ast/multi_match.rb +24 -0
- data/lib/noiseless/ast/paginate.rb +15 -0
- data/lib/noiseless/ast/prefix.rb +8 -0
- data/lib/noiseless/ast/range.rb +18 -0
- data/lib/noiseless/ast/root.rb +69 -0
- data/lib/noiseless/ast/search_after.rb +14 -0
- data/lib/noiseless/ast/sort.rb +15 -0
- data/lib/noiseless/ast/vector.rb +27 -0
- data/lib/noiseless/ast/wildcard.rb +8 -0
- data/lib/noiseless/ast.rb +30 -0
- data/lib/noiseless/bulk_importer.rb +195 -0
- data/lib/noiseless/callbacks.rb +138 -0
- data/lib/noiseless/connection_manager.rb +26 -0
- data/lib/noiseless/document_manager.rb +137 -0
- data/lib/noiseless/dsl.rb +107 -0
- data/lib/noiseless/generators/application_search_generator.rb +24 -0
- data/lib/noiseless/instrumentation.rb +174 -0
- data/lib/noiseless/introspection/console.rb +228 -0
- data/lib/noiseless/introspection/query_visualizer.rb +533 -0
- data/lib/noiseless/introspection.rb +221 -0
- data/lib/noiseless/mapping.rb +253 -0
- data/lib/noiseless/mapping_definition_processor.rb +231 -0
- data/lib/noiseless/model.rb +111 -0
- data/lib/noiseless/model_registry.rb +77 -0
- data/lib/noiseless/multi_search.rb +244 -0
- data/lib/noiseless/pagination.rb +375 -0
- data/lib/noiseless/query_builder.rb +284 -0
- data/lib/noiseless/railtie.rb +35 -0
- data/lib/noiseless/response/aggregations.rb +46 -0
- data/lib/noiseless/response/empty.rb +20 -0
- data/lib/noiseless/response/records.rb +94 -0
- data/lib/noiseless/response/results.rb +110 -0
- data/lib/noiseless/response/suggestions.rb +55 -0
- data/lib/noiseless/response.rb +98 -0
- data/lib/noiseless/response_factory.rb +32 -0
- data/lib/noiseless/runtime_reset_middleware.rb +15 -0
- data/lib/noiseless/search_index_update_job.rb +84 -0
- data/lib/noiseless/test_case.rb +230 -0
- data/lib/noiseless/test_helper.rb +295 -0
- data/lib/noiseless/version.rb +2 -2
- data/lib/noiseless.rb +146 -2
- data/lib/tasks/benchmark.rake +35 -0
- data/lib/tasks/release.rake +22 -0
- data/lib/tasks/test.rake +11 -0
- metadata +265 -14
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "es_compatible_execution"
|
|
5
|
+
|
|
6
|
+
module Noiseless
|
|
7
|
+
module Adapters
|
|
8
|
+
module ExecutionModules
|
|
9
|
+
module ElasticsearchExecution
|
|
10
|
+
include EsCompatibleExecution
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def execute_search(query_hash, indexes: [], **_opts)
|
|
15
|
+
path = indexes.any? ? "/#{indexes.join(',')}/_search" : "/_search"
|
|
16
|
+
body = JSON.generate(query_hash)
|
|
17
|
+
|
|
18
|
+
response = post_request(path, body)
|
|
19
|
+
parse_json_response!(response, error_class: Noiseless::SearchError, context: "search")
|
|
20
|
+
ensure
|
|
21
|
+
response&.close
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def execute_create_index(index_name, mappings: nil, settings: nil, **_opts)
|
|
25
|
+
body = {}
|
|
26
|
+
body[:mappings] = mappings if mappings
|
|
27
|
+
body[:settings] = settings if settings
|
|
28
|
+
|
|
29
|
+
response = put_request("/#{index_name}", body.any? ? JSON.generate(body) : nil)
|
|
30
|
+
parse_json_response!(response, context: "create index #{index_name}")
|
|
31
|
+
ensure
|
|
32
|
+
response&.close
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def execute_index_document(index, id, document, **_opts)
|
|
36
|
+
path = id ? "/#{index}/_doc/#{id}" : "/#{index}/_doc"
|
|
37
|
+
body = JSON.generate(document)
|
|
38
|
+
|
|
39
|
+
response = id ? put_request(path, body) : post_request(path, body)
|
|
40
|
+
parse_json_response!(response, context: "index document #{index}/#{id}")
|
|
41
|
+
ensure
|
|
42
|
+
response&.close
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def execute_cluster_health(**_opts)
|
|
46
|
+
response = get_request("/_cluster/health")
|
|
47
|
+
JSON.parse(response.read)
|
|
48
|
+
rescue StandardError => e
|
|
49
|
+
{
|
|
50
|
+
"cluster_name" => "unknown",
|
|
51
|
+
"status" => "red",
|
|
52
|
+
"timed_out" => false,
|
|
53
|
+
"number_of_nodes" => 0,
|
|
54
|
+
"number_of_data_nodes" => 0,
|
|
55
|
+
"active_primary_shards" => 0,
|
|
56
|
+
"active_shards" => 0,
|
|
57
|
+
"error" => {
|
|
58
|
+
"type" => e.class.name,
|
|
59
|
+
"reason" => e.message
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
ensure
|
|
63
|
+
response&.close
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "http_transport"
|
|
5
|
+
|
|
6
|
+
module Noiseless
|
|
7
|
+
module Adapters
|
|
8
|
+
module ExecutionModules
|
|
9
|
+
# Document and index operations shared by the wire-compatible
|
|
10
|
+
# Elasticsearch and OpenSearch HTTP APIs.
|
|
11
|
+
module EsCompatibleExecution
|
|
12
|
+
include HttpTransport
|
|
13
|
+
|
|
14
|
+
private
|
|
15
|
+
|
|
16
|
+
def execute_bulk(actions, **_opts)
|
|
17
|
+
body = actions.map do |action|
|
|
18
|
+
if action[:index]
|
|
19
|
+
action_line = { index: { _index: action[:index][:_index], _id: action[:index][:_id] } }
|
|
20
|
+
data_line = action[:index][:data]
|
|
21
|
+
"#{JSON.generate(action_line)}\n#{JSON.generate(data_line)}\n"
|
|
22
|
+
else
|
|
23
|
+
"#{JSON.generate(action)}\n"
|
|
24
|
+
end
|
|
25
|
+
end.join
|
|
26
|
+
|
|
27
|
+
response = post_request("/_bulk", body, content_type: "application/x-ndjson")
|
|
28
|
+
parse_json_response!(response, context: "bulk")
|
|
29
|
+
ensure
|
|
30
|
+
response&.close
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def execute_delete_index(index_name, **_opts)
|
|
34
|
+
response = delete_request("/#{index_name}")
|
|
35
|
+
parse_json_response!(response, context: "delete index #{index_name}")
|
|
36
|
+
ensure
|
|
37
|
+
response&.close
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def execute_refresh_index(index_name)
|
|
41
|
+
response = post_request("/#{index_name}/_refresh", nil)
|
|
42
|
+
parse_json_response!(response, context: "refresh index #{index_name}")
|
|
43
|
+
ensure
|
|
44
|
+
response&.close
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def execute_index_exists?(index_name)
|
|
48
|
+
response = head_request("/#{index_name}")
|
|
49
|
+
response.success?
|
|
50
|
+
rescue StandardError
|
|
51
|
+
false
|
|
52
|
+
ensure
|
|
53
|
+
response&.close
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def execute_update_document(index, id, changes, **_opts)
|
|
57
|
+
body = JSON.generate(doc: changes)
|
|
58
|
+
|
|
59
|
+
response = post_request("/#{index}/_update/#{id}", body)
|
|
60
|
+
parse_json_response!(response, context: "update document #{index}/#{id}")
|
|
61
|
+
ensure
|
|
62
|
+
response&.close
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def execute_delete_document(index, id, **_opts)
|
|
66
|
+
response = delete_request("/#{index}/_doc/#{id}")
|
|
67
|
+
parse_json_response!(response, context: "delete document #{index}/#{id}")
|
|
68
|
+
ensure
|
|
69
|
+
response&.close
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def execute_document_exists?(index, id)
|
|
73
|
+
response = head_request("/#{index}/_doc/#{id}")
|
|
74
|
+
response.success?
|
|
75
|
+
rescue StandardError
|
|
76
|
+
false
|
|
77
|
+
ensure
|
|
78
|
+
response&.close
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module Adapters
|
|
5
|
+
module ExecutionModules
|
|
6
|
+
# Shared Async::HTTP connection handling for HTTP-based adapters.
|
|
7
|
+
# Host classes must provide a private +default_port+ method.
|
|
8
|
+
module HttpTransport
|
|
9
|
+
def initialize(hosts: [], **connection_params)
|
|
10
|
+
# Ensure we always have at least one host
|
|
11
|
+
hosts_array = Array(hosts)
|
|
12
|
+
@hosts = hosts_array.empty? ? ["http://localhost:#{default_port}"] : hosts_array
|
|
13
|
+
@connection_params = connection_params
|
|
14
|
+
|
|
15
|
+
# Initialize HTTP clients for each host
|
|
16
|
+
@clients = {}
|
|
17
|
+
@hosts.each do |host|
|
|
18
|
+
endpoint = Async::HTTP::Endpoint.parse(host)
|
|
19
|
+
@clients[host] = Async::HTTP::Client.new(endpoint)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
super(hosts: @hosts, **connection_params)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def close
|
|
26
|
+
@clients&.each_value(&:close)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
# HTTP helpers using Async::HTTP with connection pooling
|
|
32
|
+
def get_request(path)
|
|
33
|
+
with_client do |client|
|
|
34
|
+
client.get(path, default_headers)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def post_request(path, body, content_type: "application/json")
|
|
39
|
+
headers = body ? default_headers + [["content-type", content_type]] : default_headers
|
|
40
|
+
|
|
41
|
+
with_client do |client|
|
|
42
|
+
client.post(path, headers, body)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def put_request(path, body, content_type: "application/json")
|
|
47
|
+
headers = body ? default_headers + [["content-type", content_type]] : default_headers
|
|
48
|
+
|
|
49
|
+
with_client do |client|
|
|
50
|
+
client.put(path, headers, body)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def delete_request(path)
|
|
55
|
+
with_client do |client|
|
|
56
|
+
client.delete(path, default_headers)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def head_request(path)
|
|
61
|
+
with_client do |client|
|
|
62
|
+
client.head(path, default_headers)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def with_client
|
|
67
|
+
# Select a random host for load balancing
|
|
68
|
+
host = @hosts.sample
|
|
69
|
+
client = @clients[host]
|
|
70
|
+
|
|
71
|
+
yield(client)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def default_headers
|
|
75
|
+
[
|
|
76
|
+
["accept", "application/json"],
|
|
77
|
+
["user-agent", "Noiseless/#{Noiseless::VERSION} (Ruby/#{RUBY_VERSION})"]
|
|
78
|
+
]
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "es_compatible_execution"
|
|
5
|
+
|
|
6
|
+
module Noiseless
|
|
7
|
+
module Adapters
|
|
8
|
+
module ExecutionModules
|
|
9
|
+
module OpensearchExecution
|
|
10
|
+
include EsCompatibleExecution
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def execute_search(query_hash, indexes: [], **_opts)
|
|
15
|
+
index_path = indexes.any? ? indexes.join(",") : "_all"
|
|
16
|
+
path = "/#{index_path}/_search"
|
|
17
|
+
body = JSON.generate(query_hash)
|
|
18
|
+
|
|
19
|
+
response = post_request(path, body)
|
|
20
|
+
parse_json_response!(response, error_class: Noiseless::SearchError, context: "search #{index_path}")
|
|
21
|
+
ensure
|
|
22
|
+
response&.close
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def execute_create_index(index_name, mappings: nil, settings: nil, **opts)
|
|
26
|
+
body = opts.dup
|
|
27
|
+
body[:mappings] = mappings if mappings
|
|
28
|
+
body[:settings] = settings if settings
|
|
29
|
+
|
|
30
|
+
response = put_request("/#{index_name}", body.any? ? JSON.generate(body) : nil)
|
|
31
|
+
parse_json_response!(response, context: "create index #{index_name}")
|
|
32
|
+
ensure
|
|
33
|
+
response&.close
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def execute_index_document(index, id, document, **_opts)
|
|
37
|
+
path = "/#{index}/_doc/#{id}"
|
|
38
|
+
body = JSON.generate(document)
|
|
39
|
+
|
|
40
|
+
response = put_request(path, body)
|
|
41
|
+
parse_json_response!(response, context: "index document #{index}/#{id}")
|
|
42
|
+
ensure
|
|
43
|
+
response&.close
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def execute_cluster_health(**_opts)
|
|
47
|
+
response = get_request("/_cluster/health")
|
|
48
|
+
JSON.parse(response.read)
|
|
49
|
+
rescue StandardError => e
|
|
50
|
+
{
|
|
51
|
+
cluster_name: "unknown",
|
|
52
|
+
status: "red",
|
|
53
|
+
timed_out: false,
|
|
54
|
+
number_of_nodes: 0,
|
|
55
|
+
number_of_data_nodes: 0,
|
|
56
|
+
active_primary_shards: 0,
|
|
57
|
+
active_shards: 0,
|
|
58
|
+
relocating_shards: 0,
|
|
59
|
+
initializing_shards: 0,
|
|
60
|
+
unassigned_shards: 0,
|
|
61
|
+
error: { type: e.class.name, reason: e.message }
|
|
62
|
+
}
|
|
63
|
+
ensure
|
|
64
|
+
response&.close
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# OpenSearch-specific features
|
|
68
|
+
def execute_point_in_time_search(query_hash, pit_id:, **_opts)
|
|
69
|
+
# Point-in-time search for consistent pagination
|
|
70
|
+
enhanced_query = query_hash.merge(pit: { id: pit_id })
|
|
71
|
+
body = JSON.generate(enhanced_query)
|
|
72
|
+
|
|
73
|
+
response = post_request("/_search", body)
|
|
74
|
+
parse_json_response!(response, error_class: Noiseless::SearchError, context: "point-in-time search")
|
|
75
|
+
ensure
|
|
76
|
+
response&.close
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def execute_search_template(template_id:, params: {}, **_opts)
|
|
80
|
+
# OpenSearch search templates
|
|
81
|
+
template_query = {
|
|
82
|
+
id: template_id,
|
|
83
|
+
params: params
|
|
84
|
+
}
|
|
85
|
+
body = JSON.generate(template_query)
|
|
86
|
+
|
|
87
|
+
response = post_request("/_search/template", body)
|
|
88
|
+
parse_json_response!(response, error_class: Noiseless::SearchError, context: "search template #{template_id}")
|
|
89
|
+
ensure
|
|
90
|
+
response&.close
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# ============================================
|
|
94
|
+
# Search Pipeline API (OpenSearch 3.x)
|
|
95
|
+
# ============================================
|
|
96
|
+
|
|
97
|
+
def execute_create_pipeline(name, request_processors:, response_processors:, description: nil)
|
|
98
|
+
body = {
|
|
99
|
+
description: description,
|
|
100
|
+
request_processors: request_processors,
|
|
101
|
+
response_processors: response_processors
|
|
102
|
+
}.compact
|
|
103
|
+
|
|
104
|
+
response = put_request("/_search/pipeline/#{name}", JSON.generate(body))
|
|
105
|
+
JSON.parse(response.read)
|
|
106
|
+
rescue StandardError => e
|
|
107
|
+
{ acknowledged: false, error: { type: e.class.name, reason: e.message } }
|
|
108
|
+
ensure
|
|
109
|
+
response&.close
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def execute_get_pipeline(name)
|
|
113
|
+
response = get_request("/_search/pipeline/#{name}")
|
|
114
|
+
JSON.parse(response.read)
|
|
115
|
+
rescue StandardError => e
|
|
116
|
+
{ error: { type: e.class.name, reason: e.message } }
|
|
117
|
+
ensure
|
|
118
|
+
response&.close
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def execute_list_pipelines
|
|
122
|
+
response = get_request("/_search/pipeline")
|
|
123
|
+
JSON.parse(response.read)
|
|
124
|
+
rescue StandardError => e
|
|
125
|
+
{ error: { type: e.class.name, reason: e.message } }
|
|
126
|
+
ensure
|
|
127
|
+
response&.close
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def execute_delete_pipeline(name)
|
|
131
|
+
response = delete_request("/_search/pipeline/#{name}")
|
|
132
|
+
JSON.parse(response.read)
|
|
133
|
+
rescue StandardError => e
|
|
134
|
+
{ acknowledged: false, error: { type: e.class.name, reason: e.message } }
|
|
135
|
+
ensure
|
|
136
|
+
response&.close
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def execute_pipeline_exists?(name)
|
|
140
|
+
response = head_request("/_search/pipeline/#{name}")
|
|
141
|
+
response.success?
|
|
142
|
+
rescue StandardError
|
|
143
|
+
false
|
|
144
|
+
ensure
|
|
145
|
+
response&.close
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# ============================================
|
|
149
|
+
# Query Rules API (OpenSearch 3.x)
|
|
150
|
+
# ============================================
|
|
151
|
+
|
|
152
|
+
def execute_create_rule(feature_type, rule_id, attributes:, feature_value:)
|
|
153
|
+
body = {
|
|
154
|
+
match_criteria: {
|
|
155
|
+
query: attributes
|
|
156
|
+
},
|
|
157
|
+
feature_value: feature_value
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
response = put_request("/_rules/#{feature_type}/#{rule_id}", JSON.generate(body))
|
|
161
|
+
JSON.parse(response.read)
|
|
162
|
+
rescue StandardError => e
|
|
163
|
+
{ acknowledged: false, error: { type: e.class.name, reason: e.message } }
|
|
164
|
+
ensure
|
|
165
|
+
response&.close
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def execute_get_rule(feature_type, rule_id)
|
|
169
|
+
response = get_request("/_rules/#{feature_type}/#{rule_id}")
|
|
170
|
+
JSON.parse(response.read)
|
|
171
|
+
rescue StandardError => e
|
|
172
|
+
{ error: { type: e.class.name, reason: e.message } }
|
|
173
|
+
ensure
|
|
174
|
+
response&.close
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def execute_list_rules(feature_type, search_after: nil)
|
|
178
|
+
path = "/_rules/#{feature_type}"
|
|
179
|
+
path += "?search_after=#{search_after}" if search_after
|
|
180
|
+
|
|
181
|
+
response = get_request(path)
|
|
182
|
+
JSON.parse(response.read)
|
|
183
|
+
rescue StandardError => e
|
|
184
|
+
{ rules: [], error: { type: e.class.name, reason: e.message } }
|
|
185
|
+
ensure
|
|
186
|
+
response&.close
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def execute_delete_rule(feature_type, rule_id)
|
|
190
|
+
response = delete_request("/_rules/#{feature_type}/#{rule_id}")
|
|
191
|
+
JSON.parse(response.read)
|
|
192
|
+
rescue StandardError => e
|
|
193
|
+
{ acknowledged: false, error: { type: e.class.name, reason: e.message } }
|
|
194
|
+
ensure
|
|
195
|
+
response&.close
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def execute_rule_exists?(feature_type, rule_id)
|
|
199
|
+
response = head_request("/_rules/#{feature_type}/#{rule_id}")
|
|
200
|
+
response.success?
|
|
201
|
+
rescue StandardError
|
|
202
|
+
false
|
|
203
|
+
ensure
|
|
204
|
+
response&.close
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module Adapters
|
|
5
|
+
module ExecutionModules
|
|
6
|
+
# pgvector support for semantic/vector search in PostgreSQL
|
|
7
|
+
# Provides similarity search using embeddings
|
|
8
|
+
#
|
|
9
|
+
# Required:
|
|
10
|
+
# CREATE EXTENSION IF NOT EXISTS vector;
|
|
11
|
+
#
|
|
12
|
+
# Table setup:
|
|
13
|
+
# ALTER TABLE your_table ADD COLUMN embedding vector(1536);
|
|
14
|
+
# CREATE INDEX ON your_table USING ivfflat (embedding vector_cosine_ops);
|
|
15
|
+
#
|
|
16
|
+
module PgvectorSupport
|
|
17
|
+
# Perform semantic search using vector similarity
|
|
18
|
+
#
|
|
19
|
+
# @param scope [ActiveRecord::Relation] The base scope to search
|
|
20
|
+
# @param embedding [Array<Float>] The query embedding vector
|
|
21
|
+
# @param column [Symbol] The column containing embeddings (default: :embedding)
|
|
22
|
+
# @param limit [Integer] Maximum results to return
|
|
23
|
+
# @param distance_threshold [Float] Maximum distance threshold (optional)
|
|
24
|
+
# @param distance_metric [Symbol] :cosine, :l2, or :inner_product
|
|
25
|
+
# @return [ActiveRecord::Relation] Scope with vector similarity ordering
|
|
26
|
+
#
|
|
27
|
+
def vector_search(scope, embedding, column: :embedding, limit: 20, distance_threshold: nil,
|
|
28
|
+
distance_metric: :cosine)
|
|
29
|
+
return scope unless pgvector_available?
|
|
30
|
+
|
|
31
|
+
vector_string = "[#{embedding.join(',')}]"
|
|
32
|
+
distance_op = distance_operator(distance_metric)
|
|
33
|
+
|
|
34
|
+
# Build the query with distance calculation
|
|
35
|
+
scope = scope.select(
|
|
36
|
+
"#{scope.table_name}.*",
|
|
37
|
+
"#{quoted_column(column)} #{distance_op} '#{vector_string}' AS vector_distance"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Apply distance threshold if specified
|
|
41
|
+
if distance_threshold
|
|
42
|
+
scope = scope.where(
|
|
43
|
+
"#{quoted_column(column)} #{distance_op} '#{vector_string}' < ?",
|
|
44
|
+
distance_threshold
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Order by similarity (ascending distance = more similar)
|
|
49
|
+
scope.order(Arel.sql("#{quoted_column(column)} #{distance_op} '#{vector_string}'"))
|
|
50
|
+
.limit(limit)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Hybrid search combining text and vector search
|
|
54
|
+
#
|
|
55
|
+
# @param scope [ActiveRecord::Relation] Base scope
|
|
56
|
+
# @param text_query [String] Text query for pg_trgm search
|
|
57
|
+
# @param embedding [Array<Float>] Query embedding for vector search
|
|
58
|
+
# @param text_fields [Array<Symbol>] Fields to search with text
|
|
59
|
+
# @param vector_column [Symbol] Column containing embeddings
|
|
60
|
+
# @param text_weight [Float] Weight for text similarity (0.0-1.0)
|
|
61
|
+
# @param vector_weight [Float] Weight for vector similarity (0.0-1.0)
|
|
62
|
+
# @return [ActiveRecord::Relation]
|
|
63
|
+
#
|
|
64
|
+
def hybrid_search(scope, text_query:, embedding:, text_fields:, vector_column: :embedding,
|
|
65
|
+
text_weight: 0.5, vector_weight: 0.5, limit: 20)
|
|
66
|
+
return scope unless pgvector_available?
|
|
67
|
+
|
|
68
|
+
vector_string = "[#{embedding.join(',')}]"
|
|
69
|
+
text_conditions = text_fields.map { |f| "similarity(#{quoted_column(f)}, ?)" }.join(" + ")
|
|
70
|
+
text_similarity_count = text_fields.size
|
|
71
|
+
|
|
72
|
+
# Normalized combined score
|
|
73
|
+
scope.select(
|
|
74
|
+
"#{scope.table_name}.*",
|
|
75
|
+
# Text similarity (0-1 per field, averaged)
|
|
76
|
+
Arel.sql(
|
|
77
|
+
"(#{text_conditions}) / #{text_similarity_count} * #{text_weight} AS text_score"
|
|
78
|
+
),
|
|
79
|
+
# Vector similarity (convert distance to similarity: 1 - distance for cosine)
|
|
80
|
+
"(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight} AS vector_score",
|
|
81
|
+
# Combined score
|
|
82
|
+
"(((#{text_conditions}) / #{text_similarity_count}) * #{text_weight} + " \
|
|
83
|
+
"(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight}) AS combined_score"
|
|
84
|
+
).where(
|
|
85
|
+
"#{text_conditions} > 0 OR #{quoted_column(vector_column)} IS NOT NULL",
|
|
86
|
+
*Array.new(text_similarity_count, text_query)
|
|
87
|
+
).order(Arel.sql("combined_score DESC"))
|
|
88
|
+
.limit(limit)
|
|
89
|
+
.tap { |s| s.bind_values.concat(Array.new(text_similarity_count, text_query)) }
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Execute a KNN (K-Nearest Neighbors) search
|
|
93
|
+
#
|
|
94
|
+
# @param model [Class] The ActiveRecord model
|
|
95
|
+
# @param embedding [Array<Float>] Query embedding
|
|
96
|
+
# @param k [Integer] Number of nearest neighbors
|
|
97
|
+
# @param column [Symbol] Embedding column
|
|
98
|
+
# @param filters [Hash] Additional WHERE conditions
|
|
99
|
+
# @return [Array<Hash>] Results with distance scores
|
|
100
|
+
#
|
|
101
|
+
def knn_search(model, embedding, k: 10, column: :embedding, filters: {})
|
|
102
|
+
return [] unless pgvector_available?
|
|
103
|
+
|
|
104
|
+
vector_string = "[#{embedding.join(',')}]"
|
|
105
|
+
|
|
106
|
+
scope = model.all
|
|
107
|
+
scope = scope.where(filters) if filters.any?
|
|
108
|
+
|
|
109
|
+
results = scope.select(
|
|
110
|
+
"#{model.table_name}.*",
|
|
111
|
+
"#{quoted_column(column)} <=> '#{vector_string}' AS distance"
|
|
112
|
+
).order(Arel.sql("#{quoted_column(column)} <=> '#{vector_string}'"))
|
|
113
|
+
.limit(k)
|
|
114
|
+
|
|
115
|
+
format_knn_response(results, model)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Store an embedding for a record
|
|
119
|
+
#
|
|
120
|
+
# @param record [ActiveRecord::Base] The record to update
|
|
121
|
+
# @param embedding [Array<Float>] The embedding vector
|
|
122
|
+
# @param column [Symbol] The column to store the embedding
|
|
123
|
+
#
|
|
124
|
+
def store_embedding(record, embedding, column: :embedding)
|
|
125
|
+
return false unless pgvector_available?
|
|
126
|
+
|
|
127
|
+
vector_string = "[#{embedding.join(',')}]"
|
|
128
|
+
record.update_column(column, vector_string)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Batch store embeddings
|
|
132
|
+
#
|
|
133
|
+
# @param model [Class] The ActiveRecord model
|
|
134
|
+
# @param embeddings [Hash<String, Array<Float>>] Map of ID -> embedding
|
|
135
|
+
# @param column [Symbol] The column to store embeddings
|
|
136
|
+
#
|
|
137
|
+
def batch_store_embeddings(model, embeddings, column: :embedding)
|
|
138
|
+
return 0 unless pgvector_available?
|
|
139
|
+
|
|
140
|
+
# Use UPDATE FROM VALUES for efficient batch update
|
|
141
|
+
values = embeddings.map do |id, emb|
|
|
142
|
+
"(#{ActiveRecord::Base.connection.quote(id)}, '[#{emb.join(',')}]'::vector)"
|
|
143
|
+
end.join(",")
|
|
144
|
+
|
|
145
|
+
sql = <<~SQL.squish
|
|
146
|
+
UPDATE #{model.table_name}
|
|
147
|
+
SET #{column} = v.embedding
|
|
148
|
+
FROM (VALUES #{values}) AS v(id, embedding)
|
|
149
|
+
WHERE #{model.table_name}.id = v.id::uuid
|
|
150
|
+
SQL
|
|
151
|
+
|
|
152
|
+
ActiveRecord::Base.connection.execute(sql)
|
|
153
|
+
embeddings.size
|
|
154
|
+
rescue StandardError => e
|
|
155
|
+
Rails.logger.error("Failed to batch store embeddings: #{e.message}")
|
|
156
|
+
0
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Find similar records to a given record
|
|
160
|
+
#
|
|
161
|
+
# @param record [ActiveRecord::Base] The reference record
|
|
162
|
+
# @param limit [Integer] Number of similar records
|
|
163
|
+
# @param column [Symbol] Embedding column
|
|
164
|
+
# @param exclude_self [Boolean] Exclude the reference record
|
|
165
|
+
# @return [ActiveRecord::Relation]
|
|
166
|
+
#
|
|
167
|
+
def find_similar(record, limit: 10, column: :embedding, exclude_self: true)
|
|
168
|
+
embedding = record.send(column)
|
|
169
|
+
return record.class.none unless embedding && pgvector_available?
|
|
170
|
+
|
|
171
|
+
scope = record.class.where.not(column => nil)
|
|
172
|
+
scope = scope.where.not(id: record.id) if exclude_self
|
|
173
|
+
|
|
174
|
+
vector_search(scope, embedding, column: column, limit: limit)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Check if pgvector is available
|
|
178
|
+
def pgvector_available?
|
|
179
|
+
@pgvector_available ||= available_extensions.include?("vector")
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
private
|
|
183
|
+
|
|
184
|
+
def distance_operator(metric)
|
|
185
|
+
case metric
|
|
186
|
+
when :l2, :euclidean
|
|
187
|
+
"<->" # L2/Euclidean distance
|
|
188
|
+
when :inner_product
|
|
189
|
+
"<#>" # Negative inner product
|
|
190
|
+
else
|
|
191
|
+
"<=>" # Cosine distance (default)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def format_knn_response(records, model)
|
|
196
|
+
hits = records.map do |record|
|
|
197
|
+
{
|
|
198
|
+
"_index" => model.table_name,
|
|
199
|
+
"_id" => record.id.to_s,
|
|
200
|
+
"_score" => 1.0 - (record.respond_to?(:distance) ? record.distance : 0),
|
|
201
|
+
"_source" => record.as_json(except: [:distance])
|
|
202
|
+
}
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
{
|
|
206
|
+
"took" => 0,
|
|
207
|
+
"timed_out" => false,
|
|
208
|
+
"_shards" => { "total" => 1, "successful" => 1, "skipped" => 0, "failed" => 0 },
|
|
209
|
+
"hits" => {
|
|
210
|
+
"total" => { "value" => hits.size, "relation" => "eq" },
|
|
211
|
+
"max_score" => hits.first&.dig("_score"),
|
|
212
|
+
"hits" => hits
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|