noiseless 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +28 -0
- data/README.md +214 -0
- data/lib/application_search.rb +15 -0
- data/lib/noiseless/adapter.rb +313 -0
- data/lib/noiseless/adapters/elasticsearch.rb +70 -0
- data/lib/noiseless/adapters/execution_modules/elasticsearch_execution.rb +188 -0
- data/lib/noiseless/adapters/execution_modules/opensearch_execution.rb +377 -0
- data/lib/noiseless/adapters/execution_modules/pgvector_support.rb +219 -0
- data/lib/noiseless/adapters/execution_modules/postgresql_execution.rb +461 -0
- data/lib/noiseless/adapters/execution_modules/typesense_execution.rb +472 -0
- data/lib/noiseless/adapters/open_search.rb +208 -0
- data/lib/noiseless/adapters/postgresql.rb +171 -0
- data/lib/noiseless/adapters/typesense.rb +70 -0
- data/lib/noiseless/adapters.rb +14 -0
- data/lib/noiseless/ast/aggregation.rb +56 -0
- data/lib/noiseless/ast/bool.rb +16 -0
- data/lib/noiseless/ast/bulk.rb +18 -0
- data/lib/noiseless/ast/collapse.rb +16 -0
- data/lib/noiseless/ast/combined_fields.rb +33 -0
- data/lib/noiseless/ast/conversation.rb +29 -0
- data/lib/noiseless/ast/filter.rb +15 -0
- data/lib/noiseless/ast/hybrid.rb +35 -0
- data/lib/noiseless/ast/image_query.rb +29 -0
- data/lib/noiseless/ast/join.rb +31 -0
- data/lib/noiseless/ast/match.rb +15 -0
- data/lib/noiseless/ast/multi_match.rb +24 -0
- data/lib/noiseless/ast/paginate.rb +15 -0
- data/lib/noiseless/ast/prefix.rb +15 -0
- data/lib/noiseless/ast/range.rb +18 -0
- data/lib/noiseless/ast/root.rb +69 -0
- data/lib/noiseless/ast/search_after.rb +14 -0
- data/lib/noiseless/ast/sort.rb +15 -0
- data/lib/noiseless/ast/vector.rb +27 -0
- data/lib/noiseless/ast/wildcard.rb +15 -0
- data/lib/noiseless/ast.rb +30 -0
- data/lib/noiseless/bulk_importer.rb +195 -0
- data/lib/noiseless/callbacks.rb +138 -0
- data/lib/noiseless/connection_manager.rb +26 -0
- data/lib/noiseless/document_manager.rb +137 -0
- data/lib/noiseless/dsl.rb +107 -0
- data/lib/noiseless/generators/application_search_generator.rb +24 -0
- data/lib/noiseless/instrumentation.rb +174 -0
- data/lib/noiseless/introspection/console.rb +228 -0
- data/lib/noiseless/introspection/query_visualizer.rb +533 -0
- data/lib/noiseless/introspection.rb +221 -0
- data/lib/noiseless/mapping.rb +253 -0
- data/lib/noiseless/mapping_definition_processor.rb +231 -0
- data/lib/noiseless/model.rb +111 -0
- data/lib/noiseless/model_registry.rb +77 -0
- data/lib/noiseless/multi_search.rb +244 -0
- data/lib/noiseless/pagination.rb +375 -0
- data/lib/noiseless/query_builder.rb +284 -0
- data/lib/noiseless/railtie.rb +35 -0
- data/lib/noiseless/response/aggregations.rb +46 -0
- data/lib/noiseless/response/empty.rb +20 -0
- data/lib/noiseless/response/records.rb +94 -0
- data/lib/noiseless/response/results.rb +110 -0
- data/lib/noiseless/response/suggestions.rb +55 -0
- data/lib/noiseless/response.rb +98 -0
- data/lib/noiseless/response_factory.rb +32 -0
- data/lib/noiseless/runtime_reset_middleware.rb +15 -0
- data/lib/noiseless/search_index_update_job.rb +84 -0
- data/lib/noiseless/test_case.rb +230 -0
- data/lib/noiseless/test_helper.rb +295 -0
- data/lib/noiseless/version.rb +2 -2
- data/lib/noiseless.rb +130 -2
- data/lib/tasks/benchmark.rake +35 -0
- data/lib/tasks/release.rake +22 -0
- data/lib/tasks/test.rake +11 -0
- metadata +260 -14
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3568cdfb6fe26188f2ccb0287265f3adfd128b8a056f06636496c8644a87cad3
|
|
4
|
+
data.tar.gz: f0b29fa15d18a083950553337b9295e480c3966204dc614df5b9b4999b073742
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cda0f5f8ebdfa6d4f35f6caa9d8c49d179d0fd0d1bdbe928ac9605d3da67e91635b0a15827fcc74c5b2196b4381050a11051a97c1f6a8b75890a20f88d32be60
|
|
7
|
+
data.tar.gz: f379b20348f2c293255aa56815e458a0668ad77aa0fe01026ed21239a751a815264bbccd87bee54dbad6b55496940f543c186f008621eaa5e767e455a6f9d156
|
data/LICENSE.txt
CHANGED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, Abdelkader Boudih
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
CHANGED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# Noiseless
|
|
2
|
+
|
|
3
|
+
Async-first search abstraction for Rails with multi-backend support (OpenSearch, Elasticsearch, Typesense, PostgreSQL).
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Chainable DSL** — fluent query builder with runtime validation
|
|
8
|
+
- **Multi-backend** — OpenSearch, Elasticsearch, Typesense, PostgreSQL adapters
|
|
9
|
+
- **Async-first** — built on Ruby 3.4+ fiber scheduler with non-blocking I/O
|
|
10
|
+
- **HTTP/2 connection pooling** — persistent connections via `Async::Pool`
|
|
11
|
+
- **Rails integration** — Railtie with log subscriber and controller runtime tracking
|
|
12
|
+
- **Lazy loading** — adapters loaded on-demand, test files excluded from production
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```ruby
|
|
17
|
+
gem "noiseless"
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
`noiseless` is a Rails gem. It requires Ruby >= 3.4 and Rails >= 8.1.
|
|
21
|
+
|
|
22
|
+
## Configuration
|
|
23
|
+
|
|
24
|
+
Create `config/noiseless.yml`:
|
|
25
|
+
|
|
26
|
+
```yaml
|
|
27
|
+
development:
|
|
28
|
+
default: primary
|
|
29
|
+
connections:
|
|
30
|
+
primary:
|
|
31
|
+
adapter: elasticsearch
|
|
32
|
+
hosts:
|
|
33
|
+
- http://localhost:9201
|
|
34
|
+
opensearch:
|
|
35
|
+
adapter: open_search
|
|
36
|
+
hosts:
|
|
37
|
+
- http://localhost:9202
|
|
38
|
+
typesense:
|
|
39
|
+
adapter: typesense
|
|
40
|
+
hosts:
|
|
41
|
+
- http://localhost:8109
|
|
42
|
+
postgresql:
|
|
43
|
+
adapter: postgresql
|
|
44
|
+
|
|
45
|
+
production:
|
|
46
|
+
default: primary
|
|
47
|
+
connections:
|
|
48
|
+
primary:
|
|
49
|
+
adapter: opensearch
|
|
50
|
+
hosts:
|
|
51
|
+
- <%= ENV['OPENSEARCH_URL'] %>
|
|
52
|
+
typesense:
|
|
53
|
+
adapter: typesense
|
|
54
|
+
hosts:
|
|
55
|
+
- <%= ENV['TYPESENSE_URL'] %>
|
|
56
|
+
postgresql:
|
|
57
|
+
adapter: postgresql
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Usage
|
|
61
|
+
|
|
62
|
+
### Defining a Search
|
|
63
|
+
|
|
64
|
+
```ruby
|
|
65
|
+
class Company::Search < Noiseless::Model
|
|
66
|
+
index_name 'companies'
|
|
67
|
+
|
|
68
|
+
def by_name(name)
|
|
69
|
+
multi_match(name, [:name, :name_aliases])
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def suppliers_only
|
|
73
|
+
filter(:company_type, 'supplier')
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Executing Searches
|
|
79
|
+
|
|
80
|
+
All `.execute` calls return `Async::Task` objects. Use `Sync` to wait for results, or use the `_sync` convenience methods:
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
# Convenience method (recommended for simple cases)
|
|
84
|
+
results = Company::Search.new.by_name('tech').execute_sync
|
|
85
|
+
|
|
86
|
+
# Class-level convenience
|
|
87
|
+
results = Company::Search.search_sync do |s|
|
|
88
|
+
s.match(:name, 'tech')
|
|
89
|
+
s.limit(10)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Explicit Sync block
|
|
93
|
+
results = Sync do
|
|
94
|
+
Company::Search.new
|
|
95
|
+
.by_name('technology')
|
|
96
|
+
.suppliers_only
|
|
97
|
+
.limit(20)
|
|
98
|
+
.execute
|
|
99
|
+
.wait
|
|
100
|
+
end
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Concurrent Searches
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
Async do |task|
|
|
107
|
+
companies_task = Company::Search.new.match(:name, 'tech').execute
|
|
108
|
+
products_task = Product::Search.new.match(:name, 'tech').execute
|
|
109
|
+
|
|
110
|
+
companies = companies_task.wait
|
|
111
|
+
products = products_task.wait
|
|
112
|
+
end
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
For best performance, run independent searches concurrently within a single `Async` block rather than creating separate `Sync` blocks per search.
|
|
116
|
+
|
|
117
|
+
### Advanced Queries
|
|
118
|
+
|
|
119
|
+
```ruby
|
|
120
|
+
results = Company::Search.new
|
|
121
|
+
.match(:name, 'electronics')
|
|
122
|
+
.filter(:status, 'active')
|
|
123
|
+
.geo_distance(:location, lat: 40.7128, lon: -74.0060, distance: '50km')
|
|
124
|
+
.sort(:created_at, :desc)
|
|
125
|
+
.paginate(page: 1, per_page: 10)
|
|
126
|
+
.execute_sync
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Rails Integration
|
|
130
|
+
|
|
131
|
+
```ruby
|
|
132
|
+
class CompaniesController < ApplicationController
|
|
133
|
+
def search
|
|
134
|
+
@results = Company::Search.new
|
|
135
|
+
.by_name(params[:q])
|
|
136
|
+
.limit(20)
|
|
137
|
+
.execute_sync
|
|
138
|
+
|
|
139
|
+
render json: @results
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Testing
|
|
145
|
+
|
|
146
|
+
Add to `test/test_helper.rb`:
|
|
147
|
+
|
|
148
|
+
```ruby
|
|
149
|
+
require 'noiseless/test_helper'
|
|
150
|
+
require 'noiseless/test_case'
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### With Noiseless::TestCase (automatic VCR cassettes)
|
|
154
|
+
|
|
155
|
+
```ruby
|
|
156
|
+
class CompanySearchTest < Noiseless::TestCase
|
|
157
|
+
def test_search_by_name
|
|
158
|
+
# Cassette auto-named: company_search/search_by_name
|
|
159
|
+
search = Company::Search.new.by_name('test')
|
|
160
|
+
assert_search_results(search)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### With manual VCR control
|
|
166
|
+
|
|
167
|
+
```ruby
|
|
168
|
+
class CompanySearchTest < ActiveSupport::TestCase
|
|
169
|
+
include Noiseless::TestHelper
|
|
170
|
+
|
|
171
|
+
def test_custom_search
|
|
172
|
+
noiseless_cassette(record: :new_episodes) do
|
|
173
|
+
results = Company::Search.new.by_name('test').execute_sync
|
|
174
|
+
assert results.any?
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Running Tests Locally
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
docker compose up -d postgres elasticsearch opensearch typesense
|
|
184
|
+
bin/test
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
`bin/test` expects all four local services from `docker-compose.yml`, including PostgreSQL on `:5432`.
|
|
188
|
+
Default ports match `docker-compose.yml`: PostgreSQL `:5432`, Elasticsearch `:9201`, OpenSearch `:9202`, Typesense `:8109`. Override via env vars:
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
ELASTICSEARCH_PORT=9200 OPENSEARCH_PORT=9201 TYPESENSE_PORT=8108 bin/test
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
For a release smoke test that does not require the dummy app or local services:
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
bundle exec rake release:check
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Debug Mode
|
|
201
|
+
|
|
202
|
+
```ruby
|
|
203
|
+
ENV['NOISELESS_VERBOSE'] = 'true'
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## Contributing
|
|
207
|
+
|
|
208
|
+
1. Follow Rails conventions for code organization
|
|
209
|
+
2. Test helpers must remain separate from core functionality
|
|
210
|
+
3. Add tests for new features using the provided test utilities
|
|
211
|
+
|
|
212
|
+
## License
|
|
213
|
+
|
|
214
|
+
BSD 3-Clause License — See [LICENSE.txt](LICENSE.txt)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Abstract base class for all search models
|
|
4
|
+
class ApplicationSearch < Noiseless::Model
|
|
5
|
+
# Mark as abstract - concrete search models like Product::Search inherit from this
|
|
6
|
+
def self.abstract!
|
|
7
|
+
@abstract = true
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def self.abstract?
|
|
11
|
+
@abstract == true
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
abstract!
|
|
15
|
+
end
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "async"
|
|
4
|
+
require_relative "introspection"
|
|
5
|
+
|
|
6
|
+
module Noiseless
|
|
7
|
+
class Adapter
|
|
8
|
+
include Instrumentation
|
|
9
|
+
include Introspection
|
|
10
|
+
|
|
11
|
+
def initialize(hosts: [], **connection_params)
|
|
12
|
+
@hosts = hosts
|
|
13
|
+
@connection_params = connection_params.dup
|
|
14
|
+
@connection_params.delete(:async)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def async_context?
|
|
18
|
+
true
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Convert AST to Hash/JSON before execution
|
|
22
|
+
def search(ast_node, model_class: nil, response_type: nil, **)
|
|
23
|
+
query_hash = ast_to_hash(ast_node)
|
|
24
|
+
|
|
25
|
+
Async do
|
|
26
|
+
raw_response = instrument(:search, indexes: ast_node.indexes, query: query_hash) do
|
|
27
|
+
execute_search(query_hash, indexes: ast_node.indexes, **)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
ResponseFactory.create(
|
|
31
|
+
raw_response,
|
|
32
|
+
model_class: model_class,
|
|
33
|
+
response_type: response_type,
|
|
34
|
+
query_hash: query_hash
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def bulk(actions, **)
|
|
40
|
+
Async do
|
|
41
|
+
instrument(:bulk, actions_count: actions.size) do
|
|
42
|
+
execute_bulk(actions, **)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def create_index(index_name, **)
|
|
48
|
+
Async do
|
|
49
|
+
instrument(:create_index, index: index_name) do
|
|
50
|
+
execute_create_index(index_name, **)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def delete_index(index_name, **)
|
|
56
|
+
Async do
|
|
57
|
+
instrument(:delete_index, index: index_name) do
|
|
58
|
+
execute_delete_index(index_name, **)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def index_exists?(index_name)
|
|
64
|
+
Async do
|
|
65
|
+
execute_index_exists?(index_name)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def index_document(index:, id:, document:, **)
|
|
70
|
+
Async do
|
|
71
|
+
instrument(:index_document, index: index, id: id) do
|
|
72
|
+
execute_index_document(index, id, document, **)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def update_document(index:, id:, changes:, **)
|
|
78
|
+
Async do
|
|
79
|
+
instrument(:update_document, index: index, id: id, changes_count: changes.size) do
|
|
80
|
+
execute_update_document(index, id, changes, **)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def delete_document(index:, id:, **)
|
|
86
|
+
Async do
|
|
87
|
+
instrument(:delete_document, index: index, id: id) do
|
|
88
|
+
execute_delete_document(index, id, **)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def document_exists?(index:, id:)
|
|
94
|
+
Async do
|
|
95
|
+
execute_document_exists?(index, id)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Raw search method for backward compatibility
|
|
100
|
+
def search_raw(query_body, indexes: [], **)
|
|
101
|
+
Async do
|
|
102
|
+
instrument(:search, indexes: indexes, query: query_body) do
|
|
103
|
+
execute_search(query_body, indexes: indexes, **)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
private
|
|
109
|
+
|
|
110
|
+
# Convert AST to Hash - override in subclasses for adapter-specific format
|
|
111
|
+
def ast_to_hash(ast_node)
|
|
112
|
+
result = {}
|
|
113
|
+
|
|
114
|
+
query_hash = build_query_hash(ast_node.bool)
|
|
115
|
+
result[:query] = query_hash unless query_hash.empty?
|
|
116
|
+
|
|
117
|
+
sort_hash = build_sort_hash(ast_node.sort)
|
|
118
|
+
result[:sort] = sort_hash unless sort_hash.empty?
|
|
119
|
+
|
|
120
|
+
# Handle search_after (cursor pagination) vs offset pagination
|
|
121
|
+
if ast_node.search_after
|
|
122
|
+
result[:search_after] = ast_node.search_after.values
|
|
123
|
+
result[:size] = ast_node.paginate&.per_page || 20
|
|
124
|
+
else
|
|
125
|
+
pagination = build_pagination_hash(ast_node.paginate)
|
|
126
|
+
result[:from] = pagination[:from]
|
|
127
|
+
result[:size] = pagination[:size]
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Field collapsing
|
|
131
|
+
result[:collapse] = build_collapse_hash(ast_node.collapse) if ast_node.collapse
|
|
132
|
+
|
|
133
|
+
# Aggregations
|
|
134
|
+
result[:aggs] = build_aggregations_hash(ast_node.aggregations) if ast_node.aggregations.any?
|
|
135
|
+
|
|
136
|
+
# Vector/kNN search (OpenSearch/Elasticsearch compatible)
|
|
137
|
+
result[:knn] = build_knn_query(ast_node.vector) if ast_node.vector_search?
|
|
138
|
+
|
|
139
|
+
# Hybrid search (combines text + vector with RRF or weighted scoring)
|
|
140
|
+
if ast_node.hybrid_search?
|
|
141
|
+
hybrid_config = build_hybrid_query(ast_node.hybrid)
|
|
142
|
+
result.merge!(hybrid_config)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Search pipeline (OpenSearch only)
|
|
146
|
+
result[:search_pipeline] = ast_node.pipeline if ast_node.has_pipeline?
|
|
147
|
+
|
|
148
|
+
result
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def build_knn_query(vector_node)
|
|
152
|
+
{
|
|
153
|
+
field: vector_node.field.to_s,
|
|
154
|
+
query_vector: vector_node.embedding,
|
|
155
|
+
k: vector_node.k,
|
|
156
|
+
num_candidates: vector_node.k * 10
|
|
157
|
+
}
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Build hybrid query using RRF (Reciprocal Rank Fusion) for OpenSearch/Elasticsearch
|
|
161
|
+
def build_hybrid_query(hybrid_node)
|
|
162
|
+
{
|
|
163
|
+
query: {
|
|
164
|
+
bool: {
|
|
165
|
+
should: [
|
|
166
|
+
{
|
|
167
|
+
match: {
|
|
168
|
+
_all: hybrid_node.text_query
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
]
|
|
172
|
+
}
|
|
173
|
+
},
|
|
174
|
+
knn: build_knn_query(hybrid_node.vector),
|
|
175
|
+
rank: {
|
|
176
|
+
rrf: {
|
|
177
|
+
window_size: hybrid_node.vector.k * 2
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def build_query_hash(bool_node)
|
|
184
|
+
return {} if bool_node.must.empty? && bool_node.filter.empty?
|
|
185
|
+
|
|
186
|
+
must_queries = bool_node.must.filter_map { |node| build_must_clause(node) }
|
|
187
|
+
|
|
188
|
+
{
|
|
189
|
+
bool: {
|
|
190
|
+
must: must_queries,
|
|
191
|
+
filter: bool_node.filter.map { |f| { term: { f.field => f.value } } }
|
|
192
|
+
}.reject { |_, v| v.empty? }
|
|
193
|
+
}
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def build_sort_hash(sort_nodes)
|
|
197
|
+
return [] if sort_nodes.empty?
|
|
198
|
+
|
|
199
|
+
sort_nodes.map { |s| { s.field => { order: s.direction } } }
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def build_pagination_hash(paginate_node)
|
|
203
|
+
return { from: 0, size: 20 } unless paginate_node
|
|
204
|
+
|
|
205
|
+
{
|
|
206
|
+
from: (paginate_node.page - 1) * paginate_node.per_page,
|
|
207
|
+
size: paginate_node.per_page
|
|
208
|
+
}
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Override in subclasses
|
|
212
|
+
def execute_search(_query_hash, **_opts)
|
|
213
|
+
{
|
|
214
|
+
took: 1,
|
|
215
|
+
hits: {
|
|
216
|
+
total: { value: 0 },
|
|
217
|
+
hits: []
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def execute_bulk(actions, **_opts)
|
|
223
|
+
{
|
|
224
|
+
items: actions.map { |_action| { index: { status: 201 } } }
|
|
225
|
+
}
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def execute_create_index(_index_name, **_opts)
|
|
229
|
+
{ acknowledged: true }
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def execute_delete_index(_index_name, **_opts)
|
|
233
|
+
{ acknowledged: true }
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def execute_index_exists?(_index_name)
|
|
237
|
+
true
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def execute_index_document(index, id, _document, **_opts)
|
|
241
|
+
{ _index: index, _id: id, result: "created" }
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def execute_update_document(index, id, _changes, **_opts)
|
|
245
|
+
{ _index: index, _id: id, result: "updated" }
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def execute_delete_document(index, id, **_opts)
|
|
249
|
+
{ _index: index, _id: id, result: "deleted" }
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def execute_document_exists?(_index, _id)
|
|
253
|
+
true
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def build_must_clause(node)
|
|
257
|
+
case node
|
|
258
|
+
when AST::Match
|
|
259
|
+
{ match: { node.field => node.value } }
|
|
260
|
+
when AST::MultiMatch
|
|
261
|
+
{ multi_match: { query: node.query, fields: node.fields }.merge(node.options) }
|
|
262
|
+
when AST::CombinedFields
|
|
263
|
+
{ combined_fields: { query: node.query, fields: node.fields }.merge(node.options) }
|
|
264
|
+
when AST::Wildcard
|
|
265
|
+
{ wildcard: { node.field => node.value } }
|
|
266
|
+
when AST::Range
|
|
267
|
+
range_options = {
|
|
268
|
+
gte: node.gte,
|
|
269
|
+
lte: node.lte,
|
|
270
|
+
gt: node.gt,
|
|
271
|
+
lt: node.lt
|
|
272
|
+
}.compact
|
|
273
|
+
{ range: { node.field => range_options } }
|
|
274
|
+
when AST::Prefix
|
|
275
|
+
{ prefix: { node.field => node.value } }
|
|
276
|
+
else
|
|
277
|
+
node.to_hash
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def build_collapse_hash(collapse_node)
|
|
282
|
+
result = { field: collapse_node.field }
|
|
283
|
+
result[:inner_hits] = collapse_node.inner_hits if collapse_node.inner_hits
|
|
284
|
+
if collapse_node.max_concurrent_group_searches
|
|
285
|
+
result[:max_concurrent_group_searches] =
|
|
286
|
+
collapse_node.max_concurrent_group_searches
|
|
287
|
+
end
|
|
288
|
+
result
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def build_aggregations_hash(aggregations)
|
|
292
|
+
aggregations.each_with_object({}) do |agg, hash|
|
|
293
|
+
hash[agg.name] = build_single_aggregation(agg)
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def build_single_aggregation(agg)
|
|
298
|
+
result = {}
|
|
299
|
+
|
|
300
|
+
# Build the aggregation type hash
|
|
301
|
+
agg_body = {}
|
|
302
|
+
agg_body[:field] = agg.field if agg.field
|
|
303
|
+
agg_body.merge!(agg.options)
|
|
304
|
+
|
|
305
|
+
result[agg.type] = agg_body
|
|
306
|
+
|
|
307
|
+
# Add sub-aggregations if any
|
|
308
|
+
result[:aggs] = build_aggregations_hash(agg.sub_aggregations) if agg.sub_aggregations.any?
|
|
309
|
+
|
|
310
|
+
result
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "execution_modules/elasticsearch_execution"
|
|
4
|
+
|
|
5
|
+
module Noiseless
|
|
6
|
+
module Adapters
|
|
7
|
+
class Elasticsearch < Adapter
|
|
8
|
+
include ExecutionModules::ElasticsearchExecution
|
|
9
|
+
|
|
10
|
+
def initialize(hosts: [], **connection_params)
|
|
11
|
+
# Ensure we always have at least one host
|
|
12
|
+
hosts_array = Array(hosts)
|
|
13
|
+
default_port = ENV["ELASTICSEARCH_PORT"] || 9200
|
|
14
|
+
@hosts = hosts_array.empty? ? ["http://localhost:#{default_port}"] : hosts_array
|
|
15
|
+
@connection_params = connection_params
|
|
16
|
+
|
|
17
|
+
# Initialize HTTP clients for each host
|
|
18
|
+
@clients = {}
|
|
19
|
+
@hosts.each do |host|
|
|
20
|
+
endpoint = Async::HTTP::Endpoint.parse(host)
|
|
21
|
+
@clients[host] = Async::HTTP::Client.new(endpoint)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
super(hosts: @hosts, **connection_params)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Cluster health API - needed for Rails healthcheck
|
|
28
|
+
def cluster
|
|
29
|
+
@cluster ||= ClusterAPI.new(self)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Indices API - needed for index management operations
|
|
33
|
+
def indices
|
|
34
|
+
@indices ||= IndicesAPI.new(self)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class ClusterAPI
|
|
38
|
+
def initialize(adapter)
|
|
39
|
+
@adapter = adapter
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def health(**)
|
|
43
|
+
Sync do
|
|
44
|
+
@adapter.send(:execute_cluster_health, **)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class IndicesAPI
|
|
50
|
+
def initialize(adapter)
|
|
51
|
+
@adapter = adapter
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def get(index:)
|
|
55
|
+
@adapter.execute_index_exists?(index) ? { index => {} } : raise("Index not found")
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def stats(index:)
|
|
59
|
+
# Return basic stats structure
|
|
60
|
+
{ "indices" => { index => {} } }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def refresh(index:)
|
|
64
|
+
# Refresh the index to make documents immediately searchable
|
|
65
|
+
@adapter.send(:execute_refresh_index, index)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|