noiseless 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +28 -0
- data/README.md +214 -0
- data/lib/application_search.rb +15 -0
- data/lib/noiseless/adapter.rb +313 -0
- data/lib/noiseless/adapters/elasticsearch.rb +70 -0
- data/lib/noiseless/adapters/execution_modules/elasticsearch_execution.rb +188 -0
- data/lib/noiseless/adapters/execution_modules/opensearch_execution.rb +377 -0
- data/lib/noiseless/adapters/execution_modules/pgvector_support.rb +219 -0
- data/lib/noiseless/adapters/execution_modules/postgresql_execution.rb +461 -0
- data/lib/noiseless/adapters/execution_modules/typesense_execution.rb +472 -0
- data/lib/noiseless/adapters/open_search.rb +208 -0
- data/lib/noiseless/adapters/postgresql.rb +171 -0
- data/lib/noiseless/adapters/typesense.rb +70 -0
- data/lib/noiseless/adapters.rb +14 -0
- data/lib/noiseless/ast/aggregation.rb +56 -0
- data/lib/noiseless/ast/bool.rb +16 -0
- data/lib/noiseless/ast/bulk.rb +18 -0
- data/lib/noiseless/ast/collapse.rb +16 -0
- data/lib/noiseless/ast/combined_fields.rb +33 -0
- data/lib/noiseless/ast/conversation.rb +29 -0
- data/lib/noiseless/ast/filter.rb +15 -0
- data/lib/noiseless/ast/hybrid.rb +35 -0
- data/lib/noiseless/ast/image_query.rb +29 -0
- data/lib/noiseless/ast/join.rb +31 -0
- data/lib/noiseless/ast/match.rb +15 -0
- data/lib/noiseless/ast/multi_match.rb +24 -0
- data/lib/noiseless/ast/paginate.rb +15 -0
- data/lib/noiseless/ast/prefix.rb +15 -0
- data/lib/noiseless/ast/range.rb +18 -0
- data/lib/noiseless/ast/root.rb +69 -0
- data/lib/noiseless/ast/search_after.rb +14 -0
- data/lib/noiseless/ast/sort.rb +15 -0
- data/lib/noiseless/ast/vector.rb +27 -0
- data/lib/noiseless/ast/wildcard.rb +15 -0
- data/lib/noiseless/ast.rb +30 -0
- data/lib/noiseless/bulk_importer.rb +195 -0
- data/lib/noiseless/callbacks.rb +138 -0
- data/lib/noiseless/connection_manager.rb +26 -0
- data/lib/noiseless/document_manager.rb +137 -0
- data/lib/noiseless/dsl.rb +107 -0
- data/lib/noiseless/generators/application_search_generator.rb +24 -0
- data/lib/noiseless/instrumentation.rb +174 -0
- data/lib/noiseless/introspection/console.rb +228 -0
- data/lib/noiseless/introspection/query_visualizer.rb +533 -0
- data/lib/noiseless/introspection.rb +221 -0
- data/lib/noiseless/mapping.rb +253 -0
- data/lib/noiseless/mapping_definition_processor.rb +231 -0
- data/lib/noiseless/model.rb +111 -0
- data/lib/noiseless/model_registry.rb +77 -0
- data/lib/noiseless/multi_search.rb +244 -0
- data/lib/noiseless/pagination.rb +375 -0
- data/lib/noiseless/query_builder.rb +284 -0
- data/lib/noiseless/railtie.rb +35 -0
- data/lib/noiseless/response/aggregations.rb +46 -0
- data/lib/noiseless/response/empty.rb +20 -0
- data/lib/noiseless/response/records.rb +94 -0
- data/lib/noiseless/response/results.rb +110 -0
- data/lib/noiseless/response/suggestions.rb +55 -0
- data/lib/noiseless/response.rb +98 -0
- data/lib/noiseless/response_factory.rb +32 -0
- data/lib/noiseless/runtime_reset_middleware.rb +15 -0
- data/lib/noiseless/search_index_update_job.rb +84 -0
- data/lib/noiseless/test_case.rb +230 -0
- data/lib/noiseless/test_helper.rb +295 -0
- data/lib/noiseless/version.rb +2 -2
- data/lib/noiseless.rb +130 -2
- data/lib/tasks/benchmark.rake +35 -0
- data/lib/tasks/release.rake +22 -0
- data/lib/tasks/test.rake +11 -0
- metadata +260 -14
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "execution_modules/postgresql_execution"
|
|
4
|
+
|
|
5
|
+
module Noiseless
|
|
6
|
+
module Adapters
|
|
7
|
+
# PostgreSQL adapter for noiseless - uses pg_trgm, unaccent, and pgvector
|
|
8
|
+
# Provides search capabilities using native PostgreSQL extensions as:
|
|
9
|
+
# - Fallback when OpenSearch/Elasticsearch is unavailable
|
|
10
|
+
# - Simple queries that don't need full search cluster overhead
|
|
11
|
+
# - Semantic/vector search via pgvector
|
|
12
|
+
#
|
|
13
|
+
# Required extensions:
|
|
14
|
+
# CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
15
|
+
# CREATE EXTENSION IF NOT EXISTS unaccent;
|
|
16
|
+
# CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
|
|
17
|
+
# CREATE EXTENSION IF NOT EXISTS vector; -- for pgvector
|
|
18
|
+
#
|
|
19
|
+
class Postgresql < Adapter
|
|
20
|
+
include ExecutionModules::PostgresqlExecution
|
|
21
|
+
|
|
22
|
+
attr_reader :model_class_cache
|
|
23
|
+
|
|
24
|
+
def initialize(hosts: nil, **connection_params) # rubocop:disable Lint/UnusedMethodArgument
|
|
25
|
+
@connection_params = connection_params
|
|
26
|
+
@model_class_cache = {}
|
|
27
|
+
|
|
28
|
+
# Verify extensions on initialization (optional, can be disabled)
|
|
29
|
+
verify_extensions! unless connection_params[:skip_extension_check]
|
|
30
|
+
|
|
31
|
+
super(hosts: [], **connection_params)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def async_context?
|
|
35
|
+
# PostgreSQL queries don't need async HTTP context
|
|
36
|
+
# but we still wrap in Async for consistency with other adapters
|
|
37
|
+
false
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Override AST conversion to build PostgreSQL-compatible query
|
|
41
|
+
def ast_to_hash(ast_node)
|
|
42
|
+
{
|
|
43
|
+
bool: ast_node.bool,
|
|
44
|
+
sort: ast_node.sort,
|
|
45
|
+
paginate: ast_node.paginate,
|
|
46
|
+
indexes: ast_node.indexes, # maps to table/model
|
|
47
|
+
vector: ast_node.vector # for pgvector semantic search
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Override search to return synchronously (no HTTP calls needed)
|
|
52
|
+
def search(ast_node, model_class: nil, response_type: nil, **)
|
|
53
|
+
query_hash = ast_to_hash(ast_node)
|
|
54
|
+
|
|
55
|
+
Async do
|
|
56
|
+
raw_response = instrument(:search, indexes: ast_node.indexes, query: query_hash) do
|
|
57
|
+
execute_search(query_hash, model_class: model_class, **)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
ResponseFactory.create(
|
|
61
|
+
raw_response,
|
|
62
|
+
model_class: model_class,
|
|
63
|
+
response_type: response_type,
|
|
64
|
+
query_hash: build_pagination_from_ast(ast_node)
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Register model for this adapter (caches table info)
|
|
70
|
+
def register_model(model_class, index_name:)
|
|
71
|
+
@model_class_cache[index_name] = model_class
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Cluster health check - always healthy for PostgreSQL
|
|
75
|
+
def cluster
|
|
76
|
+
@cluster ||= ClusterAPI.new(self)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Index operations - no-op for PostgreSQL (data lives in tables)
|
|
80
|
+
def indices
|
|
81
|
+
@indices ||= IndicesAPI.new(self)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
class ClusterAPI
|
|
85
|
+
def initialize(adapter)
|
|
86
|
+
@adapter = adapter
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def health(**)
|
|
90
|
+
# Check PostgreSQL connectivity and extensions
|
|
91
|
+
{
|
|
92
|
+
"cluster_name" => "postgresql",
|
|
93
|
+
"status" => @adapter.extensions_available? ? "green" : "yellow",
|
|
94
|
+
"number_of_nodes" => 1,
|
|
95
|
+
"active_primary_shards" => 1,
|
|
96
|
+
"extensions" => @adapter.available_extensions
|
|
97
|
+
}
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
class IndicesAPI
|
|
102
|
+
def initialize(adapter)
|
|
103
|
+
@adapter = adapter
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def get(index:)
|
|
107
|
+
# Return table info as index info
|
|
108
|
+
{ index => { "mappings" => {}, "settings" => {} } }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def stats(index:)
|
|
112
|
+
{ "indices" => { index => {} } }
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def refresh(index:) # rubocop:disable Lint/UnusedMethodArgument
|
|
116
|
+
# No-op for PostgreSQL - queries always see latest data
|
|
117
|
+
{ "_shards" => { "total" => 1, "successful" => 1, "failed" => 0 } }
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def extensions_available?
|
|
122
|
+
@extensions_available ||= check_extensions
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def available_extensions
|
|
126
|
+
@available_extensions ||= detect_extensions
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
private
|
|
130
|
+
|
|
131
|
+
def verify_extensions!
|
|
132
|
+
missing = required_extensions - available_extensions
|
|
133
|
+
return if missing.empty?
|
|
134
|
+
|
|
135
|
+
Rails.logger.warn(
|
|
136
|
+
"Noiseless PostgreSQL adapter: Missing extensions: #{missing.join(', ')}. " \
|
|
137
|
+
"Some search features may be limited."
|
|
138
|
+
)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def required_extensions
|
|
142
|
+
%w[pg_trgm unaccent]
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def check_extensions
|
|
146
|
+
required_extensions.all? { |ext| available_extensions.include?(ext) }
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def detect_extensions
|
|
150
|
+
result = ActiveRecord::Base.connection.execute(<<~SQL.squish)
|
|
151
|
+
SELECT extname FROM pg_extension
|
|
152
|
+
WHERE extname IN ('pg_trgm', 'unaccent', 'fuzzystrmatch', 'vector', 'btree_gin', 'btree_gist')
|
|
153
|
+
SQL
|
|
154
|
+
result.pluck("extname")
|
|
155
|
+
rescue StandardError => e
|
|
156
|
+
Rails.logger.error("Failed to detect PostgreSQL extensions: #{e.message}")
|
|
157
|
+
[]
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def build_pagination_from_ast(ast_node)
|
|
161
|
+
paginate = ast_node.paginate
|
|
162
|
+
return { from: 0, size: 20 } unless paginate
|
|
163
|
+
|
|
164
|
+
{
|
|
165
|
+
from: (paginate.page - 1) * paginate.per_page,
|
|
166
|
+
size: paginate.per_page
|
|
167
|
+
}
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "execution_modules/typesense_execution"
|
|
4
|
+
|
|
5
|
+
module Noiseless
|
|
6
|
+
module Adapters
|
|
7
|
+
class Typesense < Adapter
|
|
8
|
+
include ExecutionModules::TypesenseExecution
|
|
9
|
+
|
|
10
|
+
def initialize(hosts: [], **connection_params)
|
|
11
|
+
# Ensure we always have at least one host
|
|
12
|
+
hosts_array = Array(hosts)
|
|
13
|
+
default_port = ENV["TYPESENSE_PORT"] || 8108
|
|
14
|
+
@hosts = hosts_array.empty? ? ["http://localhost:#{default_port}"] : hosts_array
|
|
15
|
+
@connection_params = connection_params
|
|
16
|
+
|
|
17
|
+
# Initialize HTTP clients for each host
|
|
18
|
+
@clients = {}
|
|
19
|
+
@hosts.each do |host|
|
|
20
|
+
endpoint = Async::HTTP::Endpoint.parse(host)
|
|
21
|
+
@clients[host] = Async::HTTP::Client.new(endpoint)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
super(hosts: @hosts, **connection_params)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Cluster health API - needed for Rails healthcheck
|
|
28
|
+
def cluster
|
|
29
|
+
@cluster ||= ClusterAPI.new(self)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Indices API - needed for index management operations
|
|
33
|
+
def indices
|
|
34
|
+
@indices ||= IndicesAPI.new(self)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class ClusterAPI
|
|
38
|
+
def initialize(adapter)
|
|
39
|
+
@adapter = adapter
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def health(**)
|
|
43
|
+
Sync do
|
|
44
|
+
@adapter.send(:execute_cluster_health, **)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class IndicesAPI
|
|
50
|
+
def initialize(adapter)
|
|
51
|
+
@adapter = adapter
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def get(index:)
|
|
55
|
+
@adapter.execute_index_exists?(index) ? { index => {} } : raise("Index not found")
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def stats(index:)
|
|
59
|
+
# Return basic stats structure
|
|
60
|
+
{ "indices" => { index => {} } }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def refresh(index: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
64
|
+
# Typesense doesn't require explicit refresh - documents are immediately available
|
|
65
|
+
{ "_shards" => { "total" => 1, "successful" => 1, "failed" => 0 } }
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module Adapters
|
|
5
|
+
def self.lookup(name, hosts: [], **params)
|
|
6
|
+
adapter_name = name.to_s
|
|
7
|
+
class_name = adapter_name.classify
|
|
8
|
+
|
|
9
|
+
# Zeitwerk will load the adapter class on demand
|
|
10
|
+
adapter_class = const_get(class_name)
|
|
11
|
+
adapter_class.new(hosts: hosts, **params)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
class Aggregation < Node
|
|
6
|
+
attr_reader :name, :type, :field, :options, :sub_aggregations
|
|
7
|
+
|
|
8
|
+
METRIC_TYPES = %i[avg sum min max cardinality value_count stats extended_stats percentiles].freeze
|
|
9
|
+
BUCKET_TYPES = %i[terms histogram date_histogram range date_range filter filters nested].freeze
|
|
10
|
+
|
|
11
|
+
def initialize(name, type, field: nil, sub_aggregations: [], **options)
|
|
12
|
+
super()
|
|
13
|
+
@name = name.to_s
|
|
14
|
+
@type = type.to_sym
|
|
15
|
+
@field = field&.to_s
|
|
16
|
+
@options = options
|
|
17
|
+
@sub_aggregations = sub_aggregations
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def metric?
|
|
21
|
+
METRIC_TYPES.include?(@type)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def bucket?
|
|
25
|
+
BUCKET_TYPES.include?(@type)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def add_sub_aggregation(aggregation)
|
|
29
|
+
@sub_aggregations << aggregation
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
class AggregationBuilder
|
|
34
|
+
attr_reader :aggregations
|
|
35
|
+
|
|
36
|
+
def initialize
|
|
37
|
+
@aggregations = []
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def agg(name, type, field: nil, **, &)
|
|
41
|
+
sub_aggs = []
|
|
42
|
+
if block_given?
|
|
43
|
+
sub_builder = AggregationBuilder.new
|
|
44
|
+
sub_builder.instance_eval(&)
|
|
45
|
+
sub_aggs = sub_builder.aggregations
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
aggregation = Aggregation.new(name, type, field: field, sub_aggregations: sub_aggs, **)
|
|
49
|
+
@aggregations << aggregation
|
|
50
|
+
aggregation
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
alias aggregation agg
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
class Bool < Node
|
|
6
|
+
attr_reader :must, :filter, :should
|
|
7
|
+
|
|
8
|
+
def initialize(must: [], filter: [], should: [])
|
|
9
|
+
super()
|
|
10
|
+
@must = must
|
|
11
|
+
@filter = filter
|
|
12
|
+
@should = should
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
class Bulk < Node
|
|
6
|
+
attr_reader :operations
|
|
7
|
+
|
|
8
|
+
def initialize(operations)
|
|
9
|
+
super()
|
|
10
|
+
@operations = operations
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def ==(other)
|
|
14
|
+
other.is_a?(self.class) && operations == other.operations
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
class Collapse < Node
|
|
6
|
+
attr_reader :field, :inner_hits, :max_concurrent_group_searches
|
|
7
|
+
|
|
8
|
+
def initialize(field, inner_hits: nil, max_concurrent_group_searches: nil)
|
|
9
|
+
super()
|
|
10
|
+
@field = field.to_s
|
|
11
|
+
@inner_hits = inner_hits
|
|
12
|
+
@max_concurrent_group_searches = max_concurrent_group_searches
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
class CombinedFields < Node
|
|
6
|
+
attr_reader :query, :fields, :operator, :minimum_should_match, :zero_terms_query,
|
|
7
|
+
:auto_generate_synonyms_phrase_query
|
|
8
|
+
|
|
9
|
+
def initialize(query, fields, operator: nil, minimum_should_match: nil, zero_terms_query: nil,
|
|
10
|
+
auto_generate_synonyms_phrase_query: nil)
|
|
11
|
+
super()
|
|
12
|
+
@query = query
|
|
13
|
+
@fields = Array(fields).map(&:to_s)
|
|
14
|
+
@operator = operator
|
|
15
|
+
@minimum_should_match = minimum_should_match
|
|
16
|
+
@zero_terms_query = zero_terms_query
|
|
17
|
+
@auto_generate_synonyms_phrase_query = auto_generate_synonyms_phrase_query
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def options
|
|
21
|
+
{}.tap do |opts|
|
|
22
|
+
opts[:operator] = @operator if @operator
|
|
23
|
+
opts[:minimum_should_match] = @minimum_should_match if @minimum_should_match
|
|
24
|
+
opts[:zero_terms_query] = @zero_terms_query if @zero_terms_query
|
|
25
|
+
unless @auto_generate_synonyms_phrase_query.nil?
|
|
26
|
+
opts[:auto_generate_synonyms_phrase_query] =
|
|
27
|
+
@auto_generate_synonyms_phrase_query
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
# Conversational search node for RAG (Retrieval Augmented Generation)
|
|
6
|
+
# Typesense and Elasticsearch support conversational/RAG search
|
|
7
|
+
class Conversation < Node
|
|
8
|
+
attr_reader :model_id, :conversation_id, :system_prompt
|
|
9
|
+
|
|
10
|
+
# @param model_id [String] The LLM model identifier
|
|
11
|
+
# @param conversation_id [String, nil] ID for multi-turn conversations (optional)
|
|
12
|
+
# @param system_prompt [String, nil] Custom system prompt (optional)
|
|
13
|
+
def initialize(model_id:, conversation_id: nil, system_prompt: nil)
|
|
14
|
+
super()
|
|
15
|
+
@model_id = model_id
|
|
16
|
+
@conversation_id = conversation_id
|
|
17
|
+
@system_prompt = system_prompt
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def multi_turn?
|
|
21
|
+
!@conversation_id.nil?
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def custom_prompt?
|
|
25
|
+
!@system_prompt.nil?
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
# Hybrid search node combining text and vector search
|
|
6
|
+
# Supports weighted combination of BM25 text scores and kNN vector scores
|
|
7
|
+
class Hybrid < Node
|
|
8
|
+
attr_reader :text_query, :vector, :text_weight, :vector_weight
|
|
9
|
+
|
|
10
|
+
# @param text_query [String] The text query for BM25 matching
|
|
11
|
+
# @param vector [AST::Vector] The vector search node
|
|
12
|
+
# @param text_weight [Float] Weight for text search score (0.0-1.0)
|
|
13
|
+
# @param vector_weight [Float] Weight for vector search score (0.0-1.0)
|
|
14
|
+
def initialize(text_query, vector, text_weight: 0.5, vector_weight: 0.5)
|
|
15
|
+
super()
|
|
16
|
+
@text_query = text_query
|
|
17
|
+
@vector = vector
|
|
18
|
+
@text_weight = text_weight
|
|
19
|
+
@vector_weight = vector_weight
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def balanced?
|
|
23
|
+
@text_weight == @vector_weight
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def text_dominant?
|
|
27
|
+
@text_weight > @vector_weight
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def vector_dominant?
|
|
31
|
+
@vector_weight > @text_weight
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
# Image search node for Typesense visual search
|
|
6
|
+
# Supports searching by image URL or base64 encoded image data
|
|
7
|
+
class ImageQuery < Node
|
|
8
|
+
attr_reader :field, :image_data, :k
|
|
9
|
+
|
|
10
|
+
# @param field [Symbol, String] The image embedding field name
|
|
11
|
+
# @param image_data [String] Image URL or base64 encoded image data
|
|
12
|
+
# @param k [Integer] Number of nearest neighbors (default: 10)
|
|
13
|
+
def initialize(field, image_data, k: 10)
|
|
14
|
+
super()
|
|
15
|
+
@field = field
|
|
16
|
+
@image_data = image_data
|
|
17
|
+
@k = k
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def url?
|
|
21
|
+
@image_data.start_with?("http://", "https://")
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def base64?
|
|
25
|
+
!url?
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
# Join node for cross-collection queries (Typesense feature)
|
|
6
|
+
# Allows including related documents from other collections
|
|
7
|
+
class Join < Node
|
|
8
|
+
attr_reader :collection, :on, :include_fields, :strategy
|
|
9
|
+
|
|
10
|
+
# @param collection [String, Symbol] The collection to join
|
|
11
|
+
# @param on [Hash] Join conditions (e.g., { foreign_key: :local_key })
|
|
12
|
+
# @param include_fields [Array<String, Symbol>] Fields to include from joined collection
|
|
13
|
+
# @param strategy [Symbol] Join strategy :left or :inner (default: :left)
|
|
14
|
+
def initialize(collection, on:, include_fields: [], strategy: :left)
|
|
15
|
+
super()
|
|
16
|
+
@collection = collection.to_s
|
|
17
|
+
@on = on
|
|
18
|
+
@include_fields = Array(include_fields).map(&:to_s)
|
|
19
|
+
@strategy = strategy
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def left_join?
|
|
23
|
+
@strategy == :left
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def inner_join?
|
|
27
|
+
@strategy == :inner
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
class MultiMatch
|
|
6
|
+
attr_reader :query, :fields, :options
|
|
7
|
+
|
|
8
|
+
def initialize(query, fields, **options)
|
|
9
|
+
@query = query
|
|
10
|
+
@fields = Array(fields)
|
|
11
|
+
@options = options
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def to_hash
|
|
15
|
+
{
|
|
16
|
+
multi_match: {
|
|
17
|
+
query: @query,
|
|
18
|
+
fields: @fields
|
|
19
|
+
}.merge(@options)
|
|
20
|
+
}
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Noiseless
|
|
4
|
+
module AST
|
|
5
|
+
class Range < Node
|
|
6
|
+
attr_reader :field, :gte, :lte, :gt, :lt
|
|
7
|
+
|
|
8
|
+
def initialize(field, gte: nil, lte: nil, gt: nil, lt: nil)
|
|
9
|
+
super()
|
|
10
|
+
@field = field
|
|
11
|
+
@gte = gte
|
|
12
|
+
@lte = lte
|
|
13
|
+
@gt = gt
|
|
14
|
+
@lt = lt
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|