ragdoll 0.1.3 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +201 -0
- data/README.md +160 -31
- data/Rakefile +0 -3
- data/app/models/ragdoll/embedding.rb +74 -0
- data/app/models/ragdoll/search.rb +165 -0
- data/app/models/ragdoll/search_result.rb +121 -0
- data/app/services/ragdoll/configuration_service.rb +3 -3
- data/app/services/ragdoll/document_processor.rb +124 -1
- data/app/services/ragdoll/embedding_service.rb +10 -0
- data/app/services/ragdoll/search_engine.rb +64 -6
- data/db/migrate/007_create_ragdoll_searches.rb +73 -0
- data/db/migrate/008_create_ragdoll_search_results.rb +49 -0
- data/lib/ragdoll/core/client.rb +75 -8
- data/lib/ragdoll/core/model.rb +13 -0
- data/lib/ragdoll/core/version.rb +1 -1
- data/lib/ragdoll/core.rb +2 -0
- data/lib/ragdoll.rb +17 -0
- data/lib/tasks/db.rake +13 -13
- metadata +371 -2
@@ -0,0 +1,49 @@
|
|
1
|
+
class CreateRagdollSearchResults < ActiveRecord::Migration[7.0]
|
2
|
+
def change
|
3
|
+
# Junction table for tracking which embeddings were returned for each search
|
4
|
+
create_table :ragdoll_search_results,
|
5
|
+
comment: "Junction table linking searches to their returned embeddings" do |t|
|
6
|
+
|
7
|
+
t.references :search, null: false, foreign_key: { to_table: :ragdoll_searches },
|
8
|
+
comment: "Reference to the search query"
|
9
|
+
|
10
|
+
t.references :embedding, null: false, foreign_key: { to_table: :ragdoll_embeddings },
|
11
|
+
comment: "Reference to the returned embedding"
|
12
|
+
|
13
|
+
t.float :similarity_score, null: false,
|
14
|
+
comment: "Similarity score for this result"
|
15
|
+
|
16
|
+
t.integer :result_rank, null: false,
|
17
|
+
comment: "Ranking position of this result (1-based)"
|
18
|
+
|
19
|
+
t.boolean :clicked, default: false,
|
20
|
+
comment: "Whether user interacted with this result"
|
21
|
+
|
22
|
+
t.datetime :clicked_at,
|
23
|
+
comment: "Timestamp when result was clicked/selected"
|
24
|
+
|
25
|
+
t.timestamps null: false,
|
26
|
+
comment: "Standard creation and update timestamps"
|
27
|
+
|
28
|
+
###########
|
29
|
+
# Indexes #
|
30
|
+
###########
|
31
|
+
|
32
|
+
t.index [:search_id, :result_rank],
|
33
|
+
name: "idx_search_results_search_rank",
|
34
|
+
comment: "Index for retrieving results in ranked order"
|
35
|
+
|
36
|
+
t.index [:embedding_id, :similarity_score],
|
37
|
+
name: "idx_search_results_embedding_score",
|
38
|
+
comment: "Index for analyzing embedding performance"
|
39
|
+
|
40
|
+
t.index :similarity_score,
|
41
|
+
name: "idx_search_results_similarity",
|
42
|
+
comment: "Index for similarity score analysis"
|
43
|
+
|
44
|
+
t.index [:clicked, :clicked_at],
|
45
|
+
name: "idx_search_results_clicks",
|
46
|
+
comment: "Index for click-through analysis"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/ragdoll/core/client.rb
CHANGED
@@ -49,7 +49,15 @@ module Ragdoll
|
|
49
49
|
|
50
50
|
# Get relevant context without prompt enhancement
|
51
51
|
def get_context(query:, limit: 10, **options)
|
52
|
-
|
52
|
+
search_response = search_similar_content(query: query, limit: limit, **options)
|
53
|
+
|
54
|
+
# Handle both old format (array) and new format (hash with results/statistics)
|
55
|
+
if search_response.is_a?(Hash) && search_response.key?(:results)
|
56
|
+
results = search_response[:results]
|
57
|
+
else
|
58
|
+
# Fallback for old format
|
59
|
+
results = search_response || []
|
60
|
+
end
|
53
61
|
|
54
62
|
context_chunks = results.map do |result|
|
55
63
|
{
|
@@ -76,13 +84,31 @@ module Ragdoll
|
|
76
84
|
|
77
85
|
# Semantic search++ should incorporate hybrid search
|
78
86
|
def search(query:, **options)
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
87
|
+
# Pass through tracking options to the search engine
|
88
|
+
search_response = search_similar_content(query: query, **options)
|
89
|
+
|
90
|
+
# Handle both old format (array) and new format (hash with results/statistics)
|
91
|
+
if search_response.is_a?(Hash) && search_response.key?(:results)
|
92
|
+
results = search_response[:results]
|
93
|
+
statistics = search_response[:statistics]
|
94
|
+
execution_time_ms = search_response[:execution_time_ms]
|
95
|
+
|
96
|
+
{
|
97
|
+
query: query,
|
98
|
+
results: results,
|
99
|
+
total_results: results.length,
|
100
|
+
statistics: statistics,
|
101
|
+
execution_time_ms: execution_time_ms
|
102
|
+
}
|
103
|
+
else
|
104
|
+
# Fallback for old format
|
105
|
+
results = search_response || []
|
106
|
+
{
|
107
|
+
query: query,
|
108
|
+
results: results,
|
109
|
+
total_results: results.length
|
110
|
+
}
|
111
|
+
end
|
86
112
|
end
|
87
113
|
|
88
114
|
# Search similar content (core functionality)
|
@@ -92,11 +118,52 @@ module Ragdoll
|
|
92
118
|
|
93
119
|
# Hybrid search combining semantic and full-text search
|
94
120
|
def hybrid_search(query:, **options)
|
121
|
+
start_time = Time.current
|
122
|
+
|
123
|
+
# Extract tracking options
|
124
|
+
session_id = options[:session_id]
|
125
|
+
user_id = options[:user_id]
|
126
|
+
track_search = options.fetch(:track_search, true)
|
127
|
+
|
95
128
|
# Generate embedding for the query
|
96
129
|
query_embedding = @embedding_service.generate_embedding(query)
|
97
130
|
|
98
131
|
# Perform hybrid search
|
99
132
|
results = Ragdoll::Document.hybrid_search(query, query_embedding: query_embedding, **options)
|
133
|
+
|
134
|
+
execution_time = ((Time.current - start_time) * 1000).round
|
135
|
+
|
136
|
+
# Record search if tracking enabled
|
137
|
+
if track_search && query && !query.empty?
|
138
|
+
begin
|
139
|
+
# Format results for search recording - hybrid search returns different format
|
140
|
+
search_results = results.map do |result|
|
141
|
+
{
|
142
|
+
embedding_id: result[:embedding_id] || result[:id],
|
143
|
+
similarity: result[:similarity] || result[:score] || 0.0
|
144
|
+
}
|
145
|
+
end
|
146
|
+
|
147
|
+
# Extract filters from options
|
148
|
+
filters = options.slice(:document_type, :status).compact
|
149
|
+
search_options = options.slice(:limit, :semantic_weight, :text_weight).compact
|
150
|
+
|
151
|
+
Ragdoll::Search.record_search(
|
152
|
+
query: query,
|
153
|
+
query_embedding: query_embedding,
|
154
|
+
results: search_results,
|
155
|
+
search_type: "hybrid",
|
156
|
+
filters: filters,
|
157
|
+
options: search_options,
|
158
|
+
execution_time_ms: execution_time,
|
159
|
+
session_id: session_id,
|
160
|
+
user_id: user_id
|
161
|
+
)
|
162
|
+
rescue => e
|
163
|
+
# Log error but don't fail the search
|
164
|
+
puts "Warning: Hybrid search tracking failed: #{e.message}" if ENV["RAGDOLL_DEBUG"]
|
165
|
+
end
|
166
|
+
end
|
100
167
|
|
101
168
|
{
|
102
169
|
query: query,
|
data/lib/ragdoll/core/model.rb
CHANGED
@@ -8,6 +8,14 @@ module Ragdoll
|
|
8
8
|
# The provider is optional.
|
9
9
|
# Can be initialized with nil or empty string.
|
10
10
|
Model = Data.define(:name) do
|
11
|
+
def initialize(name:)
|
12
|
+
# Handle case where a Model object is passed instead of a string
|
13
|
+
if name.is_a?(Model)
|
14
|
+
super(name: name.name)
|
15
|
+
else
|
16
|
+
super(name: name)
|
17
|
+
end
|
18
|
+
end
|
11
19
|
# @return [Symbol, nil] the provider part of the name, or nil if not present.
|
12
20
|
def provider
|
13
21
|
return nil if name.nil? || name.empty?
|
@@ -31,6 +39,11 @@ module Ragdoll
|
|
31
39
|
name.nil? ? "" : name
|
32
40
|
end
|
33
41
|
|
42
|
+
# @return [Boolean] true if the name is nil or empty.
|
43
|
+
def empty?
|
44
|
+
name.nil? || name.empty?
|
45
|
+
end
|
46
|
+
|
34
47
|
# @return [Hash] a hash representation of the model.
|
35
48
|
def to_h
|
36
49
|
{ provider: provider, model: model }
|
data/lib/ragdoll/core/version.rb
CHANGED
data/lib/ragdoll/core.rb
CHANGED
@@ -30,6 +30,8 @@ require "ragdoll/content"
|
|
30
30
|
require "ragdoll/text_content"
|
31
31
|
require "ragdoll/audio_content"
|
32
32
|
require "ragdoll/image_content"
|
33
|
+
require "ragdoll/search"
|
34
|
+
require "ragdoll/search_result"
|
33
35
|
require "ragdoll/document_processor"
|
34
36
|
require "ragdoll/document_management"
|
35
37
|
require "ragdoll/text_chunker"
|
data/lib/ragdoll.rb
CHANGED
@@ -196,6 +196,23 @@ module Ragdoll
|
|
196
196
|
Ragdoll::Core.search_similar_content(*args, **kwargs)
|
197
197
|
end
|
198
198
|
|
199
|
+
# Perform hybrid search combining semantic and full-text search.
|
200
|
+
# @param query [String] the search query string.
|
201
|
+
# @param semantic_weight [Float] weight for semantic search results (0.0 - 1.0).
|
202
|
+
# @param text_weight [Float] weight for full-text search results (0.0 - 1.0).
|
203
|
+
# @param options [Hash] additional search options, such as filters and limits.
|
204
|
+
# @example
|
205
|
+
# results = Ragdoll.hybrid_search(
|
206
|
+
# query: "machine learning",
|
207
|
+
# semantic_weight: 0.7,
|
208
|
+
# text_weight: 0.3
|
209
|
+
# )
|
210
|
+
# results.each { |result| puts result[:document_title] }
|
211
|
+
# @return [Array<Hash>] an array of search results combining semantic and text search.
|
212
|
+
def hybrid_search(*args, **kwargs)
|
213
|
+
Ragdoll::Core.hybrid_search(*args, **kwargs)
|
214
|
+
end
|
215
|
+
|
199
216
|
|
200
217
|
###############
|
201
218
|
# Misc. Stuff #
|
data/lib/tasks/db.rake
CHANGED
@@ -8,9 +8,9 @@ namespace :db do
|
|
8
8
|
require_relative "../ragdoll-core"
|
9
9
|
|
10
10
|
config = Ragdoll::Core.configuration
|
11
|
-
puts "Creating database with config: #{config.
|
11
|
+
puts "Creating database with config: #{config.database.inspect}"
|
12
12
|
|
13
|
-
case config.
|
13
|
+
case config.database[:adapter]
|
14
14
|
when "postgresql"
|
15
15
|
puts "PostgreSQL database setup - running as superuser to create database and role..."
|
16
16
|
|
@@ -20,8 +20,8 @@ namespace :db do
|
|
20
20
|
database: 'postgres', # Connect to postgres database initially
|
21
21
|
username: ENV.fetch('POSTGRES_SUPERUSER', 'postgres'),
|
22
22
|
password: ENV['POSTGRES_SUPERUSER_PASSWORD'],
|
23
|
-
host: config.
|
24
|
-
port: config.
|
23
|
+
host: config.database[:host] || 'localhost',
|
24
|
+
port: config.database[:port] || 5432
|
25
25
|
)
|
26
26
|
|
27
27
|
# Run individual SQL commands to avoid transaction block issues
|
@@ -62,8 +62,8 @@ namespace :db do
|
|
62
62
|
database: 'ragdoll_development',
|
63
63
|
username: ENV.fetch('POSTGRES_SUPERUSER', 'postgres'),
|
64
64
|
password: ENV['POSTGRES_SUPERUSER_PASSWORD'],
|
65
|
-
host: config.
|
66
|
-
port: config.
|
65
|
+
host: config.database[:host] || 'localhost',
|
66
|
+
port: config.database[:port] || 5432
|
67
67
|
)
|
68
68
|
|
69
69
|
ActiveRecord::Base.connection.execute <<-SQL
|
@@ -94,11 +94,11 @@ namespace :db do
|
|
94
94
|
require_relative "../ragdoll-core"
|
95
95
|
|
96
96
|
config = Ragdoll::Core.configuration
|
97
|
-
puts "Dropping database with config: #{config.
|
97
|
+
puts "Dropping database with config: #{config.database.inspect}"
|
98
98
|
|
99
|
-
case config.
|
99
|
+
case config.database[:adapter]
|
100
100
|
when "postgresql", "mysql2"
|
101
|
-
puts "For #{config.
|
101
|
+
puts "For #{config.database[:adapter]}, please drop the database manually on your server"
|
102
102
|
end
|
103
103
|
|
104
104
|
puts "Database drop completed"
|
@@ -216,9 +216,9 @@ namespace :db do
|
|
216
216
|
|
217
217
|
config = Ragdoll::Core.configuration
|
218
218
|
|
219
|
-
case config.
|
219
|
+
case config.database[:adapter]
|
220
220
|
when "postgresql"
|
221
|
-
db_config = config.
|
221
|
+
db_config = config.database
|
222
222
|
psql_cmd = "psql"
|
223
223
|
psql_cmd += " -h #{db_config[:host]}" if db_config[:host]
|
224
224
|
psql_cmd += " -p #{db_config[:port]}" if db_config[:port]
|
@@ -227,7 +227,7 @@ namespace :db do
|
|
227
227
|
puts "Opening PostgreSQL console..."
|
228
228
|
system(psql_cmd)
|
229
229
|
when "mysql2"
|
230
|
-
db_config = config.
|
230
|
+
db_config = config.database
|
231
231
|
mysql_cmd = "mysql"
|
232
232
|
mysql_cmd += " -h #{db_config[:host]}" if db_config[:host]
|
233
233
|
mysql_cmd += " -P #{db_config[:port]}" if db_config[:port]
|
@@ -237,7 +237,7 @@ namespace :db do
|
|
237
237
|
puts "Opening MySQL console..."
|
238
238
|
system(mysql_cmd)
|
239
239
|
else
|
240
|
-
puts "Console not supported for adapter: #{config.
|
240
|
+
puts "Console not supported for adapter: #{config.database[:adapter]}"
|
241
241
|
end
|
242
242
|
end
|
243
243
|
|