ragdoll 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +243 -0
- data/README.md +209 -31
- data/Rakefile +4 -5
- data/app/models/ragdoll/document.rb +115 -12
- data/app/models/ragdoll/embedding.rb +108 -2
- data/app/models/ragdoll/search.rb +165 -0
- data/app/models/ragdoll/search_result.rb +121 -0
- data/app/services/ragdoll/configuration_service.rb +3 -3
- data/app/services/ragdoll/document_processor.rb +124 -1
- data/app/services/ragdoll/embedding_service.rb +10 -0
- data/app/services/ragdoll/search_engine.rb +75 -6
- data/db/migrate/{001_enable_postgresql_extensions.rb → 20250815234901_enable_postgresql_extensions.rb} +7 -8
- data/db/migrate/20250815234902_create_ragdoll_documents.rb +117 -0
- data/db/migrate/{005_create_ragdoll_embeddings.rb → 20250815234903_create_ragdoll_embeddings.rb} +13 -10
- data/db/migrate/{006_create_ragdoll_contents.rb → 20250815234904_create_ragdoll_contents.rb} +14 -11
- data/db/migrate/20250815234905_create_ragdoll_searches.rb +77 -0
- data/db/migrate/20250815234906_create_ragdoll_search_results.rb +49 -0
- data/lib/ragdoll/core/client.rb +75 -8
- data/lib/ragdoll/core/database.rb +8 -3
- data/lib/ragdoll/core/model.rb +13 -0
- data/lib/ragdoll/core/version.rb +1 -1
- data/lib/ragdoll/core.rb +2 -0
- data/lib/ragdoll.rb +17 -0
- data/lib/tasks/db.rake +75 -27
- metadata +375 -6
- data/db/migrate/004_create_ragdoll_documents.rb +0 -70
data/lib/ragdoll/core/client.rb
CHANGED
@@ -49,7 +49,15 @@ module Ragdoll
|
|
49
49
|
|
50
50
|
# Get relevant context without prompt enhancement
|
51
51
|
def get_context(query:, limit: 10, **options)
|
52
|
-
|
52
|
+
search_response = search_similar_content(query: query, limit: limit, **options)
|
53
|
+
|
54
|
+
# Handle both old format (array) and new format (hash with results/statistics)
|
55
|
+
if search_response.is_a?(Hash) && search_response.key?(:results)
|
56
|
+
results = search_response[:results]
|
57
|
+
else
|
58
|
+
# Fallback for old format
|
59
|
+
results = search_response || []
|
60
|
+
end
|
53
61
|
|
54
62
|
context_chunks = results.map do |result|
|
55
63
|
{
|
@@ -76,13 +84,31 @@ module Ragdoll
|
|
76
84
|
|
77
85
|
# Semantic search++ should incorporate hybrid search
|
78
86
|
def search(query:, **options)
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
87
|
+
# Pass through tracking options to the search engine
|
88
|
+
search_response = search_similar_content(query: query, **options)
|
89
|
+
|
90
|
+
# Handle both old format (array) and new format (hash with results/statistics)
|
91
|
+
if search_response.is_a?(Hash) && search_response.key?(:results)
|
92
|
+
results = search_response[:results]
|
93
|
+
statistics = search_response[:statistics]
|
94
|
+
execution_time_ms = search_response[:execution_time_ms]
|
95
|
+
|
96
|
+
{
|
97
|
+
query: query,
|
98
|
+
results: results,
|
99
|
+
total_results: results.length,
|
100
|
+
statistics: statistics,
|
101
|
+
execution_time_ms: execution_time_ms
|
102
|
+
}
|
103
|
+
else
|
104
|
+
# Fallback for old format
|
105
|
+
results = search_response || []
|
106
|
+
{
|
107
|
+
query: query,
|
108
|
+
results: results,
|
109
|
+
total_results: results.length
|
110
|
+
}
|
111
|
+
end
|
86
112
|
end
|
87
113
|
|
88
114
|
# Search similar content (core functionality)
|
@@ -92,11 +118,52 @@ module Ragdoll
|
|
92
118
|
|
93
119
|
# Hybrid search combining semantic and full-text search
|
94
120
|
def hybrid_search(query:, **options)
|
121
|
+
start_time = Time.current
|
122
|
+
|
123
|
+
# Extract tracking options
|
124
|
+
session_id = options[:session_id]
|
125
|
+
user_id = options[:user_id]
|
126
|
+
track_search = options.fetch(:track_search, true)
|
127
|
+
|
95
128
|
# Generate embedding for the query
|
96
129
|
query_embedding = @embedding_service.generate_embedding(query)
|
97
130
|
|
98
131
|
# Perform hybrid search
|
99
132
|
results = Ragdoll::Document.hybrid_search(query, query_embedding: query_embedding, **options)
|
133
|
+
|
134
|
+
execution_time = ((Time.current - start_time) * 1000).round
|
135
|
+
|
136
|
+
# Record search if tracking enabled
|
137
|
+
if track_search && query && !query.empty?
|
138
|
+
begin
|
139
|
+
# Format results for search recording - hybrid search returns different format
|
140
|
+
search_results = results.map do |result|
|
141
|
+
{
|
142
|
+
embedding_id: result[:embedding_id] || result[:id],
|
143
|
+
similarity: result[:similarity] || result[:score] || 0.0
|
144
|
+
}
|
145
|
+
end
|
146
|
+
|
147
|
+
# Extract filters from options
|
148
|
+
filters = options.slice(:document_type, :status).compact
|
149
|
+
search_options = options.slice(:limit, :semantic_weight, :text_weight).compact
|
150
|
+
|
151
|
+
Ragdoll::Search.record_search(
|
152
|
+
query: query,
|
153
|
+
query_embedding: query_embedding,
|
154
|
+
results: search_results,
|
155
|
+
search_type: "hybrid",
|
156
|
+
filters: filters,
|
157
|
+
options: search_options,
|
158
|
+
execution_time_ms: execution_time,
|
159
|
+
session_id: session_id,
|
160
|
+
user_id: user_id
|
161
|
+
)
|
162
|
+
rescue => e
|
163
|
+
# Log error but don't fail the search
|
164
|
+
puts "Warning: Hybrid search tracking failed: #{e.message}" if ENV["RAGDOLL_DEBUG"]
|
165
|
+
end
|
166
|
+
end
|
100
167
|
|
101
168
|
{
|
102
169
|
query: query,
|
@@ -90,10 +90,10 @@ module Ragdoll
|
|
90
90
|
# Drop all tables in correct order (respecting foreign key constraints)
|
91
91
|
# Order: dependent tables first, then parent tables
|
92
92
|
tables_to_drop = %w[
|
93
|
+
ragdoll_search_results
|
94
|
+
ragdoll_searches
|
93
95
|
ragdoll_embeddings
|
94
|
-
|
95
|
-
ragdoll_image_contents
|
96
|
-
ragdoll_audio_contents
|
96
|
+
ragdoll_contents
|
97
97
|
ragdoll_documents
|
98
98
|
schema_migrations
|
99
99
|
]
|
@@ -109,6 +109,11 @@ module Ragdoll
|
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
+
# Also drop any functions/triggers that might exist
|
113
|
+
if ActiveRecord::Base.connection.adapter_name.downcase.include?("postgresql")
|
114
|
+
ActiveRecord::Base.connection.execute("DROP FUNCTION IF EXISTS ragdoll_documents_vector_update() CASCADE")
|
115
|
+
end
|
116
|
+
|
112
117
|
migrate!
|
113
118
|
end
|
114
119
|
|
data/lib/ragdoll/core/model.rb
CHANGED
@@ -8,6 +8,14 @@ module Ragdoll
|
|
8
8
|
# The provider is optional.
|
9
9
|
# Can be initialized with nil or empty string.
|
10
10
|
Model = Data.define(:name) do
|
11
|
+
def initialize(name:)
|
12
|
+
# Handle case where a Model object is passed instead of a string
|
13
|
+
if name.is_a?(Model)
|
14
|
+
super(name: name.name)
|
15
|
+
else
|
16
|
+
super(name: name)
|
17
|
+
end
|
18
|
+
end
|
11
19
|
# @return [Symbol, nil] the provider part of the name, or nil if not present.
|
12
20
|
def provider
|
13
21
|
return nil if name.nil? || name.empty?
|
@@ -31,6 +39,11 @@ module Ragdoll
|
|
31
39
|
name.nil? ? "" : name
|
32
40
|
end
|
33
41
|
|
42
|
+
# @return [Boolean] true if the name is nil or empty.
|
43
|
+
def empty?
|
44
|
+
name.nil? || name.empty?
|
45
|
+
end
|
46
|
+
|
34
47
|
# @return [Hash] a hash representation of the model.
|
35
48
|
def to_h
|
36
49
|
{ provider: provider, model: model }
|
data/lib/ragdoll/core/version.rb
CHANGED
data/lib/ragdoll/core.rb
CHANGED
@@ -30,6 +30,8 @@ require "ragdoll/content"
|
|
30
30
|
require "ragdoll/text_content"
|
31
31
|
require "ragdoll/audio_content"
|
32
32
|
require "ragdoll/image_content"
|
33
|
+
require "ragdoll/search"
|
34
|
+
require "ragdoll/search_result"
|
33
35
|
require "ragdoll/document_processor"
|
34
36
|
require "ragdoll/document_management"
|
35
37
|
require "ragdoll/text_chunker"
|
data/lib/ragdoll.rb
CHANGED
@@ -196,6 +196,23 @@ module Ragdoll
|
|
196
196
|
Ragdoll::Core.search_similar_content(*args, **kwargs)
|
197
197
|
end
|
198
198
|
|
199
|
+
# Perform hybrid search combining semantic and full-text search.
|
200
|
+
# @param query [String] the search query string.
|
201
|
+
# @param semantic_weight [Float] weight for semantic search results (0.0 - 1.0).
|
202
|
+
# @param text_weight [Float] weight for full-text search results (0.0 - 1.0).
|
203
|
+
# @param options [Hash] additional search options, such as filters and limits.
|
204
|
+
# @example
|
205
|
+
# results = Ragdoll.hybrid_search(
|
206
|
+
# query: "machine learning",
|
207
|
+
# semantic_weight: 0.7,
|
208
|
+
# text_weight: 0.3
|
209
|
+
# )
|
210
|
+
# results.each { |result| puts result[:document_title] }
|
211
|
+
# @return [Array<Hash>] an array of search results combining semantic and text search.
|
212
|
+
def hybrid_search(*args, **kwargs)
|
213
|
+
Ragdoll::Core.hybrid_search(*args, **kwargs)
|
214
|
+
end
|
215
|
+
|
199
216
|
|
200
217
|
###############
|
201
218
|
# Misc. Stuff #
|
data/lib/tasks/db.rake
CHANGED
@@ -8,9 +8,9 @@ namespace :db do
|
|
8
8
|
require_relative "../ragdoll-core"
|
9
9
|
|
10
10
|
config = Ragdoll::Core.configuration
|
11
|
-
puts "Creating database with config: #{config.
|
11
|
+
puts "Creating database with config: #{config.database.inspect}"
|
12
12
|
|
13
|
-
case config.
|
13
|
+
case config.database[:adapter]
|
14
14
|
when "postgresql"
|
15
15
|
puts "PostgreSQL database setup - running as superuser to create database and role..."
|
16
16
|
|
@@ -20,27 +20,22 @@ namespace :db do
|
|
20
20
|
database: 'postgres', # Connect to postgres database initially
|
21
21
|
username: ENV.fetch('POSTGRES_SUPERUSER', 'postgres'),
|
22
22
|
password: ENV['POSTGRES_SUPERUSER_PASSWORD'],
|
23
|
-
host: config.
|
24
|
-
port: config.
|
23
|
+
host: config.database[:host] || 'localhost',
|
24
|
+
port: config.database[:port] || 5432
|
25
25
|
)
|
26
26
|
|
27
27
|
# Run individual SQL commands to avoid transaction block issues
|
28
|
-
|
29
|
-
ActiveRecord::Base.connection.execute("DROP DATABASE IF EXISTS ragdoll_development")
|
30
|
-
rescue => e
|
31
|
-
puts "Note: #{e.message}" if e.message.include?("does not exist")
|
32
|
-
end
|
33
|
-
|
34
|
-
begin
|
35
|
-
ActiveRecord::Base.connection.execute("DROP ROLE IF EXISTS ragdoll")
|
36
|
-
rescue => e
|
37
|
-
puts "Note: #{e.message}" if e.message.include?("does not exist")
|
38
|
-
end
|
28
|
+
# Note: Removed the DROP DATABASE/ROLE here since that should be done via db:drop task
|
39
29
|
|
40
30
|
begin
|
41
31
|
ActiveRecord::Base.connection.execute("CREATE ROLE ragdoll WITH LOGIN CREATEDB")
|
32
|
+
puts "Role 'ragdoll' created successfully"
|
42
33
|
rescue => e
|
43
|
-
|
34
|
+
if e.message.include?("already exists")
|
35
|
+
puts "Note: Role 'ragdoll' already exists, continuing..."
|
36
|
+
else
|
37
|
+
raise e
|
38
|
+
end
|
44
39
|
end
|
45
40
|
|
46
41
|
begin
|
@@ -50,8 +45,16 @@ namespace :db do
|
|
50
45
|
ENCODING = 'UTF8'
|
51
46
|
CONNECTION LIMIT = -1
|
52
47
|
SQL
|
48
|
+
puts "Database 'ragdoll_development' created successfully"
|
53
49
|
rescue => e
|
54
|
-
|
50
|
+
if e.message.include?("already exists")
|
51
|
+
puts "ERROR: Database 'ragdoll_development' already exists!"
|
52
|
+
puts "Please run 'rake db:drop' first to remove the existing database, then run 'rake db:create' again."
|
53
|
+
puts "Or use 'rake db:reset' to drop, create, and migrate in one step."
|
54
|
+
exit 1
|
55
|
+
else
|
56
|
+
raise e
|
57
|
+
end
|
55
58
|
end
|
56
59
|
|
57
60
|
ActiveRecord::Base.connection.execute("GRANT ALL PRIVILEGES ON DATABASE ragdoll_development TO ragdoll")
|
@@ -62,8 +65,8 @@ namespace :db do
|
|
62
65
|
database: 'ragdoll_development',
|
63
66
|
username: ENV.fetch('POSTGRES_SUPERUSER', 'postgres'),
|
64
67
|
password: ENV['POSTGRES_SUPERUSER_PASSWORD'],
|
65
|
-
host: config.
|
66
|
-
port: config.
|
68
|
+
host: config.database[:host] || 'localhost',
|
69
|
+
port: config.database[:port] || 5432
|
67
70
|
)
|
68
71
|
|
69
72
|
ActiveRecord::Base.connection.execute <<-SQL
|
@@ -94,11 +97,56 @@ namespace :db do
|
|
94
97
|
require_relative "../ragdoll-core"
|
95
98
|
|
96
99
|
config = Ragdoll::Core.configuration
|
97
|
-
puts "Dropping database with config: #{config.
|
100
|
+
puts "Dropping database with config: #{config.database.inspect}"
|
98
101
|
|
99
|
-
case config.
|
100
|
-
when "postgresql"
|
101
|
-
puts "
|
102
|
+
case config.database[:adapter]
|
103
|
+
when "postgresql"
|
104
|
+
puts "PostgreSQL database drop - running as superuser to drop database and role..."
|
105
|
+
|
106
|
+
# Connect as superuser to drop database and role
|
107
|
+
ActiveRecord::Base.establish_connection(
|
108
|
+
adapter: 'postgresql',
|
109
|
+
database: 'postgres', # Connect to postgres database initially
|
110
|
+
username: ENV.fetch('POSTGRES_SUPERUSER', 'postgres'),
|
111
|
+
password: ENV['POSTGRES_SUPERUSER_PASSWORD'],
|
112
|
+
host: config.database[:host] || 'localhost',
|
113
|
+
port: config.database[:port] || 5432
|
114
|
+
)
|
115
|
+
|
116
|
+
# Drop the database if it exists
|
117
|
+
begin
|
118
|
+
ActiveRecord::Base.connection.execute("DROP DATABASE IF EXISTS ragdoll_development")
|
119
|
+
puts "Database 'ragdoll_development' dropped successfully"
|
120
|
+
rescue => e
|
121
|
+
puts "Error dropping database: #{e.message}"
|
122
|
+
end
|
123
|
+
|
124
|
+
# Optionally drop the role (commented out by default to preserve user)
|
125
|
+
# begin
|
126
|
+
# ActiveRecord::Base.connection.execute("DROP ROLE IF EXISTS ragdoll")
|
127
|
+
# puts "Role 'ragdoll' dropped successfully"
|
128
|
+
# rescue => e
|
129
|
+
# puts "Error dropping role: #{e.message}"
|
130
|
+
# end
|
131
|
+
|
132
|
+
when "mysql2"
|
133
|
+
puts "MySQL database drop - connecting to drop database..."
|
134
|
+
|
135
|
+
# Connect without specifying database
|
136
|
+
ActiveRecord::Base.establish_connection(
|
137
|
+
adapter: 'mysql2',
|
138
|
+
username: config.database[:username],
|
139
|
+
password: config.database[:password],
|
140
|
+
host: config.database[:host] || 'localhost',
|
141
|
+
port: config.database[:port] || 3306
|
142
|
+
)
|
143
|
+
|
144
|
+
begin
|
145
|
+
ActiveRecord::Base.connection.execute("DROP DATABASE IF EXISTS #{config.database[:database]}")
|
146
|
+
puts "Database '#{config.database[:database]}' dropped successfully"
|
147
|
+
rescue => e
|
148
|
+
puts "Error dropping database: #{e.message}"
|
149
|
+
end
|
102
150
|
end
|
103
151
|
|
104
152
|
puts "Database drop completed"
|
@@ -216,9 +264,9 @@ namespace :db do
|
|
216
264
|
|
217
265
|
config = Ragdoll::Core.configuration
|
218
266
|
|
219
|
-
case config.
|
267
|
+
case config.database[:adapter]
|
220
268
|
when "postgresql"
|
221
|
-
db_config = config.
|
269
|
+
db_config = config.database
|
222
270
|
psql_cmd = "psql"
|
223
271
|
psql_cmd += " -h #{db_config[:host]}" if db_config[:host]
|
224
272
|
psql_cmd += " -p #{db_config[:port]}" if db_config[:port]
|
@@ -227,7 +275,7 @@ namespace :db do
|
|
227
275
|
puts "Opening PostgreSQL console..."
|
228
276
|
system(psql_cmd)
|
229
277
|
when "mysql2"
|
230
|
-
db_config = config.
|
278
|
+
db_config = config.database
|
231
279
|
mysql_cmd = "mysql"
|
232
280
|
mysql_cmd += " -h #{db_config[:host]}" if db_config[:host]
|
233
281
|
mysql_cmd += " -P #{db_config[:port]}" if db_config[:port]
|
@@ -237,7 +285,7 @@ namespace :db do
|
|
237
285
|
puts "Opening MySQL console..."
|
238
286
|
system(mysql_cmd)
|
239
287
|
else
|
240
|
-
puts "Console not supported for adapter: #{config.
|
288
|
+
puts "Console not supported for adapter: #{config.database[:adapter]}"
|
241
289
|
end
|
242
290
|
end
|
243
291
|
|