ragdoll 0.1.1 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +52 -1
- data/app/jobs/ragdoll/extract_keywords_job.rb +28 -0
- data/app/jobs/ragdoll/extract_text_job.rb +38 -0
- data/app/jobs/ragdoll/generate_embeddings_job.rb +28 -0
- data/app/jobs/ragdoll/generate_summary_job.rb +25 -0
- data/app/lib/ragdoll/metadata_schemas.rb +332 -0
- data/app/models/ragdoll/audio_content.rb +142 -0
- data/app/models/ragdoll/content.rb +95 -0
- data/app/models/ragdoll/document.rb +611 -0
- data/app/models/ragdoll/embedding.rb +176 -0
- data/app/models/ragdoll/image_content.rb +194 -0
- data/app/models/ragdoll/text_content.rb +137 -0
- data/app/services/ragdoll/configuration_service.rb +113 -0
- data/app/services/ragdoll/document_management.rb +108 -0
- data/app/services/ragdoll/document_processor.rb +342 -0
- data/app/services/ragdoll/embedding_service.rb +202 -0
- data/app/services/ragdoll/image_description_service.rb +230 -0
- data/app/services/ragdoll/metadata_generator.rb +329 -0
- data/app/services/ragdoll/model_resolver.rb +72 -0
- data/app/services/ragdoll/search_engine.rb +51 -0
- data/app/services/ragdoll/text_chunker.rb +208 -0
- data/app/services/ragdoll/text_generation_service.rb +355 -0
- data/lib/ragdoll/core/client.rb +32 -41
- data/lib/ragdoll/core/configuration.rb +140 -156
- data/lib/ragdoll/core/database.rb +1 -1
- data/lib/ragdoll/core/model.rb +45 -0
- data/lib/ragdoll/core/version.rb +1 -1
- data/lib/ragdoll/core.rb +35 -17
- data/lib/ragdoll.rb +1 -1
- data/lib/tasks/annotate.rake +1 -1
- data/lib/tasks/db.rake +2 -2
- metadata +24 -20
- data/lib/ragdoll/core/document_management.rb +0 -110
- data/lib/ragdoll/core/document_processor.rb +0 -344
- data/lib/ragdoll/core/embedding_service.rb +0 -183
- data/lib/ragdoll/core/jobs/extract_keywords.rb +0 -32
- data/lib/ragdoll/core/jobs/extract_text.rb +0 -42
- data/lib/ragdoll/core/jobs/generate_embeddings.rb +0 -32
- data/lib/ragdoll/core/jobs/generate_summary.rb +0 -29
- data/lib/ragdoll/core/metadata_schemas.rb +0 -334
- data/lib/ragdoll/core/models/audio_content.rb +0 -175
- data/lib/ragdoll/core/models/content.rb +0 -126
- data/lib/ragdoll/core/models/document.rb +0 -678
- data/lib/ragdoll/core/models/embedding.rb +0 -204
- data/lib/ragdoll/core/models/image_content.rb +0 -227
- data/lib/ragdoll/core/models/text_content.rb +0 -169
- data/lib/ragdoll/core/search_engine.rb +0 -50
- data/lib/ragdoll/core/services/image_description_service.rb +0 -230
- data/lib/ragdoll/core/services/metadata_generator.rb +0 -335
- data/lib/ragdoll/core/text_chunker.rb +0 -210
- data/lib/ragdoll/core/text_generation_service.rb +0 -360
data/lib/ragdoll/core/client.rb
CHANGED
@@ -6,14 +6,22 @@ module Ragdoll
|
|
6
6
|
module Core
|
7
7
|
class Client
|
8
8
|
def initialize
|
9
|
+
# Setup configuration services
|
10
|
+
@config_service = Ragdoll::ConfigurationService.new
|
11
|
+
@model_resolver = Ragdoll::ModelResolver.new(@config_service)
|
12
|
+
|
9
13
|
# Setup logging
|
10
14
|
setup_logging
|
11
15
|
|
12
16
|
# Setup database connection
|
13
|
-
Database.setup(
|
14
|
-
|
15
|
-
@embedding_service = EmbeddingService.new
|
16
|
-
|
17
|
+
Database.setup(@config_service.config.database)
|
18
|
+
|
19
|
+
@embedding_service = Ragdoll::EmbeddingService.new(
|
20
|
+
client: nil,
|
21
|
+
config_service: @config_service,
|
22
|
+
model_resolver: @model_resolver
|
23
|
+
)
|
24
|
+
@search_engine = Ragdoll::SearchEngine.new(@embedding_service, config_service: @config_service)
|
17
25
|
end
|
18
26
|
|
19
27
|
# Primary method for RAG applications
|
@@ -88,7 +96,7 @@ module Ragdoll
|
|
88
96
|
query_embedding = @embedding_service.generate_embedding(query)
|
89
97
|
|
90
98
|
# Perform hybrid search
|
91
|
-
results =
|
99
|
+
results = Ragdoll::Document.hybrid_search(query, query_embedding: query_embedding, **options)
|
92
100
|
|
93
101
|
{
|
94
102
|
query: query,
|
@@ -111,30 +119,28 @@ module Ragdoll
|
|
111
119
|
# Document management
|
112
120
|
def add_document(path:)
|
113
121
|
# Parse the document
|
114
|
-
parsed = DocumentProcessor.parse(path)
|
122
|
+
parsed = Ragdoll::DocumentProcessor.parse(path)
|
115
123
|
|
116
124
|
# Extract title from metadata or use filename
|
117
125
|
title = parsed[:metadata][:title] ||
|
118
126
|
File.basename(path, File.extname(path))
|
119
127
|
|
120
128
|
# Add document to database
|
121
|
-
doc_id = DocumentManagement.add_document(path, parsed[:content], {
|
129
|
+
doc_id = Ragdoll::DocumentManagement.add_document(path, parsed[:content], {
|
122
130
|
title: title,
|
123
131
|
document_type: parsed[:document_type],
|
124
132
|
**parsed[:metadata]
|
125
133
|
})
|
126
134
|
|
127
|
-
|
128
135
|
# Queue background jobs for processing if content is available
|
129
136
|
embeddings_queued = false
|
130
137
|
if parsed[:content].present?
|
131
|
-
Ragdoll::
|
132
|
-
Ragdoll::
|
133
|
-
Ragdoll::
|
138
|
+
Ragdoll::GenerateEmbeddingsJob.perform_later(doc_id)
|
139
|
+
Ragdoll::GenerateSummaryJob.perform_later(doc_id)
|
140
|
+
Ragdoll::ExtractKeywordsJob.perform_later(doc_id)
|
134
141
|
embeddings_queued = true
|
135
142
|
end
|
136
143
|
|
137
|
-
|
138
144
|
# Return success information
|
139
145
|
{
|
140
146
|
success: true,
|
@@ -155,16 +161,16 @@ module Ragdoll
|
|
155
161
|
|
156
162
|
def add_text(content:, title:, **options)
|
157
163
|
# Add document to database
|
158
|
-
doc_id = DocumentManagement.add_document(title, content, {
|
164
|
+
doc_id = Ragdoll::DocumentManagement.add_document(title, content, {
|
159
165
|
title: title,
|
160
166
|
document_type: "text",
|
161
167
|
**options
|
162
168
|
})
|
163
169
|
|
164
170
|
# Queue background job for embeddings
|
165
|
-
Ragdoll::
|
166
|
-
|
167
|
-
|
171
|
+
Ragdoll::GenerateEmbeddingsJob.perform_later(doc_id,
|
172
|
+
chunk_size: options[:chunk_size],
|
173
|
+
chunk_overlap: options[:chunk_overlap])
|
168
174
|
|
169
175
|
doc_id
|
170
176
|
end
|
@@ -188,7 +194,7 @@ module Ragdoll
|
|
188
194
|
end
|
189
195
|
|
190
196
|
def get_document(id:)
|
191
|
-
document_hash = DocumentManagement.get_document(id)
|
197
|
+
document_hash = Ragdoll::DocumentManagement.get_document(id)
|
192
198
|
return nil unless document_hash
|
193
199
|
|
194
200
|
# DocumentManagement.get_document already returns a hash with all needed info
|
@@ -196,7 +202,7 @@ module Ragdoll
|
|
196
202
|
end
|
197
203
|
|
198
204
|
def document_status(id:)
|
199
|
-
document =
|
205
|
+
document = Ragdoll::Document.find(id)
|
200
206
|
embeddings_count = document.all_embeddings.count
|
201
207
|
|
202
208
|
{
|
@@ -228,25 +234,25 @@ module Ragdoll
|
|
228
234
|
end
|
229
235
|
|
230
236
|
def update_document(id:, **updates)
|
231
|
-
DocumentManagement.update_document(id, **updates)
|
237
|
+
Ragdoll::DocumentManagement.update_document(id, **updates)
|
232
238
|
end
|
233
239
|
|
234
240
|
def delete_document(id:)
|
235
|
-
DocumentManagement.delete_document(id)
|
241
|
+
Ragdoll::DocumentManagement.delete_document(id)
|
236
242
|
end
|
237
243
|
|
238
244
|
def list_documents(**options)
|
239
|
-
DocumentManagement.list_documents(options)
|
245
|
+
Ragdoll::DocumentManagement.list_documents(options)
|
240
246
|
end
|
241
247
|
|
242
248
|
# Analytics and stats
|
243
249
|
def stats
|
244
|
-
DocumentManagement.get_document_stats
|
250
|
+
Ragdoll::DocumentManagement.get_document_stats
|
245
251
|
end
|
246
252
|
|
247
253
|
def search_analytics(days: 30)
|
248
254
|
# This could be implemented with additional database queries
|
249
|
-
|
255
|
+
Ragdoll::Embedding.where("returned_at > ?", days.days.ago)
|
250
256
|
.group("DATE(returned_at)")
|
251
257
|
.count
|
252
258
|
end
|
@@ -265,14 +271,13 @@ module Ragdoll
|
|
265
271
|
require "active_job"
|
266
272
|
|
267
273
|
# Create log directory if it doesn't exist
|
268
|
-
|
269
|
-
log_file = Ragdoll.config.logging_config[:filepath] || File.join(Dir.home, ".ragdoll", "ragdoll.log")
|
274
|
+
log_file = @config_service.config.logging[:filepath]
|
270
275
|
log_dir = File.dirname(log_file)
|
271
276
|
FileUtils.mkdir_p(log_dir) unless Dir.exist?(log_dir)
|
272
277
|
|
273
278
|
# Set up logger with appropriate level
|
274
279
|
logger = Logger.new(log_file)
|
275
|
-
logger.level = case
|
280
|
+
logger.level = case @config_service.config.logging[:level]
|
276
281
|
when :debug then Logger::DEBUG
|
277
282
|
when :info then Logger::INFO
|
278
283
|
when :warn then Logger::WARN
|
@@ -290,26 +295,12 @@ module Ragdoll
|
|
290
295
|
end
|
291
296
|
|
292
297
|
def build_enhanced_prompt(original_prompt, context)
|
293
|
-
|
294
|
-
template = default_prompt_template
|
298
|
+
template = @config_service.config.prompt_template(:rag_enhancement)
|
295
299
|
|
296
300
|
template
|
297
301
|
.gsub("{{context}}", context)
|
298
302
|
.gsub("{{prompt}}", original_prompt)
|
299
303
|
end
|
300
|
-
|
301
|
-
def default_prompt_template
|
302
|
-
<<~TEMPLATE
|
303
|
-
You are an AI assistant. Use the following context to help answer the user's question. If the context doesn't contain relevant information, say so.
|
304
|
-
|
305
|
-
Context:
|
306
|
-
{{context}}
|
307
|
-
|
308
|
-
Question: {{prompt}}
|
309
|
-
|
310
|
-
Answer:
|
311
|
-
TEMPLATE
|
312
|
-
end
|
313
304
|
end
|
314
305
|
end
|
315
306
|
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require "yaml"
|
4
4
|
require "fileutils"
|
5
5
|
require "ostruct"
|
6
|
+
require_relative "model"
|
6
7
|
|
7
8
|
module Ragdoll
|
8
9
|
module Core
|
@@ -12,134 +13,162 @@ module Ragdoll
|
|
12
13
|
class ConfigurationLoadUnknownError < StandardError; end
|
13
14
|
|
14
15
|
DEFAULT = {
|
15
|
-
directory
|
16
|
-
|
16
|
+
# Base directory for all Ragdoll files - single source of truth
|
17
|
+
base_directory: File.join(Dir.home, ".config", "ragdoll"),
|
18
|
+
|
19
|
+
# Configuration file path derived from base directory
|
20
|
+
config_filepath: File.join(Dir.home, ".config", "ragdoll", "config.yml"),
|
21
|
+
|
22
|
+
# Model configurations organized by purpose with inheritance support
|
17
23
|
models: {
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
text: "text-embedding-3-small",
|
23
|
-
image: "image-embedding-3-small", # FIXME
|
24
|
-
audio: "audio-embedding-3-small", # FIXME
|
24
|
+
text_generation: {
|
25
|
+
default: -> { Model.new(ENV.fetch("RAGDOLL_DEFAULT_TEXT_MODEL", "openai/gpt-4o")) },
|
26
|
+
summary: -> { Model.new(ENV.fetch("RAGDOLL_SUMMARY_MODEL", "openai/gpt-4o")) },
|
27
|
+
keywords: -> { Model.new(ENV.fetch("RAGDOLL_KEYWORDS_MODEL", "openai/gpt-4o")) }
|
25
28
|
},
|
29
|
+
embedding: {
|
30
|
+
provider: :openai,
|
31
|
+
text: -> { Model.new(ENV.fetch("RAGDOLL_TEXT_EMBEDDING_MODEL", "openai/text-embedding-3-small")) },
|
32
|
+
image: -> { Model.new(ENV.fetch("RAGDOLL_IMAGE_EMBEDDING_MODEL", "openai/clip-vit-base-patch32")) },
|
33
|
+
audio: -> { Model.new(ENV.fetch("RAGDOLL_AUDIO_EMBEDDING_MODEL", "openai/whisper-1")) },
|
34
|
+
max_dimensions: 3072,
|
35
|
+
cache_embeddings: true
|
36
|
+
}
|
26
37
|
},
|
27
|
-
|
38
|
+
|
39
|
+
# Processing configuration by content type
|
40
|
+
processing: {
|
28
41
|
text: {
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
max_tokens: 4096,
|
34
|
-
overlap: 128,
|
35
|
-
},
|
36
|
-
audio: {
|
37
|
-
max_tokens: 4096,
|
38
|
-
overlap: 128,
|
42
|
+
chunking: {
|
43
|
+
max_tokens: 1000,
|
44
|
+
overlap: 200
|
45
|
+
}
|
39
46
|
},
|
40
47
|
default: {
|
41
|
-
|
42
|
-
|
48
|
+
chunking: {
|
49
|
+
max_tokens: 4096,
|
50
|
+
overlap: 128
|
51
|
+
}
|
43
52
|
},
|
53
|
+
search: {
|
54
|
+
similarity_threshold: 0.7,
|
55
|
+
max_results: 10,
|
56
|
+
analytics: {
|
57
|
+
enable: true,
|
58
|
+
usage_tracking_enabled: true,
|
59
|
+
ranking_enabled: true,
|
60
|
+
recency_weight: 0.3,
|
61
|
+
frequency_weight: 0.7,
|
62
|
+
similarity_weight: 1.0
|
63
|
+
}
|
64
|
+
}
|
44
65
|
},
|
45
|
-
|
66
|
+
|
67
|
+
# LLM provider configurations (renamed from ruby_llm_config)
|
68
|
+
llm_providers: {
|
69
|
+
default_provider: :openai,
|
46
70
|
openai: {
|
47
|
-
api_key: -> { ENV
|
48
|
-
organization: -> { ENV
|
49
|
-
project: -> { ENV
|
71
|
+
api_key: -> { ENV.fetch("OPENAI_API_KEY", nil) },
|
72
|
+
organization: -> { ENV.fetch("OPENAI_ORGANIZATION", nil) },
|
73
|
+
project: -> { ENV.fetch("OPENAI_PROJECT", nil) }
|
50
74
|
},
|
51
75
|
anthropic: {
|
52
|
-
api_key: -> { ENV
|
76
|
+
api_key: -> { ENV.fetch("ANTHROPIC_API_KEY", nil) }
|
53
77
|
},
|
54
78
|
google: {
|
55
|
-
api_key: -> { ENV
|
56
|
-
project_id: -> { ENV
|
79
|
+
api_key: -> { ENV.fetch("GOOGLE_API_KEY", nil) },
|
80
|
+
project_id: -> { ENV.fetch("GOOGLE_PROJECT_ID", nil) }
|
57
81
|
},
|
58
82
|
azure: {
|
59
|
-
api_key: -> { ENV
|
60
|
-
endpoint: -> { ENV
|
61
|
-
api_version: -> { ENV
|
83
|
+
api_key: -> { ENV.fetch("AZURE_OPENAI_API_KEY", nil) },
|
84
|
+
endpoint: -> { ENV.fetch("AZURE_OPENAI_ENDPOINT", nil) },
|
85
|
+
api_version: -> { ENV.fetch("AZURE_OPENAI_API_VERSION", "2024-02-01") }
|
62
86
|
},
|
63
87
|
ollama: {
|
64
|
-
endpoint: -> { ENV
|
88
|
+
endpoint: -> { ENV.fetch("OLLAMA_ENDPOINT", "http://localhost:11434") }
|
65
89
|
},
|
66
90
|
huggingface: {
|
67
|
-
api_key: -> { ENV
|
91
|
+
api_key: -> { ENV.fetch("HUGGINGFACE_API_KEY", nil) }
|
68
92
|
},
|
69
93
|
openrouter: {
|
70
|
-
api_key: -> { ENV
|
71
|
-
}
|
72
|
-
},
|
73
|
-
embedding_config: {
|
74
|
-
provider: :openai,
|
75
|
-
cache_embeddings: true,
|
76
|
-
max_embedding_dimensions: 3072, # Support up to text-embedding-3-large
|
94
|
+
api_key: -> { ENV.fetch("OPENROUTER_API_KEY", nil) }
|
95
|
+
}
|
77
96
|
},
|
78
|
-
|
97
|
+
|
98
|
+
# Summarization configuration
|
99
|
+
summarization: {
|
79
100
|
enable: true,
|
80
101
|
max_length: 300,
|
81
|
-
min_content_length: 300
|
102
|
+
min_content_length: 300
|
82
103
|
},
|
83
|
-
|
104
|
+
|
105
|
+
# Database configuration with standardized ENV variable name
|
106
|
+
database: {
|
84
107
|
adapter: "postgresql",
|
85
108
|
database: "ragdoll_development",
|
86
109
|
username: "ragdoll",
|
87
|
-
password: -> { ENV
|
110
|
+
password: -> { ENV.fetch("RAGDOLL_DATABASE_PASSWORD", nil) },
|
88
111
|
host: "localhost",
|
89
112
|
port: 5432,
|
90
113
|
auto_migrate: true,
|
91
|
-
logger: nil
|
114
|
+
logger: nil
|
92
115
|
},
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
similarity_threshold: 0.7,
|
100
|
-
max_results: 10,
|
101
|
-
enable_analytics: true,
|
102
|
-
enable_usage_tracking: true,
|
103
|
-
usage_ranking_enabled: true,
|
104
|
-
usage_recency_weight: 0.3,
|
105
|
-
usage_frequency_weight: 0.7,
|
106
|
-
usage_similarity_weight: 1.0,
|
116
|
+
|
117
|
+
# Logging configuration with corrected key names and path derivation
|
118
|
+
logging: {
|
119
|
+
level: :warn, # Fixed: was log_level, now matches usage
|
120
|
+
directory: File.join(Dir.home, ".config", "ragdoll", "logs"),
|
121
|
+
filepath: File.join(Dir.home, ".config", "ragdoll", "logs", "ragdoll.log")
|
107
122
|
},
|
108
|
-
|
123
|
+
|
124
|
+
# Prompt templates for customizable text generation
|
125
|
+
prompt_templates: {
|
126
|
+
rag_enhancement: <<~TEMPLATE.strip
|
127
|
+
You are an AI assistant. Use the following context to help answer the user's question.
|
128
|
+
If the context doesn't contain relevant information, say so.
|
129
|
+
|
130
|
+
Context:
|
131
|
+
{{context}}
|
132
|
+
|
133
|
+
Question: {{prompt}}
|
134
|
+
|
135
|
+
Answer:
|
136
|
+
TEMPLATE
|
137
|
+
}
|
138
|
+
|
139
|
+
}.freeze
|
109
140
|
|
110
141
|
def initialize(config = {})
|
111
142
|
merged_config = deep_merge(self.class::DEFAULT, config)
|
112
|
-
resolved_config = resolve_procs(merged_config)
|
143
|
+
resolved_config = resolve_procs(merged_config, [])
|
113
144
|
@config = OpenStruct.new(resolved_config)
|
114
145
|
end
|
115
146
|
|
116
147
|
def self.load(path: nil)
|
117
|
-
path ||= DEFAULT[:
|
148
|
+
path ||= DEFAULT[:config_filepath]
|
118
149
|
|
119
|
-
unless File.exist?(path)
|
120
|
-
raise ConfigurationFileNotFoundError, "Configuration file not found: #{path}"
|
121
|
-
end
|
150
|
+
raise ConfigurationFileNotFoundError, "Configuration file not found: #{path}" unless File.exist?(path)
|
122
151
|
|
123
152
|
new(YAML.safe_load_file(path) || {})
|
124
153
|
rescue Errno::ENOENT
|
125
154
|
raise ConfigurationFileNotFoundError, "Configuration file not found: #{path}"
|
126
|
-
rescue => e
|
155
|
+
rescue StandardError => e
|
127
156
|
raise ConfigurationLoadUnknownError, "Failed to load configuration from #{path}: #{e.message}"
|
128
157
|
end
|
129
158
|
|
130
159
|
def save(path: nil)
|
131
160
|
if path.nil?
|
132
|
-
path = @config.
|
161
|
+
path = @config.config_filepath
|
133
162
|
else
|
134
|
-
save_filepath = @config.
|
135
|
-
@config.
|
163
|
+
save_filepath = @config.config_filepath
|
164
|
+
@config.config_filepath = path
|
136
165
|
end
|
137
166
|
|
138
167
|
FileUtils.mkdir_p(File.dirname(path))
|
139
168
|
|
140
169
|
File.write(path, @config.to_yaml)
|
141
|
-
rescue => e
|
142
|
-
@config.
|
170
|
+
rescue StandardError => e
|
171
|
+
@config.config_filepath = save_filepath unless save_filepath.nil?
|
143
172
|
raise ConfigurationSaveError, "Failed to save configuration to #{path}: #{e.message}"
|
144
173
|
end
|
145
174
|
|
@@ -160,6 +189,35 @@ module Ragdoll
|
|
160
189
|
end
|
161
190
|
end
|
162
191
|
|
192
|
+
# Resolve model with inheritance support
|
193
|
+
# Returns the model string for a given task, with inheritance from default
|
194
|
+
def resolve_model(task_type)
|
195
|
+
case task_type
|
196
|
+
when :embedding
|
197
|
+
@config.models[:embedding]
|
198
|
+
when :text, :summary, :keywords, :default
|
199
|
+
@config.models[:text_generation][task_type] || @config.models[:text_generation][:default]
|
200
|
+
else
|
201
|
+
@config.models[:text_generation][:default]
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Get provider credentials for a given provider
|
206
|
+
def provider_credentials(provider = nil)
|
207
|
+
provider ||= @config.llm_providers[:default_provider]
|
208
|
+
@config.llm_providers[provider] || {}
|
209
|
+
end
|
210
|
+
|
211
|
+
# Resolve embedding model for content type
|
212
|
+
def embedding_model(content_type = :text)
|
213
|
+
@config.models[:embedding][content_type] || @config.models[:embedding][:text]
|
214
|
+
end
|
215
|
+
|
216
|
+
# Get prompt template
|
217
|
+
def prompt_template(template_name = :rag_enhancement)
|
218
|
+
@config.prompt_templates[template_name]
|
219
|
+
end
|
220
|
+
|
163
221
|
# Enable method delegation to the internal OpenStruct
|
164
222
|
def method_missing(method_name, *args, &block)
|
165
223
|
@config.send(method_name, *args, &block)
|
@@ -171,103 +229,29 @@ module Ragdoll
|
|
171
229
|
|
172
230
|
private
|
173
231
|
|
174
|
-
def resolve_procs(obj)
|
232
|
+
def resolve_procs(obj, path = [])
|
175
233
|
case obj
|
176
234
|
when Hash
|
177
|
-
obj.
|
235
|
+
obj.each_with_object({}) { |(k, v), result| result[k] = resolve_procs(v, path + [k]) }
|
178
236
|
when Proc
|
179
237
|
obj.call
|
238
|
+
when String
|
239
|
+
# Convert strings to Model instances in the models configuration section
|
240
|
+
if path.length >= 2 && path[0] == :models
|
241
|
+
Model.new(obj)
|
242
|
+
else
|
243
|
+
obj
|
244
|
+
end
|
180
245
|
else
|
181
246
|
obj
|
182
247
|
end
|
183
248
|
end
|
184
249
|
|
185
250
|
def deep_merge(hash1, hash2)
|
186
|
-
hash1.merge(hash2) do |
|
251
|
+
hash1.merge(hash2) do |_key, oldval, newval|
|
187
252
|
oldval.is_a?(Hash) && newval.is_a?(Hash) ? deep_merge(oldval, newval) : newval
|
188
253
|
end
|
189
254
|
end
|
190
255
|
end
|
191
256
|
end
|
192
257
|
end
|
193
|
-
|
194
|
-
__END__
|
195
|
-
|
196
|
-
{
|
197
|
-
directory: "/Users/dewayne/.ragdoll",
|
198
|
-
filepath: "/Users/dewayne/.ragdoll/config.yml",
|
199
|
-
embedding_config:
|
200
|
-
{default:
|
201
|
-
{model: "openai/gpt-4o-mini", summary_model: "openai/gpt-4o-mini", keywords_model: "openai/gpt-4o-mini", max_dimensions: 3072},
|
202
|
-
text: {model: "openai/text-embedding-3-small", max_tokens: 1000, overlap: 200},
|
203
|
-
image: {model: "laion/CLIP-ViT-H-14", max_tokens: 4096, overlap: 128},
|
204
|
-
audio: {model: "openl3", transcription_model: "openai/whisper-large-v2", max_tokens: 4096, overlap: 128}},
|
205
|
-
chunking: {text: {max_tokens: 1000, overlap: 200}, default: {max_tokens: 4096, overlap: 128}},
|
206
|
-
ruby_llm_config:
|
207
|
-
{openai: {api_key: "***", organization: nil, project: nil},
|
208
|
-
anthropic:
|
209
|
-
{api_key: "***"},
|
210
|
-
google: {api_key: "***", project_id: nil},
|
211
|
-
azure: {api_key: nil, endpoint: nil, api_version: "2024-02-01"},
|
212
|
-
ollama: {endpoint: "http://localhost:11434/v1"},
|
213
|
-
huggingface: {api_key: nil},
|
214
|
-
openrouter: {api_key: nil}},
|
215
|
-
summarization_config: {enable: true, model: nil, max_length: 300, min_content_length: 300},
|
216
|
-
database_config:
|
217
|
-
{adapter: "postgresql",
|
218
|
-
database: "ragdoll_development",
|
219
|
-
username: "ragdoll",
|
220
|
-
password: "ragdoll",
|
221
|
-
host: "localhost",
|
222
|
-
port: 5432,
|
223
|
-
pool: 20,
|
224
|
-
timeout: 5000,
|
225
|
-
auto_migrate: true,
|
226
|
-
logger: nil},
|
227
|
-
logging_config: {level: :warn, directory: "/Users/dewayne/.ragdoll", filepath: "/Users/dewayne/.ragdoll/ragdoll.log"},
|
228
|
-
search:
|
229
|
-
{similarity_threshold: 0.7,
|
230
|
-
max_results: 10,
|
231
|
-
enable_analytics: true,
|
232
|
-
enable_usage_tracking: true,
|
233
|
-
usage_ranking_enabled: true,
|
234
|
-
usage_recency_weight: 0.3,
|
235
|
-
usage_frequency_weight: 0.7,
|
236
|
-
usage_similarity_weight: 1.0},
|
237
|
-
llm_provider: :openai,
|
238
|
-
openai_api_key: "***",
|
239
|
-
llm_config:
|
240
|
-
{openai: {api_key: "***", organization: nil, project: nil},
|
241
|
-
anthropic:
|
242
|
-
{api_key: "***"},
|
243
|
-
google: {api_key: "***", project_id: nil},
|
244
|
-
azure: {api_key: nil, endpoint: nil, api_version: "2024-02-01"},
|
245
|
-
ollama: {endpoint: "http://localhost:11434"},
|
246
|
-
huggingface: {api_key: nil},
|
247
|
-
openrouter: {api_key: nil}},
|
248
|
-
embedding_provider: :openai,
|
249
|
-
embedding_model: "text-embedding-3-small",
|
250
|
-
max_embedding_dimensions: 3072,
|
251
|
-
cache_embeddings: true,
|
252
|
-
default_model: "gpt-4o-mini",
|
253
|
-
summary_provider_model: "openai/gpt-4o-mini",
|
254
|
-
keywords_provider_model: "openai/gpt-4o-mini",
|
255
|
-
embeddings_provider_model: "openai/text-embedding-3-small",
|
256
|
-
summary_model: nil,
|
257
|
-
chunk_size: 1000,
|
258
|
-
chunk_overlap: 200,
|
259
|
-
enable_document_summarization: true,
|
260
|
-
summary_max_length: 300,
|
261
|
-
summary_min_content_length: 300,
|
262
|
-
prompt_template: nil,
|
263
|
-
search_similarity_threshold: 0.7,
|
264
|
-
max_search_results: 10,
|
265
|
-
enable_search_analytics: true,
|
266
|
-
enable_usage_tracking: true,
|
267
|
-
usage_ranking_enabled: true,
|
268
|
-
usage_recency_weight: 0.3,
|
269
|
-
usage_frequency_weight: 0.7,
|
270
|
-
usage_similarity_weight: 1.0,
|
271
|
-
log_level: :warn,
|
272
|
-
log_file: "/Users/dewayne/.ragdoll/ragdoll.log"
|
273
|
-
}
|
@@ -125,7 +125,7 @@ module Ragdoll
|
|
125
125
|
adapter: "postgresql",
|
126
126
|
database: "ragdoll_development",
|
127
127
|
username: "ragdoll",
|
128
|
-
password: ENV
|
128
|
+
password: ENV.fetch("RAGDOLL_DATABASE_PASSWORD", nil),
|
129
129
|
host: "localhost",
|
130
130
|
port: 5432,
|
131
131
|
auto_migrate: true,
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# lib/ragdoll/core/model.rb
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Ragdoll
|
5
|
+
module Core
|
6
|
+
# Model represents a provider and model name.
|
7
|
+
# It is initialized with a string in the format "provider/model".
|
8
|
+
# The provider is optional.
|
9
|
+
# Can be initialized with nil or empty string.
|
10
|
+
Model = Data.define(:name) do
|
11
|
+
# @return [Symbol, nil] the provider part of the name, or nil if not present.
|
12
|
+
def provider
|
13
|
+
return nil if name.nil? || name.empty?
|
14
|
+
|
15
|
+
parts = name.split("/", 2)
|
16
|
+
return nil if parts.length < 2 || parts.first.empty?
|
17
|
+
|
18
|
+
parts.first.to_sym
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [String, nil] the model part of the name, or nil if name is nil/empty.
|
22
|
+
def model
|
23
|
+
return nil if name.nil? || name.empty?
|
24
|
+
|
25
|
+
parts = name.split("/", 2)
|
26
|
+
parts.length < 2 ? name : parts.last
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [String] the original name string, or empty string if name is nil.
|
30
|
+
def to_s
|
31
|
+
name.nil? ? "" : name
|
32
|
+
end
|
33
|
+
|
34
|
+
# @return [Hash] a hash representation of the model.
|
35
|
+
def to_h
|
36
|
+
{ provider: provider, model: model }
|
37
|
+
end
|
38
|
+
|
39
|
+
# YAML serialization - save as string name
|
40
|
+
def encode_with(coder)
|
41
|
+
coder.scalar = name
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|