ragdoll 0.1.1 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/Rakefile +52 -1
  4. data/app/jobs/ragdoll/extract_keywords_job.rb +28 -0
  5. data/app/jobs/ragdoll/extract_text_job.rb +38 -0
  6. data/app/jobs/ragdoll/generate_embeddings_job.rb +28 -0
  7. data/app/jobs/ragdoll/generate_summary_job.rb +25 -0
  8. data/app/lib/ragdoll/metadata_schemas.rb +332 -0
  9. data/app/models/ragdoll/audio_content.rb +142 -0
  10. data/app/models/ragdoll/content.rb +95 -0
  11. data/app/models/ragdoll/document.rb +611 -0
  12. data/app/models/ragdoll/embedding.rb +176 -0
  13. data/app/models/ragdoll/image_content.rb +194 -0
  14. data/app/models/ragdoll/text_content.rb +137 -0
  15. data/app/services/ragdoll/configuration_service.rb +113 -0
  16. data/app/services/ragdoll/document_management.rb +108 -0
  17. data/app/services/ragdoll/document_processor.rb +342 -0
  18. data/app/services/ragdoll/embedding_service.rb +202 -0
  19. data/app/services/ragdoll/image_description_service.rb +230 -0
  20. data/app/services/ragdoll/metadata_generator.rb +329 -0
  21. data/app/services/ragdoll/model_resolver.rb +72 -0
  22. data/app/services/ragdoll/search_engine.rb +51 -0
  23. data/app/services/ragdoll/text_chunker.rb +208 -0
  24. data/app/services/ragdoll/text_generation_service.rb +355 -0
  25. data/lib/ragdoll/core/client.rb +32 -41
  26. data/lib/ragdoll/core/configuration.rb +140 -156
  27. data/lib/ragdoll/core/database.rb +1 -1
  28. data/lib/ragdoll/core/model.rb +45 -0
  29. data/lib/ragdoll/core/version.rb +1 -1
  30. data/lib/ragdoll/core.rb +35 -17
  31. data/lib/ragdoll.rb +1 -1
  32. data/lib/tasks/annotate.rake +1 -1
  33. data/lib/tasks/db.rake +2 -2
  34. metadata +24 -20
  35. data/lib/ragdoll/core/document_management.rb +0 -110
  36. data/lib/ragdoll/core/document_processor.rb +0 -344
  37. data/lib/ragdoll/core/embedding_service.rb +0 -183
  38. data/lib/ragdoll/core/jobs/extract_keywords.rb +0 -32
  39. data/lib/ragdoll/core/jobs/extract_text.rb +0 -42
  40. data/lib/ragdoll/core/jobs/generate_embeddings.rb +0 -32
  41. data/lib/ragdoll/core/jobs/generate_summary.rb +0 -29
  42. data/lib/ragdoll/core/metadata_schemas.rb +0 -334
  43. data/lib/ragdoll/core/models/audio_content.rb +0 -175
  44. data/lib/ragdoll/core/models/content.rb +0 -126
  45. data/lib/ragdoll/core/models/document.rb +0 -678
  46. data/lib/ragdoll/core/models/embedding.rb +0 -204
  47. data/lib/ragdoll/core/models/image_content.rb +0 -227
  48. data/lib/ragdoll/core/models/text_content.rb +0 -169
  49. data/lib/ragdoll/core/search_engine.rb +0 -50
  50. data/lib/ragdoll/core/services/image_description_service.rb +0 -230
  51. data/lib/ragdoll/core/services/metadata_generator.rb +0 -335
  52. data/lib/ragdoll/core/text_chunker.rb +0 -210
  53. data/lib/ragdoll/core/text_generation_service.rb +0 -360
@@ -6,14 +6,22 @@ module Ragdoll
6
6
  module Core
7
7
  class Client
8
8
  def initialize
9
+ # Setup configuration services
10
+ @config_service = Ragdoll::ConfigurationService.new
11
+ @model_resolver = Ragdoll::ModelResolver.new(@config_service)
12
+
9
13
  # Setup logging
10
14
  setup_logging
11
15
 
12
16
  # Setup database connection
13
- Database.setup(Ragdoll.config.database_config)
14
-
15
- @embedding_service = EmbeddingService.new
16
- @search_engine = SearchEngine.new(@embedding_service)
17
+ Database.setup(@config_service.config.database)
18
+
19
+ @embedding_service = Ragdoll::EmbeddingService.new(
20
+ client: nil,
21
+ config_service: @config_service,
22
+ model_resolver: @model_resolver
23
+ )
24
+ @search_engine = Ragdoll::SearchEngine.new(@embedding_service, config_service: @config_service)
17
25
  end
18
26
 
19
27
  # Primary method for RAG applications
@@ -88,7 +96,7 @@ module Ragdoll
88
96
  query_embedding = @embedding_service.generate_embedding(query)
89
97
 
90
98
  # Perform hybrid search
91
- results = Models::Document.hybrid_search(query, query_embedding: query_embedding, **options)
99
+ results = Ragdoll::Document.hybrid_search(query, query_embedding: query_embedding, **options)
92
100
 
93
101
  {
94
102
  query: query,
@@ -111,30 +119,28 @@ module Ragdoll
111
119
  # Document management
112
120
  def add_document(path:)
113
121
  # Parse the document
114
- parsed = DocumentProcessor.parse(path)
122
+ parsed = Ragdoll::DocumentProcessor.parse(path)
115
123
 
116
124
  # Extract title from metadata or use filename
117
125
  title = parsed[:metadata][:title] ||
118
126
  File.basename(path, File.extname(path))
119
127
 
120
128
  # Add document to database
121
- doc_id = DocumentManagement.add_document(path, parsed[:content], {
129
+ doc_id = Ragdoll::DocumentManagement.add_document(path, parsed[:content], {
122
130
  title: title,
123
131
  document_type: parsed[:document_type],
124
132
  **parsed[:metadata]
125
133
  })
126
134
 
127
-
128
135
  # Queue background jobs for processing if content is available
129
136
  embeddings_queued = false
130
137
  if parsed[:content].present?
131
- Ragdoll::Core::Jobs::GenerateEmbeddings.perform_later(doc_id)
132
- Ragdoll::Core::Jobs::GenerateSummary.perform_later(doc_id)
133
- Ragdoll::Core::Jobs::ExtractKeywords.perform_later(doc_id)
138
+ Ragdoll::GenerateEmbeddingsJob.perform_later(doc_id)
139
+ Ragdoll::GenerateSummaryJob.perform_later(doc_id)
140
+ Ragdoll::ExtractKeywordsJob.perform_later(doc_id)
134
141
  embeddings_queued = true
135
142
  end
136
143
 
137
-
138
144
  # Return success information
139
145
  {
140
146
  success: true,
@@ -155,16 +161,16 @@ module Ragdoll
155
161
 
156
162
  def add_text(content:, title:, **options)
157
163
  # Add document to database
158
- doc_id = DocumentManagement.add_document(title, content, {
164
+ doc_id = Ragdoll::DocumentManagement.add_document(title, content, {
159
165
  title: title,
160
166
  document_type: "text",
161
167
  **options
162
168
  })
163
169
 
164
170
  # Queue background job for embeddings
165
- Ragdoll::Core::Jobs::GenerateEmbeddings.perform_later(doc_id,
166
- chunk_size: options[:chunk_size],
167
- chunk_overlap: options[:chunk_overlap])
171
+ Ragdoll::GenerateEmbeddingsJob.perform_later(doc_id,
172
+ chunk_size: options[:chunk_size],
173
+ chunk_overlap: options[:chunk_overlap])
168
174
 
169
175
  doc_id
170
176
  end
@@ -188,7 +194,7 @@ module Ragdoll
188
194
  end
189
195
 
190
196
  def get_document(id:)
191
- document_hash = DocumentManagement.get_document(id)
197
+ document_hash = Ragdoll::DocumentManagement.get_document(id)
192
198
  return nil unless document_hash
193
199
 
194
200
  # DocumentManagement.get_document already returns a hash with all needed info
@@ -196,7 +202,7 @@ module Ragdoll
196
202
  end
197
203
 
198
204
  def document_status(id:)
199
- document = Models::Document.find(id)
205
+ document = Ragdoll::Document.find(id)
200
206
  embeddings_count = document.all_embeddings.count
201
207
 
202
208
  {
@@ -228,25 +234,25 @@ module Ragdoll
228
234
  end
229
235
 
230
236
  def update_document(id:, **updates)
231
- DocumentManagement.update_document(id, **updates)
237
+ Ragdoll::DocumentManagement.update_document(id, **updates)
232
238
  end
233
239
 
234
240
  def delete_document(id:)
235
- DocumentManagement.delete_document(id)
241
+ Ragdoll::DocumentManagement.delete_document(id)
236
242
  end
237
243
 
238
244
  def list_documents(**options)
239
- DocumentManagement.list_documents(options)
245
+ Ragdoll::DocumentManagement.list_documents(options)
240
246
  end
241
247
 
242
248
  # Analytics and stats
243
249
  def stats
244
- DocumentManagement.get_document_stats
250
+ Ragdoll::DocumentManagement.get_document_stats
245
251
  end
246
252
 
247
253
  def search_analytics(days: 30)
248
254
  # This could be implemented with additional database queries
249
- Models::Embedding.where("returned_at > ?", days.days.ago)
255
+ Ragdoll::Embedding.where("returned_at > ?", days.days.ago)
250
256
  .group("DATE(returned_at)")
251
257
  .count
252
258
  end
@@ -265,14 +271,13 @@ module Ragdoll
265
271
  require "active_job"
266
272
 
267
273
  # Create log directory if it doesn't exist
268
- # FIXME: log_file is not in current config structure
269
- log_file = Ragdoll.config.logging_config[:filepath] || File.join(Dir.home, ".ragdoll", "ragdoll.log")
274
+ log_file = @config_service.config.logging[:filepath]
270
275
  log_dir = File.dirname(log_file)
271
276
  FileUtils.mkdir_p(log_dir) unless Dir.exist?(log_dir)
272
277
 
273
278
  # Set up logger with appropriate level
274
279
  logger = Logger.new(log_file)
275
- logger.level = case Ragdoll.config.logging_config[:level]
280
+ logger.level = case @config_service.config.logging[:level]
276
281
  when :debug then Logger::DEBUG
277
282
  when :info then Logger::INFO
278
283
  when :warn then Logger::WARN
@@ -290,26 +295,12 @@ module Ragdoll
290
295
  end
291
296
 
292
297
  def build_enhanced_prompt(original_prompt, context)
293
- # FIXME: prompt_template is not in current config structure
294
- template = default_prompt_template
298
+ template = @config_service.config.prompt_template(:rag_enhancement)
295
299
 
296
300
  template
297
301
  .gsub("{{context}}", context)
298
302
  .gsub("{{prompt}}", original_prompt)
299
303
  end
300
-
301
- def default_prompt_template
302
- <<~TEMPLATE
303
- You are an AI assistant. Use the following context to help answer the user's question. If the context doesn't contain relevant information, say so.
304
-
305
- Context:
306
- {{context}}
307
-
308
- Question: {{prompt}}
309
-
310
- Answer:
311
- TEMPLATE
312
- end
313
304
  end
314
305
  end
315
306
  end
@@ -3,6 +3,7 @@
3
3
  require "yaml"
4
4
  require "fileutils"
5
5
  require "ostruct"
6
+ require_relative "model"
6
7
 
7
8
  module Ragdoll
8
9
  module Core
@@ -12,134 +13,162 @@ module Ragdoll
12
13
  class ConfigurationLoadUnknownError < StandardError; end
13
14
 
14
15
  DEFAULT = {
15
- directory: File.join(Dir.home, ".ragdoll"),
16
- filepath: File.join(Dir.home, ".ragdoll", "config.yml"),
16
+ # Base directory for all Ragdoll files - single source of truth
17
+ base_directory: File.join(Dir.home, ".config", "ragdoll"),
18
+
19
+ # Configuration file path derived from base directory
20
+ config_filepath: File.join(Dir.home, ".config", "ragdoll", "config.yml"),
21
+
22
+ # Model configurations organized by purpose with inheritance support
17
23
  models: {
18
- default: "openai/gpt-4o",
19
- summary: "openai/gpt-4o",
20
- keywords: "openai/gpt-4o",
21
- embedding: {
22
- text: "text-embedding-3-small",
23
- image: "image-embedding-3-small", # FIXME
24
- audio: "audio-embedding-3-small", # FIXME
24
+ text_generation: {
25
+ default: -> { Model.new(ENV.fetch("RAGDOLL_DEFAULT_TEXT_MODEL", "openai/gpt-4o")) },
26
+ summary: -> { Model.new(ENV.fetch("RAGDOLL_SUMMARY_MODEL", "openai/gpt-4o")) },
27
+ keywords: -> { Model.new(ENV.fetch("RAGDOLL_KEYWORDS_MODEL", "openai/gpt-4o")) }
25
28
  },
29
+ embedding: {
30
+ provider: :openai,
31
+ text: -> { Model.new(ENV.fetch("RAGDOLL_TEXT_EMBEDDING_MODEL", "openai/text-embedding-3-small")) },
32
+ image: -> { Model.new(ENV.fetch("RAGDOLL_IMAGE_EMBEDDING_MODEL", "openai/clip-vit-base-patch32")) },
33
+ audio: -> { Model.new(ENV.fetch("RAGDOLL_AUDIO_EMBEDDING_MODEL", "openai/whisper-1")) },
34
+ max_dimensions: 3072,
35
+ cache_embeddings: true
36
+ }
26
37
  },
27
- chunking: {
38
+
39
+ # Processing configuration by content type
40
+ processing: {
28
41
  text: {
29
- max_tokens: 1000,
30
- overlap: 200,
31
- },
32
- image: {
33
- max_tokens: 4096,
34
- overlap: 128,
35
- },
36
- audio: {
37
- max_tokens: 4096,
38
- overlap: 128,
42
+ chunking: {
43
+ max_tokens: 1000,
44
+ overlap: 200
45
+ }
39
46
  },
40
47
  default: {
41
- max_tokens: 4096,
42
- overlap: 128,
48
+ chunking: {
49
+ max_tokens: 4096,
50
+ overlap: 128
51
+ }
43
52
  },
53
+ search: {
54
+ similarity_threshold: 0.7,
55
+ max_results: 10,
56
+ analytics: {
57
+ enable: true,
58
+ usage_tracking_enabled: true,
59
+ ranking_enabled: true,
60
+ recency_weight: 0.3,
61
+ frequency_weight: 0.7,
62
+ similarity_weight: 1.0
63
+ }
64
+ }
44
65
  },
45
- ruby_llm_config: {
66
+
67
+ # LLM provider configurations (renamed from ruby_llm_config)
68
+ llm_providers: {
69
+ default_provider: :openai,
46
70
  openai: {
47
- api_key: -> { ENV["OPENAI_API_KEY"] },
48
- organization: -> { ENV["OPENAI_ORGANIZATION"] },
49
- project: -> { ENV["OPENAI_PROJECT"] },
71
+ api_key: -> { ENV.fetch("OPENAI_API_KEY", nil) },
72
+ organization: -> { ENV.fetch("OPENAI_ORGANIZATION", nil) },
73
+ project: -> { ENV.fetch("OPENAI_PROJECT", nil) }
50
74
  },
51
75
  anthropic: {
52
- api_key: -> { ENV["ANTHROPIC_API_KEY"] },
76
+ api_key: -> { ENV.fetch("ANTHROPIC_API_KEY", nil) }
53
77
  },
54
78
  google: {
55
- api_key: -> { ENV["GOOGLE_API_KEY"] },
56
- project_id: -> { ENV["GOOGLE_PROJECT_ID"] },
79
+ api_key: -> { ENV.fetch("GOOGLE_API_KEY", nil) },
80
+ project_id: -> { ENV.fetch("GOOGLE_PROJECT_ID", nil) }
57
81
  },
58
82
  azure: {
59
- api_key: -> { ENV["AZURE_OPENAI_API_KEY"] },
60
- endpoint: -> { ENV["AZURE_OPENAI_ENDPOINT"] },
61
- api_version: -> { ENV["AZURE_OPENAI_API_VERSION"] || "2024-02-01" },
83
+ api_key: -> { ENV.fetch("AZURE_OPENAI_API_KEY", nil) },
84
+ endpoint: -> { ENV.fetch("AZURE_OPENAI_ENDPOINT", nil) },
85
+ api_version: -> { ENV.fetch("AZURE_OPENAI_API_VERSION", "2024-02-01") }
62
86
  },
63
87
  ollama: {
64
- endpoint: -> { ENV["OLLAMA_ENDPOINT"] || "http://localhost:11434/v1" },
88
+ endpoint: -> { ENV.fetch("OLLAMA_ENDPOINT", "http://localhost:11434") }
65
89
  },
66
90
  huggingface: {
67
- api_key: -> { ENV["HUGGINGFACE_API_KEY"] },
91
+ api_key: -> { ENV.fetch("HUGGINGFACE_API_KEY", nil) }
68
92
  },
69
93
  openrouter: {
70
- api_key: -> { ENV["OPENROUTER_API_KEY"] },
71
- },
72
- },
73
- embedding_config: {
74
- provider: :openai,
75
- cache_embeddings: true,
76
- max_embedding_dimensions: 3072, # Support up to text-embedding-3-large
94
+ api_key: -> { ENV.fetch("OPENROUTER_API_KEY", nil) }
95
+ }
77
96
  },
78
- summarization_config: {
97
+
98
+ # Summarization configuration
99
+ summarization: {
79
100
  enable: true,
80
101
  max_length: 300,
81
- min_content_length: 300,
102
+ min_content_length: 300
82
103
  },
83
- database_config: {
104
+
105
+ # Database configuration with standardized ENV variable name
106
+ database: {
84
107
  adapter: "postgresql",
85
108
  database: "ragdoll_development",
86
109
  username: "ragdoll",
87
- password: -> { ENV["DATABASE_PASSWORD"] },
110
+ password: -> { ENV.fetch("RAGDOLL_DATABASE_PASSWORD", nil) },
88
111
  host: "localhost",
89
112
  port: 5432,
90
113
  auto_migrate: true,
91
- logger: nil, # Set to Logger.new(STDOUT) for debugging
114
+ logger: nil
92
115
  },
93
- logging_config: {
94
- log_level: :warn,
95
- log_directory: File.join(Dir.home, ".ragdoll"),
96
- log_filepath: File.join(Dir.home, ".ragdoll", "ragdoll.log"),
97
- },
98
- search: {
99
- similarity_threshold: 0.7,
100
- max_results: 10,
101
- enable_analytics: true,
102
- enable_usage_tracking: true,
103
- usage_ranking_enabled: true,
104
- usage_recency_weight: 0.3,
105
- usage_frequency_weight: 0.7,
106
- usage_similarity_weight: 1.0,
116
+
117
+ # Logging configuration with corrected key names and path derivation
118
+ logging: {
119
+ level: :warn, # Fixed: was log_level, now matches usage
120
+ directory: File.join(Dir.home, ".config", "ragdoll", "logs"),
121
+ filepath: File.join(Dir.home, ".config", "ragdoll", "logs", "ragdoll.log")
107
122
  },
108
- }
123
+
124
+ # Prompt templates for customizable text generation
125
+ prompt_templates: {
126
+ rag_enhancement: <<~TEMPLATE.strip
127
+ You are an AI assistant. Use the following context to help answer the user's question.
128
+ If the context doesn't contain relevant information, say so.
129
+
130
+ Context:
131
+ {{context}}
132
+
133
+ Question: {{prompt}}
134
+
135
+ Answer:
136
+ TEMPLATE
137
+ }
138
+
139
+ }.freeze
109
140
 
110
141
  def initialize(config = {})
111
142
  merged_config = deep_merge(self.class::DEFAULT, config)
112
- resolved_config = resolve_procs(merged_config)
143
+ resolved_config = resolve_procs(merged_config, [])
113
144
  @config = OpenStruct.new(resolved_config)
114
145
  end
115
146
 
116
147
  def self.load(path: nil)
117
- path ||= DEFAULT[:filepath]
148
+ path ||= DEFAULT[:config_filepath]
118
149
 
119
- unless File.exist?(path)
120
- raise ConfigurationFileNotFoundError, "Configuration file not found: #{path}"
121
- end
150
+ raise ConfigurationFileNotFoundError, "Configuration file not found: #{path}" unless File.exist?(path)
122
151
 
123
152
  new(YAML.safe_load_file(path) || {})
124
153
  rescue Errno::ENOENT
125
154
  raise ConfigurationFileNotFoundError, "Configuration file not found: #{path}"
126
- rescue => e
155
+ rescue StandardError => e
127
156
  raise ConfigurationLoadUnknownError, "Failed to load configuration from #{path}: #{e.message}"
128
157
  end
129
158
 
130
159
  def save(path: nil)
131
160
  if path.nil?
132
- path = @config.filepath
161
+ path = @config.config_filepath
133
162
  else
134
- save_filepath = @config.filepath
135
- @config.filepath = path
163
+ save_filepath = @config.config_filepath
164
+ @config.config_filepath = path
136
165
  end
137
166
 
138
167
  FileUtils.mkdir_p(File.dirname(path))
139
168
 
140
169
  File.write(path, @config.to_yaml)
141
- rescue => e
142
- @config.filepath = save_filepath unless save_filepath.nil?
170
+ rescue StandardError => e
171
+ @config.config_filepath = save_filepath unless save_filepath.nil?
143
172
  raise ConfigurationSaveError, "Failed to save configuration to #{path}: #{e.message}"
144
173
  end
145
174
 
@@ -160,6 +189,35 @@ module Ragdoll
160
189
  end
161
190
  end
162
191
 
192
+ # Resolve model with inheritance support
193
+ # Returns the model string for a given task, with inheritance from default
194
+ def resolve_model(task_type)
195
+ case task_type
196
+ when :embedding
197
+ @config.models[:embedding]
198
+ when :text, :summary, :keywords, :default
199
+ @config.models[:text_generation][task_type] || @config.models[:text_generation][:default]
200
+ else
201
+ @config.models[:text_generation][:default]
202
+ end
203
+ end
204
+
205
+ # Get provider credentials for a given provider
206
+ def provider_credentials(provider = nil)
207
+ provider ||= @config.llm_providers[:default_provider]
208
+ @config.llm_providers[provider] || {}
209
+ end
210
+
211
+ # Resolve embedding model for content type
212
+ def embedding_model(content_type = :text)
213
+ @config.models[:embedding][content_type] || @config.models[:embedding][:text]
214
+ end
215
+
216
+ # Get prompt template
217
+ def prompt_template(template_name = :rag_enhancement)
218
+ @config.prompt_templates[template_name]
219
+ end
220
+
163
221
  # Enable method delegation to the internal OpenStruct
164
222
  def method_missing(method_name, *args, &block)
165
223
  @config.send(method_name, *args, &block)
@@ -171,103 +229,29 @@ module Ragdoll
171
229
 
172
230
  private
173
231
 
174
- def resolve_procs(obj)
232
+ def resolve_procs(obj, path = [])
175
233
  case obj
176
234
  when Hash
177
- obj.transform_values { |v| resolve_procs(v) }
235
+ obj.each_with_object({}) { |(k, v), result| result[k] = resolve_procs(v, path + [k]) }
178
236
  when Proc
179
237
  obj.call
238
+ when String
239
+ # Convert strings to Model instances in the models configuration section
240
+ if path.length >= 2 && path[0] == :models
241
+ Model.new(obj)
242
+ else
243
+ obj
244
+ end
180
245
  else
181
246
  obj
182
247
  end
183
248
  end
184
249
 
185
250
  def deep_merge(hash1, hash2)
186
- hash1.merge(hash2) do |key, oldval, newval|
251
+ hash1.merge(hash2) do |_key, oldval, newval|
187
252
  oldval.is_a?(Hash) && newval.is_a?(Hash) ? deep_merge(oldval, newval) : newval
188
253
  end
189
254
  end
190
255
  end
191
256
  end
192
257
  end
193
-
194
- __END__
195
-
196
- {
197
- directory: "/Users/dewayne/.ragdoll",
198
- filepath: "/Users/dewayne/.ragdoll/config.yml",
199
- embedding_config:
200
- {default:
201
- {model: "openai/gpt-4o-mini", summary_model: "openai/gpt-4o-mini", keywords_model: "openai/gpt-4o-mini", max_dimensions: 3072},
202
- text: {model: "openai/text-embedding-3-small", max_tokens: 1000, overlap: 200},
203
- image: {model: "laion/CLIP-ViT-H-14", max_tokens: 4096, overlap: 128},
204
- audio: {model: "openl3", transcription_model: "openai/whisper-large-v2", max_tokens: 4096, overlap: 128}},
205
- chunking: {text: {max_tokens: 1000, overlap: 200}, default: {max_tokens: 4096, overlap: 128}},
206
- ruby_llm_config:
207
- {openai: {api_key: "***", organization: nil, project: nil},
208
- anthropic:
209
- {api_key: "***"},
210
- google: {api_key: "***", project_id: nil},
211
- azure: {api_key: nil, endpoint: nil, api_version: "2024-02-01"},
212
- ollama: {endpoint: "http://localhost:11434/v1"},
213
- huggingface: {api_key: nil},
214
- openrouter: {api_key: nil}},
215
- summarization_config: {enable: true, model: nil, max_length: 300, min_content_length: 300},
216
- database_config:
217
- {adapter: "postgresql",
218
- database: "ragdoll_development",
219
- username: "ragdoll",
220
- password: "ragdoll",
221
- host: "localhost",
222
- port: 5432,
223
- pool: 20,
224
- timeout: 5000,
225
- auto_migrate: true,
226
- logger: nil},
227
- logging_config: {level: :warn, directory: "/Users/dewayne/.ragdoll", filepath: "/Users/dewayne/.ragdoll/ragdoll.log"},
228
- search:
229
- {similarity_threshold: 0.7,
230
- max_results: 10,
231
- enable_analytics: true,
232
- enable_usage_tracking: true,
233
- usage_ranking_enabled: true,
234
- usage_recency_weight: 0.3,
235
- usage_frequency_weight: 0.7,
236
- usage_similarity_weight: 1.0},
237
- llm_provider: :openai,
238
- openai_api_key: "***",
239
- llm_config:
240
- {openai: {api_key: "***", organization: nil, project: nil},
241
- anthropic:
242
- {api_key: "***"},
243
- google: {api_key: "***", project_id: nil},
244
- azure: {api_key: nil, endpoint: nil, api_version: "2024-02-01"},
245
- ollama: {endpoint: "http://localhost:11434"},
246
- huggingface: {api_key: nil},
247
- openrouter: {api_key: nil}},
248
- embedding_provider: :openai,
249
- embedding_model: "text-embedding-3-small",
250
- max_embedding_dimensions: 3072,
251
- cache_embeddings: true,
252
- default_model: "gpt-4o-mini",
253
- summary_provider_model: "openai/gpt-4o-mini",
254
- keywords_provider_model: "openai/gpt-4o-mini",
255
- embeddings_provider_model: "openai/text-embedding-3-small",
256
- summary_model: nil,
257
- chunk_size: 1000,
258
- chunk_overlap: 200,
259
- enable_document_summarization: true,
260
- summary_max_length: 300,
261
- summary_min_content_length: 300,
262
- prompt_template: nil,
263
- search_similarity_threshold: 0.7,
264
- max_search_results: 10,
265
- enable_search_analytics: true,
266
- enable_usage_tracking: true,
267
- usage_ranking_enabled: true,
268
- usage_recency_weight: 0.3,
269
- usage_frequency_weight: 0.7,
270
- usage_similarity_weight: 1.0,
271
- log_level: :warn,
272
- log_file: "/Users/dewayne/.ragdoll/ragdoll.log"
273
- }
@@ -125,7 +125,7 @@ module Ragdoll
125
125
  adapter: "postgresql",
126
126
  database: "ragdoll_development",
127
127
  username: "ragdoll",
128
- password: ENV["RAGDOLL_DATABASE_PASSWORD"],
128
+ password: ENV.fetch("RAGDOLL_DATABASE_PASSWORD", nil),
129
129
  host: "localhost",
130
130
  port: 5432,
131
131
  auto_migrate: true,
@@ -0,0 +1,45 @@
1
+ # lib/ragdoll/core/model.rb
2
+ # frozen_string_literal: true
3
+
4
+ module Ragdoll
5
+ module Core
6
+ # Model represents a provider and model name.
7
+ # It is initialized with a string in the format "provider/model".
8
+ # The provider is optional.
9
+ # Can be initialized with nil or empty string.
10
+ Model = Data.define(:name) do
11
+ # @return [Symbol, nil] the provider part of the name, or nil if not present.
12
+ def provider
13
+ return nil if name.nil? || name.empty?
14
+
15
+ parts = name.split("/", 2)
16
+ return nil if parts.length < 2 || parts.first.empty?
17
+
18
+ parts.first.to_sym
19
+ end
20
+
21
+ # @return [String, nil] the model part of the name, or nil if name is nil/empty.
22
+ def model
23
+ return nil if name.nil? || name.empty?
24
+
25
+ parts = name.split("/", 2)
26
+ parts.length < 2 ? name : parts.last
27
+ end
28
+
29
+ # @return [String] the original name string, or empty string if name is nil.
30
+ def to_s
31
+ name.nil? ? "" : name
32
+ end
33
+
34
+ # @return [Hash] a hash representation of the model.
35
+ def to_h
36
+ { provider: provider, model: model }
37
+ end
38
+
39
+ # YAML serialization - save as string name
40
+ def encode_with(coder)
41
+ coder.scalar = name
42
+ end
43
+ end
44
+ end
45
+ end
@@ -3,6 +3,6 @@
3
3
 
4
4
  module Ragdoll
5
5
  module Core
6
- VERSION = "0.1.1"
6
+ VERSION = "0.1.8"
7
7
  end
8
8
  end