ragdoll 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +353 -0
- data/Rakefile +21 -0
- data/db/migrate/001_enable_postgresql_extensions.rb +23 -0
- data/db/migrate/004_create_ragdoll_documents.rb +70 -0
- data/db/migrate/005_create_ragdoll_embeddings.rb +41 -0
- data/db/migrate/006_create_ragdoll_contents.rb +47 -0
- data/lib/ragdoll/core/client.rb +315 -0
- data/lib/ragdoll/core/configuration.rb +273 -0
- data/lib/ragdoll/core/database.rb +141 -0
- data/lib/ragdoll/core/document_management.rb +110 -0
- data/lib/ragdoll/core/document_processor.rb +344 -0
- data/lib/ragdoll/core/embedding_service.rb +183 -0
- data/lib/ragdoll/core/errors.rb +11 -0
- data/lib/ragdoll/core/jobs/extract_keywords.rb +32 -0
- data/lib/ragdoll/core/jobs/extract_text.rb +42 -0
- data/lib/ragdoll/core/jobs/generate_embeddings.rb +32 -0
- data/lib/ragdoll/core/jobs/generate_summary.rb +29 -0
- data/lib/ragdoll/core/metadata_schemas.rb +334 -0
- data/lib/ragdoll/core/models/audio_content.rb +175 -0
- data/lib/ragdoll/core/models/content.rb +126 -0
- data/lib/ragdoll/core/models/document.rb +678 -0
- data/lib/ragdoll/core/models/embedding.rb +204 -0
- data/lib/ragdoll/core/models/image_content.rb +227 -0
- data/lib/ragdoll/core/models/text_content.rb +169 -0
- data/lib/ragdoll/core/search_engine.rb +50 -0
- data/lib/ragdoll/core/services/image_description_service.rb +230 -0
- data/lib/ragdoll/core/services/metadata_generator.rb +335 -0
- data/lib/ragdoll/core/shrine_config.rb +71 -0
- data/lib/ragdoll/core/text_chunker.rb +210 -0
- data/lib/ragdoll/core/text_generation_service.rb +360 -0
- data/lib/ragdoll/core/version.rb +8 -0
- data/lib/ragdoll/core.rb +73 -0
- data/lib/ragdoll-core.rb +3 -0
- data/lib/ragdoll.rb +249 -0
- data/lib/tasks/annotate.rake +126 -0
- data/lib/tasks/db.rake +338 -0
- metadata +80 -0
data/lib/ragdoll.rb
ADDED
@@ -0,0 +1,249 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "debug_me"
|
4
|
+
include DebugMe
|
5
|
+
$DEBUG_ME = true
|
6
|
+
|
7
|
+
require "delegate"
|
8
|
+
require_relative "ragdoll/core"
|
9
|
+
|
10
|
+
module Ragdoll
|
11
|
+
class << self
|
12
|
+
|
13
|
+
#################
|
14
|
+
# Configuration #
|
15
|
+
#################
|
16
|
+
|
17
|
+
# Retrieve the current configuration.
|
18
|
+
# @example
|
19
|
+
# config = Ragdoll.config
|
20
|
+
# puts config.database_config[:adapter]
|
21
|
+
# @example
|
22
|
+
# current_config = Ragdoll.configuration
|
23
|
+
# puts current_config.models[:default]
|
24
|
+
# @return [Ragdoll::Core::Configuration] the current configuration instance.
|
25
|
+
def config
|
26
|
+
Core.config
|
27
|
+
end
|
28
|
+
|
29
|
+
# Configure the Ragdoll module.
|
30
|
+
# @yieldparam config [Ragdoll::Core::Configuration] the configuration instance to modify.
|
31
|
+
# @example
|
32
|
+
# Ragdoll.configure do |config|
|
33
|
+
# config.database_config[:adapter] = "postgres"
|
34
|
+
# end
|
35
|
+
# @yield [Ragdoll::Core::Configuration] yields the configuration instance for modification.
|
36
|
+
def configure(*args, **kwargs, &block)
|
37
|
+
Ragdoll::Core.configure(*args, **kwargs, &block)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Access the current configuration.
|
41
|
+
# @param args [Array] additional arguments for configuration.
|
42
|
+
# @param kwargs [Hash] keyword arguments for configuration.
|
43
|
+
# @return [Ragdoll::Core::Configuration] the current configuration instance.
|
44
|
+
def configuration(*args, **kwargs)
|
45
|
+
Ragdoll::Core.configuration(*args, **kwargs)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @example
|
49
|
+
# Ragdoll.reset_configuration!
|
50
|
+
# puts Ragdoll.config.models[:default]
|
51
|
+
def reset_configuration!(*args, **kwargs)
|
52
|
+
Ragdoll::Core.reset_configuration!(*args, **kwargs)
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
#######################
|
57
|
+
# Document Management #
|
58
|
+
#######################
|
59
|
+
|
60
|
+
# Add a directory of documents to the system.
|
61
|
+
# @param path [String] the path to the directory containing documents.
|
62
|
+
# @example
|
63
|
+
# Ragdoll.add_directory(path: "/path/to/documents", recursive: true)
|
64
|
+
# @param recursive [Boolean] whether to add documents from subdirectories.
|
65
|
+
def add_directory(*args, **kwargs)
|
66
|
+
Ragdoll::Core.add_directory(*args, **kwargs)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Add a single document to the system.
|
70
|
+
# @example
|
71
|
+
# Ragdoll.add_document(path: "/path/to/document.txt")
|
72
|
+
# @param path [String] the file path of the document to add.
|
73
|
+
def add_document(*args, **kwargs)
|
74
|
+
Ragdoll::Core.add_document(*args, **kwargs)
|
75
|
+
end
|
76
|
+
alias_method :add, :add_document
|
77
|
+
|
78
|
+
# Retrieve a document by its identifier.
|
79
|
+
# @param id [String] the identifier of the document to retrieve.
|
80
|
+
# @example
|
81
|
+
# document = Ragdoll.get_document(id: "123")
|
82
|
+
# puts document[:title] if document
|
83
|
+
# @return [Hash, nil] the document data or nil if not found.
|
84
|
+
def get_document(*args, **kwargs)
|
85
|
+
Ragdoll::Core.get_document(*args, **kwargs)
|
86
|
+
end
|
87
|
+
alias_method :get, :get_document
|
88
|
+
|
89
|
+
# List all documents in the system.
|
90
|
+
# @param options [Hash] options for listing documents, such as limit and offset.
|
91
|
+
# @example
|
92
|
+
# documents = Ragdoll.list_documents(limit: 10)
|
93
|
+
# documents.each { |doc| puts doc[:title] }
|
94
|
+
# @return [Array<Hash>] an array of document data.
|
95
|
+
def list_documents(*args, **kwargs)
|
96
|
+
Ragdoll::Core.list_documents(*args, **kwargs)
|
97
|
+
end
|
98
|
+
alias_method :list, :list_documents
|
99
|
+
|
100
|
+
# Delete a document by its identifier.
|
101
|
+
# @param id [String] the identifier of the document to delete.
|
102
|
+
# @example
|
103
|
+
# success = Ragdoll.delete_document(id: "123")
|
104
|
+
# puts "Deleted" if success
|
105
|
+
# @return [Boolean] true if the document was successfully deleted.
|
106
|
+
def delete_document(*args, **kwargs)
|
107
|
+
Ragdoll::Core.delete_document(*args, **kwargs)
|
108
|
+
end
|
109
|
+
alias_method :delete, :delete_document
|
110
|
+
|
111
|
+
# Get the status of a document.
|
112
|
+
# @param id [String] the identifier of the document to check status.
|
113
|
+
# @example
|
114
|
+
# status = Ragdoll.document_status(id: "123")
|
115
|
+
# puts status[:status]
|
116
|
+
# @return [Hash] the status information of the document.
|
117
|
+
def document_status(*args, **kwargs)
|
118
|
+
Ragdoll::Core.document_status(*args, **kwargs)
|
119
|
+
end
|
120
|
+
alias_method :status, :document_status
|
121
|
+
|
122
|
+
# Update a document's information.
|
123
|
+
# @param id [String] the identifier of the document to update.
|
124
|
+
# @param updates [Hash] the fields to update in the document.
|
125
|
+
# @example
|
126
|
+
# updated_doc = Ragdoll.update_document(id: "123", title: "New Title")
|
127
|
+
# puts updated_doc[:title]
|
128
|
+
# @return [Hash] the updated document data.
|
129
|
+
def update_document(*args, **kwargs)
|
130
|
+
Ragdoll::Core.update_document(*args, **kwargs)
|
131
|
+
end
|
132
|
+
alias_method :update, :update_document
|
133
|
+
|
134
|
+
# Retrieve all documents.
|
135
|
+
# @example
|
136
|
+
# all_docs = Ragdoll.documents
|
137
|
+
# all_docs.each { |doc| puts doc.title }
|
138
|
+
# @return [ActiveRecord::Relation] a relation of all documents.
|
139
|
+
def documents
|
140
|
+
Ragdoll::Core::Models::Document.all
|
141
|
+
end
|
142
|
+
alias_method :docs, :documents
|
143
|
+
|
144
|
+
#############
|
145
|
+
# Retrieval #
|
146
|
+
#############
|
147
|
+
|
148
|
+
# FIXME: This high-level API method should be able to take a query that is
|
149
|
+
# a string or a file. If its a file, then the downstream Process will
|
150
|
+
# be responsible for reading the file and passing the contents to the
|
151
|
+
# search method based upon whether the content is text, image or audio.
|
152
|
+
|
153
|
+
# Perform a search for documents based on a query.
|
154
|
+
# @param query [String] the search query string.
|
155
|
+
# @param options [Hash] additional search options, such as filters and limits.
|
156
|
+
# @example
|
157
|
+
# response = Ragdoll.search(query: "example search")
|
158
|
+
# response[:results].each { |result| puts result[:document_title] }
|
159
|
+
# @return [Hash] the search results.
|
160
|
+
def search(*args, **kwargs)
|
161
|
+
Ragdoll::Core.search(*args, **kwargs)
|
162
|
+
end
|
163
|
+
|
164
|
+
# Enhance a prompt with additional context.
|
165
|
+
# @param prompt [String] the original prompt to enhance.
|
166
|
+
# @param context_limit [Integer] the number of context chunks to include.
|
167
|
+
# @param options [Hash] additional options for enhancing the prompt.
|
168
|
+
# @example
|
169
|
+
# enhanced = Ragdoll.enhance_prompt(prompt: "What is AI?", context_limit: 3)
|
170
|
+
# puts enhanced[:enhanced_prompt]
|
171
|
+
# @return [Hash] the enhanced prompt data.
|
172
|
+
def enhance_prompt(*args, **kwargs)
|
173
|
+
Ragdoll::Core.enhance_prompt(*args, **kwargs)
|
174
|
+
end
|
175
|
+
|
176
|
+
# Retrieve context for a given query.
|
177
|
+
# @param query [String] the query to retrieve context for.
|
178
|
+
# @param limit [Integer] the number of context chunks to retrieve.
|
179
|
+
# @param options [Hash] additional options for context retrieval.
|
180
|
+
# @example
|
181
|
+
# context = Ragdoll.get_context(query: "AI", limit: 5)
|
182
|
+
# puts context[:combined_context]
|
183
|
+
# @return [Hash] the context data.
|
184
|
+
def get_context(*args, **kwargs)
|
185
|
+
Ragdoll::Core.get_context(*args, **kwargs)
|
186
|
+
end
|
187
|
+
|
188
|
+
# Search for content similar to a given query.
|
189
|
+
# @param query [String] the query to find similar content for.
|
190
|
+
# @param options [Hash] additional options for the search, such as filters and limits.
|
191
|
+
# @example
|
192
|
+
# similar_content = Ragdoll.search_similar_content(query: "AI")
|
193
|
+
# similar_content.each { |content| puts content[:document_title] }
|
194
|
+
# @return [Array<Hash>] an array of similar content data.
|
195
|
+
def search_similar_content(*args, **kwargs)
|
196
|
+
Ragdoll::Core.search_similar_content(*args, **kwargs)
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
###############
|
201
|
+
# Misc. Stuff #
|
202
|
+
###############
|
203
|
+
|
204
|
+
# Retrieve statistics about the system.
|
205
|
+
# @example
|
206
|
+
# stats = Ragdoll.stats
|
207
|
+
# puts stats[:total_documents]
|
208
|
+
# @return [Hash] the system statistics.
|
209
|
+
def stats(*args, **kwargs)
|
210
|
+
Ragdoll::Core.stats(*args, **kwargs)
|
211
|
+
end
|
212
|
+
|
213
|
+
# Check if the system is healthy.
|
214
|
+
# @example
|
215
|
+
# puts "System is healthy" if Ragdoll.healthy?
|
216
|
+
# @return [Boolean] true if the system is healthy.
|
217
|
+
def healthy?(*args, **kwargs)
|
218
|
+
Ragdoll::Core.healthy?(*args, **kwargs)
|
219
|
+
end
|
220
|
+
|
221
|
+
# Retrieve the client instance.
|
222
|
+
# @example
|
223
|
+
# client = Ragdoll.client
|
224
|
+
# puts client.inspect
|
225
|
+
# @return [Ragdoll::Core::Client] the client instance.
|
226
|
+
def client(*args, **kwargs)
|
227
|
+
Ragdoll::Core.client(*args, **kwargs)
|
228
|
+
end
|
229
|
+
|
230
|
+
# Retrieve the version information of the Ragdoll modules.
|
231
|
+
# @example
|
232
|
+
# versions = Ragdoll.version
|
233
|
+
# versions.each { |version| puts version }
|
234
|
+
# @return [Array<String>] an array of version strings for each module.
|
235
|
+
def version
|
236
|
+
versions = []
|
237
|
+
|
238
|
+
ObjectSpace.each_object(Module) do |mod|
|
239
|
+
if mod.name =~ /^Ragdoll::\w+$/
|
240
|
+
if defined?(mod::VERSION) && mod::VERSION.is_a?(String)
|
241
|
+
versions << "#{mod.name}: #{mod::VERSION}"
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
versions
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "annotate"
|
4
|
+
require "annotate/annotate_models"
|
5
|
+
|
6
|
+
# Define environment task for gem context
|
7
|
+
task :environment do
|
8
|
+
# Load the gem's environment
|
9
|
+
require_relative "../ragdoll-core"
|
10
|
+
|
11
|
+
# Set up database connection
|
12
|
+
begin
|
13
|
+
# Load all models to ensure they're available
|
14
|
+
Dir[File.join(__dir__, "../ragdoll/core/models/*.rb")].each { |file| require file }
|
15
|
+
|
16
|
+
# Set up database connection with default config
|
17
|
+
Ragdoll::Core::Database.setup({
|
18
|
+
adapter: "postgresql",
|
19
|
+
database: "ragdoll_development",
|
20
|
+
username: "ragdoll",
|
21
|
+
password: ENV["RAGDOLL_DATABASE_PASSWORD"] || ENV["DATABASE_PASSWORD"],
|
22
|
+
host: "localhost",
|
23
|
+
port: 5432,
|
24
|
+
auto_migrate: false # Don't auto-migrate during annotation
|
25
|
+
})
|
26
|
+
|
27
|
+
puts "✅ Connected to database: ragdoll_development"
|
28
|
+
rescue StandardError => e
|
29
|
+
puts "❌ Database connection failed: #{e.message}"
|
30
|
+
puts " Annotations will be based on model definitions only"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
namespace :annotate do
|
35
|
+
task :models do
|
36
|
+
AnnotateModels.do_annotations({
|
37
|
+
"models" => "true",
|
38
|
+
"position_in_class" => "before",
|
39
|
+
"show_foreign_keys" => "true",
|
40
|
+
"show_indexes" => "true",
|
41
|
+
"model_dir" => "lib/ragdoll/core/models",
|
42
|
+
"exclude_tests" => "true",
|
43
|
+
"exclude_fixtures" => "true",
|
44
|
+
"exclude_factories" => "true",
|
45
|
+
"exclude_serializers" => "true",
|
46
|
+
"exclude_scaffolds" => "true",
|
47
|
+
"exclude_controllers" => "true",
|
48
|
+
"exclude_helpers" => "true"
|
49
|
+
})
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
task :set_annotation_options do
|
54
|
+
# You can override any of these by setting an environment variable of the
|
55
|
+
# same name.
|
56
|
+
Annotate.set_defaults(
|
57
|
+
"active_admin" => "false",
|
58
|
+
"additional_file_patterns" => [],
|
59
|
+
"routes" => "false",
|
60
|
+
"models" => "true",
|
61
|
+
"position_in_routes" => "before",
|
62
|
+
"position_in_class" => "before",
|
63
|
+
"position_in_test" => "before",
|
64
|
+
"position_in_fixture" => "before",
|
65
|
+
"position_in_factory" => "before",
|
66
|
+
"position_in_serializer" => "before",
|
67
|
+
"show_foreign_keys" => "true",
|
68
|
+
"show_complete_foreign_keys" => "false",
|
69
|
+
"show_indexes" => "true",
|
70
|
+
"simple_indexes" => "false",
|
71
|
+
"model_dir" => "lib/ragdoll/core/models",
|
72
|
+
"root_dir" => "",
|
73
|
+
"include_version" => "false",
|
74
|
+
"require" => "",
|
75
|
+
"exclude_tests" => "false",
|
76
|
+
"exclude_fixtures" => "false",
|
77
|
+
"exclude_factories" => "false",
|
78
|
+
"exclude_serializers" => "false",
|
79
|
+
"exclude_scaffolds" => "false",
|
80
|
+
"exclude_controllers" => "true",
|
81
|
+
"exclude_helpers" => "true",
|
82
|
+
"exclude_sti_subclasses" => "false",
|
83
|
+
"ignore_model_sub_dir" => "false",
|
84
|
+
"ignore_columns" => nil,
|
85
|
+
"ignore_routes" => nil,
|
86
|
+
"ignore_unknown_options" => "false",
|
87
|
+
"hide_limit_column_types" => "integer,bigint,boolean",
|
88
|
+
"hide_default_column_types" => "json,jsonb,hstore",
|
89
|
+
"skip_on_db_migrate" => "false",
|
90
|
+
"format_bare" => "true",
|
91
|
+
"format_rdoc" => "false",
|
92
|
+
"format_yard" => "false",
|
93
|
+
"format_markdown" => "false",
|
94
|
+
"sort" => "false",
|
95
|
+
"force" => "false",
|
96
|
+
"frozen" => "false",
|
97
|
+
"classified_sort" => "true",
|
98
|
+
"trace" => "false",
|
99
|
+
"wrapper_open" => nil,
|
100
|
+
"wrapper_close" => nil,
|
101
|
+
"with_comment" => "true"
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Load only essential model annotation tasks
|
106
|
+
desc "Add schema information (as comments) to model files"
|
107
|
+
task annotate_models: :environment do
|
108
|
+
puts "Running annotate for ragdoll-core models..."
|
109
|
+
|
110
|
+
# Use the CLI approach since the programmatic API doesn't respect model_dir properly
|
111
|
+
success = system("MODEL_DIR=lib/ragdoll/core/models bundle exec annotate --models --position-in-class=before --show-foreign-keys --show-indexes")
|
112
|
+
|
113
|
+
if success
|
114
|
+
puts "✅ Model annotations updated successfully!"
|
115
|
+
else
|
116
|
+
puts "⚠️ Annotate completed with warnings (database connection issues)"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
desc "Remove schema information from model files"
|
121
|
+
task :remove_annotation do
|
122
|
+
AnnotateModels.remove_annotations({
|
123
|
+
"models" => "true",
|
124
|
+
"model_dir" => "lib/ragdoll/core/models"
|
125
|
+
})
|
126
|
+
end
|