ragdoll 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +318 -40
  3. data/Rakefile +15 -4
  4. data/db/migrate/001_enable_postgresql_extensions.rb +23 -0
  5. data/db/migrate/004_create_ragdoll_documents.rb +70 -0
  6. data/db/migrate/005_create_ragdoll_embeddings.rb +41 -0
  7. data/db/migrate/006_create_ragdoll_contents.rb +47 -0
  8. data/lib/ragdoll/core/client.rb +315 -0
  9. data/lib/ragdoll/core/configuration.rb +273 -0
  10. data/lib/ragdoll/core/database.rb +141 -0
  11. data/lib/ragdoll/core/document_management.rb +110 -0
  12. data/lib/ragdoll/core/document_processor.rb +344 -0
  13. data/lib/ragdoll/core/embedding_service.rb +183 -0
  14. data/lib/ragdoll/core/errors.rb +11 -0
  15. data/lib/ragdoll/core/jobs/extract_keywords.rb +32 -0
  16. data/lib/ragdoll/core/jobs/extract_text.rb +42 -0
  17. data/lib/ragdoll/core/jobs/generate_embeddings.rb +32 -0
  18. data/lib/ragdoll/core/jobs/generate_summary.rb +29 -0
  19. data/lib/ragdoll/core/metadata_schemas.rb +334 -0
  20. data/lib/ragdoll/core/models/audio_content.rb +175 -0
  21. data/lib/ragdoll/core/models/content.rb +126 -0
  22. data/lib/ragdoll/core/models/document.rb +678 -0
  23. data/lib/ragdoll/core/models/embedding.rb +204 -0
  24. data/lib/ragdoll/core/models/image_content.rb +227 -0
  25. data/lib/ragdoll/core/models/text_content.rb +169 -0
  26. data/lib/ragdoll/core/search_engine.rb +50 -0
  27. data/lib/ragdoll/core/services/image_description_service.rb +230 -0
  28. data/lib/ragdoll/core/services/metadata_generator.rb +335 -0
  29. data/lib/ragdoll/core/shrine_config.rb +71 -0
  30. data/lib/ragdoll/core/text_chunker.rb +210 -0
  31. data/lib/ragdoll/core/text_generation_service.rb +360 -0
  32. data/lib/ragdoll/core/version.rb +8 -0
  33. data/lib/ragdoll/core.rb +73 -0
  34. data/lib/ragdoll-core.rb +3 -0
  35. data/lib/ragdoll.rb +243 -6
  36. data/lib/tasks/annotate.rake +126 -0
  37. data/lib/tasks/db.rake +338 -0
  38. metadata +40 -37
  39. data/app/models/ragdoll/document.rb +0 -9
  40. data/app/models/ragdoll/embedding.rb +0 -9
  41. data/config/initializers/ragdoll.rb +0 -6
  42. data/config/routes.rb +0 -5
  43. data/db/migrate/20250218123456_create_documents.rb +0 -20
  44. data/lib/config/database.yml +0 -28
  45. data/lib/config/ragdoll.yml +0 -31
  46. data/lib/ragdoll/engine.rb +0 -16
  47. data/lib/ragdoll/import_job.rb +0 -15
  48. data/lib/ragdoll/ingestion.rb +0 -30
  49. data/lib/ragdoll/search.rb +0 -18
  50. data/lib/ragdoll/version.rb +0 -7
  51. data/lib/tasks/import_task.thor +0 -32
  52. data/lib/tasks/jobs_task.thor +0 -40
  53. data/lib/tasks/ragdoll_tasks.thor +0 -7
  54. data/lib/tasks/search_task.thor +0 -55
data/lib/ragdoll.rb CHANGED
@@ -1,12 +1,249 @@
1
- # This file is the main entry point for the Ragdoll gem, requiring all necessary components.
2
-
3
1
  # frozen_string_literal: true
4
2
 
5
- # frozen_string_literal: true
3
+ require "debug_me"
4
+ include DebugMe
5
+ $DEBUG_ME = true
6
6
 
7
- require "ragdoll/version"
8
- require "ragdoll/engine"
7
+ require "delegate"
8
+ require_relative "ragdoll/core"
9
9
 
10
10
  module Ragdoll
11
- class Error < StandardError; end
11
+ class << self
12
+
13
+ #################
14
+ # Configuration #
15
+ #################
16
+
17
+ # Retrieve the current configuration.
18
+ # @example
19
+ # config = Ragdoll.config
20
+ # puts config.database_config[:adapter]
21
+ # @example
22
+ # current_config = Ragdoll.configuration
23
+ # puts current_config.models[:default]
24
+ # @return [Ragdoll::Core::Configuration] the current configuration instance.
25
+ def config
26
+ Core.config
27
+ end
28
+
29
+ # Configure the Ragdoll module.
30
+ # @yieldparam config [Ragdoll::Core::Configuration] the configuration instance to modify.
31
+ # @example
32
+ # Ragdoll.configure do |config|
33
+ # config.database_config[:adapter] = "postgres"
34
+ # end
35
+ # @yield [Ragdoll::Core::Configuration] yields the configuration instance for modification.
36
+ def configure(*args, **kwargs, &block)
37
+ Ragdoll::Core.configure(*args, **kwargs, &block)
38
+ end
39
+
40
+ # Access the current configuration.
41
+ # @param args [Array] additional arguments for configuration.
42
+ # @param kwargs [Hash] keyword arguments for configuration.
43
+ # @return [Ragdoll::Core::Configuration] the current configuration instance.
44
+ def configuration(*args, **kwargs)
45
+ Ragdoll::Core.configuration(*args, **kwargs)
46
+ end
47
+
48
+ # @example
49
+ # Ragdoll.reset_configuration!
50
+ # puts Ragdoll.config.models[:default]
51
+ def reset_configuration!(*args, **kwargs)
52
+ Ragdoll::Core.reset_configuration!(*args, **kwargs)
53
+ end
54
+
55
+
56
+ #######################
57
+ # Document Management #
58
+ #######################
59
+
60
+ # Add a directory of documents to the system.
61
+ # @param path [String] the path to the directory containing documents.
62
+ # @example
63
+ # Ragdoll.add_directory(path: "/path/to/documents", recursive: true)
64
+ # @param recursive [Boolean] whether to add documents from subdirectories.
65
+ def add_directory(*args, **kwargs)
66
+ Ragdoll::Core.add_directory(*args, **kwargs)
67
+ end
68
+
69
+ # Add a single document to the system.
70
+ # @example
71
+ # Ragdoll.add_document(path: "/path/to/document.txt")
72
+ # @param path [String] the file path of the document to add.
73
+ def add_document(*args, **kwargs)
74
+ Ragdoll::Core.add_document(*args, **kwargs)
75
+ end
76
+ alias_method :add, :add_document
77
+
78
+ # Retrieve a document by its identifier.
79
+ # @param id [String] the identifier of the document to retrieve.
80
+ # @example
81
+ # document = Ragdoll.get_document(id: "123")
82
+ # puts document[:title] if document
83
+ # @return [Hash, nil] the document data or nil if not found.
84
+ def get_document(*args, **kwargs)
85
+ Ragdoll::Core.get_document(*args, **kwargs)
86
+ end
87
+ alias_method :get, :get_document
88
+
89
+ # List all documents in the system.
90
+ # @param options [Hash] options for listing documents, such as limit and offset.
91
+ # @example
92
+ # documents = Ragdoll.list_documents(limit: 10)
93
+ # documents.each { |doc| puts doc[:title] }
94
+ # @return [Array<Hash>] an array of document data.
95
+ def list_documents(*args, **kwargs)
96
+ Ragdoll::Core.list_documents(*args, **kwargs)
97
+ end
98
+ alias_method :list, :list_documents
99
+
100
+ # Delete a document by its identifier.
101
+ # @param id [String] the identifier of the document to delete.
102
+ # @example
103
+ # success = Ragdoll.delete_document(id: "123")
104
+ # puts "Deleted" if success
105
+ # @return [Boolean] true if the document was successfully deleted.
106
+ def delete_document(*args, **kwargs)
107
+ Ragdoll::Core.delete_document(*args, **kwargs)
108
+ end
109
+ alias_method :delete, :delete_document
110
+
111
+ # Get the status of a document.
112
+ # @param id [String] the identifier of the document to check status.
113
+ # @example
114
+ # status = Ragdoll.document_status(id: "123")
115
+ # puts status[:status]
116
+ # @return [Hash] the status information of the document.
117
+ def document_status(*args, **kwargs)
118
+ Ragdoll::Core.document_status(*args, **kwargs)
119
+ end
120
+ alias_method :status, :document_status
121
+
122
+ # Update a document's information.
123
+ # @param id [String] the identifier of the document to update.
124
+ # @param updates [Hash] the fields to update in the document.
125
+ # @example
126
+ # updated_doc = Ragdoll.update_document(id: "123", title: "New Title")
127
+ # puts updated_doc[:title]
128
+ # @return [Hash] the updated document data.
129
+ def update_document(*args, **kwargs)
130
+ Ragdoll::Core.update_document(*args, **kwargs)
131
+ end
132
+ alias_method :update, :update_document
133
+
134
+ # Retrieve all documents.
135
+ # @example
136
+ # all_docs = Ragdoll.documents
137
+ # all_docs.each { |doc| puts doc.title }
138
+ # @return [ActiveRecord::Relation] a relation of all documents.
139
+ def documents
140
+ Ragdoll::Core::Models::Document.all
141
+ end
142
+ alias_method :docs, :documents
143
+
144
+ #############
145
+ # Retrieval #
146
+ #############
147
+
148
+ # FIXME: This high-level API method should be able to take a query that is
149
+ # a string or a file. If its a file, then the downstream Process will
150
+ # be responsible for reading the file and passing the contents to the
151
+ # search method based upon whether the content is text, image or audio.
152
+
153
+ # Perform a search for documents based on a query.
154
+ # @param query [String] the search query string.
155
+ # @param options [Hash] additional search options, such as filters and limits.
156
+ # @example
157
+ # response = Ragdoll.search(query: "example search")
158
+ # response[:results].each { |result| puts result[:document_title] }
159
+ # @return [Hash] the search results.
160
+ def search(*args, **kwargs)
161
+ Ragdoll::Core.search(*args, **kwargs)
162
+ end
163
+
164
+ # Enhance a prompt with additional context.
165
+ # @param prompt [String] the original prompt to enhance.
166
+ # @param context_limit [Integer] the number of context chunks to include.
167
+ # @param options [Hash] additional options for enhancing the prompt.
168
+ # @example
169
+ # enhanced = Ragdoll.enhance_prompt(prompt: "What is AI?", context_limit: 3)
170
+ # puts enhanced[:enhanced_prompt]
171
+ # @return [Hash] the enhanced prompt data.
172
+ def enhance_prompt(*args, **kwargs)
173
+ Ragdoll::Core.enhance_prompt(*args, **kwargs)
174
+ end
175
+
176
+ # Retrieve context for a given query.
177
+ # @param query [String] the query to retrieve context for.
178
+ # @param limit [Integer] the number of context chunks to retrieve.
179
+ # @param options [Hash] additional options for context retrieval.
180
+ # @example
181
+ # context = Ragdoll.get_context(query: "AI", limit: 5)
182
+ # puts context[:combined_context]
183
+ # @return [Hash] the context data.
184
+ def get_context(*args, **kwargs)
185
+ Ragdoll::Core.get_context(*args, **kwargs)
186
+ end
187
+
188
+ # Search for content similar to a given query.
189
+ # @param query [String] the query to find similar content for.
190
+ # @param options [Hash] additional options for the search, such as filters and limits.
191
+ # @example
192
+ # similar_content = Ragdoll.search_similar_content(query: "AI")
193
+ # similar_content.each { |content| puts content[:document_title] }
194
+ # @return [Array<Hash>] an array of similar content data.
195
+ def search_similar_content(*args, **kwargs)
196
+ Ragdoll::Core.search_similar_content(*args, **kwargs)
197
+ end
198
+
199
+
200
+ ###############
201
+ # Misc. Stuff #
202
+ ###############
203
+
204
+ # Retrieve statistics about the system.
205
+ # @example
206
+ # stats = Ragdoll.stats
207
+ # puts stats[:total_documents]
208
+ # @return [Hash] the system statistics.
209
+ def stats(*args, **kwargs)
210
+ Ragdoll::Core.stats(*args, **kwargs)
211
+ end
212
+
213
+ # Check if the system is healthy.
214
+ # @example
215
+ # puts "System is healthy" if Ragdoll.healthy?
216
+ # @return [Boolean] true if the system is healthy.
217
+ def healthy?(*args, **kwargs)
218
+ Ragdoll::Core.healthy?(*args, **kwargs)
219
+ end
220
+
221
+ # Retrieve the client instance.
222
+ # @example
223
+ # client = Ragdoll.client
224
+ # puts client.inspect
225
+ # @return [Ragdoll::Core::Client] the client instance.
226
+ def client(*args, **kwargs)
227
+ Ragdoll::Core.client(*args, **kwargs)
228
+ end
229
+
230
+ # Retrieve the version information of the Ragdoll modules.
231
+ # @example
232
+ # versions = Ragdoll.version
233
+ # versions.each { |version| puts version }
234
+ # @return [Array<String>] an array of version strings for each module.
235
+ def version
236
+ versions = []
237
+
238
+ ObjectSpace.each_object(Module) do |mod|
239
+ if mod.name =~ /^Ragdoll::\w+$/
240
+ if defined?(mod::VERSION) && mod::VERSION.is_a?(String)
241
+ versions << "#{mod.name}: #{mod::VERSION}"
242
+ end
243
+ end
244
+ end
245
+
246
+ versions
247
+ end
248
+ end
12
249
  end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "annotate"
4
+ require "annotate/annotate_models"
5
+
6
+ # Define environment task for gem context
7
+ task :environment do
8
+ # Load the gem's environment
9
+ require_relative "../ragdoll-core"
10
+
11
+ # Set up database connection
12
+ begin
13
+ # Load all models to ensure they're available
14
+ Dir[File.join(__dir__, "../ragdoll/core/models/*.rb")].each { |file| require file }
15
+
16
+ # Set up database connection with default config
17
+ Ragdoll::Core::Database.setup({
18
+ adapter: "postgresql",
19
+ database: "ragdoll_development",
20
+ username: "ragdoll",
21
+ password: ENV["RAGDOLL_DATABASE_PASSWORD"] || ENV["DATABASE_PASSWORD"],
22
+ host: "localhost",
23
+ port: 5432,
24
+ auto_migrate: false # Don't auto-migrate during annotation
25
+ })
26
+
27
+ puts "✅ Connected to database: ragdoll_development"
28
+ rescue StandardError => e
29
+ puts "❌ Database connection failed: #{e.message}"
30
+ puts " Annotations will be based on model definitions only"
31
+ end
32
+ end
33
+
34
+ namespace :annotate do
35
+ task :models do
36
+ AnnotateModels.do_annotations({
37
+ "models" => "true",
38
+ "position_in_class" => "before",
39
+ "show_foreign_keys" => "true",
40
+ "show_indexes" => "true",
41
+ "model_dir" => "lib/ragdoll/core/models",
42
+ "exclude_tests" => "true",
43
+ "exclude_fixtures" => "true",
44
+ "exclude_factories" => "true",
45
+ "exclude_serializers" => "true",
46
+ "exclude_scaffolds" => "true",
47
+ "exclude_controllers" => "true",
48
+ "exclude_helpers" => "true"
49
+ })
50
+ end
51
+ end
52
+
53
+ task :set_annotation_options do
54
+ # You can override any of these by setting an environment variable of the
55
+ # same name.
56
+ Annotate.set_defaults(
57
+ "active_admin" => "false",
58
+ "additional_file_patterns" => [],
59
+ "routes" => "false",
60
+ "models" => "true",
61
+ "position_in_routes" => "before",
62
+ "position_in_class" => "before",
63
+ "position_in_test" => "before",
64
+ "position_in_fixture" => "before",
65
+ "position_in_factory" => "before",
66
+ "position_in_serializer" => "before",
67
+ "show_foreign_keys" => "true",
68
+ "show_complete_foreign_keys" => "false",
69
+ "show_indexes" => "true",
70
+ "simple_indexes" => "false",
71
+ "model_dir" => "lib/ragdoll/core/models",
72
+ "root_dir" => "",
73
+ "include_version" => "false",
74
+ "require" => "",
75
+ "exclude_tests" => "false",
76
+ "exclude_fixtures" => "false",
77
+ "exclude_factories" => "false",
78
+ "exclude_serializers" => "false",
79
+ "exclude_scaffolds" => "false",
80
+ "exclude_controllers" => "true",
81
+ "exclude_helpers" => "true",
82
+ "exclude_sti_subclasses" => "false",
83
+ "ignore_model_sub_dir" => "false",
84
+ "ignore_columns" => nil,
85
+ "ignore_routes" => nil,
86
+ "ignore_unknown_options" => "false",
87
+ "hide_limit_column_types" => "integer,bigint,boolean",
88
+ "hide_default_column_types" => "json,jsonb,hstore",
89
+ "skip_on_db_migrate" => "false",
90
+ "format_bare" => "true",
91
+ "format_rdoc" => "false",
92
+ "format_yard" => "false",
93
+ "format_markdown" => "false",
94
+ "sort" => "false",
95
+ "force" => "false",
96
+ "frozen" => "false",
97
+ "classified_sort" => "true",
98
+ "trace" => "false",
99
+ "wrapper_open" => nil,
100
+ "wrapper_close" => nil,
101
+ "with_comment" => "true"
102
+ )
103
+ end
104
+
105
+ # Load only essential model annotation tasks
106
+ desc "Add schema information (as comments) to model files"
107
+ task annotate_models: :environment do
108
+ puts "Running annotate for ragdoll-core models..."
109
+
110
+ # Use the CLI approach since the programmatic API doesn't respect model_dir properly
111
+ success = system("MODEL_DIR=lib/ragdoll/core/models bundle exec annotate --models --position-in-class=before --show-foreign-keys --show-indexes")
112
+
113
+ if success
114
+ puts "✅ Model annotations updated successfully!"
115
+ else
116
+ puts "⚠️ Annotate completed with warnings (database connection issues)"
117
+ end
118
+ end
119
+
120
+ desc "Remove schema information from model files"
121
+ task :remove_annotation do
122
+ AnnotateModels.remove_annotations({
123
+ "models" => "true",
124
+ "model_dir" => "lib/ragdoll/core/models"
125
+ })
126
+ end