ragdoll 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +40 -318
  3. data/Rakefile +4 -15
  4. data/app/models/ragdoll/document.rb +9 -0
  5. data/app/models/ragdoll/embedding.rb +9 -0
  6. data/config/initializers/ragdoll.rb +6 -0
  7. data/config/routes.rb +5 -0
  8. data/db/migrate/20250218123456_create_documents.rb +20 -0
  9. data/lib/config/database.yml +28 -0
  10. data/lib/config/ragdoll.yml +31 -0
  11. data/lib/ragdoll/engine.rb +16 -0
  12. data/lib/ragdoll/import_job.rb +15 -0
  13. data/lib/ragdoll/ingestion.rb +30 -0
  14. data/lib/ragdoll/search.rb +18 -0
  15. data/lib/ragdoll/version.rb +7 -0
  16. data/lib/ragdoll.rb +6 -243
  17. data/lib/tasks/import_task.thor +32 -0
  18. data/lib/tasks/jobs_task.thor +40 -0
  19. data/lib/tasks/ragdoll_tasks.thor +7 -0
  20. data/lib/tasks/search_task.thor +55 -0
  21. metadata +37 -40
  22. data/db/migrate/001_enable_postgresql_extensions.rb +0 -23
  23. data/db/migrate/004_create_ragdoll_documents.rb +0 -70
  24. data/db/migrate/005_create_ragdoll_embeddings.rb +0 -41
  25. data/db/migrate/006_create_ragdoll_contents.rb +0 -47
  26. data/lib/ragdoll/core/client.rb +0 -315
  27. data/lib/ragdoll/core/configuration.rb +0 -273
  28. data/lib/ragdoll/core/database.rb +0 -141
  29. data/lib/ragdoll/core/document_management.rb +0 -110
  30. data/lib/ragdoll/core/document_processor.rb +0 -344
  31. data/lib/ragdoll/core/embedding_service.rb +0 -183
  32. data/lib/ragdoll/core/errors.rb +0 -11
  33. data/lib/ragdoll/core/jobs/extract_keywords.rb +0 -32
  34. data/lib/ragdoll/core/jobs/extract_text.rb +0 -42
  35. data/lib/ragdoll/core/jobs/generate_embeddings.rb +0 -32
  36. data/lib/ragdoll/core/jobs/generate_summary.rb +0 -29
  37. data/lib/ragdoll/core/metadata_schemas.rb +0 -334
  38. data/lib/ragdoll/core/models/audio_content.rb +0 -175
  39. data/lib/ragdoll/core/models/content.rb +0 -126
  40. data/lib/ragdoll/core/models/document.rb +0 -678
  41. data/lib/ragdoll/core/models/embedding.rb +0 -204
  42. data/lib/ragdoll/core/models/image_content.rb +0 -227
  43. data/lib/ragdoll/core/models/text_content.rb +0 -169
  44. data/lib/ragdoll/core/search_engine.rb +0 -50
  45. data/lib/ragdoll/core/services/image_description_service.rb +0 -230
  46. data/lib/ragdoll/core/services/metadata_generator.rb +0 -335
  47. data/lib/ragdoll/core/shrine_config.rb +0 -71
  48. data/lib/ragdoll/core/text_chunker.rb +0 -210
  49. data/lib/ragdoll/core/text_generation_service.rb +0 -360
  50. data/lib/ragdoll/core/version.rb +0 -8
  51. data/lib/ragdoll/core.rb +0 -73
  52. data/lib/ragdoll-core.rb +0 -3
  53. data/lib/tasks/annotate.rake +0 -126
  54. data/lib/tasks/db.rake +0 -338
data/lib/ragdoll.rb CHANGED
@@ -1,249 +1,12 @@
1
+ # This file is the main entry point for the Ragdoll gem, requiring all necessary components.
2
+
1
3
  # frozen_string_literal: true
2
4
 
3
- require "debug_me"
4
- include DebugMe
5
- $DEBUG_ME = true
5
+ # frozen_string_literal: true
6
6
 
7
- require "delegate"
8
- require_relative "ragdoll/core"
7
+ require "ragdoll/version"
8
+ require "ragdoll/engine"
9
9
 
10
10
  module Ragdoll
11
- class << self
12
-
13
- #################
14
- # Configuration #
15
- #################
16
-
17
- # Retrieve the current configuration.
18
- # @example
19
- # config = Ragdoll.config
20
- # puts config.database_config[:adapter]
21
- # @example
22
- # current_config = Ragdoll.configuration
23
- # puts current_config.models[:default]
24
- # @return [Ragdoll::Core::Configuration] the current configuration instance.
25
- def config
26
- Core.config
27
- end
28
-
29
- # Configure the Ragdoll module.
30
- # @yieldparam config [Ragdoll::Core::Configuration] the configuration instance to modify.
31
- # @example
32
- # Ragdoll.configure do |config|
33
- # config.database_config[:adapter] = "postgres"
34
- # end
35
- # @yield [Ragdoll::Core::Configuration] yields the configuration instance for modification.
36
- def configure(*args, **kwargs, &block)
37
- Ragdoll::Core.configure(*args, **kwargs, &block)
38
- end
39
-
40
- # Access the current configuration.
41
- # @param args [Array] additional arguments for configuration.
42
- # @param kwargs [Hash] keyword arguments for configuration.
43
- # @return [Ragdoll::Core::Configuration] the current configuration instance.
44
- def configuration(*args, **kwargs)
45
- Ragdoll::Core.configuration(*args, **kwargs)
46
- end
47
-
48
- # @example
49
- # Ragdoll.reset_configuration!
50
- # puts Ragdoll.config.models[:default]
51
- def reset_configuration!(*args, **kwargs)
52
- Ragdoll::Core.reset_configuration!(*args, **kwargs)
53
- end
54
-
55
-
56
- #######################
57
- # Document Management #
58
- #######################
59
-
60
- # Add a directory of documents to the system.
61
- # @param path [String] the path to the directory containing documents.
62
- # @example
63
- # Ragdoll.add_directory(path: "/path/to/documents", recursive: true)
64
- # @param recursive [Boolean] whether to add documents from subdirectories.
65
- def add_directory(*args, **kwargs)
66
- Ragdoll::Core.add_directory(*args, **kwargs)
67
- end
68
-
69
- # Add a single document to the system.
70
- # @example
71
- # Ragdoll.add_document(path: "/path/to/document.txt")
72
- # @param path [String] the file path of the document to add.
73
- def add_document(*args, **kwargs)
74
- Ragdoll::Core.add_document(*args, **kwargs)
75
- end
76
- alias_method :add, :add_document
77
-
78
- # Retrieve a document by its identifier.
79
- # @param id [String] the identifier of the document to retrieve.
80
- # @example
81
- # document = Ragdoll.get_document(id: "123")
82
- # puts document[:title] if document
83
- # @return [Hash, nil] the document data or nil if not found.
84
- def get_document(*args, **kwargs)
85
- Ragdoll::Core.get_document(*args, **kwargs)
86
- end
87
- alias_method :get, :get_document
88
-
89
- # List all documents in the system.
90
- # @param options [Hash] options for listing documents, such as limit and offset.
91
- # @example
92
- # documents = Ragdoll.list_documents(limit: 10)
93
- # documents.each { |doc| puts doc[:title] }
94
- # @return [Array<Hash>] an array of document data.
95
- def list_documents(*args, **kwargs)
96
- Ragdoll::Core.list_documents(*args, **kwargs)
97
- end
98
- alias_method :list, :list_documents
99
-
100
- # Delete a document by its identifier.
101
- # @param id [String] the identifier of the document to delete.
102
- # @example
103
- # success = Ragdoll.delete_document(id: "123")
104
- # puts "Deleted" if success
105
- # @return [Boolean] true if the document was successfully deleted.
106
- def delete_document(*args, **kwargs)
107
- Ragdoll::Core.delete_document(*args, **kwargs)
108
- end
109
- alias_method :delete, :delete_document
110
-
111
- # Get the status of a document.
112
- # @param id [String] the identifier of the document to check status.
113
- # @example
114
- # status = Ragdoll.document_status(id: "123")
115
- # puts status[:status]
116
- # @return [Hash] the status information of the document.
117
- def document_status(*args, **kwargs)
118
- Ragdoll::Core.document_status(*args, **kwargs)
119
- end
120
- alias_method :status, :document_status
121
-
122
- # Update a document's information.
123
- # @param id [String] the identifier of the document to update.
124
- # @param updates [Hash] the fields to update in the document.
125
- # @example
126
- # updated_doc = Ragdoll.update_document(id: "123", title: "New Title")
127
- # puts updated_doc[:title]
128
- # @return [Hash] the updated document data.
129
- def update_document(*args, **kwargs)
130
- Ragdoll::Core.update_document(*args, **kwargs)
131
- end
132
- alias_method :update, :update_document
133
-
134
- # Retrieve all documents.
135
- # @example
136
- # all_docs = Ragdoll.documents
137
- # all_docs.each { |doc| puts doc.title }
138
- # @return [ActiveRecord::Relation] a relation of all documents.
139
- def documents
140
- Ragdoll::Core::Models::Document.all
141
- end
142
- alias_method :docs, :documents
143
-
144
- #############
145
- # Retrieval #
146
- #############
147
-
148
- # FIXME: This high-level API method should be able to take a query that is
149
- # a string or a file. If its a file, then the downstream Process will
150
- # be responsible for reading the file and passing the contents to the
151
- # search method based upon whether the content is text, image or audio.
152
-
153
- # Perform a search for documents based on a query.
154
- # @param query [String] the search query string.
155
- # @param options [Hash] additional search options, such as filters and limits.
156
- # @example
157
- # response = Ragdoll.search(query: "example search")
158
- # response[:results].each { |result| puts result[:document_title] }
159
- # @return [Hash] the search results.
160
- def search(*args, **kwargs)
161
- Ragdoll::Core.search(*args, **kwargs)
162
- end
163
-
164
- # Enhance a prompt with additional context.
165
- # @param prompt [String] the original prompt to enhance.
166
- # @param context_limit [Integer] the number of context chunks to include.
167
- # @param options [Hash] additional options for enhancing the prompt.
168
- # @example
169
- # enhanced = Ragdoll.enhance_prompt(prompt: "What is AI?", context_limit: 3)
170
- # puts enhanced[:enhanced_prompt]
171
- # @return [Hash] the enhanced prompt data.
172
- def enhance_prompt(*args, **kwargs)
173
- Ragdoll::Core.enhance_prompt(*args, **kwargs)
174
- end
175
-
176
- # Retrieve context for a given query.
177
- # @param query [String] the query to retrieve context for.
178
- # @param limit [Integer] the number of context chunks to retrieve.
179
- # @param options [Hash] additional options for context retrieval.
180
- # @example
181
- # context = Ragdoll.get_context(query: "AI", limit: 5)
182
- # puts context[:combined_context]
183
- # @return [Hash] the context data.
184
- def get_context(*args, **kwargs)
185
- Ragdoll::Core.get_context(*args, **kwargs)
186
- end
187
-
188
- # Search for content similar to a given query.
189
- # @param query [String] the query to find similar content for.
190
- # @param options [Hash] additional options for the search, such as filters and limits.
191
- # @example
192
- # similar_content = Ragdoll.search_similar_content(query: "AI")
193
- # similar_content.each { |content| puts content[:document_title] }
194
- # @return [Array<Hash>] an array of similar content data.
195
- def search_similar_content(*args, **kwargs)
196
- Ragdoll::Core.search_similar_content(*args, **kwargs)
197
- end
198
-
199
-
200
- ###############
201
- # Misc. Stuff #
202
- ###############
203
-
204
- # Retrieve statistics about the system.
205
- # @example
206
- # stats = Ragdoll.stats
207
- # puts stats[:total_documents]
208
- # @return [Hash] the system statistics.
209
- def stats(*args, **kwargs)
210
- Ragdoll::Core.stats(*args, **kwargs)
211
- end
212
-
213
- # Check if the system is healthy.
214
- # @example
215
- # puts "System is healthy" if Ragdoll.healthy?
216
- # @return [Boolean] true if the system is healthy.
217
- def healthy?(*args, **kwargs)
218
- Ragdoll::Core.healthy?(*args, **kwargs)
219
- end
220
-
221
- # Retrieve the client instance.
222
- # @example
223
- # client = Ragdoll.client
224
- # puts client.inspect
225
- # @return [Ragdoll::Core::Client] the client instance.
226
- def client(*args, **kwargs)
227
- Ragdoll::Core.client(*args, **kwargs)
228
- end
229
-
230
- # Retrieve the version information of the Ragdoll modules.
231
- # @example
232
- # versions = Ragdoll.version
233
- # versions.each { |version| puts version }
234
- # @return [Array<String>] an array of version strings for each module.
235
- def version
236
- versions = []
237
-
238
- ObjectSpace.each_object(Module) do |mod|
239
- if mod.name =~ /^Ragdoll::\w+$/
240
- if defined?(mod::VERSION) && mod::VERSION.is_a?(String)
241
- versions << "#{mod.name}: #{mod::VERSION}"
242
- end
243
- end
244
- end
245
-
246
- versions
247
- end
248
- end
11
+ class Error < StandardError; end
249
12
  end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+ require_relative '../ragdoll/import_job'
5
+
6
+ module Ragdoll
7
+ class ImportTask < Thor
8
+ desc "import PATH", "Import documents from a file, glob, or directory"
9
+ method_option :recursive, aliases: "-r", type: :boolean, default: false, desc: "Recursively import files from directories"
10
+ method_option :jobs, aliases: ["-j", "--jobs"], type: :numeric, default: 1, desc: "Number of concurrent import jobs"
11
+ def import(path)
12
+ queue = SolidQueue.new(concurrency: options[:jobs])
13
+ files = if File.directory?(path)
14
+ if options[:recursive]
15
+ Dir.glob("#{path}/**/*")
16
+ else
17
+ Dir.glob("#{path}/*")
18
+ end
19
+ else
20
+ [path]
21
+ end
22
+
23
+ files.each do |file|
24
+ next unless File.file?(file)
25
+
26
+ queue.push(file) do |file|
27
+ Ragdoll::ImportJob.perform_async(file)
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+
5
+ module Ragdoll
6
+ class JobsTask < Thor
7
+ desc "jobs [JOB_ID]", "Report the status of all running and queued import jobs, or a specific job if JOB_ID is provided"
8
+ method_option :stop_all, type: :boolean, default: false, desc: "Stop all running and queued jobs"
9
+ method_option :pause_all, type: :boolean, default: false, desc: "Pause all running jobs"
10
+ method_option :resume_all, type: :boolean, default: false, desc: "Resume all paused jobs"
11
+ method_option :stop, type: :boolean, default: false, desc: "Stop a specific job"
12
+ method_option :pause, type: :boolean, default: false, desc: "Pause a specific job"
13
+ method_option :resume, type: :boolean, default: false, desc: "Resume a specific job"
14
+ def jobs(job_id = nil)
15
+ if job_id
16
+ if options[:stop]
17
+ puts "Stopping job ID: #{job_id}..."
18
+ elsif options[:pause]
19
+ puts "Pausing job ID: #{job_id}..."
20
+ elsif options[:resume]
21
+ puts "Resuming job ID: #{job_id}..."
22
+ else
23
+ puts "Fetching status for job ID: #{job_id}..."
24
+ end
25
+ else
26
+ if options[:stop_all]
27
+ puts "Stopping all jobs..."
28
+ elsif options[:pause_all]
29
+ puts "Pausing all running jobs..."
30
+ elsif options[:resume_all]
31
+ puts "Resuming all paused jobs..."
32
+ else
33
+ puts "Fetching status of all running and queued import jobs..."
34
+ puts "Job ID: 12345, Status: Running, File: document1.txt"
35
+ puts "Job ID: 12346, Status: Running, File: document2.txt"
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,7 @@
1
+ require 'thor'
2
+
3
+ module Ragdoll
4
+ class Tasks < Thor
5
+ # Move your existing CLI tasks here
6
+ end
7
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+ require_relative '../ragdoll/search'
5
+
6
+ module Ragdoll
7
+ class SearchTask < Thor
8
+ desc "search PROMPT", "Search the database with a prompt"
9
+ method_option :prompt, aliases: ["-p", "--prompt"], type: :string, desc: "File path containing the prompt text"
10
+ method_option :max_count, type: :numeric, default: 10, desc: "Maximum number of results to return"
11
+ method_option :rerank, type: :boolean, default: false, desc: "Rerank results using keyword search"
12
+ def search(prompt = nil)
13
+ if options[:prompt]
14
+ prompt = File.read(options[:prompt])
15
+ end
16
+
17
+ unless prompt
18
+ puts "Please provide a prompt as a string or with the -p option."
19
+ return
20
+ end
21
+
22
+ keywords = extract_keywords(prompt)
23
+ vectorized_prompt = vectorize_prompt(prompt)
24
+ search_instance = Ragdoll::Search.new(vectorized_prompt)
25
+ results = search_instance.search_database(options[:max_count])
26
+
27
+ if options[:rerank]
28
+ results = rerank_results(results, keywords)
29
+ end
30
+
31
+ results.each do |result|
32
+ puts "Source: #{result[:source]}"
33
+ puts "Metadata: #{result[:metadata]}"
34
+ puts "--------------------------------"
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def rerank_results(results, keywords)
41
+ results.sort_by do |result|
42
+ content = result[:source].downcase
43
+ keywords.count { |keyword| content.include?(keyword) }
44
+ end.reverse
45
+ end
46
+
47
+ def extract_keywords(prompt)
48
+ prompt.split.map(&:downcase).uniq
49
+ end
50
+
51
+ def vectorize_prompt(prompt)
52
+ prompt.split.map(&:downcase)
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,14 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ragdoll
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dewayne VanHoozer
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 1980-01-02 00:00:00.000000000 Z
11
- dependencies: []
10
+ date: 2025-02-19 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rails
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '7.1'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '7.1'
12
26
  description: Under development. Contributors welcome.
13
27
  email:
14
28
  - dvanhoozer@gmail.com
@@ -18,47 +32,30 @@ extra_rdoc_files: []
18
32
  files:
19
33
  - README.md
20
34
  - Rakefile
21
- - db/migrate/001_enable_postgresql_extensions.rb
22
- - db/migrate/004_create_ragdoll_documents.rb
23
- - db/migrate/005_create_ragdoll_embeddings.rb
24
- - db/migrate/006_create_ragdoll_contents.rb
25
- - lib/ragdoll-core.rb
35
+ - app/models/ragdoll/document.rb
36
+ - app/models/ragdoll/embedding.rb
37
+ - config/initializers/ragdoll.rb
38
+ - config/routes.rb
39
+ - db/migrate/20250218123456_create_documents.rb
40
+ - lib/config/database.yml
41
+ - lib/config/ragdoll.yml
26
42
  - lib/ragdoll.rb
27
- - lib/ragdoll/core.rb
28
- - lib/ragdoll/core/client.rb
29
- - lib/ragdoll/core/configuration.rb
30
- - lib/ragdoll/core/database.rb
31
- - lib/ragdoll/core/document_management.rb
32
- - lib/ragdoll/core/document_processor.rb
33
- - lib/ragdoll/core/embedding_service.rb
34
- - lib/ragdoll/core/errors.rb
35
- - lib/ragdoll/core/jobs/extract_keywords.rb
36
- - lib/ragdoll/core/jobs/extract_text.rb
37
- - lib/ragdoll/core/jobs/generate_embeddings.rb
38
- - lib/ragdoll/core/jobs/generate_summary.rb
39
- - lib/ragdoll/core/metadata_schemas.rb
40
- - lib/ragdoll/core/models/audio_content.rb
41
- - lib/ragdoll/core/models/content.rb
42
- - lib/ragdoll/core/models/document.rb
43
- - lib/ragdoll/core/models/embedding.rb
44
- - lib/ragdoll/core/models/image_content.rb
45
- - lib/ragdoll/core/models/text_content.rb
46
- - lib/ragdoll/core/search_engine.rb
47
- - lib/ragdoll/core/services/image_description_service.rb
48
- - lib/ragdoll/core/services/metadata_generator.rb
49
- - lib/ragdoll/core/shrine_config.rb
50
- - lib/ragdoll/core/text_chunker.rb
51
- - lib/ragdoll/core/text_generation_service.rb
52
- - lib/ragdoll/core/version.rb
53
- - lib/tasks/annotate.rake
54
- - lib/tasks/db.rake
43
+ - lib/ragdoll/engine.rb
44
+ - lib/ragdoll/import_job.rb
45
+ - lib/ragdoll/ingestion.rb
46
+ - lib/ragdoll/search.rb
47
+ - lib/ragdoll/version.rb
48
+ - lib/tasks/import_task.thor
49
+ - lib/tasks/jobs_task.thor
50
+ - lib/tasks/ragdoll_tasks.thor
51
+ - lib/tasks/search_task.thor
55
52
  homepage: https://github.com/MadBomber/ragdoll
56
53
  licenses:
57
54
  - MIT
58
55
  metadata:
59
56
  allowed_push_host: https://rubygems.org
60
57
  homepage_uri: https://github.com/MadBomber/ragdoll
61
- source_code_uri: https://github.com/MadBomber/ragdoll/blob/main
58
+ source_code_uri: https://github.com/MadBomber/ragdoll
62
59
  changelog_uri: https://github.com/MadBomber/ragdoll/blob/main/CHANGELOG.md
63
60
  rdoc_options: []
64
61
  require_paths:
@@ -67,14 +64,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
67
64
  requirements:
68
65
  - - ">="
69
66
  - !ruby/object:Gem::Version
70
- version: 3.2.0
67
+ version: 3.1.0
71
68
  required_rubygems_version: !ruby/object:Gem::Requirement
72
69
  requirements:
73
70
  - - ">="
74
71
  - !ruby/object:Gem::Version
75
72
  version: '0'
76
73
  requirements: []
77
- rubygems_version: 3.7.1
74
+ rubygems_version: 3.6.3
78
75
  specification_version: 4
79
- summary: Multi-Modal Retrieval Augmented Generation
76
+ summary: Ruby on Rails Engine
80
77
  test_files: []
@@ -1,23 +0,0 @@
1
- class EnablePostgresqlExtensions < ActiveRecord::Migration[7.0]
2
- def up
3
- # This migration is now handled by the db:create rake task
4
- # Just ensure required extensions are available
5
-
6
- # Vector similarity search (required for embeddings)
7
- execute "CREATE EXTENSION IF NOT EXISTS vector"
8
-
9
- # Useful optional extensions for text processing and search
10
- execute "CREATE EXTENSION IF NOT EXISTS unaccent" # Remove accents from text
11
- execute "CREATE EXTENSION IF NOT EXISTS pg_trgm" # Trigram matching for fuzzy search
12
-
13
- # UUID support (useful for generating unique identifiers)
14
- execute "CREATE EXTENSION IF NOT EXISTS \"uuid-ossp\""
15
- end
16
-
17
- def down
18
- execute <<-SQL
19
- DROP DATABASE IF EXISTS ragdoll_development;
20
- DROP ROLE IF EXISTS ragdoll;
21
- SQL
22
- end
23
- end
@@ -1,70 +0,0 @@
1
- class CreateRagdollDocuments < ActiveRecord::Migration[7.0]
2
- def change
3
- create_table :ragdoll_documents,
4
- comment: "Core documents table with LLM-generated structured metadata" do |t|
5
-
6
- t.string :location, null: false,
7
- comment: "Source location of document (file path, URL, or identifier)"
8
-
9
- t.string :title, null: false,
10
- comment: "Human-readable document title for display and search"
11
-
12
- t.text :summary, null: false, default: "",
13
- comment: "LLM-generated summary of document content"
14
-
15
- t.text :keywords , null: false, default: "",
16
- comment: "LLM-generated comma-separated keywords of document"
17
-
18
- t.string :document_type, null: false, default: "text",
19
- comment: "Document format type"
20
-
21
- t.string :status, null: false, default: "pending",
22
- comment: "Document processing status"
23
-
24
- t.json :metadata, default: {},
25
- comment: "LLM-generated structured metadata about the file"
26
-
27
- t.timestamp :file_modified_at, null: false, default: -> { "CURRENT_TIMESTAMP" },
28
- comment: "Timestamp when the source file was last modified"
29
-
30
- t.timestamps null: false,
31
- comment: "Standard creation and update timestamps"
32
-
33
- ###########
34
- # Indexes #
35
- ###########
36
-
37
- t.index :location, unique: true,
38
- comment: "Unique index for document source lookup"
39
-
40
- t.index :title,
41
- comment: "Index for title-based search"
42
-
43
- t.index :document_type,
44
- comment: "Index for filtering by document type"
45
-
46
- t.index :status,
47
- comment: "Index for filtering by processing status"
48
-
49
- t.index :created_at,
50
- comment: "Index for chronological sorting"
51
-
52
- t.index %i[document_type status],
53
- comment: "Composite index for type+status filtering"
54
-
55
- t.index "to_tsvector('english', COALESCE(title, '') ||
56
- ' ' ||
57
- COALESCE(metadata->>'summary', '') ||
58
- ' ' || COALESCE(metadata->>'keywords', '') ||
59
- ' ' || COALESCE(metadata->>'description', ''))",
60
- using: :gin, name: "index_ragdoll_documents_on_fulltext_search",
61
- comment: "Full-text search across title and metadata fields"
62
-
63
- t.index "(metadata->>'document_type')", name: "index_ragdoll_documents_on_metadata_type",
64
- comment: "Index for filtering by document type"
65
-
66
- t.index "(metadata->>'classification')", name: "index_ragdoll_documents_on_metadata_classification",
67
- comment: "Index for filtering by document classification"
68
- end
69
- end
70
- end
@@ -1,41 +0,0 @@
1
- class CreateRagdollEmbeddings < ActiveRecord::Migration[7.0]
2
- def change
3
- create_table :ragdoll_embeddings,
4
- comment: "Polymorphic vector embeddings storage for semantic similarity search" do |t|
5
-
6
- t.references :embeddable, polymorphic: true, null: false,
7
- comment: "Polymorphic reference to embeddable content"
8
-
9
- t.text :content, null: false, default: "",
10
- comment: "Original text content that was embedded"
11
-
12
- t.vector :embedding_vector, limit: 1536, null: false,
13
- comment: "Vector embedding using pgvector"
14
-
15
- t.integer :chunk_index, null: false,
16
- comment: "Chunk index for ordering embeddings"
17
-
18
- t.integer :usage_count, default: 0,
19
- comment: "Number of times used in similarity searches"
20
-
21
- t.datetime :returned_at,
22
- comment: "Timestamp of most recent usage"
23
-
24
- t.json :metadata, default: {},
25
- comment: "Embedding-specific metadata (positions, processing info)"
26
-
27
- t.timestamps null: false,
28
- comment: "Standard creation and update timestamps"
29
-
30
- ###########
31
- # Indexes #
32
- ###########
33
-
34
- t.index %i[embeddable_type embeddable_id],
35
- comment: "Index for finding embeddings by embeddable content"
36
-
37
- t.index :embedding_vector, using: :ivfflat, opclass: :vector_cosine_ops, name: "index_ragdoll_embeddings_on_embedding_vector_cosine",
38
- comment: "IVFFlat index for fast cosine similarity search"
39
- end
40
- end
41
- end