ragdoll 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +40 -318
- data/Rakefile +4 -15
- data/app/models/ragdoll/document.rb +9 -0
- data/app/models/ragdoll/embedding.rb +9 -0
- data/config/initializers/ragdoll.rb +6 -0
- data/config/routes.rb +5 -0
- data/db/migrate/20250218123456_create_documents.rb +20 -0
- data/lib/config/database.yml +28 -0
- data/lib/config/ragdoll.yml +31 -0
- data/lib/ragdoll/engine.rb +16 -0
- data/lib/ragdoll/import_job.rb +15 -0
- data/lib/ragdoll/ingestion.rb +30 -0
- data/lib/ragdoll/search.rb +18 -0
- data/lib/ragdoll/version.rb +7 -0
- data/lib/ragdoll.rb +6 -243
- data/lib/tasks/import_task.thor +32 -0
- data/lib/tasks/jobs_task.thor +40 -0
- data/lib/tasks/ragdoll_tasks.thor +7 -0
- data/lib/tasks/search_task.thor +55 -0
- metadata +37 -40
- data/db/migrate/001_enable_postgresql_extensions.rb +0 -23
- data/db/migrate/004_create_ragdoll_documents.rb +0 -70
- data/db/migrate/005_create_ragdoll_embeddings.rb +0 -41
- data/db/migrate/006_create_ragdoll_contents.rb +0 -47
- data/lib/ragdoll/core/client.rb +0 -315
- data/lib/ragdoll/core/configuration.rb +0 -273
- data/lib/ragdoll/core/database.rb +0 -141
- data/lib/ragdoll/core/document_management.rb +0 -110
- data/lib/ragdoll/core/document_processor.rb +0 -344
- data/lib/ragdoll/core/embedding_service.rb +0 -183
- data/lib/ragdoll/core/errors.rb +0 -11
- data/lib/ragdoll/core/jobs/extract_keywords.rb +0 -32
- data/lib/ragdoll/core/jobs/extract_text.rb +0 -42
- data/lib/ragdoll/core/jobs/generate_embeddings.rb +0 -32
- data/lib/ragdoll/core/jobs/generate_summary.rb +0 -29
- data/lib/ragdoll/core/metadata_schemas.rb +0 -334
- data/lib/ragdoll/core/models/audio_content.rb +0 -175
- data/lib/ragdoll/core/models/content.rb +0 -126
- data/lib/ragdoll/core/models/document.rb +0 -678
- data/lib/ragdoll/core/models/embedding.rb +0 -204
- data/lib/ragdoll/core/models/image_content.rb +0 -227
- data/lib/ragdoll/core/models/text_content.rb +0 -169
- data/lib/ragdoll/core/search_engine.rb +0 -50
- data/lib/ragdoll/core/services/image_description_service.rb +0 -230
- data/lib/ragdoll/core/services/metadata_generator.rb +0 -335
- data/lib/ragdoll/core/shrine_config.rb +0 -71
- data/lib/ragdoll/core/text_chunker.rb +0 -210
- data/lib/ragdoll/core/text_generation_service.rb +0 -360
- data/lib/ragdoll/core/version.rb +0 -8
- data/lib/ragdoll/core.rb +0 -73
- data/lib/ragdoll-core.rb +0 -3
- data/lib/tasks/annotate.rake +0 -126
- data/lib/tasks/db.rake +0 -338
data/lib/ragdoll.rb
CHANGED
@@ -1,249 +1,12 @@
|
|
1
|
+
# This file is the main entry point for the Ragdoll gem, requiring all necessary components.
|
2
|
+
|
1
3
|
# frozen_string_literal: true
|
2
4
|
|
3
|
-
|
4
|
-
include DebugMe
|
5
|
-
$DEBUG_ME = true
|
5
|
+
# frozen_string_literal: true
|
6
6
|
|
7
|
-
require "
|
8
|
-
|
7
|
+
require "ragdoll/version"
|
8
|
+
require "ragdoll/engine"
|
9
9
|
|
10
10
|
module Ragdoll
|
11
|
-
class
|
12
|
-
|
13
|
-
#################
|
14
|
-
# Configuration #
|
15
|
-
#################
|
16
|
-
|
17
|
-
# Retrieve the current configuration.
|
18
|
-
# @example
|
19
|
-
# config = Ragdoll.config
|
20
|
-
# puts config.database_config[:adapter]
|
21
|
-
# @example
|
22
|
-
# current_config = Ragdoll.configuration
|
23
|
-
# puts current_config.models[:default]
|
24
|
-
# @return [Ragdoll::Core::Configuration] the current configuration instance.
|
25
|
-
def config
|
26
|
-
Core.config
|
27
|
-
end
|
28
|
-
|
29
|
-
# Configure the Ragdoll module.
|
30
|
-
# @yieldparam config [Ragdoll::Core::Configuration] the configuration instance to modify.
|
31
|
-
# @example
|
32
|
-
# Ragdoll.configure do |config|
|
33
|
-
# config.database_config[:adapter] = "postgres"
|
34
|
-
# end
|
35
|
-
# @yield [Ragdoll::Core::Configuration] yields the configuration instance for modification.
|
36
|
-
def configure(*args, **kwargs, &block)
|
37
|
-
Ragdoll::Core.configure(*args, **kwargs, &block)
|
38
|
-
end
|
39
|
-
|
40
|
-
# Access the current configuration.
|
41
|
-
# @param args [Array] additional arguments for configuration.
|
42
|
-
# @param kwargs [Hash] keyword arguments for configuration.
|
43
|
-
# @return [Ragdoll::Core::Configuration] the current configuration instance.
|
44
|
-
def configuration(*args, **kwargs)
|
45
|
-
Ragdoll::Core.configuration(*args, **kwargs)
|
46
|
-
end
|
47
|
-
|
48
|
-
# @example
|
49
|
-
# Ragdoll.reset_configuration!
|
50
|
-
# puts Ragdoll.config.models[:default]
|
51
|
-
def reset_configuration!(*args, **kwargs)
|
52
|
-
Ragdoll::Core.reset_configuration!(*args, **kwargs)
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
#######################
|
57
|
-
# Document Management #
|
58
|
-
#######################
|
59
|
-
|
60
|
-
# Add a directory of documents to the system.
|
61
|
-
# @param path [String] the path to the directory containing documents.
|
62
|
-
# @example
|
63
|
-
# Ragdoll.add_directory(path: "/path/to/documents", recursive: true)
|
64
|
-
# @param recursive [Boolean] whether to add documents from subdirectories.
|
65
|
-
def add_directory(*args, **kwargs)
|
66
|
-
Ragdoll::Core.add_directory(*args, **kwargs)
|
67
|
-
end
|
68
|
-
|
69
|
-
# Add a single document to the system.
|
70
|
-
# @example
|
71
|
-
# Ragdoll.add_document(path: "/path/to/document.txt")
|
72
|
-
# @param path [String] the file path of the document to add.
|
73
|
-
def add_document(*args, **kwargs)
|
74
|
-
Ragdoll::Core.add_document(*args, **kwargs)
|
75
|
-
end
|
76
|
-
alias_method :add, :add_document
|
77
|
-
|
78
|
-
# Retrieve a document by its identifier.
|
79
|
-
# @param id [String] the identifier of the document to retrieve.
|
80
|
-
# @example
|
81
|
-
# document = Ragdoll.get_document(id: "123")
|
82
|
-
# puts document[:title] if document
|
83
|
-
# @return [Hash, nil] the document data or nil if not found.
|
84
|
-
def get_document(*args, **kwargs)
|
85
|
-
Ragdoll::Core.get_document(*args, **kwargs)
|
86
|
-
end
|
87
|
-
alias_method :get, :get_document
|
88
|
-
|
89
|
-
# List all documents in the system.
|
90
|
-
# @param options [Hash] options for listing documents, such as limit and offset.
|
91
|
-
# @example
|
92
|
-
# documents = Ragdoll.list_documents(limit: 10)
|
93
|
-
# documents.each { |doc| puts doc[:title] }
|
94
|
-
# @return [Array<Hash>] an array of document data.
|
95
|
-
def list_documents(*args, **kwargs)
|
96
|
-
Ragdoll::Core.list_documents(*args, **kwargs)
|
97
|
-
end
|
98
|
-
alias_method :list, :list_documents
|
99
|
-
|
100
|
-
# Delete a document by its identifier.
|
101
|
-
# @param id [String] the identifier of the document to delete.
|
102
|
-
# @example
|
103
|
-
# success = Ragdoll.delete_document(id: "123")
|
104
|
-
# puts "Deleted" if success
|
105
|
-
# @return [Boolean] true if the document was successfully deleted.
|
106
|
-
def delete_document(*args, **kwargs)
|
107
|
-
Ragdoll::Core.delete_document(*args, **kwargs)
|
108
|
-
end
|
109
|
-
alias_method :delete, :delete_document
|
110
|
-
|
111
|
-
# Get the status of a document.
|
112
|
-
# @param id [String] the identifier of the document to check status.
|
113
|
-
# @example
|
114
|
-
# status = Ragdoll.document_status(id: "123")
|
115
|
-
# puts status[:status]
|
116
|
-
# @return [Hash] the status information of the document.
|
117
|
-
def document_status(*args, **kwargs)
|
118
|
-
Ragdoll::Core.document_status(*args, **kwargs)
|
119
|
-
end
|
120
|
-
alias_method :status, :document_status
|
121
|
-
|
122
|
-
# Update a document's information.
|
123
|
-
# @param id [String] the identifier of the document to update.
|
124
|
-
# @param updates [Hash] the fields to update in the document.
|
125
|
-
# @example
|
126
|
-
# updated_doc = Ragdoll.update_document(id: "123", title: "New Title")
|
127
|
-
# puts updated_doc[:title]
|
128
|
-
# @return [Hash] the updated document data.
|
129
|
-
def update_document(*args, **kwargs)
|
130
|
-
Ragdoll::Core.update_document(*args, **kwargs)
|
131
|
-
end
|
132
|
-
alias_method :update, :update_document
|
133
|
-
|
134
|
-
# Retrieve all documents.
|
135
|
-
# @example
|
136
|
-
# all_docs = Ragdoll.documents
|
137
|
-
# all_docs.each { |doc| puts doc.title }
|
138
|
-
# @return [ActiveRecord::Relation] a relation of all documents.
|
139
|
-
def documents
|
140
|
-
Ragdoll::Core::Models::Document.all
|
141
|
-
end
|
142
|
-
alias_method :docs, :documents
|
143
|
-
|
144
|
-
#############
|
145
|
-
# Retrieval #
|
146
|
-
#############
|
147
|
-
|
148
|
-
# FIXME: This high-level API method should be able to take a query that is
|
149
|
-
# a string or a file. If its a file, then the downstream Process will
|
150
|
-
# be responsible for reading the file and passing the contents to the
|
151
|
-
# search method based upon whether the content is text, image or audio.
|
152
|
-
|
153
|
-
# Perform a search for documents based on a query.
|
154
|
-
# @param query [String] the search query string.
|
155
|
-
# @param options [Hash] additional search options, such as filters and limits.
|
156
|
-
# @example
|
157
|
-
# response = Ragdoll.search(query: "example search")
|
158
|
-
# response[:results].each { |result| puts result[:document_title] }
|
159
|
-
# @return [Hash] the search results.
|
160
|
-
def search(*args, **kwargs)
|
161
|
-
Ragdoll::Core.search(*args, **kwargs)
|
162
|
-
end
|
163
|
-
|
164
|
-
# Enhance a prompt with additional context.
|
165
|
-
# @param prompt [String] the original prompt to enhance.
|
166
|
-
# @param context_limit [Integer] the number of context chunks to include.
|
167
|
-
# @param options [Hash] additional options for enhancing the prompt.
|
168
|
-
# @example
|
169
|
-
# enhanced = Ragdoll.enhance_prompt(prompt: "What is AI?", context_limit: 3)
|
170
|
-
# puts enhanced[:enhanced_prompt]
|
171
|
-
# @return [Hash] the enhanced prompt data.
|
172
|
-
def enhance_prompt(*args, **kwargs)
|
173
|
-
Ragdoll::Core.enhance_prompt(*args, **kwargs)
|
174
|
-
end
|
175
|
-
|
176
|
-
# Retrieve context for a given query.
|
177
|
-
# @param query [String] the query to retrieve context for.
|
178
|
-
# @param limit [Integer] the number of context chunks to retrieve.
|
179
|
-
# @param options [Hash] additional options for context retrieval.
|
180
|
-
# @example
|
181
|
-
# context = Ragdoll.get_context(query: "AI", limit: 5)
|
182
|
-
# puts context[:combined_context]
|
183
|
-
# @return [Hash] the context data.
|
184
|
-
def get_context(*args, **kwargs)
|
185
|
-
Ragdoll::Core.get_context(*args, **kwargs)
|
186
|
-
end
|
187
|
-
|
188
|
-
# Search for content similar to a given query.
|
189
|
-
# @param query [String] the query to find similar content for.
|
190
|
-
# @param options [Hash] additional options for the search, such as filters and limits.
|
191
|
-
# @example
|
192
|
-
# similar_content = Ragdoll.search_similar_content(query: "AI")
|
193
|
-
# similar_content.each { |content| puts content[:document_title] }
|
194
|
-
# @return [Array<Hash>] an array of similar content data.
|
195
|
-
def search_similar_content(*args, **kwargs)
|
196
|
-
Ragdoll::Core.search_similar_content(*args, **kwargs)
|
197
|
-
end
|
198
|
-
|
199
|
-
|
200
|
-
###############
|
201
|
-
# Misc. Stuff #
|
202
|
-
###############
|
203
|
-
|
204
|
-
# Retrieve statistics about the system.
|
205
|
-
# @example
|
206
|
-
# stats = Ragdoll.stats
|
207
|
-
# puts stats[:total_documents]
|
208
|
-
# @return [Hash] the system statistics.
|
209
|
-
def stats(*args, **kwargs)
|
210
|
-
Ragdoll::Core.stats(*args, **kwargs)
|
211
|
-
end
|
212
|
-
|
213
|
-
# Check if the system is healthy.
|
214
|
-
# @example
|
215
|
-
# puts "System is healthy" if Ragdoll.healthy?
|
216
|
-
# @return [Boolean] true if the system is healthy.
|
217
|
-
def healthy?(*args, **kwargs)
|
218
|
-
Ragdoll::Core.healthy?(*args, **kwargs)
|
219
|
-
end
|
220
|
-
|
221
|
-
# Retrieve the client instance.
|
222
|
-
# @example
|
223
|
-
# client = Ragdoll.client
|
224
|
-
# puts client.inspect
|
225
|
-
# @return [Ragdoll::Core::Client] the client instance.
|
226
|
-
def client(*args, **kwargs)
|
227
|
-
Ragdoll::Core.client(*args, **kwargs)
|
228
|
-
end
|
229
|
-
|
230
|
-
# Retrieve the version information of the Ragdoll modules.
|
231
|
-
# @example
|
232
|
-
# versions = Ragdoll.version
|
233
|
-
# versions.each { |version| puts version }
|
234
|
-
# @return [Array<String>] an array of version strings for each module.
|
235
|
-
def version
|
236
|
-
versions = []
|
237
|
-
|
238
|
-
ObjectSpace.each_object(Module) do |mod|
|
239
|
-
if mod.name =~ /^Ragdoll::\w+$/
|
240
|
-
if defined?(mod::VERSION) && mod::VERSION.is_a?(String)
|
241
|
-
versions << "#{mod.name}: #{mod::VERSION}"
|
242
|
-
end
|
243
|
-
end
|
244
|
-
end
|
245
|
-
|
246
|
-
versions
|
247
|
-
end
|
248
|
-
end
|
11
|
+
class Error < StandardError; end
|
249
12
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'thor'
|
4
|
+
require_relative '../ragdoll/import_job'
|
5
|
+
|
6
|
+
module Ragdoll
|
7
|
+
class ImportTask < Thor
|
8
|
+
desc "import PATH", "Import documents from a file, glob, or directory"
|
9
|
+
method_option :recursive, aliases: "-r", type: :boolean, default: false, desc: "Recursively import files from directories"
|
10
|
+
method_option :jobs, aliases: ["-j", "--jobs"], type: :numeric, default: 1, desc: "Number of concurrent import jobs"
|
11
|
+
def import(path)
|
12
|
+
queue = SolidQueue.new(concurrency: options[:jobs])
|
13
|
+
files = if File.directory?(path)
|
14
|
+
if options[:recursive]
|
15
|
+
Dir.glob("#{path}/**/*")
|
16
|
+
else
|
17
|
+
Dir.glob("#{path}/*")
|
18
|
+
end
|
19
|
+
else
|
20
|
+
[path]
|
21
|
+
end
|
22
|
+
|
23
|
+
files.each do |file|
|
24
|
+
next unless File.file?(file)
|
25
|
+
|
26
|
+
queue.push(file) do |file|
|
27
|
+
Ragdoll::ImportJob.perform_async(file)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'thor'
|
4
|
+
|
5
|
+
module Ragdoll
|
6
|
+
class JobsTask < Thor
|
7
|
+
desc "jobs [JOB_ID]", "Report the status of all running and queued import jobs, or a specific job if JOB_ID is provided"
|
8
|
+
method_option :stop_all, type: :boolean, default: false, desc: "Stop all running and queued jobs"
|
9
|
+
method_option :pause_all, type: :boolean, default: false, desc: "Pause all running jobs"
|
10
|
+
method_option :resume_all, type: :boolean, default: false, desc: "Resume all paused jobs"
|
11
|
+
method_option :stop, type: :boolean, default: false, desc: "Stop a specific job"
|
12
|
+
method_option :pause, type: :boolean, default: false, desc: "Pause a specific job"
|
13
|
+
method_option :resume, type: :boolean, default: false, desc: "Resume a specific job"
|
14
|
+
def jobs(job_id = nil)
|
15
|
+
if job_id
|
16
|
+
if options[:stop]
|
17
|
+
puts "Stopping job ID: #{job_id}..."
|
18
|
+
elsif options[:pause]
|
19
|
+
puts "Pausing job ID: #{job_id}..."
|
20
|
+
elsif options[:resume]
|
21
|
+
puts "Resuming job ID: #{job_id}..."
|
22
|
+
else
|
23
|
+
puts "Fetching status for job ID: #{job_id}..."
|
24
|
+
end
|
25
|
+
else
|
26
|
+
if options[:stop_all]
|
27
|
+
puts "Stopping all jobs..."
|
28
|
+
elsif options[:pause_all]
|
29
|
+
puts "Pausing all running jobs..."
|
30
|
+
elsif options[:resume_all]
|
31
|
+
puts "Resuming all paused jobs..."
|
32
|
+
else
|
33
|
+
puts "Fetching status of all running and queued import jobs..."
|
34
|
+
puts "Job ID: 12345, Status: Running, File: document1.txt"
|
35
|
+
puts "Job ID: 12346, Status: Running, File: document2.txt"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'thor'
|
4
|
+
require_relative '../ragdoll/search'
|
5
|
+
|
6
|
+
module Ragdoll
|
7
|
+
class SearchTask < Thor
|
8
|
+
desc "search PROMPT", "Search the database with a prompt"
|
9
|
+
method_option :prompt, aliases: ["-p", "--prompt"], type: :string, desc: "File path containing the prompt text"
|
10
|
+
method_option :max_count, type: :numeric, default: 10, desc: "Maximum number of results to return"
|
11
|
+
method_option :rerank, type: :boolean, default: false, desc: "Rerank results using keyword search"
|
12
|
+
def search(prompt = nil)
|
13
|
+
if options[:prompt]
|
14
|
+
prompt = File.read(options[:prompt])
|
15
|
+
end
|
16
|
+
|
17
|
+
unless prompt
|
18
|
+
puts "Please provide a prompt as a string or with the -p option."
|
19
|
+
return
|
20
|
+
end
|
21
|
+
|
22
|
+
keywords = extract_keywords(prompt)
|
23
|
+
vectorized_prompt = vectorize_prompt(prompt)
|
24
|
+
search_instance = Ragdoll::Search.new(vectorized_prompt)
|
25
|
+
results = search_instance.search_database(options[:max_count])
|
26
|
+
|
27
|
+
if options[:rerank]
|
28
|
+
results = rerank_results(results, keywords)
|
29
|
+
end
|
30
|
+
|
31
|
+
results.each do |result|
|
32
|
+
puts "Source: #{result[:source]}"
|
33
|
+
puts "Metadata: #{result[:metadata]}"
|
34
|
+
puts "--------------------------------"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def rerank_results(results, keywords)
|
41
|
+
results.sort_by do |result|
|
42
|
+
content = result[:source].downcase
|
43
|
+
keywords.count { |keyword| content.include?(keyword) }
|
44
|
+
end.reverse
|
45
|
+
end
|
46
|
+
|
47
|
+
def extract_keywords(prompt)
|
48
|
+
prompt.split.map(&:downcase).uniq
|
49
|
+
end
|
50
|
+
|
51
|
+
def vectorize_prompt(prompt)
|
52
|
+
prompt.split.map(&:downcase)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ragdoll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dewayne VanHoozer
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
11
|
-
dependencies:
|
10
|
+
date: 2025-02-19 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: rails
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '7.1'
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '7.1'
|
12
26
|
description: Under development. Contributors welcome.
|
13
27
|
email:
|
14
28
|
- dvanhoozer@gmail.com
|
@@ -18,47 +32,30 @@ extra_rdoc_files: []
|
|
18
32
|
files:
|
19
33
|
- README.md
|
20
34
|
- Rakefile
|
21
|
-
-
|
22
|
-
-
|
23
|
-
-
|
24
|
-
-
|
25
|
-
-
|
35
|
+
- app/models/ragdoll/document.rb
|
36
|
+
- app/models/ragdoll/embedding.rb
|
37
|
+
- config/initializers/ragdoll.rb
|
38
|
+
- config/routes.rb
|
39
|
+
- db/migrate/20250218123456_create_documents.rb
|
40
|
+
- lib/config/database.yml
|
41
|
+
- lib/config/ragdoll.yml
|
26
42
|
- lib/ragdoll.rb
|
27
|
-
- lib/ragdoll/
|
28
|
-
- lib/ragdoll/
|
29
|
-
- lib/ragdoll/
|
30
|
-
- lib/ragdoll/
|
31
|
-
- lib/ragdoll/
|
32
|
-
- lib/
|
33
|
-
- lib/
|
34
|
-
- lib/
|
35
|
-
- lib/
|
36
|
-
- lib/ragdoll/core/jobs/extract_text.rb
|
37
|
-
- lib/ragdoll/core/jobs/generate_embeddings.rb
|
38
|
-
- lib/ragdoll/core/jobs/generate_summary.rb
|
39
|
-
- lib/ragdoll/core/metadata_schemas.rb
|
40
|
-
- lib/ragdoll/core/models/audio_content.rb
|
41
|
-
- lib/ragdoll/core/models/content.rb
|
42
|
-
- lib/ragdoll/core/models/document.rb
|
43
|
-
- lib/ragdoll/core/models/embedding.rb
|
44
|
-
- lib/ragdoll/core/models/image_content.rb
|
45
|
-
- lib/ragdoll/core/models/text_content.rb
|
46
|
-
- lib/ragdoll/core/search_engine.rb
|
47
|
-
- lib/ragdoll/core/services/image_description_service.rb
|
48
|
-
- lib/ragdoll/core/services/metadata_generator.rb
|
49
|
-
- lib/ragdoll/core/shrine_config.rb
|
50
|
-
- lib/ragdoll/core/text_chunker.rb
|
51
|
-
- lib/ragdoll/core/text_generation_service.rb
|
52
|
-
- lib/ragdoll/core/version.rb
|
53
|
-
- lib/tasks/annotate.rake
|
54
|
-
- lib/tasks/db.rake
|
43
|
+
- lib/ragdoll/engine.rb
|
44
|
+
- lib/ragdoll/import_job.rb
|
45
|
+
- lib/ragdoll/ingestion.rb
|
46
|
+
- lib/ragdoll/search.rb
|
47
|
+
- lib/ragdoll/version.rb
|
48
|
+
- lib/tasks/import_task.thor
|
49
|
+
- lib/tasks/jobs_task.thor
|
50
|
+
- lib/tasks/ragdoll_tasks.thor
|
51
|
+
- lib/tasks/search_task.thor
|
55
52
|
homepage: https://github.com/MadBomber/ragdoll
|
56
53
|
licenses:
|
57
54
|
- MIT
|
58
55
|
metadata:
|
59
56
|
allowed_push_host: https://rubygems.org
|
60
57
|
homepage_uri: https://github.com/MadBomber/ragdoll
|
61
|
-
source_code_uri: https://github.com/MadBomber/ragdoll
|
58
|
+
source_code_uri: https://github.com/MadBomber/ragdoll
|
62
59
|
changelog_uri: https://github.com/MadBomber/ragdoll/blob/main/CHANGELOG.md
|
63
60
|
rdoc_options: []
|
64
61
|
require_paths:
|
@@ -67,14 +64,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
67
64
|
requirements:
|
68
65
|
- - ">="
|
69
66
|
- !ruby/object:Gem::Version
|
70
|
-
version: 3.
|
67
|
+
version: 3.1.0
|
71
68
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
69
|
requirements:
|
73
70
|
- - ">="
|
74
71
|
- !ruby/object:Gem::Version
|
75
72
|
version: '0'
|
76
73
|
requirements: []
|
77
|
-
rubygems_version: 3.
|
74
|
+
rubygems_version: 3.6.3
|
78
75
|
specification_version: 4
|
79
|
-
summary:
|
76
|
+
summary: Ruby on Rails Engine
|
80
77
|
test_files: []
|
@@ -1,23 +0,0 @@
|
|
1
|
-
class EnablePostgresqlExtensions < ActiveRecord::Migration[7.0]
|
2
|
-
def up
|
3
|
-
# This migration is now handled by the db:create rake task
|
4
|
-
# Just ensure required extensions are available
|
5
|
-
|
6
|
-
# Vector similarity search (required for embeddings)
|
7
|
-
execute "CREATE EXTENSION IF NOT EXISTS vector"
|
8
|
-
|
9
|
-
# Useful optional extensions for text processing and search
|
10
|
-
execute "CREATE EXTENSION IF NOT EXISTS unaccent" # Remove accents from text
|
11
|
-
execute "CREATE EXTENSION IF NOT EXISTS pg_trgm" # Trigram matching for fuzzy search
|
12
|
-
|
13
|
-
# UUID support (useful for generating unique identifiers)
|
14
|
-
execute "CREATE EXTENSION IF NOT EXISTS \"uuid-ossp\""
|
15
|
-
end
|
16
|
-
|
17
|
-
def down
|
18
|
-
execute <<-SQL
|
19
|
-
DROP DATABASE IF EXISTS ragdoll_development;
|
20
|
-
DROP ROLE IF EXISTS ragdoll;
|
21
|
-
SQL
|
22
|
-
end
|
23
|
-
end
|
@@ -1,70 +0,0 @@
|
|
1
|
-
class CreateRagdollDocuments < ActiveRecord::Migration[7.0]
|
2
|
-
def change
|
3
|
-
create_table :ragdoll_documents,
|
4
|
-
comment: "Core documents table with LLM-generated structured metadata" do |t|
|
5
|
-
|
6
|
-
t.string :location, null: false,
|
7
|
-
comment: "Source location of document (file path, URL, or identifier)"
|
8
|
-
|
9
|
-
t.string :title, null: false,
|
10
|
-
comment: "Human-readable document title for display and search"
|
11
|
-
|
12
|
-
t.text :summary, null: false, default: "",
|
13
|
-
comment: "LLM-generated summary of document content"
|
14
|
-
|
15
|
-
t.text :keywords , null: false, default: "",
|
16
|
-
comment: "LLM-generated comma-separated keywords of document"
|
17
|
-
|
18
|
-
t.string :document_type, null: false, default: "text",
|
19
|
-
comment: "Document format type"
|
20
|
-
|
21
|
-
t.string :status, null: false, default: "pending",
|
22
|
-
comment: "Document processing status"
|
23
|
-
|
24
|
-
t.json :metadata, default: {},
|
25
|
-
comment: "LLM-generated structured metadata about the file"
|
26
|
-
|
27
|
-
t.timestamp :file_modified_at, null: false, default: -> { "CURRENT_TIMESTAMP" },
|
28
|
-
comment: "Timestamp when the source file was last modified"
|
29
|
-
|
30
|
-
t.timestamps null: false,
|
31
|
-
comment: "Standard creation and update timestamps"
|
32
|
-
|
33
|
-
###########
|
34
|
-
# Indexes #
|
35
|
-
###########
|
36
|
-
|
37
|
-
t.index :location, unique: true,
|
38
|
-
comment: "Unique index for document source lookup"
|
39
|
-
|
40
|
-
t.index :title,
|
41
|
-
comment: "Index for title-based search"
|
42
|
-
|
43
|
-
t.index :document_type,
|
44
|
-
comment: "Index for filtering by document type"
|
45
|
-
|
46
|
-
t.index :status,
|
47
|
-
comment: "Index for filtering by processing status"
|
48
|
-
|
49
|
-
t.index :created_at,
|
50
|
-
comment: "Index for chronological sorting"
|
51
|
-
|
52
|
-
t.index %i[document_type status],
|
53
|
-
comment: "Composite index for type+status filtering"
|
54
|
-
|
55
|
-
t.index "to_tsvector('english', COALESCE(title, '') ||
|
56
|
-
' ' ||
|
57
|
-
COALESCE(metadata->>'summary', '') ||
|
58
|
-
' ' || COALESCE(metadata->>'keywords', '') ||
|
59
|
-
' ' || COALESCE(metadata->>'description', ''))",
|
60
|
-
using: :gin, name: "index_ragdoll_documents_on_fulltext_search",
|
61
|
-
comment: "Full-text search across title and metadata fields"
|
62
|
-
|
63
|
-
t.index "(metadata->>'document_type')", name: "index_ragdoll_documents_on_metadata_type",
|
64
|
-
comment: "Index for filtering by document type"
|
65
|
-
|
66
|
-
t.index "(metadata->>'classification')", name: "index_ragdoll_documents_on_metadata_classification",
|
67
|
-
comment: "Index for filtering by document classification"
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
class CreateRagdollEmbeddings < ActiveRecord::Migration[7.0]
|
2
|
-
def change
|
3
|
-
create_table :ragdoll_embeddings,
|
4
|
-
comment: "Polymorphic vector embeddings storage for semantic similarity search" do |t|
|
5
|
-
|
6
|
-
t.references :embeddable, polymorphic: true, null: false,
|
7
|
-
comment: "Polymorphic reference to embeddable content"
|
8
|
-
|
9
|
-
t.text :content, null: false, default: "",
|
10
|
-
comment: "Original text content that was embedded"
|
11
|
-
|
12
|
-
t.vector :embedding_vector, limit: 1536, null: false,
|
13
|
-
comment: "Vector embedding using pgvector"
|
14
|
-
|
15
|
-
t.integer :chunk_index, null: false,
|
16
|
-
comment: "Chunk index for ordering embeddings"
|
17
|
-
|
18
|
-
t.integer :usage_count, default: 0,
|
19
|
-
comment: "Number of times used in similarity searches"
|
20
|
-
|
21
|
-
t.datetime :returned_at,
|
22
|
-
comment: "Timestamp of most recent usage"
|
23
|
-
|
24
|
-
t.json :metadata, default: {},
|
25
|
-
comment: "Embedding-specific metadata (positions, processing info)"
|
26
|
-
|
27
|
-
t.timestamps null: false,
|
28
|
-
comment: "Standard creation and update timestamps"
|
29
|
-
|
30
|
-
###########
|
31
|
-
# Indexes #
|
32
|
-
###########
|
33
|
-
|
34
|
-
t.index %i[embeddable_type embeddable_id],
|
35
|
-
comment: "Index for finding embeddings by embeddable content"
|
36
|
-
|
37
|
-
t.index :embedding_vector, using: :ivfflat, opclass: :vector_cosine_ops, name: "index_ragdoll_embeddings_on_embedding_vector_cosine",
|
38
|
-
comment: "IVFFlat index for fast cosine similarity search"
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|