ragdoll 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +318 -40
- data/Rakefile +15 -4
- data/db/migrate/001_enable_postgresql_extensions.rb +23 -0
- data/db/migrate/004_create_ragdoll_documents.rb +70 -0
- data/db/migrate/005_create_ragdoll_embeddings.rb +41 -0
- data/db/migrate/006_create_ragdoll_contents.rb +47 -0
- data/lib/ragdoll/core/client.rb +315 -0
- data/lib/ragdoll/core/configuration.rb +273 -0
- data/lib/ragdoll/core/database.rb +141 -0
- data/lib/ragdoll/core/document_management.rb +110 -0
- data/lib/ragdoll/core/document_processor.rb +344 -0
- data/lib/ragdoll/core/embedding_service.rb +183 -0
- data/lib/ragdoll/core/errors.rb +11 -0
- data/lib/ragdoll/core/jobs/extract_keywords.rb +32 -0
- data/lib/ragdoll/core/jobs/extract_text.rb +42 -0
- data/lib/ragdoll/core/jobs/generate_embeddings.rb +32 -0
- data/lib/ragdoll/core/jobs/generate_summary.rb +29 -0
- data/lib/ragdoll/core/metadata_schemas.rb +334 -0
- data/lib/ragdoll/core/models/audio_content.rb +175 -0
- data/lib/ragdoll/core/models/content.rb +126 -0
- data/lib/ragdoll/core/models/document.rb +678 -0
- data/lib/ragdoll/core/models/embedding.rb +204 -0
- data/lib/ragdoll/core/models/image_content.rb +227 -0
- data/lib/ragdoll/core/models/text_content.rb +169 -0
- data/lib/ragdoll/core/search_engine.rb +50 -0
- data/lib/ragdoll/core/services/image_description_service.rb +230 -0
- data/lib/ragdoll/core/services/metadata_generator.rb +335 -0
- data/lib/ragdoll/core/shrine_config.rb +71 -0
- data/lib/ragdoll/core/text_chunker.rb +210 -0
- data/lib/ragdoll/core/text_generation_service.rb +360 -0
- data/lib/ragdoll/core/version.rb +8 -0
- data/lib/ragdoll/core.rb +73 -0
- data/lib/ragdoll-core.rb +3 -0
- data/lib/ragdoll.rb +243 -6
- data/lib/tasks/annotate.rake +126 -0
- data/lib/tasks/db.rake +338 -0
- metadata +40 -37
- data/app/models/ragdoll/document.rb +0 -9
- data/app/models/ragdoll/embedding.rb +0 -9
- data/config/initializers/ragdoll.rb +0 -6
- data/config/routes.rb +0 -5
- data/db/migrate/20250218123456_create_documents.rb +0 -20
- data/lib/config/database.yml +0 -28
- data/lib/config/ragdoll.yml +0 -31
- data/lib/ragdoll/engine.rb +0 -16
- data/lib/ragdoll/import_job.rb +0 -15
- data/lib/ragdoll/ingestion.rb +0 -30
- data/lib/ragdoll/search.rb +0 -18
- data/lib/ragdoll/version.rb +0 -7
- data/lib/tasks/import_task.thor +0 -32
- data/lib/tasks/jobs_task.thor +0 -40
- data/lib/tasks/ragdoll_tasks.thor +0 -7
- data/lib/tasks/search_task.thor +0 -55
data/lib/tasks/import_task.thor
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'thor'
|
4
|
-
require_relative '../ragdoll/import_job'
|
5
|
-
|
6
|
-
module Ragdoll
|
7
|
-
class ImportTask < Thor
|
8
|
-
desc "import PATH", "Import documents from a file, glob, or directory"
|
9
|
-
method_option :recursive, aliases: "-r", type: :boolean, default: false, desc: "Recursively import files from directories"
|
10
|
-
method_option :jobs, aliases: ["-j", "--jobs"], type: :numeric, default: 1, desc: "Number of concurrent import jobs"
|
11
|
-
def import(path)
|
12
|
-
queue = SolidQueue.new(concurrency: options[:jobs])
|
13
|
-
files = if File.directory?(path)
|
14
|
-
if options[:recursive]
|
15
|
-
Dir.glob("#{path}/**/*")
|
16
|
-
else
|
17
|
-
Dir.glob("#{path}/*")
|
18
|
-
end
|
19
|
-
else
|
20
|
-
[path]
|
21
|
-
end
|
22
|
-
|
23
|
-
files.each do |file|
|
24
|
-
next unless File.file?(file)
|
25
|
-
|
26
|
-
queue.push(file) do |file|
|
27
|
-
Ragdoll::ImportJob.perform_async(file)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
data/lib/tasks/jobs_task.thor
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'thor'
|
4
|
-
|
5
|
-
module Ragdoll
|
6
|
-
class JobsTask < Thor
|
7
|
-
desc "jobs [JOB_ID]", "Report the status of all running and queued import jobs, or a specific job if JOB_ID is provided"
|
8
|
-
method_option :stop_all, type: :boolean, default: false, desc: "Stop all running and queued jobs"
|
9
|
-
method_option :pause_all, type: :boolean, default: false, desc: "Pause all running jobs"
|
10
|
-
method_option :resume_all, type: :boolean, default: false, desc: "Resume all paused jobs"
|
11
|
-
method_option :stop, type: :boolean, default: false, desc: "Stop a specific job"
|
12
|
-
method_option :pause, type: :boolean, default: false, desc: "Pause a specific job"
|
13
|
-
method_option :resume, type: :boolean, default: false, desc: "Resume a specific job"
|
14
|
-
def jobs(job_id = nil)
|
15
|
-
if job_id
|
16
|
-
if options[:stop]
|
17
|
-
puts "Stopping job ID: #{job_id}..."
|
18
|
-
elsif options[:pause]
|
19
|
-
puts "Pausing job ID: #{job_id}..."
|
20
|
-
elsif options[:resume]
|
21
|
-
puts "Resuming job ID: #{job_id}..."
|
22
|
-
else
|
23
|
-
puts "Fetching status for job ID: #{job_id}..."
|
24
|
-
end
|
25
|
-
else
|
26
|
-
if options[:stop_all]
|
27
|
-
puts "Stopping all jobs..."
|
28
|
-
elsif options[:pause_all]
|
29
|
-
puts "Pausing all running jobs..."
|
30
|
-
elsif options[:resume_all]
|
31
|
-
puts "Resuming all paused jobs..."
|
32
|
-
else
|
33
|
-
puts "Fetching status of all running and queued import jobs..."
|
34
|
-
puts "Job ID: 12345, Status: Running, File: document1.txt"
|
35
|
-
puts "Job ID: 12346, Status: Running, File: document2.txt"
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
data/lib/tasks/search_task.thor
DELETED
@@ -1,55 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'thor'
|
4
|
-
require_relative '../ragdoll/search'
|
5
|
-
|
6
|
-
module Ragdoll
|
7
|
-
class SearchTask < Thor
|
8
|
-
desc "search PROMPT", "Search the database with a prompt"
|
9
|
-
method_option :prompt, aliases: ["-p", "--prompt"], type: :string, desc: "File path containing the prompt text"
|
10
|
-
method_option :max_count, type: :numeric, default: 10, desc: "Maximum number of results to return"
|
11
|
-
method_option :rerank, type: :boolean, default: false, desc: "Rerank results using keyword search"
|
12
|
-
def search(prompt = nil)
|
13
|
-
if options[:prompt]
|
14
|
-
prompt = File.read(options[:prompt])
|
15
|
-
end
|
16
|
-
|
17
|
-
unless prompt
|
18
|
-
puts "Please provide a prompt as a string or with the -p option."
|
19
|
-
return
|
20
|
-
end
|
21
|
-
|
22
|
-
keywords = extract_keywords(prompt)
|
23
|
-
vectorized_prompt = vectorize_prompt(prompt)
|
24
|
-
search_instance = Ragdoll::Search.new(vectorized_prompt)
|
25
|
-
results = search_instance.search_database(options[:max_count])
|
26
|
-
|
27
|
-
if options[:rerank]
|
28
|
-
results = rerank_results(results, keywords)
|
29
|
-
end
|
30
|
-
|
31
|
-
results.each do |result|
|
32
|
-
puts "Source: #{result[:source]}"
|
33
|
-
puts "Metadata: #{result[:metadata]}"
|
34
|
-
puts "--------------------------------"
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
def rerank_results(results, keywords)
|
41
|
-
results.sort_by do |result|
|
42
|
-
content = result[:source].downcase
|
43
|
-
keywords.count { |keyword| content.include?(keyword) }
|
44
|
-
end.reverse
|
45
|
-
end
|
46
|
-
|
47
|
-
def extract_keywords(prompt)
|
48
|
-
prompt.split.map(&:downcase).uniq
|
49
|
-
end
|
50
|
-
|
51
|
-
def vectorize_prompt(prompt)
|
52
|
-
prompt.split.map(&:downcase)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|