ragdoll 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +318 -40
- data/Rakefile +66 -4
- data/app/jobs/ragdoll/extract_keywords_job.rb +28 -0
- data/app/jobs/ragdoll/extract_text_job.rb +38 -0
- data/app/jobs/ragdoll/generate_embeddings_job.rb +28 -0
- data/app/jobs/ragdoll/generate_summary_job.rb +25 -0
- data/app/lib/ragdoll/metadata_schemas.rb +332 -0
- data/app/models/ragdoll/audio_content.rb +142 -0
- data/app/models/ragdoll/content.rb +95 -0
- data/app/models/ragdoll/document.rb +606 -4
- data/app/models/ragdoll/embedding.rb +172 -5
- data/app/models/ragdoll/image_content.rb +194 -0
- data/app/models/ragdoll/text_content.rb +137 -0
- data/app/services/ragdoll/configuration_service.rb +113 -0
- data/app/services/ragdoll/document_management.rb +108 -0
- data/app/services/ragdoll/document_processor.rb +342 -0
- data/app/services/ragdoll/embedding_service.rb +202 -0
- data/app/services/ragdoll/image_description_service.rb +230 -0
- data/app/services/ragdoll/metadata_generator.rb +329 -0
- data/app/services/ragdoll/model_resolver.rb +72 -0
- data/app/services/ragdoll/search_engine.rb +51 -0
- data/app/services/ragdoll/text_chunker.rb +208 -0
- data/app/services/ragdoll/text_generation_service.rb +355 -0
- data/db/migrate/001_enable_postgresql_extensions.rb +23 -0
- data/db/migrate/004_create_ragdoll_documents.rb +70 -0
- data/db/migrate/005_create_ragdoll_embeddings.rb +41 -0
- data/db/migrate/006_create_ragdoll_contents.rb +47 -0
- data/lib/ragdoll/core/client.rb +306 -0
- data/lib/ragdoll/core/configuration.rb +257 -0
- data/lib/ragdoll/core/database.rb +141 -0
- data/lib/ragdoll/core/errors.rb +11 -0
- data/lib/ragdoll/core/model.rb +45 -0
- data/lib/ragdoll/core/shrine_config.rb +71 -0
- data/lib/ragdoll/core/version.rb +8 -0
- data/lib/ragdoll/core.rb +91 -0
- data/lib/ragdoll-core.rb +3 -0
- data/lib/ragdoll.rb +243 -6
- data/lib/tasks/annotate.rake +126 -0
- data/lib/tasks/db.rake +338 -0
- metadata +42 -35
- data/config/initializers/ragdoll.rb +0 -6
- data/config/routes.rb +0 -5
- data/db/migrate/20250218123456_create_documents.rb +0 -20
- data/lib/config/database.yml +0 -28
- data/lib/config/ragdoll.yml +0 -31
- data/lib/ragdoll/engine.rb +0 -16
- data/lib/ragdoll/import_job.rb +0 -15
- data/lib/ragdoll/ingestion.rb +0 -30
- data/lib/ragdoll/search.rb +0 -18
- data/lib/ragdoll/version.rb +0 -7
- data/lib/tasks/import_task.thor +0 -32
- data/lib/tasks/jobs_task.thor +0 -40
- data/lib/tasks/ragdoll_tasks.thor +0 -7
- data/lib/tasks/search_task.thor +0 -55
data/lib/ragdoll/engine.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
# This file defines the Ragdoll engine, which integrates the gem with Rails applications.
|
2
|
-
|
3
|
-
# frozen_string_literal: true
|
4
|
-
|
5
|
-
require "rails/engine"
|
6
|
-
|
7
|
-
module Ragdoll
|
8
|
-
class Engine < ::Rails::Engine
|
9
|
-
isolate_namespace Ragdoll
|
10
|
-
config.generators do |g|
|
11
|
-
g.test_framework :minitest
|
12
|
-
g.fixture_replacement :factory_bot
|
13
|
-
g.factory_bot dir: 'test/factories'
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
data/lib/ragdoll/import_job.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
# This file defines the ImportJob class for handling document import tasks in the background.
|
2
|
-
|
3
|
-
# frozen_string_literal: true
|
4
|
-
|
5
|
-
module Ragdoll
|
6
|
-
class ImportJob < SolidJob::Base
|
7
|
-
def perform(file)
|
8
|
-
document = File.read(file)
|
9
|
-
ingestion = Ragdoll::Ingestion.new(document)
|
10
|
-
vectorized_chunks = ingestion.chunk_and_vectorize
|
11
|
-
ingestion.store_in_database
|
12
|
-
puts "Imported #{file} successfully."
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
data/lib/ragdoll/ingestion.rb
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
# This file contains the Ingestion class responsible for processing documents by chunking and vectorizing them.
|
2
|
-
|
3
|
-
# frozen_string_literal: true
|
4
|
-
|
5
|
-
module Ragdoll
|
6
|
-
class Ingestion
|
7
|
-
def initialize(document)
|
8
|
-
@document = document
|
9
|
-
end
|
10
|
-
|
11
|
-
def chunk_and_vectorize
|
12
|
-
# Example logic for chunking and vectorization
|
13
|
-
chunks = @document.split("\n\n") # Split document into paragraphs
|
14
|
-
vectorized_chunks = chunks.map { |chunk| vectorize(chunk) }
|
15
|
-
vectorized_chunks
|
16
|
-
end
|
17
|
-
|
18
|
-
def store_in_database
|
19
|
-
# Implement logic to store vectorized data in the database
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def vectorize(chunk)
|
25
|
-
# Placeholder for vectorization logic
|
26
|
-
# Convert chunk to a vector representation
|
27
|
-
chunk.split.map(&:downcase) # Simple example: split words and downcase
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
data/lib/ragdoll/search.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
# This file contains the Search class responsible for querying the database with a prompt.
|
2
|
-
|
3
|
-
# frozen_string_literal: true
|
4
|
-
|
5
|
-
module Ragdoll
|
6
|
-
class Search
|
7
|
-
def initialize(prompt)
|
8
|
-
@prompt = prompt
|
9
|
-
end
|
10
|
-
|
11
|
-
def search_database(max_count)
|
12
|
-
# Example logic for searching the database
|
13
|
-
# This is a placeholder for actual database search logic
|
14
|
-
results = [] # Placeholder for actual database query results
|
15
|
-
results.select { |entry| entry.include?(@prompt) }
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/ragdoll/version.rb
DELETED
data/lib/tasks/import_task.thor
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'thor'
|
4
|
-
require_relative '../ragdoll/import_job'
|
5
|
-
|
6
|
-
module Ragdoll
|
7
|
-
class ImportTask < Thor
|
8
|
-
desc "import PATH", "Import documents from a file, glob, or directory"
|
9
|
-
method_option :recursive, aliases: "-r", type: :boolean, default: false, desc: "Recursively import files from directories"
|
10
|
-
method_option :jobs, aliases: ["-j", "--jobs"], type: :numeric, default: 1, desc: "Number of concurrent import jobs"
|
11
|
-
def import(path)
|
12
|
-
queue = SolidQueue.new(concurrency: options[:jobs])
|
13
|
-
files = if File.directory?(path)
|
14
|
-
if options[:recursive]
|
15
|
-
Dir.glob("#{path}/**/*")
|
16
|
-
else
|
17
|
-
Dir.glob("#{path}/*")
|
18
|
-
end
|
19
|
-
else
|
20
|
-
[path]
|
21
|
-
end
|
22
|
-
|
23
|
-
files.each do |file|
|
24
|
-
next unless File.file?(file)
|
25
|
-
|
26
|
-
queue.push(file) do |file|
|
27
|
-
Ragdoll::ImportJob.perform_async(file)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
data/lib/tasks/jobs_task.thor
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'thor'
|
4
|
-
|
5
|
-
module Ragdoll
|
6
|
-
class JobsTask < Thor
|
7
|
-
desc "jobs [JOB_ID]", "Report the status of all running and queued import jobs, or a specific job if JOB_ID is provided"
|
8
|
-
method_option :stop_all, type: :boolean, default: false, desc: "Stop all running and queued jobs"
|
9
|
-
method_option :pause_all, type: :boolean, default: false, desc: "Pause all running jobs"
|
10
|
-
method_option :resume_all, type: :boolean, default: false, desc: "Resume all paused jobs"
|
11
|
-
method_option :stop, type: :boolean, default: false, desc: "Stop a specific job"
|
12
|
-
method_option :pause, type: :boolean, default: false, desc: "Pause a specific job"
|
13
|
-
method_option :resume, type: :boolean, default: false, desc: "Resume a specific job"
|
14
|
-
def jobs(job_id = nil)
|
15
|
-
if job_id
|
16
|
-
if options[:stop]
|
17
|
-
puts "Stopping job ID: #{job_id}..."
|
18
|
-
elsif options[:pause]
|
19
|
-
puts "Pausing job ID: #{job_id}..."
|
20
|
-
elsif options[:resume]
|
21
|
-
puts "Resuming job ID: #{job_id}..."
|
22
|
-
else
|
23
|
-
puts "Fetching status for job ID: #{job_id}..."
|
24
|
-
end
|
25
|
-
else
|
26
|
-
if options[:stop_all]
|
27
|
-
puts "Stopping all jobs..."
|
28
|
-
elsif options[:pause_all]
|
29
|
-
puts "Pausing all running jobs..."
|
30
|
-
elsif options[:resume_all]
|
31
|
-
puts "Resuming all paused jobs..."
|
32
|
-
else
|
33
|
-
puts "Fetching status of all running and queued import jobs..."
|
34
|
-
puts "Job ID: 12345, Status: Running, File: document1.txt"
|
35
|
-
puts "Job ID: 12346, Status: Running, File: document2.txt"
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
data/lib/tasks/search_task.thor
DELETED
@@ -1,55 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'thor'
|
4
|
-
require_relative '../ragdoll/search'
|
5
|
-
|
6
|
-
module Ragdoll
|
7
|
-
class SearchTask < Thor
|
8
|
-
desc "search PROMPT", "Search the database with a prompt"
|
9
|
-
method_option :prompt, aliases: ["-p", "--prompt"], type: :string, desc: "File path containing the prompt text"
|
10
|
-
method_option :max_count, type: :numeric, default: 10, desc: "Maximum number of results to return"
|
11
|
-
method_option :rerank, type: :boolean, default: false, desc: "Rerank results using keyword search"
|
12
|
-
def search(prompt = nil)
|
13
|
-
if options[:prompt]
|
14
|
-
prompt = File.read(options[:prompt])
|
15
|
-
end
|
16
|
-
|
17
|
-
unless prompt
|
18
|
-
puts "Please provide a prompt as a string or with the -p option."
|
19
|
-
return
|
20
|
-
end
|
21
|
-
|
22
|
-
keywords = extract_keywords(prompt)
|
23
|
-
vectorized_prompt = vectorize_prompt(prompt)
|
24
|
-
search_instance = Ragdoll::Search.new(vectorized_prompt)
|
25
|
-
results = search_instance.search_database(options[:max_count])
|
26
|
-
|
27
|
-
if options[:rerank]
|
28
|
-
results = rerank_results(results, keywords)
|
29
|
-
end
|
30
|
-
|
31
|
-
results.each do |result|
|
32
|
-
puts "Source: #{result[:source]}"
|
33
|
-
puts "Metadata: #{result[:metadata]}"
|
34
|
-
puts "--------------------------------"
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
def rerank_results(results, keywords)
|
41
|
-
results.sort_by do |result|
|
42
|
-
content = result[:source].downcase
|
43
|
-
keywords.count { |keyword| content.include?(keyword) }
|
44
|
-
end.reverse
|
45
|
-
end
|
46
|
-
|
47
|
-
def extract_keywords(prompt)
|
48
|
-
prompt.split.map(&:downcase).uniq
|
49
|
-
end
|
50
|
-
|
51
|
-
def vectorize_prompt(prompt)
|
52
|
-
prompt.split.map(&:downcase)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|