ragdoll 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +318 -40
  3. data/Rakefile +66 -4
  4. data/app/jobs/ragdoll/extract_keywords_job.rb +28 -0
  5. data/app/jobs/ragdoll/extract_text_job.rb +38 -0
  6. data/app/jobs/ragdoll/generate_embeddings_job.rb +28 -0
  7. data/app/jobs/ragdoll/generate_summary_job.rb +25 -0
  8. data/app/lib/ragdoll/metadata_schemas.rb +332 -0
  9. data/app/models/ragdoll/audio_content.rb +142 -0
  10. data/app/models/ragdoll/content.rb +95 -0
  11. data/app/models/ragdoll/document.rb +606 -4
  12. data/app/models/ragdoll/embedding.rb +172 -5
  13. data/app/models/ragdoll/image_content.rb +194 -0
  14. data/app/models/ragdoll/text_content.rb +137 -0
  15. data/app/services/ragdoll/configuration_service.rb +113 -0
  16. data/app/services/ragdoll/document_management.rb +108 -0
  17. data/app/services/ragdoll/document_processor.rb +342 -0
  18. data/app/services/ragdoll/embedding_service.rb +202 -0
  19. data/app/services/ragdoll/image_description_service.rb +230 -0
  20. data/app/services/ragdoll/metadata_generator.rb +329 -0
  21. data/app/services/ragdoll/model_resolver.rb +72 -0
  22. data/app/services/ragdoll/search_engine.rb +51 -0
  23. data/app/services/ragdoll/text_chunker.rb +208 -0
  24. data/app/services/ragdoll/text_generation_service.rb +355 -0
  25. data/db/migrate/001_enable_postgresql_extensions.rb +23 -0
  26. data/db/migrate/004_create_ragdoll_documents.rb +70 -0
  27. data/db/migrate/005_create_ragdoll_embeddings.rb +41 -0
  28. data/db/migrate/006_create_ragdoll_contents.rb +47 -0
  29. data/lib/ragdoll/core/client.rb +306 -0
  30. data/lib/ragdoll/core/configuration.rb +257 -0
  31. data/lib/ragdoll/core/database.rb +141 -0
  32. data/lib/ragdoll/core/errors.rb +11 -0
  33. data/lib/ragdoll/core/model.rb +45 -0
  34. data/lib/ragdoll/core/shrine_config.rb +71 -0
  35. data/lib/ragdoll/core/version.rb +8 -0
  36. data/lib/ragdoll/core.rb +91 -0
  37. data/lib/ragdoll-core.rb +3 -0
  38. data/lib/ragdoll.rb +243 -6
  39. data/lib/tasks/annotate.rake +126 -0
  40. data/lib/tasks/db.rake +338 -0
  41. metadata +42 -35
  42. data/config/initializers/ragdoll.rb +0 -6
  43. data/config/routes.rb +0 -5
  44. data/db/migrate/20250218123456_create_documents.rb +0 -20
  45. data/lib/config/database.yml +0 -28
  46. data/lib/config/ragdoll.yml +0 -31
  47. data/lib/ragdoll/engine.rb +0 -16
  48. data/lib/ragdoll/import_job.rb +0 -15
  49. data/lib/ragdoll/ingestion.rb +0 -30
  50. data/lib/ragdoll/search.rb +0 -18
  51. data/lib/ragdoll/version.rb +0 -7
  52. data/lib/tasks/import_task.thor +0 -32
  53. data/lib/tasks/jobs_task.thor +0 -40
  54. data/lib/tasks/ragdoll_tasks.thor +0 -7
  55. data/lib/tasks/search_task.thor +0 -55
@@ -1,16 +0,0 @@
1
- # This file defines the Ragdoll engine, which integrates the gem with Rails applications.
2
-
3
- # frozen_string_literal: true
4
-
5
- require "rails/engine"
6
-
7
- module Ragdoll
8
- class Engine < ::Rails::Engine
9
- isolate_namespace Ragdoll
10
- config.generators do |g|
11
- g.test_framework :minitest
12
- g.fixture_replacement :factory_bot
13
- g.factory_bot dir: 'test/factories'
14
- end
15
- end
16
- end
@@ -1,15 +0,0 @@
1
- # This file defines the ImportJob class for handling document import tasks in the background.
2
-
3
- # frozen_string_literal: true
4
-
5
- module Ragdoll
6
- class ImportJob < SolidJob::Base
7
- def perform(file)
8
- document = File.read(file)
9
- ingestion = Ragdoll::Ingestion.new(document)
10
- vectorized_chunks = ingestion.chunk_and_vectorize
11
- ingestion.store_in_database
12
- puts "Imported #{file} successfully."
13
- end
14
- end
15
- end
@@ -1,30 +0,0 @@
1
- # This file contains the Ingestion class responsible for processing documents by chunking and vectorizing them.
2
-
3
- # frozen_string_literal: true
4
-
5
- module Ragdoll
6
- class Ingestion
7
- def initialize(document)
8
- @document = document
9
- end
10
-
11
- def chunk_and_vectorize
12
- # Example logic for chunking and vectorization
13
- chunks = @document.split("\n\n") # Split document into paragraphs
14
- vectorized_chunks = chunks.map { |chunk| vectorize(chunk) }
15
- vectorized_chunks
16
- end
17
-
18
- def store_in_database
19
- # Implement logic to store vectorized data in the database
20
- end
21
-
22
- private
23
-
24
- def vectorize(chunk)
25
- # Placeholder for vectorization logic
26
- # Convert chunk to a vector representation
27
- chunk.split.map(&:downcase) # Simple example: split words and downcase
28
- end
29
- end
30
- end
@@ -1,18 +0,0 @@
1
- # This file contains the Search class responsible for querying the database with a prompt.
2
-
3
- # frozen_string_literal: true
4
-
5
- module Ragdoll
6
- class Search
7
- def initialize(prompt)
8
- @prompt = prompt
9
- end
10
-
11
- def search_database(max_count)
12
- # Example logic for searching the database
13
- # This is a placeholder for actual database search logic
14
- results = [] # Placeholder for actual database query results
15
- results.select { |entry| entry.include?(@prompt) }
16
- end
17
- end
18
- end
@@ -1,7 +0,0 @@
1
- # This file defines the version number for the Ragdoll gem.
2
-
3
- # frozen_string_literal: true
4
-
5
- module Ragdoll
6
- VERSION = "0.1.0"
7
- end
@@ -1,32 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'thor'
4
- require_relative '../ragdoll/import_job'
5
-
6
- module Ragdoll
7
- class ImportTask < Thor
8
- desc "import PATH", "Import documents from a file, glob, or directory"
9
- method_option :recursive, aliases: "-r", type: :boolean, default: false, desc: "Recursively import files from directories"
10
- method_option :jobs, aliases: ["-j", "--jobs"], type: :numeric, default: 1, desc: "Number of concurrent import jobs"
11
- def import(path)
12
- queue = SolidQueue.new(concurrency: options[:jobs])
13
- files = if File.directory?(path)
14
- if options[:recursive]
15
- Dir.glob("#{path}/**/*")
16
- else
17
- Dir.glob("#{path}/*")
18
- end
19
- else
20
- [path]
21
- end
22
-
23
- files.each do |file|
24
- next unless File.file?(file)
25
-
26
- queue.push(file) do |file|
27
- Ragdoll::ImportJob.perform_async(file)
28
- end
29
- end
30
- end
31
- end
32
- end
@@ -1,40 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'thor'
4
-
5
- module Ragdoll
6
- class JobsTask < Thor
7
- desc "jobs [JOB_ID]", "Report the status of all running and queued import jobs, or a specific job if JOB_ID is provided"
8
- method_option :stop_all, type: :boolean, default: false, desc: "Stop all running and queued jobs"
9
- method_option :pause_all, type: :boolean, default: false, desc: "Pause all running jobs"
10
- method_option :resume_all, type: :boolean, default: false, desc: "Resume all paused jobs"
11
- method_option :stop, type: :boolean, default: false, desc: "Stop a specific job"
12
- method_option :pause, type: :boolean, default: false, desc: "Pause a specific job"
13
- method_option :resume, type: :boolean, default: false, desc: "Resume a specific job"
14
- def jobs(job_id = nil)
15
- if job_id
16
- if options[:stop]
17
- puts "Stopping job ID: #{job_id}..."
18
- elsif options[:pause]
19
- puts "Pausing job ID: #{job_id}..."
20
- elsif options[:resume]
21
- puts "Resuming job ID: #{job_id}..."
22
- else
23
- puts "Fetching status for job ID: #{job_id}..."
24
- end
25
- else
26
- if options[:stop_all]
27
- puts "Stopping all jobs..."
28
- elsif options[:pause_all]
29
- puts "Pausing all running jobs..."
30
- elsif options[:resume_all]
31
- puts "Resuming all paused jobs..."
32
- else
33
- puts "Fetching status of all running and queued import jobs..."
34
- puts "Job ID: 12345, Status: Running, File: document1.txt"
35
- puts "Job ID: 12346, Status: Running, File: document2.txt"
36
- end
37
- end
38
- end
39
- end
40
- end
@@ -1,7 +0,0 @@
1
- require 'thor'
2
-
3
- module Ragdoll
4
- class Tasks < Thor
5
- # Move your existing CLI tasks here
6
- end
7
- end
@@ -1,55 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'thor'
4
- require_relative '../ragdoll/search'
5
-
6
- module Ragdoll
7
- class SearchTask < Thor
8
- desc "search PROMPT", "Search the database with a prompt"
9
- method_option :prompt, aliases: ["-p", "--prompt"], type: :string, desc: "File path containing the prompt text"
10
- method_option :max_count, type: :numeric, default: 10, desc: "Maximum number of results to return"
11
- method_option :rerank, type: :boolean, default: false, desc: "Rerank results using keyword search"
12
- def search(prompt = nil)
13
- if options[:prompt]
14
- prompt = File.read(options[:prompt])
15
- end
16
-
17
- unless prompt
18
- puts "Please provide a prompt as a string or with the -p option."
19
- return
20
- end
21
-
22
- keywords = extract_keywords(prompt)
23
- vectorized_prompt = vectorize_prompt(prompt)
24
- search_instance = Ragdoll::Search.new(vectorized_prompt)
25
- results = search_instance.search_database(options[:max_count])
26
-
27
- if options[:rerank]
28
- results = rerank_results(results, keywords)
29
- end
30
-
31
- results.each do |result|
32
- puts "Source: #{result[:source]}"
33
- puts "Metadata: #{result[:metadata]}"
34
- puts "--------------------------------"
35
- end
36
- end
37
-
38
- private
39
-
40
- def rerank_results(results, keywords)
41
- results.sort_by do |result|
42
- content = result[:source].downcase
43
- keywords.count { |keyword| content.include?(keyword) }
44
- end.reverse
45
- end
46
-
47
- def extract_keywords(prompt)
48
- prompt.split.map(&:downcase).uniq
49
- end
50
-
51
- def vectorize_prompt(prompt)
52
- prompt.split.map(&:downcase)
53
- end
54
- end
55
- end