ragdoll 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +318 -40
  3. data/Rakefile +15 -4
  4. data/db/migrate/001_enable_postgresql_extensions.rb +23 -0
  5. data/db/migrate/004_create_ragdoll_documents.rb +70 -0
  6. data/db/migrate/005_create_ragdoll_embeddings.rb +41 -0
  7. data/db/migrate/006_create_ragdoll_contents.rb +47 -0
  8. data/lib/ragdoll/core/client.rb +315 -0
  9. data/lib/ragdoll/core/configuration.rb +273 -0
  10. data/lib/ragdoll/core/database.rb +141 -0
  11. data/lib/ragdoll/core/document_management.rb +110 -0
  12. data/lib/ragdoll/core/document_processor.rb +344 -0
  13. data/lib/ragdoll/core/embedding_service.rb +183 -0
  14. data/lib/ragdoll/core/errors.rb +11 -0
  15. data/lib/ragdoll/core/jobs/extract_keywords.rb +32 -0
  16. data/lib/ragdoll/core/jobs/extract_text.rb +42 -0
  17. data/lib/ragdoll/core/jobs/generate_embeddings.rb +32 -0
  18. data/lib/ragdoll/core/jobs/generate_summary.rb +29 -0
  19. data/lib/ragdoll/core/metadata_schemas.rb +334 -0
  20. data/lib/ragdoll/core/models/audio_content.rb +175 -0
  21. data/lib/ragdoll/core/models/content.rb +126 -0
  22. data/lib/ragdoll/core/models/document.rb +678 -0
  23. data/lib/ragdoll/core/models/embedding.rb +204 -0
  24. data/lib/ragdoll/core/models/image_content.rb +227 -0
  25. data/lib/ragdoll/core/models/text_content.rb +169 -0
  26. data/lib/ragdoll/core/search_engine.rb +50 -0
  27. data/lib/ragdoll/core/services/image_description_service.rb +230 -0
  28. data/lib/ragdoll/core/services/metadata_generator.rb +335 -0
  29. data/lib/ragdoll/core/shrine_config.rb +71 -0
  30. data/lib/ragdoll/core/text_chunker.rb +210 -0
  31. data/lib/ragdoll/core/text_generation_service.rb +360 -0
  32. data/lib/ragdoll/core/version.rb +8 -0
  33. data/lib/ragdoll/core.rb +73 -0
  34. data/lib/ragdoll-core.rb +3 -0
  35. data/lib/ragdoll.rb +243 -6
  36. data/lib/tasks/annotate.rake +126 -0
  37. data/lib/tasks/db.rake +338 -0
  38. metadata +40 -37
  39. data/app/models/ragdoll/document.rb +0 -9
  40. data/app/models/ragdoll/embedding.rb +0 -9
  41. data/config/initializers/ragdoll.rb +0 -6
  42. data/config/routes.rb +0 -5
  43. data/db/migrate/20250218123456_create_documents.rb +0 -20
  44. data/lib/config/database.yml +0 -28
  45. data/lib/config/ragdoll.yml +0 -31
  46. data/lib/ragdoll/engine.rb +0 -16
  47. data/lib/ragdoll/import_job.rb +0 -15
  48. data/lib/ragdoll/ingestion.rb +0 -30
  49. data/lib/ragdoll/search.rb +0 -18
  50. data/lib/ragdoll/version.rb +0 -7
  51. data/lib/tasks/import_task.thor +0 -32
  52. data/lib/tasks/jobs_task.thor +0 -40
  53. data/lib/tasks/ragdoll_tasks.thor +0 -7
  54. data/lib/tasks/search_task.thor +0 -55
@@ -1,32 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'thor'
4
- require_relative '../ragdoll/import_job'
5
-
6
- module Ragdoll
7
- class ImportTask < Thor
8
- desc "import PATH", "Import documents from a file, glob, or directory"
9
- method_option :recursive, aliases: "-r", type: :boolean, default: false, desc: "Recursively import files from directories"
10
- method_option :jobs, aliases: ["-j", "--jobs"], type: :numeric, default: 1, desc: "Number of concurrent import jobs"
11
- def import(path)
12
- queue = SolidQueue.new(concurrency: options[:jobs])
13
- files = if File.directory?(path)
14
- if options[:recursive]
15
- Dir.glob("#{path}/**/*")
16
- else
17
- Dir.glob("#{path}/*")
18
- end
19
- else
20
- [path]
21
- end
22
-
23
- files.each do |file|
24
- next unless File.file?(file)
25
-
26
- queue.push(file) do |file|
27
- Ragdoll::ImportJob.perform_async(file)
28
- end
29
- end
30
- end
31
- end
32
- end
@@ -1,40 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'thor'
4
-
5
- module Ragdoll
6
- class JobsTask < Thor
7
- desc "jobs [JOB_ID]", "Report the status of all running and queued import jobs, or a specific job if JOB_ID is provided"
8
- method_option :stop_all, type: :boolean, default: false, desc: "Stop all running and queued jobs"
9
- method_option :pause_all, type: :boolean, default: false, desc: "Pause all running jobs"
10
- method_option :resume_all, type: :boolean, default: false, desc: "Resume all paused jobs"
11
- method_option :stop, type: :boolean, default: false, desc: "Stop a specific job"
12
- method_option :pause, type: :boolean, default: false, desc: "Pause a specific job"
13
- method_option :resume, type: :boolean, default: false, desc: "Resume a specific job"
14
- def jobs(job_id = nil)
15
- if job_id
16
- if options[:stop]
17
- puts "Stopping job ID: #{job_id}..."
18
- elsif options[:pause]
19
- puts "Pausing job ID: #{job_id}..."
20
- elsif options[:resume]
21
- puts "Resuming job ID: #{job_id}..."
22
- else
23
- puts "Fetching status for job ID: #{job_id}..."
24
- end
25
- else
26
- if options[:stop_all]
27
- puts "Stopping all jobs..."
28
- elsif options[:pause_all]
29
- puts "Pausing all running jobs..."
30
- elsif options[:resume_all]
31
- puts "Resuming all paused jobs..."
32
- else
33
- puts "Fetching status of all running and queued import jobs..."
34
- puts "Job ID: 12345, Status: Running, File: document1.txt"
35
- puts "Job ID: 12346, Status: Running, File: document2.txt"
36
- end
37
- end
38
- end
39
- end
40
- end
@@ -1,7 +0,0 @@
1
- require 'thor'
2
-
3
- module Ragdoll
4
- class Tasks < Thor
5
- # Move your existing CLI tasks here
6
- end
7
- end
@@ -1,55 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'thor'
4
- require_relative '../ragdoll/search'
5
-
6
- module Ragdoll
7
- class SearchTask < Thor
8
- desc "search PROMPT", "Search the database with a prompt"
9
- method_option :prompt, aliases: ["-p", "--prompt"], type: :string, desc: "File path containing the prompt text"
10
- method_option :max_count, type: :numeric, default: 10, desc: "Maximum number of results to return"
11
- method_option :rerank, type: :boolean, default: false, desc: "Rerank results using keyword search"
12
- def search(prompt = nil)
13
- if options[:prompt]
14
- prompt = File.read(options[:prompt])
15
- end
16
-
17
- unless prompt
18
- puts "Please provide a prompt as a string or with the -p option."
19
- return
20
- end
21
-
22
- keywords = extract_keywords(prompt)
23
- vectorized_prompt = vectorize_prompt(prompt)
24
- search_instance = Ragdoll::Search.new(vectorized_prompt)
25
- results = search_instance.search_database(options[:max_count])
26
-
27
- if options[:rerank]
28
- results = rerank_results(results, keywords)
29
- end
30
-
31
- results.each do |result|
32
- puts "Source: #{result[:source]}"
33
- puts "Metadata: #{result[:metadata]}"
34
- puts "--------------------------------"
35
- end
36
- end
37
-
38
- private
39
-
40
- def rerank_results(results, keywords)
41
- results.sort_by do |result|
42
- content = result[:source].downcase
43
- keywords.count { |keyword| content.include?(keyword) }
44
- end.reverse
45
- end
46
-
47
- def extract_keywords(prompt)
48
- prompt.split.map(&:downcase).uniq
49
- end
50
-
51
- def vectorize_prompt(prompt)
52
- prompt.split.map(&:downcase)
53
- end
54
- end
55
- end