ragdoll 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: bea4621e2b802db79d78f8b1d0679cf2f81ed35b91d35683ce0afcb83ddc54e1
4
+ data.tar.gz: ec12fb975b154f77a42d54fb3c716d523e1b90e4e0122b3576c5aac15a957340
5
+ SHA512:
6
+ metadata.gz: 3702308d3b772dfc0ebf429a26bae0f0378456e9d6c48357b8e2a4cdeb3744e78b43fd610ef308ff6348f1bec28bb37d51bd5e335d78583574b3212c8f544a33
7
+ data.tar.gz: 9beebfebafe1ed2e815a3042949b68e5f208d8c555a92228eb4098fa99900f07d2985654766e2907de3069f5d10b6cf3e65fb7a3b431ee9db836e6111f1e27f2
data/README.md ADDED
@@ -0,0 +1,75 @@
1
+ # Ragdoll
2
+
3
+ Ragdoll is a Rails Engine designed for document ingestion and search. It allows you to import documents, vectorize them, and perform searches using vector representations.
4
+
5
+ ## Installation as a Rails Engine
6
+
7
+ To use Ragdoll as a Rails Engine, add this line to your application's Gemfile:
8
+
9
+ ```bash
10
+ bundle add ragdoll
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ ```bash
16
+ bundle install
17
+ ```
18
+
19
+ Or install it yourself as:
20
+
21
+ ```bash
22
+ gem install ragdoll
23
+ ```
24
+
25
+ ## Usage as a Rails Engine
26
+
27
+ ### Importing Documents
28
+
29
+ To import documents from a file, glob, or directory, use the following command:
30
+
31
+ ```bash
32
+ ragdoll import PATH
33
+ ```
34
+
35
+ - `PATH`: The path to the file or directory to import.
36
+ - Use the `-r` or `--recursive` option to import files recursively from directories.
37
+ - Use the `-j` or `--jobs` option to specify the number of concurrent import jobs.
38
+
39
+ ### Managing Jobs
40
+
41
+ To manage import jobs, use the following command:
42
+
43
+ ```bash
44
+ ragdoll jobs [JOB_ID]
45
+ ```
46
+
47
+ - `JOB_ID`: The ID of a specific job to manage.
48
+ - Use `--stop`, `--pause`, or `--resume` to control a specific job.
49
+ - Use `--stop-all`, `--pause-all`, or `--resume-all` to control all jobs.
50
+
51
+ ### Searching Documents
52
+
53
+ To search the database with a prompt, use the following command:
54
+
55
+ ```bash
56
+ ragdoll search PROMPT
57
+ ```
58
+
59
+ - `PROMPT`: The search prompt as a string or use the `-p` option to specify a file containing the prompt text.
60
+ - Use the `--max_count` option to specify the maximum number of results to return.
61
+ - Use the `--rerank` option to rerank results using keyword search.
62
+
63
+ ## Development and Contribution
64
+
65
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
66
+
67
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
68
+
69
+ ## Contributing
70
+
71
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/ragdoll.
72
+
73
+ ## License
74
+
75
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ # This file defines the Rake tasks for the Ragdoll gem, including tasks for testing.
2
+
3
+ # frozen_string_literal: true
4
+
5
+ require "bundler/gem_tasks"
6
+ require "minitest/test_task"
7
+
8
+ Minitest::TestTask.create
9
+
10
+ task default: :test
@@ -0,0 +1,9 @@
1
+ # This file defines the Document model for the Ragdoll gem.
2
+
3
+ # frozen_string_literal: true
4
+
5
+ module Ragdoll
6
+ class Document < ApplicationRecord
7
+ has_many :embeddings, dependent: :destroy
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ # This file defines the Embedding model for the Ragdoll gem.
2
+
3
+ # frozen_string_literal: true
4
+
5
+ module Ragdoll
6
+ class Embedding < ApplicationRecord
7
+ belongs_to :document
8
+ end
9
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Initializer for Ragdoll engine
4
+ Ragdoll.configure do |config|
5
+ # Set configuration options here
6
+ end
data/config/routes.rb ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ Ragdoll::Engine.routes.draw do
4
+ # Define your engine routes here
5
+ end
@@ -0,0 +1,20 @@
1
+ # This migration creates the documents table with necessary extensions for PostgreSQL.
2
+
3
+ module Ragdoll
4
+ class CreateDocuments < ActiveRecord::Migration[7.0]
5
+ def change
6
+ enable_extension 'pg_trgm'
7
+ enable_extension 'fuzzystrmatch'
8
+
9
+ create_table :documents do |t|
10
+ t.string :location
11
+ t.string :summary
12
+ t.string :type
13
+ t.datetime :processing_started_at
14
+ t.datetime :processing_finished_at
15
+
16
+ t.timestamps
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,28 @@
1
+ # This file contains the database configuration for the Ragdoll gem, using environment variables.
2
+
3
+ default: &default
4
+ adapter: postgresql
5
+ encoding: unicode
6
+ pool: <%= ENV.fetch("RAGDOLL_POOL", 5) %>
7
+ timeout: <%= ENV.fetch("RAGDOLL_TIMEOUT", 5000) %>
8
+
9
+ development:
10
+ <<: *default
11
+ host: <%= ENV.fetch("RAGDOLL_HOST", "localhost") %>
12
+ database: <%= ENV.fetch("RAGDOLL_DATABASE", "ragdoll_development") %>
13
+ username: <%= ENV.fetch("RAGDOLL_USER", "user") %>
14
+ password: <%= ENV.fetch("RAGDOLL_PASSWORD", "password") %>
15
+
16
+ test:
17
+ <<: *default
18
+ host: <%= ENV.fetch("RAGDOLL_HOST", "localhost") %>
19
+ database: <%= ENV.fetch("RAGDOLL_DATABASE", "ragdoll_test") %>
20
+ username: <%= ENV.fetch("RAGDOLL_USER", "user") %>
21
+ password: <%= ENV.fetch("RAGDOLL_PASSWORD", "password") %>
22
+
23
+ production:
24
+ <<: *default
25
+ host: <%= ENV.fetch("RAGDOLL_HOST") %>
26
+ database: <%= ENV.fetch("RAGDOLL_DATABASE") %>
27
+ username: <%= ENV.fetch("RAGDOLL_USER") %>
28
+ password: <%= ENV.fetch("RAGDOLL_PASSWORD") %>
@@ -0,0 +1,31 @@
1
+ # This file contains the default configuration settings for the Ragdoll gem, including database configurations.
2
+
3
+ default: &default
4
+ database:
5
+ host: localhost
6
+ database: ragdoll_development
7
+ user: user
8
+ password: password
9
+ pool: 5
10
+ timeout: 5000
11
+
12
+ llm:
13
+ embeddings_model: "llama-2-7b"
14
+ reranking_model: "llama-2-13b"
15
+ chat_model: "llama-2-70b"
16
+
17
+ development:
18
+ <<: *default
19
+
20
+ test:
21
+ <<: *default
22
+ database:
23
+ database: ragdoll_test
24
+
25
+ production:
26
+ <<: *default
27
+ database:
28
+ host: <%= ENV.fetch("RAGDOLL_HOST") %>
29
+ database: <%= ENV.fetch("RAGDOLL_DATABASE") %>
30
+ user: <%= ENV.fetch("RAGDOLL_USER") %>
31
+ password: <%= ENV.fetch("RAGDOLL_PASSWORD") %>
@@ -0,0 +1,16 @@
1
+ # This file defines the Ragdoll engine, which integrates the gem with Rails applications.
2
+
3
+ # frozen_string_literal: true
4
+
5
+ require "rails/engine"
6
+
7
+ module Ragdoll
8
+ class Engine < ::Rails::Engine
9
+ isolate_namespace Ragdoll
10
+ config.generators do |g|
11
+ g.test_framework :minitest
12
+ g.fixture_replacement :factory_bot
13
+ g.factory_bot dir: 'test/factories'
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ # This file defines the ImportJob class for handling document import tasks in the background.
2
+
3
+ # frozen_string_literal: true
4
+
5
+ module Ragdoll
6
+ class ImportJob < SolidJob::Base
7
+ def perform(file)
8
+ document = File.read(file)
9
+ ingestion = Ragdoll::Ingestion.new(document)
10
+ vectorized_chunks = ingestion.chunk_and_vectorize
11
+ ingestion.store_in_database
12
+ puts "Imported #{file} successfully."
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,30 @@
1
+ # This file contains the Ingestion class responsible for processing documents by chunking and vectorizing them.
2
+
3
+ # frozen_string_literal: true
4
+
5
+ module Ragdoll
6
+ class Ingestion
7
+ def initialize(document)
8
+ @document = document
9
+ end
10
+
11
+ def chunk_and_vectorize
12
+ # Example logic for chunking and vectorization
13
+ chunks = @document.split("\n\n") # Split document into paragraphs
14
+ vectorized_chunks = chunks.map { |chunk| vectorize(chunk) }
15
+ vectorized_chunks
16
+ end
17
+
18
+ def store_in_database
19
+ # Implement logic to store vectorized data in the database
20
+ end
21
+
22
+ private
23
+
24
+ def vectorize(chunk)
25
+ # Placeholder for vectorization logic
26
+ # Convert chunk to a vector representation
27
+ chunk.split.map(&:downcase) # Simple example: split words and downcase
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,18 @@
1
+ # This file contains the Search class responsible for querying the database with a prompt.
2
+
3
+ # frozen_string_literal: true
4
+
5
+ module Ragdoll
6
+ class Search
7
+ def initialize(prompt)
8
+ @prompt = prompt
9
+ end
10
+
11
+ def search_database(max_count)
12
+ # Example logic for searching the database
13
+ # This is a placeholder for actual database search logic
14
+ results = [] # Placeholder for actual database query results
15
+ results.select { |entry| entry.include?(@prompt) }
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,7 @@
1
+ # This file defines the version number for the Ragdoll gem.
2
+
3
+ # frozen_string_literal: true
4
+
5
+ module Ragdoll
6
+ VERSION = "0.1.0"
7
+ end
data/lib/ragdoll.rb ADDED
@@ -0,0 +1,12 @@
1
+ # This file is the main entry point for the Ragdoll gem, requiring all necessary components.
2
+
3
+ # frozen_string_literal: true
4
+
5
+ # frozen_string_literal: true
6
+
7
+ require "ragdoll/version"
8
+ require "ragdoll/engine"
9
+
10
+ module Ragdoll
11
+ class Error < StandardError; end
12
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+ require_relative '../ragdoll/import_job'
5
+
6
+ module Ragdoll
7
+ class ImportTask < Thor
8
+ desc "import PATH", "Import documents from a file, glob, or directory"
9
+ method_option :recursive, aliases: "-r", type: :boolean, default: false, desc: "Recursively import files from directories"
10
+ method_option :jobs, aliases: ["-j", "--jobs"], type: :numeric, default: 1, desc: "Number of concurrent import jobs"
11
+ def import(path)
12
+ queue = SolidQueue.new(concurrency: options[:jobs])
13
+ files = if File.directory?(path)
14
+ if options[:recursive]
15
+ Dir.glob("#{path}/**/*")
16
+ else
17
+ Dir.glob("#{path}/*")
18
+ end
19
+ else
20
+ [path]
21
+ end
22
+
23
+ files.each do |file|
24
+ next unless File.file?(file)
25
+
26
+ queue.push(file) do |file|
27
+ Ragdoll::ImportJob.perform_async(file)
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+
5
+ module Ragdoll
6
+ class JobsTask < Thor
7
+ desc "jobs [JOB_ID]", "Report the status of all running and queued import jobs, or a specific job if JOB_ID is provided"
8
+ method_option :stop_all, type: :boolean, default: false, desc: "Stop all running and queued jobs"
9
+ method_option :pause_all, type: :boolean, default: false, desc: "Pause all running jobs"
10
+ method_option :resume_all, type: :boolean, default: false, desc: "Resume all paused jobs"
11
+ method_option :stop, type: :boolean, default: false, desc: "Stop a specific job"
12
+ method_option :pause, type: :boolean, default: false, desc: "Pause a specific job"
13
+ method_option :resume, type: :boolean, default: false, desc: "Resume a specific job"
14
+ def jobs(job_id = nil)
15
+ if job_id
16
+ if options[:stop]
17
+ puts "Stopping job ID: #{job_id}..."
18
+ elsif options[:pause]
19
+ puts "Pausing job ID: #{job_id}..."
20
+ elsif options[:resume]
21
+ puts "Resuming job ID: #{job_id}..."
22
+ else
23
+ puts "Fetching status for job ID: #{job_id}..."
24
+ end
25
+ else
26
+ if options[:stop_all]
27
+ puts "Stopping all jobs..."
28
+ elsif options[:pause_all]
29
+ puts "Pausing all running jobs..."
30
+ elsif options[:resume_all]
31
+ puts "Resuming all paused jobs..."
32
+ else
33
+ puts "Fetching status of all running and queued import jobs..."
34
+ puts "Job ID: 12345, Status: Running, File: document1.txt"
35
+ puts "Job ID: 12346, Status: Running, File: document2.txt"
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,7 @@
1
+ require 'thor'
2
+
3
+ module Ragdoll
4
+ class Tasks < Thor
5
+ # Move your existing CLI tasks here
6
+ end
7
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+ require_relative '../ragdoll/search'
5
+
6
+ module Ragdoll
7
+ class SearchTask < Thor
8
+ desc "search PROMPT", "Search the database with a prompt"
9
+ method_option :prompt, aliases: ["-p", "--prompt"], type: :string, desc: "File path containing the prompt text"
10
+ method_option :max_count, type: :numeric, default: 10, desc: "Maximum number of results to return"
11
+ method_option :rerank, type: :boolean, default: false, desc: "Rerank results using keyword search"
12
+ def search(prompt = nil)
13
+ if options[:prompt]
14
+ prompt = File.read(options[:prompt])
15
+ end
16
+
17
+ unless prompt
18
+ puts "Please provide a prompt as a string or with the -p option."
19
+ return
20
+ end
21
+
22
+ keywords = extract_keywords(prompt)
23
+ vectorized_prompt = vectorize_prompt(prompt)
24
+ search_instance = Ragdoll::Search.new(vectorized_prompt)
25
+ results = search_instance.search_database(options[:max_count])
26
+
27
+ if options[:rerank]
28
+ results = rerank_results(results, keywords)
29
+ end
30
+
31
+ results.each do |result|
32
+ puts "Source: #{result[:source]}"
33
+ puts "Metadata: #{result[:metadata]}"
34
+ puts "--------------------------------"
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def rerank_results(results, keywords)
41
+ results.sort_by do |result|
42
+ content = result[:source].downcase
43
+ keywords.count { |keyword| content.include?(keyword) }
44
+ end.reverse
45
+ end
46
+
47
+ def extract_keywords(prompt)
48
+ prompt.split.map(&:downcase).uniq
49
+ end
50
+
51
+ def vectorize_prompt(prompt)
52
+ prompt.split.map(&:downcase)
53
+ end
54
+ end
55
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ragdoll
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Dewayne VanHoozer
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 2025-02-19 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rails
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '7.1'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '7.1'
26
+ description: Under development. Contributors welcome.
27
+ email:
28
+ - dvanhoozer@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - README.md
34
+ - Rakefile
35
+ - app/models/ragdoll/document.rb
36
+ - app/models/ragdoll/embedding.rb
37
+ - config/initializers/ragdoll.rb
38
+ - config/routes.rb
39
+ - db/migrate/20250218123456_create_documents.rb
40
+ - lib/config/database.yml
41
+ - lib/config/ragdoll.yml
42
+ - lib/ragdoll.rb
43
+ - lib/ragdoll/engine.rb
44
+ - lib/ragdoll/import_job.rb
45
+ - lib/ragdoll/ingestion.rb
46
+ - lib/ragdoll/search.rb
47
+ - lib/ragdoll/version.rb
48
+ - lib/tasks/import_task.thor
49
+ - lib/tasks/jobs_task.thor
50
+ - lib/tasks/ragdoll_tasks.thor
51
+ - lib/tasks/search_task.thor
52
+ homepage: https://github.com/MadBomber/ragdoll
53
+ licenses:
54
+ - MIT
55
+ metadata:
56
+ allowed_push_host: https://rubygems.org
57
+ homepage_uri: https://github.com/MadBomber/ragdoll
58
+ source_code_uri: https://github.com/MadBomber/ragdoll
59
+ changelog_uri: https://github.com/MadBomber/ragdoll/blob/main/CHANGELOG.md
60
+ rdoc_options: []
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 3.1.0
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubygems_version: 3.6.3
75
+ specification_version: 4
76
+ summary: Ruby on Rails Engine
77
+ test_files: []