RubyGems - ragdoll - Versions diffs - 0.1.0 - Mend

ragdoll 0.1.0

Files changed (21) hide show

checksums.yaml +7 -0
data/README.md +75 -0
data/Rakefile +10 -0
data/app/models/ragdoll/document.rb +9 -0
data/app/models/ragdoll/embedding.rb +9 -0
data/config/initializers/ragdoll.rb +6 -0
data/config/routes.rb +5 -0
data/db/migrate/20250218123456_create_documents.rb +20 -0
data/lib/config/database.yml +28 -0
data/lib/config/ragdoll.yml +31 -0
data/lib/ragdoll/engine.rb +16 -0
data/lib/ragdoll/import_job.rb +15 -0
data/lib/ragdoll/ingestion.rb +30 -0
data/lib/ragdoll/search.rb +18 -0
data/lib/ragdoll/version.rb +7 -0
data/lib/ragdoll.rb +12 -0
data/lib/tasks/import_task.thor +32 -0
data/lib/tasks/jobs_task.thor +40 -0
data/lib/tasks/ragdoll_tasks.thor +7 -0
data/lib/tasks/search_task.thor +55 -0
metadata +77 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: bea4621e2b802db79d78f8b1d0679cf2f81ed35b91d35683ce0afcb83ddc54e1
+  data.tar.gz: ec12fb975b154f77a42d54fb3c716d523e1b90e4e0122b3576c5aac15a957340
+SHA512:
+  metadata.gz: 3702308d3b772dfc0ebf429a26bae0f0378456e9d6c48357b8e2a4cdeb3744e78b43fd610ef308ff6348f1bec28bb37d51bd5e335d78583574b3212c8f544a33
+  data.tar.gz: 9beebfebafe1ed2e815a3042949b68e5f208d8c555a92228eb4098fa99900f07d2985654766e2907de3069f5d10b6cf3e65fb7a3b431ee9db836e6111f1e27f2

data/README.md ADDED Viewed

@@ -0,0 +1,75 @@
+# Ragdoll
+Ragdoll is a Rails Engine designed for document ingestion and search. It allows you to import documents, vectorize them, and perform searches using vector representations.
+## Installation as a Rails Engine
+To use Ragdoll as a Rails Engine, add this line to your application's Gemfile:
+```bash
+bundle add ragdoll
+```
+And then execute:
+```bash
+bundle install
+```
+Or install it yourself as:
+```bash
+gem install ragdoll
+```
+## Usage as a Rails Engine
+### Importing Documents
+To import documents from a file, glob, or directory, use the following command:
+```bash
+ragdoll import PATH
+```
+- `PATH`: The path to the file or directory to import.
+- Use the `-r` or `--recursive` option to import files recursively from directories.
+- Use the `-j` or `--jobs` option to specify the number of concurrent import jobs.
+### Managing Jobs
+To manage import jobs, use the following command:
+```bash
+ragdoll jobs [JOB_ID]
+```
+- `JOB_ID`: The ID of a specific job to manage.
+- Use `--stop`, `--pause`, or `--resume` to control a specific job.
+- Use `--stop-all`, `--pause-all`, or `--resume-all` to control all jobs.
+### Searching Documents
+To search the database with a prompt, use the following command:
+```bash
+ragdoll search PROMPT
+```
+- `PROMPT`: The search prompt as a string or use the `-p` option to specify a file containing the prompt text.
+- Use the `--max_count` option to specify the maximum number of results to return.
+- Use the `--rerank` option to rerank results using keyword search.
+## Development and Contribution
+After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
+To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+## Contributing
+Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/ragdoll.
+## License
+The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).

data/Rakefile ADDED Viewed

@@ -0,0 +1,10 @@
+# This file defines the Rake tasks for the Ragdoll gem, including tasks for testing.
+# frozen_string_literal: true
+require "bundler/gem_tasks"
+require "minitest/test_task"
+Minitest::TestTask.create
+task default: :test

data/app/models/ragdoll/document.rb ADDED Viewed

@@ -0,0 +1,9 @@
+# This file defines the Document model for the Ragdoll gem.
+# frozen_string_literal: true
+module Ragdoll
+  class Document < ApplicationRecord
+    has_many :embeddings, dependent: :destroy
+  end
+end

data/app/models/ragdoll/embedding.rb ADDED Viewed

@@ -0,0 +1,9 @@
+# This file defines the Embedding model for the Ragdoll gem.
+# frozen_string_literal: true
+module Ragdoll
+  class Embedding < ApplicationRecord
+    belongs_to :document
+  end
+end

data/config/initializers/ragdoll.rb ADDED Viewed

@@ -0,0 +1,6 @@
+# frozen_string_literal: true
+# Initializer for Ragdoll engine
+Ragdoll.configure do |config|
+  # Set configuration options here
+end

data/config/routes.rb ADDED Viewed

@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+Ragdoll::Engine.routes.draw do
+  # Define your engine routes here
+end

data/db/migrate/20250218123456_create_documents.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# This migration creates the documents table with necessary extensions for PostgreSQL.
+module Ragdoll
+  class CreateDocuments < ActiveRecord::Migration[7.0]
+  def change
+    enable_extension 'pg_trgm'
+    enable_extension 'fuzzystrmatch'
+    create_table :documents do |t|
+      t.string :location
+      t.string :summary
+      t.string :type
+      t.datetime :processing_started_at
+      t.datetime :processing_finished_at
+      t.timestamps
+    end
+  end
+  end
+end

data/lib/config/database.yml ADDED Viewed

@@ -0,0 +1,28 @@
+# This file contains the database configuration for the Ragdoll gem, using environment variables.
+default: &default
+  adapter: postgresql
+  encoding: unicode
+  pool: <%= ENV.fetch("RAGDOLL_POOL", 5) %>
+  timeout: <%= ENV.fetch("RAGDOLL_TIMEOUT", 5000) %>
+development:
+  <<: *default
+  host: <%= ENV.fetch("RAGDOLL_HOST", "localhost") %>
+  database: <%= ENV.fetch("RAGDOLL_DATABASE", "ragdoll_development") %>
+  username: <%= ENV.fetch("RAGDOLL_USER", "user") %>
+  password: <%= ENV.fetch("RAGDOLL_PASSWORD", "password") %>
+test:
+  <<: *default
+  host: <%= ENV.fetch("RAGDOLL_HOST", "localhost") %>
+  database: <%= ENV.fetch("RAGDOLL_DATABASE", "ragdoll_test") %>
+  username: <%= ENV.fetch("RAGDOLL_USER", "user") %>
+  password: <%= ENV.fetch("RAGDOLL_PASSWORD", "password") %>
+production:
+  <<: *default
+  host: <%= ENV.fetch("RAGDOLL_HOST") %>
+  database: <%= ENV.fetch("RAGDOLL_DATABASE") %>
+  username: <%= ENV.fetch("RAGDOLL_USER") %>
+  password: <%= ENV.fetch("RAGDOLL_PASSWORD") %>

data/lib/config/ragdoll.yml ADDED Viewed

@@ -0,0 +1,31 @@
+# This file contains the default configuration settings for the Ragdoll gem, including database configurations.
+default: &default
+  database:
+    host: localhost
+    database: ragdoll_development
+    user: user
+    password: password
+    pool: 5
+    timeout: 5000
+  llm:
+    embeddings_model: "llama-2-7b"
+    reranking_model: "llama-2-13b"
+    chat_model: "llama-2-70b"
+development:
+  <<: *default
+test:
+  <<: *default
+  database:
+    database: ragdoll_test
+production:
+  <<: *default
+  database:
+    host: <%= ENV.fetch("RAGDOLL_HOST") %>
+    database: <%= ENV.fetch("RAGDOLL_DATABASE") %>
+    user: <%= ENV.fetch("RAGDOLL_USER") %>
+    password: <%= ENV.fetch("RAGDOLL_PASSWORD") %>

data/lib/ragdoll/engine.rb ADDED Viewed

@@ -0,0 +1,16 @@
+# This file defines the Ragdoll engine, which integrates the gem with Rails applications.
+# frozen_string_literal: true
+require "rails/engine"
+module Ragdoll
+  class Engine < ::Rails::Engine
+    isolate_namespace Ragdoll
+    config.generators do |g|
+      g.test_framework :minitest
+      g.fixture_replacement :factory_bot
+      g.factory_bot dir: 'test/factories'
+    end
+  end
+end

data/lib/ragdoll/import_job.rb ADDED Viewed

@@ -0,0 +1,15 @@
+# This file defines the ImportJob class for handling document import tasks in the background.
+# frozen_string_literal: true
+module Ragdoll
+  class ImportJob < SolidJob::Base
+    def perform(file)
+      document = File.read(file)
+      ingestion = Ragdoll::Ingestion.new(document)
+      vectorized_chunks = ingestion.chunk_and_vectorize
+      ingestion.store_in_database
+      puts "Imported #{file} successfully."
+    end
+  end
+end

data/lib/ragdoll/ingestion.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# This file contains the Ingestion class responsible for processing documents by chunking and vectorizing them.
+# frozen_string_literal: true
+module Ragdoll
+  class Ingestion
+    def initialize(document)
+      @document = document
+    end
+    def chunk_and_vectorize
+      # Example logic for chunking and vectorization
+      chunks = @document.split("\n\n") # Split document into paragraphs
+      vectorized_chunks = chunks.map { |chunk| vectorize(chunk) }
+      vectorized_chunks
+    end
+    def store_in_database
+      # Implement logic to store vectorized data in the database
+    end
+    private
+    def vectorize(chunk)
+      # Placeholder for vectorization logic
+      # Convert chunk to a vector representation
+      chunk.split.map(&:downcase) # Simple example: split words and downcase
+    end
+  end
+end

data/lib/ragdoll/search.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# This file contains the Search class responsible for querying the database with a prompt.
+# frozen_string_literal: true
+module Ragdoll
+  class Search
+    def initialize(prompt)
+      @prompt = prompt
+    end
+    def search_database(max_count)
+      # Example logic for searching the database
+      # This is a placeholder for actual database search logic
+      results = [] # Placeholder for actual database query results
+      results.select { |entry| entry.include?(@prompt) }
+    end
+  end
+end

data/lib/ragdoll/version.rb ADDED Viewed

@@ -0,0 +1,7 @@
+# This file defines the version number for the Ragdoll gem.
+# frozen_string_literal: true
+module Ragdoll
+  VERSION = "0.1.0"
+end

data/lib/ragdoll.rb ADDED Viewed

@@ -0,0 +1,12 @@
+# This file is the main entry point for the Ragdoll gem, requiring all necessary components.
+# frozen_string_literal: true
+# frozen_string_literal: true
+require "ragdoll/version"
+require "ragdoll/engine"
+module Ragdoll
+  class Error < StandardError; end
+end

data/lib/tasks/import_task.thor ADDED Viewed

@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+require 'thor'
+require_relative '../ragdoll/import_job'
+module Ragdoll
+  class ImportTask < Thor
+    desc "import PATH", "Import documents from a file, glob, or directory"
+    method_option :recursive, aliases: "-r", type: :boolean, default: false, desc: "Recursively import files from directories"
+    method_option :jobs, aliases: ["-j", "--jobs"], type: :numeric, default: 1, desc: "Number of concurrent import jobs"
+    def import(path)
+      queue = SolidQueue.new(concurrency: options[:jobs])
+      files = if File.directory?(path)
+                if options[:recursive]
+                  Dir.glob("#{path}/**/*")
+                else
+                  Dir.glob("#{path}/*")
+                end
+              else
+                [path]
+              end
+      files.each do |file|
+        next unless File.file?(file)
+        queue.push(file) do |file|
+          Ragdoll::ImportJob.perform_async(file)
+        end
+      end
+    end
+  end
+end

data/lib/tasks/jobs_task.thor ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+require 'thor'
+module Ragdoll
+  class JobsTask < Thor
+    desc "jobs [JOB_ID]", "Report the status of all running and queued import jobs, or a specific job if JOB_ID is provided"
+    method_option :stop_all, type: :boolean, default: false, desc: "Stop all running and queued jobs"
+    method_option :pause_all, type: :boolean, default: false, desc: "Pause all running jobs"
+    method_option :resume_all, type: :boolean, default: false, desc: "Resume all paused jobs"
+    method_option :stop, type: :boolean, default: false, desc: "Stop a specific job"
+    method_option :pause, type: :boolean, default: false, desc: "Pause a specific job"
+    method_option :resume, type: :boolean, default: false, desc: "Resume a specific job"
+    def jobs(job_id = nil)
+      if job_id
+        if options[:stop]
+          puts "Stopping job ID: #{job_id}..."
+        elsif options[:pause]
+          puts "Pausing job ID: #{job_id}..."
+        elsif options[:resume]
+          puts "Resuming job ID: #{job_id}..."
+        else
+          puts "Fetching status for job ID: #{job_id}..."
+        end
+      else
+        if options[:stop_all]
+          puts "Stopping all jobs..."
+        elsif options[:pause_all]
+          puts "Pausing all running jobs..."
+        elsif options[:resume_all]
+          puts "Resuming all paused jobs..."
+        else
+          puts "Fetching status of all running and queued import jobs..."
+          puts "Job ID: 12345, Status: Running, File: document1.txt"
+          puts "Job ID: 12346, Status: Running, File: document2.txt"
+        end
+      end
+    end
+  end
+end

data/lib/tasks/ragdoll_tasks.thor ADDED Viewed

@@ -0,0 +1,7 @@
+require 'thor'
+module Ragdoll
+  class Tasks < Thor
+    # Move your existing CLI tasks here
+  end
+end

data/lib/tasks/search_task.thor ADDED Viewed

@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+require 'thor'
+require_relative '../ragdoll/search'
+module Ragdoll
+  class SearchTask < Thor
+    desc "search PROMPT", "Search the database with a prompt"
+    method_option :prompt, aliases: ["-p", "--prompt"], type: :string, desc: "File path containing the prompt text"
+    method_option :max_count, type: :numeric, default: 10, desc: "Maximum number of results to return"
+    method_option :rerank, type: :boolean, default: false, desc: "Rerank results using keyword search"
+    def search(prompt = nil)
+      if options[:prompt]
+        prompt = File.read(options[:prompt])
+      end
+      unless prompt
+        puts "Please provide a prompt as a string or with the -p option."
+        return
+      end
+      keywords = extract_keywords(prompt)
+      vectorized_prompt = vectorize_prompt(prompt)
+      search_instance = Ragdoll::Search.new(vectorized_prompt)
+      results = search_instance.search_database(options[:max_count])
+      if options[:rerank]
+        results = rerank_results(results, keywords)
+      end
+      results.each do |result|
+        puts "Source: #{result[:source]}"
+        puts "Metadata: #{result[:metadata]}"
+        puts "--------------------------------"
+      end
+    end
+    private
+    def rerank_results(results, keywords)
+      results.sort_by do |result|
+        content = result[:source].downcase
+        keywords.count { |keyword| content.include?(keyword) }
+      end.reverse
+    end
+    def extract_keywords(prompt)
+      prompt.split.map(&:downcase).uniq
+    end
+    def vectorize_prompt(prompt)
+      prompt.split.map(&:downcase)
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,77 @@
+--- !ruby/object:Gem::Specification
+name: ragdoll
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Dewayne VanHoozer
+bindir: bin
+cert_chain: []
+date: 2025-02-19 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rails
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '7.1'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '7.1'
+description: Under development.  Contributors welcome.
+email:
+- dvanhoozer@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- README.md
+- Rakefile
+- app/models/ragdoll/document.rb
+- app/models/ragdoll/embedding.rb
+- config/initializers/ragdoll.rb
+- config/routes.rb
+- db/migrate/20250218123456_create_documents.rb
+- lib/config/database.yml
+- lib/config/ragdoll.yml
+- lib/ragdoll.rb
+- lib/ragdoll/engine.rb
+- lib/ragdoll/import_job.rb
+- lib/ragdoll/ingestion.rb
+- lib/ragdoll/search.rb
+- lib/ragdoll/version.rb
+- lib/tasks/import_task.thor
+- lib/tasks/jobs_task.thor
+- lib/tasks/ragdoll_tasks.thor
+- lib/tasks/search_task.thor
+homepage: https://github.com/MadBomber/ragdoll
+licenses:
+- MIT
+metadata:
+  allowed_push_host: https://rubygems.org
+  homepage_uri: https://github.com/MadBomber/ragdoll
+  source_code_uri: https://github.com/MadBomber/ragdoll
+  changelog_uri: https://github.com/MadBomber/ragdoll/blob/main/CHANGELOG.md
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: 3.1.0
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.6.3
+specification_version: 4
+summary: Ruby on Rails Engine
+test_files: []