RubyGems - llm_classifier - Versions diffs - 0.1.0 - Mend

llm_classifier 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +7 -0
data/.devcontainer/Dockerfile +3 -0
data/.devcontainer/compose.yaml +13 -0
data/.devcontainer/devcontainer.json +43 -0
data/.rspec +3 -0
data/.rubocop.yml +45 -0
data/CHANGELOG.md +30 -0
data/LICENSE.txt +21 -0
data/README.md +309 -0
data/Rakefile +12 -0
data/lib/llm_classifier/adapters/anthropic.rb +72 -0
data/lib/llm_classifier/adapters/base.rb +18 -0
data/lib/llm_classifier/adapters/openai.rb +70 -0
data/lib/llm_classifier/adapters/ruby_llm.rb +30 -0
data/lib/llm_classifier/classifier.rb +206 -0
data/lib/llm_classifier/configuration.rb +38 -0
data/lib/llm_classifier/content_fetchers/base.rb +18 -0
data/lib/llm_classifier/content_fetchers/null.rb +12 -0
data/lib/llm_classifier/content_fetchers/web.rb +178 -0
data/lib/llm_classifier/knowledge.rb +44 -0
data/lib/llm_classifier/rails/concerns/classifiable.rb +88 -0
data/lib/llm_classifier/rails/generators/classifier_generator.rb +34 -0
data/lib/llm_classifier/rails/generators/install_generator.rb +54 -0
data/lib/llm_classifier/rails/generators/templates/classifier.rb.erb +48 -0
data/lib/llm_classifier/rails/generators/templates/classifier_spec.rb.erb +15 -0
data/lib/llm_classifier/rails/railtie.rb +18 -0
data/lib/llm_classifier/result.rb +65 -0
data/lib/llm_classifier/version.rb +5 -0
data/lib/llm_classifier.rb +41 -0
metadata +88 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 983b48d9d7882b918eb73b68d28aa49134e4aed1cdeb242a92eb9b7301767a89
+  data.tar.gz: ebe5c1dd0a365ecb63b27991fb2f19775a20efd70f1141e348a84cace9d93a8e
+SHA512:
+  metadata.gz: b75468e630400fd04b59c2e9765c2bc504a3c8cc8cc0f652d1249661ff574b18c320851496a35c500c86ccf3e1cb5d416bc2533e8c9ff87d63d89e1ef614ab38
+  data.tar.gz: 4513b387bb5a03a5ee06ec7feeb3b2f026408cd82abb20585dfac62e2551b171be90329b96c1078f39e54001275479e065ab5b6d8ddf37b3c40383e55cadc7a5

data/.devcontainer/Dockerfile ADDED Viewed

@@ -0,0 +1,3 @@
+# Make sure RUBY_VERSION matches the Ruby version in .ruby-version
+ARG RUBY_VERSION=3.4.7
+FROM ghcr.io/rails/devcontainer/images/ruby:$RUBY_VERSION

data/.devcontainer/compose.yaml ADDED Viewed

@@ -0,0 +1,13 @@
+name: "llm_classifier"
+services:
+  llm_classifier:
+    build:
+      context: ..
+      dockerfile: .devcontainer/Dockerfile
+    volumes:
+    - ../..:/workspaces:cached
+    # Overrides default command so things don't shut down after the process ends.
+    command: sleep infinity

data/.devcontainer/devcontainer.json ADDED Viewed

@@ -0,0 +1,43 @@
+// For format details, see https://containers.dev/implementors/json_reference/.
+// For config options, see the README at: https://github.com/devcontainers/templates/tree/main/src/ruby
+{
+  "name": "llm_classifier",
+  "dockerComposeFile": "compose.yaml",
+  "service": "llm_classifier",
+  "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
+  // Features to add to the dev container. More info: https://containers.dev/features.
+  "features": {
+    "ghcr.io/devcontainers/features/github-cli:1": {},
+    "ghcr.io/devcontainers/features/docker-outside-of-docker:1": { "moby": false }
+  },
+  "containerEnv": {
+  },
+  // Use 'forwardPorts' to make a list of ports inside the container available locally.
+  "forwardPorts": [],
+  // Configure tool-specific properties.
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "Shopify.ruby-lsp",
+        "Shopify.ruby-extensions-pack",
+        "eamodio.gitlens",
+        "ms-azuretools.vscode-docker",
+        "Gruntfuggly.todo-tree",
+        "Anthropic.claude-code",
+        "GitHub.vscode-pull-request-github",
+        "ms-azuretools.vscode-docker",
+        "ms-azuretools.vscode-containers"
+      ]
+    }
+  },
+  // Uncomment to connect as root instead. More info: https://containers.dev/implementors/json_reference/#remoteUser.
+  // "remoteUser": "root",
+  // Use 'postCreateCommand' to run commands after the container is created.
+  "postCreateCommand": ""
+}

data/.rspec ADDED Viewed

@@ -0,0 +1,3 @@
+--require spec_helper
+--format documentation
+--color

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,45 @@
+require:
+  - rubocop-rspec
+AllCops:
+  TargetRubyVersion: 3.1
+  NewCops: enable
+  SuggestExtensions: false
+  Exclude:
+    - "vendor/**/*"
+    - "spec/fixtures/**/*"
+Style/StringLiterals:
+  EnforcedStyle: double_quotes
+Style/StringLiteralsInInterpolation:
+  EnforcedStyle: double_quotes
+Style/HashExcept:
+  Enabled: false  # Requires ActiveSupport, which is not a dependency
+Layout/LineLength:
+  Max: 120
+Metrics/BlockLength:
+  Exclude:
+    - "spec/**/*"
+    - "*.gemspec"
+Metrics/MethodLength:
+  Max: 20
+Metrics/ClassLength:
+  Exclude:
+    - "lib/llm_classifier/classifier.rb"
+    - "lib/llm_classifier/content_fetchers/web.rb"
+Metrics/ParameterLists:
+  Exclude:
+    - "lib/llm_classifier/result.rb"
+RSpec/ExampleLength:
+  Max: 15
+RSpec/MultipleExpectations:
+  Max: 5

data/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,30 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+## [0.1.0] - 2024-12-02
+### Added
+- Initial release
+- Core `Classifier` base class with DSL (categories, system_prompt, model, adapter)
+- `Result` object for classification responses
+- `Knowledge` class for domain-specific prompt injection
+- Multi-label classification support
+- Before/after classify callbacks
+- LLM Adapters:
+  - `RubyLlm` adapter (requires ruby_llm gem)
+  - `OpenAI` adapter (direct API)
+  - `Anthropic` adapter (direct API)
+- Content Fetchers:
+  - `Web` fetcher with SSRF protection
+  - `Null` fetcher for testing
+- Rails integration:
+  - `Classifiable` concern for ActiveRecord
+  - Install generator
+  - Classifier generator
+  - Railtie for auto-configuration

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2024 Axium Foundry
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,309 @@
+# LlmClassifier
+A flexible Ruby gem for building LLM-powered classifiers. Define categories, system prompts, and domain knowledge using a clean DSL. Supports multiple LLM backends and integrates seamlessly with Rails.
+## Installation
+Add this line to your application's Gemfile:
+```ruby
+gem 'llm_classifier'
+# Add your preferred LLM adapter
+gem 'ruby_llm'  # recommended
+# or use direct API adapters (no additional gem needed)
+```
+And then execute:
+```bash
+$ bundle install
+```
+For Rails applications, run the install generator:
+```bash
+$ rails generate llm_classifier:install
+```
+## Quick Start
+### 1. Define a Classifier
+```ruby
+class SentimentClassifier < LlmClassifier::Classifier
+  categories :positive, :negative, :neutral
+  system_prompt <<~PROMPT
+    You are a sentiment analyzer. Classify the sentiment of the given text.
+    Categories:
+    - positive: Expresses satisfaction, happiness, or approval
+    - negative: Expresses dissatisfaction, unhappiness, or criticism
+    - neutral: Neither positive nor negative, factual or balanced
+    Respond with ONLY a JSON object:
+    {
+      "categories": ["category"],
+      "confidence": 0.0-1.0,
+      "reasoning": "Brief explanation"
+    }
+  PROMPT
+end
+```
+### 2. Use It
+```ruby
+result = SentimentClassifier.classify("I absolutely love this product!")
+result.success?    # => true
+result.category    # => "positive"
+result.confidence  # => 0.95
+result.reasoning   # => "Strong positive language with 'love' and 'absolutely'"
+```
+## Configuration
+```ruby
+# config/initializers/llm_classifier.rb
+LlmClassifier.configure do |config|
+  # LLM adapter: :ruby_llm (default), :openai, :anthropic
+  config.adapter = :ruby_llm
+  # Default model for classification
+  config.default_model = "gpt-4o-mini"
+  # API keys (reads from ENV by default)
+  config.openai_api_key = ENV["OPENAI_API_KEY"]
+  config.anthropic_api_key = ENV["ANTHROPIC_API_KEY"]
+  # Content fetching settings
+  config.web_fetch_timeout = 10
+  config.web_fetch_user_agent = "MyApp/1.0"
+end
+```
+## Features
+### Multi-label Classification
+```ruby
+class TopicClassifier < LlmClassifier::Classifier
+  categories :ruby, :rails, :javascript, :python, :devops
+  multi_label true  # Can return multiple categories
+  system_prompt "Identify all programming topics mentioned..."
+end
+result = TopicClassifier.classify("Building a Rails API with React frontend")
+result.categories  # => ["rails", "javascript"]
+```
+### Domain Knowledge
+Inject domain-specific knowledge into your prompts:
+```ruby
+class BusinessClassifier < LlmClassifier::Classifier
+  categories :dealership, :mechanic, :parts, :gear
+  system_prompt "Classify motorcycle businesses..."
+  knowledge do
+    motorcycle_brands %w[Harley-Davidson Honda Yamaha Kawasaki]
+    gear_retailers ["RevZilla", "Cycle Gear", "J&P Cycles"]
+    classification_rules({
+      dealership: "Contains brand name + sales indicators",
+      mechanic: "Offers repair/maintenance services"
+    })
+  end
+end
+```
+### Callbacks
+```ruby
+class AuditedClassifier < LlmClassifier::Classifier
+  categories :approved, :rejected
+  before_classify do |input|
+    input.strip.downcase  # Preprocess input
+  end
+  after_classify do |result|
+    Rails.logger.info("Classification: #{result.category}")
+    AuditLog.create!(result: result.to_h)
+  end
+end
+```
+### Override Adapter Per-Classifier
+```ruby
+class CriticalClassifier < LlmClassifier::Classifier
+  categories :high, :medium, :low
+  adapter :anthropic      # Use Anthropic for this classifier
+  model "claude-sonnet-4-20250514"  # Specific model
+end
+```
+## Rails Integration
+### ActiveRecord Concern
+```ruby
+class Review < ApplicationRecord
+  include LlmClassifier::Rails::Concerns::Classifiable
+  classifies :sentiment,
+             with: SentimentClassifier,
+             from: :body,                    # Column to classify
+             store_in: :classification_data  # JSONB column for results
+end
+# Usage
+review = Review.find(1)
+review.classify_sentiment!
+review.sentiment_category     # => "positive"
+review.sentiment_categories   # => ["positive"]
+review.sentiment_classification
+# => {"category" => "positive", "confidence" => 0.9, ...}
+```
+### Complex Input
+```ruby
+class Review < ApplicationRecord
+  include LlmClassifier::Rails::Concerns::Classifiable
+  classifies :quality,
+             with: QualityClassifier,
+             from: ->(record) {
+               {
+                 title: record.title,
+                 body: record.body,
+                 author_reputation: record.user.reputation_score
+               }
+             },
+             store_in: :metadata
+end
+```
+### Generators
+```bash
+# Generate a new classifier
+$ rails generate llm_classifier:classifier Sentiment positive negative neutral
+# Creates:
+#   app/classifiers/sentiment_classifier.rb
+#   spec/classifiers/sentiment_classifier_spec.rb
+```
+## Content Fetching
+Fetch and include web content in classification:
+```ruby
+fetcher = LlmClassifier::ContentFetchers::Web.new(timeout: 10)
+content = fetcher.fetch("https://example.com/about")
+# Use in classification
+result = BusinessClassifier.classify(
+  name: "Example Motors",
+  description: "Auto dealer",
+  website_content: content
+)
+```
+Features:
+- SSRF protection (blocks private IPs)
+- Automatic redirect handling
+- HTML text extraction
+- Configurable timeout and user agent
+## Adapters
+### Built-in Adapters
+- **`:ruby_llm`** - Uses the [ruby_llm](https://github.com/crmne/ruby_llm) gem (recommended)
+- **`:openai`** - Direct OpenAI API integration
+- **`:anthropic`** - Direct Anthropic API integration
+### Custom Adapter
+```ruby
+class MyCustomAdapter < LlmClassifier::Adapters::Base
+  def chat(model:, system_prompt:, user_prompt:)
+    # Make API call and return response text
+    MyLlmClient.complete(
+      model: model,
+      system: system_prompt,
+      prompt: user_prompt
+    )
+  end
+end
+LlmClassifier.configure do |config|
+  config.adapter = MyCustomAdapter
+end
+```
+## Result Object
+All classifications return a `LlmClassifier::Result`:
+```ruby
+result = MyClassifier.classify(input)
+result.success?      # => true/false
+result.failure?      # => true/false
+result.category      # => "primary_category" (first)
+result.categories    # => ["cat1", "cat2"] (all)
+result.confidence    # => 0.95
+result.reasoning     # => "Explanation from LLM"
+result.raw_response  # => Original JSON string
+result.metadata      # => Additional data from response
+result.error         # => Error message if failed
+result.to_h          # => Hash representation
+```
+## Development
+### Using Dev Container (Recommended)
+This project includes a [Dev Container](https://containers.dev/) configuration for a consistent development environment.
+1. Open the project in VS Code
+2. Install the "Dev Containers" extension if not already installed
+3. Press `Cmd+Shift+P` and select "Dev Containers: Reopen in Container"
+4. Wait for the container to build and start
+The container includes Ruby 3.3.2, GitHub CLI, and useful VS Code extensions.
+### Local Setup
+```bash
+# Clone the repo
+git clone https://github.com/AxiumFoundry/llm_classifier.git
+cd llm_classifier
+# Install dependencies
+bundle install
+# Run tests
+bundle exec rspec
+# Run linter
+bundle exec rubocop
+```
+## Contributing
+Bug reports and pull requests are welcome on GitHub at https://github.com/AxiumFoundry/llm_classifier.
+## License
+The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).

data/Rakefile ADDED Viewed

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+require "bundler/gem_tasks"
+require "rspec/core/rake_task"
+RSpec::Core::RakeTask.new(:spec)
+require "rubocop/rake_task"
+RuboCop::RakeTask.new
+task default: %i[spec rubocop]

data/lib/llm_classifier/adapters/anthropic.rb ADDED Viewed

@@ -0,0 +1,72 @@
+# frozen_string_literal: true
+require "net/http"
+require "json"
+require "uri"
+module LlmClassifier
+  module Adapters
+    # Adapter for Anthropic API
+    class Anthropic < Base
+      API_URL = "https://api.anthropic.com/v1/messages"
+      API_VERSION = "2023-06-01"
+      def chat(model:, system_prompt:, user_prompt:)
+        api_key = validate_api_key
+        response = send_request(model, system_prompt, user_prompt, api_key)
+        parse_response(response)
+      end
+      private
+      def validate_api_key
+        api_key = config.anthropic_api_key
+        raise ConfigurationError, "Anthropic API key not configured" unless api_key
+        api_key
+      end
+      def send_request(model, system_prompt, user_prompt, api_key)
+        uri = URI(API_URL)
+        http = build_http_client(uri)
+        request = build_request(uri, api_key, model, system_prompt, user_prompt)
+        http.request(request)
+      end
+      def build_http_client(uri)
+        http = Net::HTTP.new(uri.host, uri.port)
+        http.use_ssl = true
+        http
+      end
+      def build_request(uri, api_key, model, system_prompt, user_prompt)
+        request = Net::HTTP::Post.new(uri)
+        request["Content-Type"] = "application/json"
+        request["x-api-key"] = api_key
+        request["anthropic-version"] = API_VERSION
+        request.body = build_request_body(model, system_prompt, user_prompt)
+        request
+      end
+      def build_request_body(model, system_prompt, user_prompt)
+        {
+          model: model,
+          max_tokens: 1024,
+          system: system_prompt,
+          messages: [
+            { role: "user", content: user_prompt }
+          ]
+        }.to_json
+      end
+      def parse_response(response)
+        unless response.is_a?(Net::HTTPSuccess)
+          raise AdapterError, "Anthropic API error: #{response.code} - #{response.body}"
+        end
+        parsed = JSON.parse(response.body)
+        parsed.dig("content", 0, "text")
+      end
+    end
+  end
+end

data/lib/llm_classifier/adapters/base.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+module LlmClassifier
+  module Adapters
+    # Base adapter class for LLM providers
+    class Base
+      def chat(model:, system_prompt:, user_prompt:)
+        raise NotImplementedError, "Subclasses must implement #chat"
+      end
+      protected
+      def config
+        LlmClassifier.configuration
+      end
+    end
+  end
+end

data/lib/llm_classifier/adapters/openai.rb ADDED Viewed

@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+require "net/http"
+require "json"
+require "uri"
+module LlmClassifier
+  module Adapters
+    # Adapter for OpenAI API
+    class OpenAI < Base
+      API_URL = "https://api.openai.com/v1/chat/completions"
+      def chat(model:, system_prompt:, user_prompt:)
+        api_key = validate_api_key
+        response = send_request(model, system_prompt, user_prompt, api_key)
+        parse_response(response)
+      end
+      private
+      def validate_api_key
+        api_key = config.openai_api_key
+        raise ConfigurationError, "OpenAI API key not configured" unless api_key
+        api_key
+      end
+      def send_request(model, system_prompt, user_prompt, api_key)
+        uri = URI(API_URL)
+        http = build_http_client(uri)
+        request = build_request(uri, api_key, model, system_prompt, user_prompt)
+        http.request(request)
+      end
+      def build_http_client(uri)
+        http = Net::HTTP.new(uri.host, uri.port)
+        http.use_ssl = true
+        http
+      end
+      def build_request(uri, api_key, model, system_prompt, user_prompt)
+        request = Net::HTTP::Post.new(uri)
+        request["Content-Type"] = "application/json"
+        request["Authorization"] = "Bearer #{api_key}"
+        request.body = build_request_body(model, system_prompt, user_prompt)
+        request
+      end
+      def build_request_body(model, system_prompt, user_prompt)
+        {
+          model: model,
+          messages: [
+            { role: "system", content: system_prompt },
+            { role: "user", content: user_prompt }
+          ],
+          temperature: 0.3
+        }.to_json
+      end
+      def parse_response(response)
+        unless response.is_a?(Net::HTTPSuccess)
+          raise AdapterError, "OpenAI API error: #{response.code} - #{response.body}"
+        end
+        parsed = JSON.parse(response.body)
+        parsed.dig("choices", 0, "message", "content")
+      end
+    end
+  end
+end

data/lib/llm_classifier/adapters/ruby_llm.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+module LlmClassifier
+  module Adapters
+    # Adapter for the ruby_llm gem
+    class RubyLlm < Base
+      def chat(model:, system_prompt:, user_prompt:)
+        ensure_ruby_llm_loaded!
+        chat_instance = ::RubyLLM.chat(model: model)
+        chat_instance.with_instructions(system_prompt)
+        response = chat_instance.ask(user_prompt)
+        response.content
+      end
+      private
+      def ensure_ruby_llm_loaded!
+        return if defined?(::RubyLLM)
+        begin
+          require "ruby_llm"
+        rescue LoadError
+          raise AdapterError, "ruby_llm gem is not installed. Add it to your Gemfile: gem 'ruby_llm'"
+        end
+      end
+    end
+  end
+end