llm_classifier 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 983b48d9d7882b918eb73b68d28aa49134e4aed1cdeb242a92eb9b7301767a89
4
+ data.tar.gz: ebe5c1dd0a365ecb63b27991fb2f19775a20efd70f1141e348a84cace9d93a8e
5
+ SHA512:
6
+ metadata.gz: b75468e630400fd04b59c2e9765c2bc504a3c8cc8cc0f652d1249661ff574b18c320851496a35c500c86ccf3e1cb5d416bc2533e8c9ff87d63d89e1ef614ab38
7
+ data.tar.gz: 4513b387bb5a03a5ee06ec7feeb3b2f026408cd82abb20585dfac62e2551b171be90329b96c1078f39e54001275479e065ab5b6d8ddf37b3c40383e55cadc7a5
@@ -0,0 +1,3 @@
1
+ # Make sure RUBY_VERSION matches the Ruby version in .ruby-version
2
+ ARG RUBY_VERSION=3.4.7
3
+ FROM ghcr.io/rails/devcontainer/images/ruby:$RUBY_VERSION
@@ -0,0 +1,13 @@
1
+ name: "llm_classifier"
2
+
3
+ services:
4
+ llm_classifier:
5
+ build:
6
+ context: ..
7
+ dockerfile: .devcontainer/Dockerfile
8
+
9
+ volumes:
10
+ - ../..:/workspaces:cached
11
+
12
+ # Overrides default command so things don't shut down after the process ends.
13
+ command: sleep infinity
@@ -0,0 +1,43 @@
1
+ // For format details, see https://containers.dev/implementors/json_reference/.
2
+ // For config options, see the README at: https://github.com/devcontainers/templates/tree/main/src/ruby
3
+ {
4
+ "name": "llm_classifier",
5
+ "dockerComposeFile": "compose.yaml",
6
+ "service": "llm_classifier",
7
+ "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
8
+
9
+ // Features to add to the dev container. More info: https://containers.dev/features.
10
+ "features": {
11
+ "ghcr.io/devcontainers/features/github-cli:1": {},
12
+ "ghcr.io/devcontainers/features/docker-outside-of-docker:1": { "moby": false }
13
+ },
14
+
15
+ "containerEnv": {
16
+ },
17
+
18
+ // Use 'forwardPorts' to make a list of ports inside the container available locally.
19
+ "forwardPorts": [],
20
+
21
+ // Configure tool-specific properties.
22
+ "customizations": {
23
+ "vscode": {
24
+ "extensions": [
25
+ "Shopify.ruby-lsp",
26
+ "Shopify.ruby-extensions-pack",
27
+ "eamodio.gitlens",
28
+ "ms-azuretools.vscode-docker",
29
+ "Gruntfuggly.todo-tree",
30
+ "Anthropic.claude-code",
31
+ "GitHub.vscode-pull-request-github",
32
+ "ms-azuretools.vscode-docker",
33
+ "ms-azuretools.vscode-containers"
34
+ ]
35
+ }
36
+ },
37
+
38
+ // Uncomment to connect as root instead. More info: https://containers.dev/implementors/json_reference/#remoteUser.
39
+ // "remoteUser": "root",
40
+
41
+ // Use 'postCreateCommand' to run commands after the container is created.
42
+ "postCreateCommand": ""
43
+ }
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --require spec_helper
2
+ --format documentation
3
+ --color
data/.rubocop.yml ADDED
@@ -0,0 +1,45 @@
1
+ require:
2
+ - rubocop-rspec
3
+
4
+ AllCops:
5
+ TargetRubyVersion: 3.1
6
+ NewCops: enable
7
+ SuggestExtensions: false
8
+ Exclude:
9
+ - "vendor/**/*"
10
+ - "spec/fixtures/**/*"
11
+
12
+ Style/StringLiterals:
13
+ EnforcedStyle: double_quotes
14
+
15
+ Style/StringLiteralsInInterpolation:
16
+ EnforcedStyle: double_quotes
17
+
18
+ Style/HashExcept:
19
+ Enabled: false # Requires ActiveSupport, which is not a dependency
20
+
21
+ Layout/LineLength:
22
+ Max: 120
23
+
24
+ Metrics/BlockLength:
25
+ Exclude:
26
+ - "spec/**/*"
27
+ - "*.gemspec"
28
+
29
+ Metrics/MethodLength:
30
+ Max: 20
31
+
32
+ Metrics/ClassLength:
33
+ Exclude:
34
+ - "lib/llm_classifier/classifier.rb"
35
+ - "lib/llm_classifier/content_fetchers/web.rb"
36
+
37
+ Metrics/ParameterLists:
38
+ Exclude:
39
+ - "lib/llm_classifier/result.rb"
40
+
41
+ RSpec/ExampleLength:
42
+ Max: 15
43
+
44
+ RSpec/MultipleExpectations:
45
+ Max: 5
data/CHANGELOG.md ADDED
@@ -0,0 +1,30 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.1.0] - 2024-12-02
11
+
12
+ ### Added
13
+ - Initial release
14
+ - Core `Classifier` base class with DSL (categories, system_prompt, model, adapter)
15
+ - `Result` object for classification responses
16
+ - `Knowledge` class for domain-specific prompt injection
17
+ - Multi-label classification support
18
+ - Before/after classify callbacks
19
+ - LLM Adapters:
20
+ - `RubyLlm` adapter (requires ruby_llm gem)
21
+ - `OpenAI` adapter (direct API)
22
+ - `Anthropic` adapter (direct API)
23
+ - Content Fetchers:
24
+ - `Web` fetcher with SSRF protection
25
+ - `Null` fetcher for testing
26
+ - Rails integration:
27
+ - `Classifiable` concern for ActiveRecord
28
+ - Install generator
29
+ - Classifier generator
30
+ - Railtie for auto-configuration
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Axium Foundry
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,309 @@
1
+ # LlmClassifier
2
+
3
+ A flexible Ruby gem for building LLM-powered classifiers. Define categories, system prompts, and domain knowledge using a clean DSL. Supports multiple LLM backends and integrates seamlessly with Rails.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'llm_classifier'
11
+
12
+ # Add your preferred LLM adapter
13
+ gem 'ruby_llm' # recommended
14
+ # or use direct API adapters (no additional gem needed)
15
+ ```
16
+
17
+ And then execute:
18
+
19
+ ```bash
20
+ $ bundle install
21
+ ```
22
+
23
+ For Rails applications, run the install generator:
24
+
25
+ ```bash
26
+ $ rails generate llm_classifier:install
27
+ ```
28
+
29
+ ## Quick Start
30
+
31
+ ### 1. Define a Classifier
32
+
33
+ ```ruby
34
+ class SentimentClassifier < LlmClassifier::Classifier
35
+ categories :positive, :negative, :neutral
36
+
37
+ system_prompt <<~PROMPT
38
+ You are a sentiment analyzer. Classify the sentiment of the given text.
39
+
40
+ Categories:
41
+ - positive: Expresses satisfaction, happiness, or approval
42
+ - negative: Expresses dissatisfaction, unhappiness, or criticism
43
+ - neutral: Neither positive nor negative, factual or balanced
44
+
45
+ Respond with ONLY a JSON object:
46
+ {
47
+ "categories": ["category"],
48
+ "confidence": 0.0-1.0,
49
+ "reasoning": "Brief explanation"
50
+ }
51
+ PROMPT
52
+ end
53
+ ```
54
+
55
+ ### 2. Use It
56
+
57
+ ```ruby
58
+ result = SentimentClassifier.classify("I absolutely love this product!")
59
+
60
+ result.success? # => true
61
+ result.category # => "positive"
62
+ result.confidence # => 0.95
63
+ result.reasoning # => "Strong positive language with 'love' and 'absolutely'"
64
+ ```
65
+
66
+ ## Configuration
67
+
68
+ ```ruby
69
+ # config/initializers/llm_classifier.rb
70
+ LlmClassifier.configure do |config|
71
+ # LLM adapter: :ruby_llm (default), :openai, :anthropic
72
+ config.adapter = :ruby_llm
73
+
74
+ # Default model for classification
75
+ config.default_model = "gpt-4o-mini"
76
+
77
+ # API keys (reads from ENV by default)
78
+ config.openai_api_key = ENV["OPENAI_API_KEY"]
79
+ config.anthropic_api_key = ENV["ANTHROPIC_API_KEY"]
80
+
81
+ # Content fetching settings
82
+ config.web_fetch_timeout = 10
83
+ config.web_fetch_user_agent = "MyApp/1.0"
84
+ end
85
+ ```
86
+
87
+ ## Features
88
+
89
+ ### Multi-label Classification
90
+
91
+ ```ruby
92
+ class TopicClassifier < LlmClassifier::Classifier
93
+ categories :ruby, :rails, :javascript, :python, :devops
94
+ multi_label true # Can return multiple categories
95
+
96
+ system_prompt "Identify all programming topics mentioned..."
97
+ end
98
+
99
+ result = TopicClassifier.classify("Building a Rails API with React frontend")
100
+ result.categories # => ["rails", "javascript"]
101
+ ```
102
+
103
+ ### Domain Knowledge
104
+
105
+ Inject domain-specific knowledge into your prompts:
106
+
107
+ ```ruby
108
+ class BusinessClassifier < LlmClassifier::Classifier
109
+ categories :dealership, :mechanic, :parts, :gear
110
+
111
+ system_prompt "Classify motorcycle businesses..."
112
+
113
+ knowledge do
114
+ motorcycle_brands %w[Harley-Davidson Honda Yamaha Kawasaki]
115
+ gear_retailers ["RevZilla", "Cycle Gear", "J&P Cycles"]
116
+ classification_rules({
117
+ dealership: "Contains brand name + sales indicators",
118
+ mechanic: "Offers repair/maintenance services"
119
+ })
120
+ end
121
+ end
122
+ ```
123
+
124
+ ### Callbacks
125
+
126
+ ```ruby
127
+ class AuditedClassifier < LlmClassifier::Classifier
128
+ categories :approved, :rejected
129
+
130
+ before_classify do |input|
131
+ input.strip.downcase # Preprocess input
132
+ end
133
+
134
+ after_classify do |result|
135
+ Rails.logger.info("Classification: #{result.category}")
136
+ AuditLog.create!(result: result.to_h)
137
+ end
138
+ end
139
+ ```
140
+
141
+ ### Override Adapter Per-Classifier
142
+
143
+ ```ruby
144
+ class CriticalClassifier < LlmClassifier::Classifier
145
+ categories :high, :medium, :low
146
+ adapter :anthropic # Use Anthropic for this classifier
147
+ model "claude-sonnet-4-20250514" # Specific model
148
+ end
149
+ ```
150
+
151
+ ## Rails Integration
152
+
153
+ ### ActiveRecord Concern
154
+
155
+ ```ruby
156
+ class Review < ApplicationRecord
157
+ include LlmClassifier::Rails::Concerns::Classifiable
158
+
159
+ classifies :sentiment,
160
+ with: SentimentClassifier,
161
+ from: :body, # Column to classify
162
+ store_in: :classification_data # JSONB column for results
163
+ end
164
+
165
+ # Usage
166
+ review = Review.find(1)
167
+ review.classify_sentiment!
168
+
169
+ review.sentiment_category # => "positive"
170
+ review.sentiment_categories # => ["positive"]
171
+ review.sentiment_classification
172
+ # => {"category" => "positive", "confidence" => 0.9, ...}
173
+ ```
174
+
175
+ ### Complex Input
176
+
177
+ ```ruby
178
+ class Review < ApplicationRecord
179
+ include LlmClassifier::Rails::Concerns::Classifiable
180
+
181
+ classifies :quality,
182
+ with: QualityClassifier,
183
+ from: ->(record) {
184
+ {
185
+ title: record.title,
186
+ body: record.body,
187
+ author_reputation: record.user.reputation_score
188
+ }
189
+ },
190
+ store_in: :metadata
191
+ end
192
+ ```
193
+
194
+ ### Generators
195
+
196
+ ```bash
197
+ # Generate a new classifier
198
+ $ rails generate llm_classifier:classifier Sentiment positive negative neutral
199
+
200
+ # Creates:
201
+ # app/classifiers/sentiment_classifier.rb
202
+ # spec/classifiers/sentiment_classifier_spec.rb
203
+ ```
204
+
205
+ ## Content Fetching
206
+
207
+ Fetch and include web content in classification:
208
+
209
+ ```ruby
210
+ fetcher = LlmClassifier::ContentFetchers::Web.new(timeout: 10)
211
+ content = fetcher.fetch("https://example.com/about")
212
+
213
+ # Use in classification
214
+ result = BusinessClassifier.classify(
215
+ name: "Example Motors",
216
+ description: "Auto dealer",
217
+ website_content: content
218
+ )
219
+ ```
220
+
221
+ Features:
222
+ - SSRF protection (blocks private IPs)
223
+ - Automatic redirect handling
224
+ - HTML text extraction
225
+ - Configurable timeout and user agent
226
+
227
+ ## Adapters
228
+
229
+ ### Built-in Adapters
230
+
231
+ - **`:ruby_llm`** - Uses the [ruby_llm](https://github.com/crmne/ruby_llm) gem (recommended)
232
+ - **`:openai`** - Direct OpenAI API integration
233
+ - **`:anthropic`** - Direct Anthropic API integration
234
+
235
+ ### Custom Adapter
236
+
237
+ ```ruby
238
+ class MyCustomAdapter < LlmClassifier::Adapters::Base
239
+ def chat(model:, system_prompt:, user_prompt:)
240
+ # Make API call and return response text
241
+ MyLlmClient.complete(
242
+ model: model,
243
+ system: system_prompt,
244
+ prompt: user_prompt
245
+ )
246
+ end
247
+ end
248
+
249
+ LlmClassifier.configure do |config|
250
+ config.adapter = MyCustomAdapter
251
+ end
252
+ ```
253
+
254
+ ## Result Object
255
+
256
+ All classifications return a `LlmClassifier::Result`:
257
+
258
+ ```ruby
259
+ result = MyClassifier.classify(input)
260
+
261
+ result.success? # => true/false
262
+ result.failure? # => true/false
263
+ result.category # => "primary_category" (first)
264
+ result.categories # => ["cat1", "cat2"] (all)
265
+ result.confidence # => 0.95
266
+ result.reasoning # => "Explanation from LLM"
267
+ result.raw_response # => Original JSON string
268
+ result.metadata # => Additional data from response
269
+ result.error # => Error message if failed
270
+ result.to_h # => Hash representation
271
+ ```
272
+
273
+ ## Development
274
+
275
+ ### Using Dev Container (Recommended)
276
+
277
+ This project includes a [Dev Container](https://containers.dev/) configuration for a consistent development environment.
278
+
279
+ 1. Open the project in VS Code
280
+ 2. Install the "Dev Containers" extension if not already installed
281
+ 3. Press `Cmd+Shift+P` and select "Dev Containers: Reopen in Container"
282
+ 4. Wait for the container to build and start
283
+
284
+ The container includes Ruby 3.3.2, GitHub CLI, and useful VS Code extensions.
285
+
286
+ ### Local Setup
287
+
288
+ ```bash
289
+ # Clone the repo
290
+ git clone https://github.com/AxiumFoundry/llm_classifier.git
291
+ cd llm_classifier
292
+
293
+ # Install dependencies
294
+ bundle install
295
+
296
+ # Run tests
297
+ bundle exec rspec
298
+
299
+ # Run linter
300
+ bundle exec rubocop
301
+ ```
302
+
303
+ ## Contributing
304
+
305
+ Bug reports and pull requests are welcome on GitHub at https://github.com/AxiumFoundry/llm_classifier.
306
+
307
+ ## License
308
+
309
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+
7
+ module LlmClassifier
8
+ module Adapters
9
+ # Adapter for Anthropic API
10
+ class Anthropic < Base
11
+ API_URL = "https://api.anthropic.com/v1/messages"
12
+ API_VERSION = "2023-06-01"
13
+
14
+ def chat(model:, system_prompt:, user_prompt:)
15
+ api_key = validate_api_key
16
+ response = send_request(model, system_prompt, user_prompt, api_key)
17
+ parse_response(response)
18
+ end
19
+
20
+ private
21
+
22
+ def validate_api_key
23
+ api_key = config.anthropic_api_key
24
+ raise ConfigurationError, "Anthropic API key not configured" unless api_key
25
+
26
+ api_key
27
+ end
28
+
29
+ def send_request(model, system_prompt, user_prompt, api_key)
30
+ uri = URI(API_URL)
31
+ http = build_http_client(uri)
32
+ request = build_request(uri, api_key, model, system_prompt, user_prompt)
33
+ http.request(request)
34
+ end
35
+
36
+ def build_http_client(uri)
37
+ http = Net::HTTP.new(uri.host, uri.port)
38
+ http.use_ssl = true
39
+ http
40
+ end
41
+
42
+ def build_request(uri, api_key, model, system_prompt, user_prompt)
43
+ request = Net::HTTP::Post.new(uri)
44
+ request["Content-Type"] = "application/json"
45
+ request["x-api-key"] = api_key
46
+ request["anthropic-version"] = API_VERSION
47
+ request.body = build_request_body(model, system_prompt, user_prompt)
48
+ request
49
+ end
50
+
51
+ def build_request_body(model, system_prompt, user_prompt)
52
+ {
53
+ model: model,
54
+ max_tokens: 1024,
55
+ system: system_prompt,
56
+ messages: [
57
+ { role: "user", content: user_prompt }
58
+ ]
59
+ }.to_json
60
+ end
61
+
62
+ def parse_response(response)
63
+ unless response.is_a?(Net::HTTPSuccess)
64
+ raise AdapterError, "Anthropic API error: #{response.code} - #{response.body}"
65
+ end
66
+
67
+ parsed = JSON.parse(response.body)
68
+ parsed.dig("content", 0, "text")
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmClassifier
4
+ module Adapters
5
+ # Base adapter class for LLM providers
6
+ class Base
7
+ def chat(model:, system_prompt:, user_prompt:)
8
+ raise NotImplementedError, "Subclasses must implement #chat"
9
+ end
10
+
11
+ protected
12
+
13
+ def config
14
+ LlmClassifier.configuration
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+
7
+ module LlmClassifier
8
+ module Adapters
9
+ # Adapter for OpenAI API
10
+ class OpenAI < Base
11
+ API_URL = "https://api.openai.com/v1/chat/completions"
12
+
13
+ def chat(model:, system_prompt:, user_prompt:)
14
+ api_key = validate_api_key
15
+ response = send_request(model, system_prompt, user_prompt, api_key)
16
+ parse_response(response)
17
+ end
18
+
19
+ private
20
+
21
+ def validate_api_key
22
+ api_key = config.openai_api_key
23
+ raise ConfigurationError, "OpenAI API key not configured" unless api_key
24
+
25
+ api_key
26
+ end
27
+
28
+ def send_request(model, system_prompt, user_prompt, api_key)
29
+ uri = URI(API_URL)
30
+ http = build_http_client(uri)
31
+ request = build_request(uri, api_key, model, system_prompt, user_prompt)
32
+ http.request(request)
33
+ end
34
+
35
+ def build_http_client(uri)
36
+ http = Net::HTTP.new(uri.host, uri.port)
37
+ http.use_ssl = true
38
+ http
39
+ end
40
+
41
+ def build_request(uri, api_key, model, system_prompt, user_prompt)
42
+ request = Net::HTTP::Post.new(uri)
43
+ request["Content-Type"] = "application/json"
44
+ request["Authorization"] = "Bearer #{api_key}"
45
+ request.body = build_request_body(model, system_prompt, user_prompt)
46
+ request
47
+ end
48
+
49
+ def build_request_body(model, system_prompt, user_prompt)
50
+ {
51
+ model: model,
52
+ messages: [
53
+ { role: "system", content: system_prompt },
54
+ { role: "user", content: user_prompt }
55
+ ],
56
+ temperature: 0.3
57
+ }.to_json
58
+ end
59
+
60
+ def parse_response(response)
61
+ unless response.is_a?(Net::HTTPSuccess)
62
+ raise AdapterError, "OpenAI API error: #{response.code} - #{response.body}"
63
+ end
64
+
65
+ parsed = JSON.parse(response.body)
66
+ parsed.dig("choices", 0, "message", "content")
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmClassifier
4
+ module Adapters
5
+ # Adapter for the ruby_llm gem
6
+ class RubyLlm < Base
7
+ def chat(model:, system_prompt:, user_prompt:)
8
+ ensure_ruby_llm_loaded!
9
+
10
+ chat_instance = ::RubyLLM.chat(model: model)
11
+ chat_instance.with_instructions(system_prompt)
12
+ response = chat_instance.ask(user_prompt)
13
+
14
+ response.content
15
+ end
16
+
17
+ private
18
+
19
+ def ensure_ruby_llm_loaded!
20
+ return if defined?(::RubyLLM)
21
+
22
+ begin
23
+ require "ruby_llm"
24
+ rescue LoadError
25
+ raise AdapterError, "ruby_llm gem is not installed. Add it to your Gemfile: gem 'ruby_llm'"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end