llm_classifier 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module LlmClassifier
6
+ # Base classifier class that provides a DSL for defining LLM-powered classifiers
7
+ class Classifier
8
+ class << self
9
+ attr_reader :defined_categories, :defined_system_prompt, :defined_model,
10
+ :defined_adapter, :defined_multi_label, :defined_knowledge,
11
+ :before_classify_callbacks, :after_classify_callbacks
12
+
13
+ def categories(*cats)
14
+ if cats.empty?
15
+ @defined_categories || []
16
+ else
17
+ @defined_categories = cats.map(&:to_s)
18
+ end
19
+ end
20
+
21
+ def system_prompt(prompt = nil)
22
+ if prompt.nil?
23
+ @defined_system_prompt
24
+ else
25
+ @defined_system_prompt = prompt
26
+ end
27
+ end
28
+
29
+ def model(model_name = nil)
30
+ if model_name.nil?
31
+ @defined_model || LlmClassifier.configuration.default_model
32
+ else
33
+ @defined_model = model_name
34
+ end
35
+ end
36
+
37
+ def adapter(adapter_name = nil)
38
+ if adapter_name.nil?
39
+ @defined_adapter || LlmClassifier.configuration.adapter
40
+ else
41
+ @defined_adapter = adapter_name
42
+ end
43
+ end
44
+
45
+ def multi_label(value = nil)
46
+ if value.nil?
47
+ @defined_multi_label || false
48
+ else
49
+ @defined_multi_label = value
50
+ end
51
+ end
52
+
53
+ def knowledge(&)
54
+ if block_given?
55
+ @defined_knowledge = Knowledge.new
56
+ @defined_knowledge.instance_eval(&)
57
+ end
58
+ @defined_knowledge
59
+ end
60
+
61
+ def before_classify(&block)
62
+ @before_classify_callbacks ||= []
63
+ @before_classify_callbacks << block
64
+ end
65
+
66
+ def after_classify(&block)
67
+ @after_classify_callbacks ||= []
68
+ @after_classify_callbacks << block
69
+ end
70
+
71
+ def classify(input, **options)
72
+ new(input, **options).classify
73
+ end
74
+ end
75
+
76
+ attr_reader :input, :options
77
+
78
+ def initialize(input, **options)
79
+ @input = input
80
+ @options = options
81
+ end
82
+
83
+ def classify
84
+ processed_input = run_before_callbacks(@input)
85
+ result = perform_classification(processed_input)
86
+ run_after_callbacks(result)
87
+ result
88
+ rescue StandardError => e
89
+ Result.failure(error: e.message)
90
+ end
91
+
92
+ private
93
+
94
+ def run_before_callbacks(input)
95
+ callbacks = self.class.before_classify_callbacks || []
96
+ callbacks.reduce(input) { |acc, callback| instance_exec(acc, &callback) || acc }
97
+ end
98
+
99
+ def run_after_callbacks(result)
100
+ callbacks = self.class.after_classify_callbacks || []
101
+ callbacks.each { |callback| instance_exec(result, &callback) }
102
+ end
103
+
104
+ def perform_classification(processed_input)
105
+ adapter_instance = build_adapter
106
+ response = adapter_instance.chat(
107
+ model: self.class.model,
108
+ system_prompt: build_system_prompt,
109
+ user_prompt: build_user_prompt(processed_input)
110
+ )
111
+
112
+ parse_response(response)
113
+ end
114
+
115
+ def build_adapter
116
+ adapter_name = self.class.adapter
117
+ adapter_class = case adapter_name
118
+ when :ruby_llm then Adapters::RubyLlm
119
+ when :openai then Adapters::OpenAI
120
+ when :anthropic then Adapters::Anthropic
121
+ when Class then adapter_name
122
+ else
123
+ raise AdapterError, "Unknown adapter: #{adapter_name}"
124
+ end
125
+ adapter_class.new
126
+ end
127
+
128
+ def build_system_prompt
129
+ prompt = self.class.system_prompt || default_system_prompt
130
+ knowledge = self.class.knowledge
131
+
132
+ prompt = "#{prompt}\n\n#{knowledge.to_prompt}" if knowledge
133
+
134
+ prompt
135
+ end
136
+
137
+ def default_system_prompt
138
+ categories = self.class.categories.join(", ")
139
+ multi = self.class.multi_label
140
+
141
+ <<~PROMPT
142
+ You are a classifier. Classify the given input into #{multi ? "one or more of" : "exactly one of"} these categories: #{categories}.
143
+
144
+ Respond with ONLY a JSON object in this format:
145
+ {
146
+ "categories": [#{multi ? '"category1", "category2"' : '"category"'}],
147
+ "confidence": 0.0-1.0,
148
+ "reasoning": "Brief explanation"
149
+ }
150
+ PROMPT
151
+ end
152
+
153
+ def build_user_prompt(processed_input)
154
+ case processed_input
155
+ when String
156
+ processed_input
157
+ when Hash
158
+ processed_input.map { |k, v| "#{k}: #{v}" }.join("\n")
159
+ else
160
+ processed_input.to_s
161
+ end
162
+ end
163
+
164
+ def parse_response(response)
165
+ json = JSON.parse(response)
166
+ valid_categories = extract_valid_categories(json)
167
+
168
+ return build_failure_result(response, json) if should_fail?(valid_categories)
169
+
170
+ build_success_result(json, valid_categories, response)
171
+ rescue JSON::ParserError => e
172
+ Result.failure(error: "Failed to parse response: #{e.message}", raw_response: response)
173
+ end
174
+
175
+ def extract_valid_categories(json)
176
+ raw_categories = Array(json["categories"] || json["category"])
177
+ raw_categories.select { |c| self.class.categories.include?(c.to_s) }
178
+ end
179
+
180
+ def should_fail?(valid_categories)
181
+ valid_categories.empty? && !self.class.categories.empty? && !self.class.multi_label
182
+ end
183
+
184
+ def build_failure_result(response, json)
185
+ Result.failure(
186
+ error: "No valid categories returned",
187
+ raw_response: response,
188
+ metadata: { parsed: json }
189
+ )
190
+ end
191
+
192
+ def build_success_result(json, valid_categories, response)
193
+ categories = self.class.multi_label ? valid_categories : [valid_categories.first].compact
194
+ excluded_keys = %w[categories category confidence reasoning]
195
+ metadata = json.reject { |k, _| excluded_keys.include?(k) }
196
+
197
+ Result.success(
198
+ categories: categories,
199
+ confidence: json["confidence"]&.to_f,
200
+ reasoning: json["reasoning"],
201
+ raw_response: response,
202
+ metadata: metadata
203
+ )
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "logger"
4
+
5
+ module LlmClassifier
6
+ # Configuration object for LlmClassifier settings
7
+ class Configuration
8
+ attr_accessor :adapter, :default_model, :openai_api_key, :anthropic_api_key,
9
+ :web_fetch_timeout, :web_fetch_user_agent, :default_queue,
10
+ :logger
11
+
12
+ def initialize
13
+ @adapter = :ruby_llm
14
+ @default_model = "gpt-4o-mini"
15
+ @openai_api_key = ENV.fetch("OPENAI_API_KEY", nil)
16
+ @anthropic_api_key = ENV.fetch("ANTHROPIC_API_KEY", nil)
17
+ @web_fetch_timeout = 10
18
+ @web_fetch_user_agent = "LlmClassifier/#{VERSION}"
19
+ @default_queue = :classification
20
+ @logger = defined?(::Rails) ? ::Rails.logger : Logger.new($stdout)
21
+ end
22
+
23
+ def adapter_class
24
+ case adapter
25
+ when :ruby_llm
26
+ Adapters::RubyLlm
27
+ when :openai
28
+ Adapters::OpenAI
29
+ when :anthropic
30
+ Adapters::Anthropic
31
+ when Class
32
+ adapter
33
+ else
34
+ raise ConfigurationError, "Unknown adapter: #{adapter}"
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmClassifier
4
+ module ContentFetchers
5
+ # Base content fetcher class
6
+ class Base
7
+ def fetch(source)
8
+ raise NotImplementedError, "Subclasses must implement #fetch"
9
+ end
10
+
11
+ protected
12
+
13
+ def config
14
+ LlmClassifier.configuration
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmClassifier
4
+ module ContentFetchers
5
+ # Null content fetcher that returns nothing
6
+ class Null < Base
7
+ def fetch(_source)
8
+ nil
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,178 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "uri"
5
+ require "resolv"
6
+ require "ipaddr"
7
+
8
+ module LlmClassifier
9
+ module ContentFetchers
10
+ # Web content fetcher with SSRF protection
11
+ class Web < Base
12
+ PRIVATE_IP_RANGES = [
13
+ IPAddr.new("10.0.0.0/8"),
14
+ IPAddr.new("172.16.0.0/12"),
15
+ IPAddr.new("192.168.0.0/16"),
16
+ IPAddr.new("127.0.0.0/8"),
17
+ IPAddr.new("169.254.0.0/16"),
18
+ IPAddr.new("::1/128"),
19
+ IPAddr.new("fc00::/7"),
20
+ IPAddr.new("fe80::/10")
21
+ ].freeze
22
+
23
+ attr_reader :debug_info
24
+
25
+ def initialize(timeout: nil, user_agent: nil)
26
+ super()
27
+ @timeout = timeout || config.web_fetch_timeout
28
+ @user_agent = user_agent || config.web_fetch_user_agent
29
+ @debug_info = {}
30
+ end
31
+
32
+ def fetch(url)
33
+ return nil if url.nil? || url.empty?
34
+
35
+ url = normalize_url(url)
36
+ @debug_info[:url] = url
37
+
38
+ response = fetch_url(url)
39
+ return handle_empty_response if response.nil? || response.empty?
40
+
41
+ process_successful_response(response)
42
+ rescue StandardError => e
43
+ handle_error(e)
44
+ end
45
+
46
+ private
47
+
48
+ def normalize_url(url)
49
+ url.match?(%r{\Ahttps?://}i) ? url : "https://#{url}"
50
+ end
51
+
52
+ def fetch_url(url, redirect_limit = 3)
53
+ return nil if redirect_limit.zero?
54
+
55
+ uri = URI.parse(url)
56
+ return nil unless validate_host_is_public(uri)
57
+
58
+ response = send_http_request(uri)
59
+ handle_http_response(response, url, redirect_limit)
60
+ end
61
+
62
+ def validate_host_is_public(uri)
63
+ return false unless %w[http https].include?(uri.scheme)
64
+ return false if uri.host.nil?
65
+
66
+ addresses = Resolv.getaddresses(uri.host)
67
+ addresses.any? { |addr| !private_ip?(addr) }
68
+ rescue Resolv::ResolvError
69
+ false
70
+ end
71
+
72
+ def private_ip?(address)
73
+ ip = IPAddr.new(address)
74
+ PRIVATE_IP_RANGES.any? { |range| range.include?(ip) }
75
+ rescue IPAddr::InvalidAddressError
76
+ true
77
+ end
78
+
79
+ def normalize_redirect_url(base_url, redirect_url)
80
+ return nil if redirect_url.blank?
81
+
82
+ if redirect_url.start_with?("http://", "https://")
83
+ redirect_url
84
+ elsif redirect_url.start_with?("//")
85
+ uri = URI.parse(base_url)
86
+ "#{uri.scheme}:#{redirect_url}"
87
+ else
88
+ URI.join(base_url, redirect_url).to_s
89
+ end
90
+ rescue URI::InvalidURIError
91
+ nil
92
+ end
93
+
94
+ def handle_empty_response
95
+ @debug_info[:status] = "failed_empty_response"
96
+ nil
97
+ end
98
+
99
+ def process_successful_response(response)
100
+ content = extract_text_content(response)
101
+ @debug_info[:status] = "success"
102
+ @debug_info[:content_length] = content&.length || 0
103
+ @debug_info[:content_preview] = content ? truncate_string(content, 500) : nil
104
+ content
105
+ end
106
+
107
+ def handle_error(error)
108
+ @debug_info[:status] = "error"
109
+ @debug_info[:error] = error.message
110
+ nil
111
+ end
112
+
113
+ def send_http_request(uri)
114
+ http = build_http_client(uri)
115
+ request = build_http_request(uri)
116
+ http.request(request)
117
+ end
118
+
119
+ def build_http_client(uri)
120
+ http = Net::HTTP.new(uri.host, uri.port)
121
+ http.use_ssl = (uri.scheme == "https")
122
+ http.open_timeout = @timeout
123
+ http.read_timeout = @timeout
124
+ http
125
+ end
126
+
127
+ def build_http_request(uri)
128
+ request = Net::HTTP::Get.new(uri.request_uri)
129
+ request["Host"] = uri.host
130
+ request["User-Agent"] = @user_agent
131
+ request
132
+ end
133
+
134
+ def handle_http_response(response, url, redirect_limit)
135
+ return response.body if response.is_a?(Net::HTTPSuccess)
136
+ return handle_redirect(response, url, redirect_limit) if response.is_a?(Net::HTTPRedirection)
137
+
138
+ nil
139
+ end
140
+
141
+ def handle_redirect(response, url, redirect_limit)
142
+ redirect_url = normalize_redirect_url(url, response["location"])
143
+ return fetch_url(redirect_url, redirect_limit - 1) if redirect_url
144
+
145
+ nil
146
+ end
147
+
148
+ def extract_text_content(html)
149
+ return nil if html.nil? || html.empty?
150
+
151
+ require_nokogiri!
152
+
153
+ doc = Nokogiri::HTML(html)
154
+ doc.css("script, style, nav, footer, header").remove
155
+
156
+ text = doc.css("body").text
157
+ text = text.gsub(/\s+/, " ").strip
158
+ truncate_string(text, 2000)
159
+ end
160
+
161
+ def truncate_string(str, max_length)
162
+ return str if str.length <= max_length
163
+
164
+ "#{str[0...max_length]}..."
165
+ end
166
+
167
+ def require_nokogiri!
168
+ return if defined?(Nokogiri)
169
+
170
+ begin
171
+ require "nokogiri"
172
+ rescue LoadError
173
+ raise Error, "nokogiri gem is required for web content fetching. Add it to your Gemfile."
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmClassifier
4
+ # Domain knowledge container that converts structured data into LLM prompts
5
+ class Knowledge
6
+ def initialize
7
+ @entries = {}
8
+ end
9
+
10
+ def method_missing(name, *args, &)
11
+ if args.any?
12
+ @entries[name] = args.first
13
+ elsif @entries.key?(name)
14
+ @entries[name]
15
+ else
16
+ super
17
+ end
18
+ end
19
+
20
+ def respond_to_missing?(name, include_private = false)
21
+ @entries.key?(name) || super
22
+ end
23
+
24
+ def to_prompt
25
+ return "" if @entries.empty?
26
+
27
+ sections = @entries.map do |key, value|
28
+ formatted_key = key.to_s.tr("_", " ").upcase
29
+ formatted_value = case value
30
+ when Array then value.join(", ")
31
+ when Hash then value.map { |k, v| "#{k}: #{v}" }.join("\n ")
32
+ else value.to_s
33
+ end
34
+ "#{formatted_key}:\n#{formatted_value}"
35
+ end
36
+
37
+ "DOMAIN KNOWLEDGE:\n\n#{sections.join("\n\n")}"
38
+ end
39
+
40
+ def to_h
41
+ @entries.dup
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmClassifier
4
+ module Rails
5
+ module Concerns
6
+ # Rails concern for adding classification capabilities to ActiveRecord models
7
+ module Classifiable
8
+ extend ActiveSupport::Concern
9
+
10
+ class_methods do
11
+ def classifies(attribute, with:, from:, store_in: nil)
12
+ classifier_class = with
13
+ source = from
14
+ storage_column = store_in
15
+
16
+ # Define the classify method
17
+ define_method("classify_#{attribute}!") do
18
+ input = build_classification_input(source)
19
+ result = classifier_class.classify(input)
20
+
21
+ store_classification_result(attribute, result, storage_column) if result.success?
22
+
23
+ result
24
+ end
25
+
26
+ # Define getter for category
27
+ define_method("#{attribute}_category") do
28
+ get_stored_classification(attribute, storage_column)&.dig("category")
29
+ end
30
+
31
+ # Define getter for categories (multi-label)
32
+ define_method("#{attribute}_categories") do
33
+ get_stored_classification(attribute, storage_column)&.dig("categories") || []
34
+ end
35
+
36
+ # Define getter for full classification data
37
+ define_method("#{attribute}_classification") do
38
+ get_stored_classification(attribute, storage_column)
39
+ end
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ def build_classification_input(source)
46
+ case source
47
+ when Symbol
48
+ send(source)
49
+ when Proc
50
+ source.call(self)
51
+ when Array
52
+ source.to_h { |attr| [attr, send(attr)] }
53
+ else
54
+ source
55
+ end
56
+ end
57
+
58
+ def store_classification_result(attribute, result, storage_column)
59
+ data = {
60
+ "category" => result.category,
61
+ "categories" => result.categories,
62
+ "confidence" => result.confidence,
63
+ "reasoning" => result.reasoning,
64
+ "classified_at" => Time.current.iso8601
65
+ }
66
+
67
+ if storage_column
68
+ current = send(storage_column) || {}
69
+ updated = current.merge("#{attribute}_classification" => data)
70
+ send("#{storage_column}=", updated)
71
+ save! if persisted?
72
+ else
73
+ @classification_results ||= {}
74
+ @classification_results[attribute] = data
75
+ end
76
+ end
77
+
78
+ def get_stored_classification(attribute, storage_column)
79
+ if storage_column
80
+ send(storage_column)&.dig("#{attribute}_classification")
81
+ else
82
+ @classification_results&.dig(attribute)
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+
5
+ module LlmClassifier
6
+ module Generators
7
+ # Rails generator for creating classifier classes
8
+ class ClassifierGenerator < ::Rails::Generators::NamedBase
9
+ source_root File.expand_path("templates", __dir__)
10
+
11
+ desc "Creates an LlmClassifier classifier class"
12
+
13
+ argument :categories, type: :array, default: [], banner: "category1 category2 ..."
14
+
15
+ def create_classifier_file
16
+ template "classifier.rb.erb", File.join("app/classifiers", "#{file_name}.rb")
17
+ end
18
+
19
+ def create_spec_file
20
+ return unless File.exist?("spec")
21
+
22
+ template "classifier_spec.rb.erb", File.join("spec/classifiers", "#{file_name}_spec.rb")
23
+ end
24
+
25
+ private
26
+
27
+ def categories_array
28
+ return %w[category_a category_b] if categories.empty?
29
+
30
+ categories
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+
5
+ module LlmClassifier
6
+ module Generators
7
+ # Rails generator for installing LlmClassifier configuration
8
+ class InstallGenerator < ::Rails::Generators::Base
9
+ source_root File.expand_path("templates", __dir__)
10
+
11
+ desc "Creates an LlmClassifier initializer"
12
+
13
+ def create_initializer_file
14
+ create_file "config/initializers/llm_classifier.rb", <<~RUBY
15
+ # frozen_string_literal: true
16
+
17
+ LlmClassifier.configure do |config|
18
+ # LLM adapter to use. Options: :ruby_llm, :openai, :anthropic
19
+ config.adapter = :ruby_llm
20
+
21
+ # Default model for classification
22
+ config.default_model = "gpt-4o-mini"
23
+
24
+ # API keys (reads from ENV by default)
25
+ # config.openai_api_key = ENV["OPENAI_API_KEY"]
26
+ # config.anthropic_api_key = ENV["ANTHROPIC_API_KEY"]
27
+
28
+ # Content fetching settings
29
+ config.web_fetch_timeout = 10
30
+ config.web_fetch_user_agent = "LlmClassifier/#{LlmClassifier::VERSION}"
31
+
32
+ # Rails integration
33
+ config.default_queue = :classification
34
+ end
35
+ RUBY
36
+ end
37
+
38
+ def create_classifiers_directory
39
+ empty_directory "app/classifiers"
40
+ create_file "app/classifiers/.keep", ""
41
+ end
42
+
43
+ def show_post_install_message
44
+ say "\n"
45
+ say "LlmClassifier installed successfully!", :green
46
+ say "\n"
47
+ say "Next steps:"
48
+ say " 1. Configure your API keys in config/initializers/llm_classifier.rb"
49
+ say " 2. Generate a classifier: rails g llm_classifier:classifier SentimentClassifier"
50
+ say "\n"
51
+ end
52
+ end
53
+ end
54
+ end