llm_classifier 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.devcontainer/Dockerfile +3 -0
- data/.devcontainer/compose.yaml +13 -0
- data/.devcontainer/devcontainer.json +43 -0
- data/.rspec +3 -0
- data/.rubocop.yml +45 -0
- data/CHANGELOG.md +30 -0
- data/LICENSE.txt +21 -0
- data/README.md +309 -0
- data/Rakefile +12 -0
- data/lib/llm_classifier/adapters/anthropic.rb +72 -0
- data/lib/llm_classifier/adapters/base.rb +18 -0
- data/lib/llm_classifier/adapters/openai.rb +70 -0
- data/lib/llm_classifier/adapters/ruby_llm.rb +30 -0
- data/lib/llm_classifier/classifier.rb +206 -0
- data/lib/llm_classifier/configuration.rb +38 -0
- data/lib/llm_classifier/content_fetchers/base.rb +18 -0
- data/lib/llm_classifier/content_fetchers/null.rb +12 -0
- data/lib/llm_classifier/content_fetchers/web.rb +178 -0
- data/lib/llm_classifier/knowledge.rb +44 -0
- data/lib/llm_classifier/rails/concerns/classifiable.rb +88 -0
- data/lib/llm_classifier/rails/generators/classifier_generator.rb +34 -0
- data/lib/llm_classifier/rails/generators/install_generator.rb +54 -0
- data/lib/llm_classifier/rails/generators/templates/classifier.rb.erb +48 -0
- data/lib/llm_classifier/rails/generators/templates/classifier_spec.rb.erb +15 -0
- data/lib/llm_classifier/rails/railtie.rb +18 -0
- data/lib/llm_classifier/result.rb +65 -0
- data/lib/llm_classifier/version.rb +5 -0
- data/lib/llm_classifier.rb +41 -0
- metadata +88 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module LlmClassifier
|
|
6
|
+
# Base classifier class that provides a DSL for defining LLM-powered classifiers
|
|
7
|
+
class Classifier
|
|
8
|
+
class << self
|
|
9
|
+
attr_reader :defined_categories, :defined_system_prompt, :defined_model,
|
|
10
|
+
:defined_adapter, :defined_multi_label, :defined_knowledge,
|
|
11
|
+
:before_classify_callbacks, :after_classify_callbacks
|
|
12
|
+
|
|
13
|
+
def categories(*cats)
|
|
14
|
+
if cats.empty?
|
|
15
|
+
@defined_categories || []
|
|
16
|
+
else
|
|
17
|
+
@defined_categories = cats.map(&:to_s)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def system_prompt(prompt = nil)
|
|
22
|
+
if prompt.nil?
|
|
23
|
+
@defined_system_prompt
|
|
24
|
+
else
|
|
25
|
+
@defined_system_prompt = prompt
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def model(model_name = nil)
|
|
30
|
+
if model_name.nil?
|
|
31
|
+
@defined_model || LlmClassifier.configuration.default_model
|
|
32
|
+
else
|
|
33
|
+
@defined_model = model_name
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def adapter(adapter_name = nil)
|
|
38
|
+
if adapter_name.nil?
|
|
39
|
+
@defined_adapter || LlmClassifier.configuration.adapter
|
|
40
|
+
else
|
|
41
|
+
@defined_adapter = adapter_name
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def multi_label(value = nil)
|
|
46
|
+
if value.nil?
|
|
47
|
+
@defined_multi_label || false
|
|
48
|
+
else
|
|
49
|
+
@defined_multi_label = value
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def knowledge(&)
|
|
54
|
+
if block_given?
|
|
55
|
+
@defined_knowledge = Knowledge.new
|
|
56
|
+
@defined_knowledge.instance_eval(&)
|
|
57
|
+
end
|
|
58
|
+
@defined_knowledge
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def before_classify(&block)
|
|
62
|
+
@before_classify_callbacks ||= []
|
|
63
|
+
@before_classify_callbacks << block
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def after_classify(&block)
|
|
67
|
+
@after_classify_callbacks ||= []
|
|
68
|
+
@after_classify_callbacks << block
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def classify(input, **options)
|
|
72
|
+
new(input, **options).classify
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
attr_reader :input, :options
|
|
77
|
+
|
|
78
|
+
def initialize(input, **options)
|
|
79
|
+
@input = input
|
|
80
|
+
@options = options
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def classify
|
|
84
|
+
processed_input = run_before_callbacks(@input)
|
|
85
|
+
result = perform_classification(processed_input)
|
|
86
|
+
run_after_callbacks(result)
|
|
87
|
+
result
|
|
88
|
+
rescue StandardError => e
|
|
89
|
+
Result.failure(error: e.message)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
def run_before_callbacks(input)
|
|
95
|
+
callbacks = self.class.before_classify_callbacks || []
|
|
96
|
+
callbacks.reduce(input) { |acc, callback| instance_exec(acc, &callback) || acc }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def run_after_callbacks(result)
|
|
100
|
+
callbacks = self.class.after_classify_callbacks || []
|
|
101
|
+
callbacks.each { |callback| instance_exec(result, &callback) }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def perform_classification(processed_input)
|
|
105
|
+
adapter_instance = build_adapter
|
|
106
|
+
response = adapter_instance.chat(
|
|
107
|
+
model: self.class.model,
|
|
108
|
+
system_prompt: build_system_prompt,
|
|
109
|
+
user_prompt: build_user_prompt(processed_input)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
parse_response(response)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def build_adapter
|
|
116
|
+
adapter_name = self.class.adapter
|
|
117
|
+
adapter_class = case adapter_name
|
|
118
|
+
when :ruby_llm then Adapters::RubyLlm
|
|
119
|
+
when :openai then Adapters::OpenAI
|
|
120
|
+
when :anthropic then Adapters::Anthropic
|
|
121
|
+
when Class then adapter_name
|
|
122
|
+
else
|
|
123
|
+
raise AdapterError, "Unknown adapter: #{adapter_name}"
|
|
124
|
+
end
|
|
125
|
+
adapter_class.new
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def build_system_prompt
|
|
129
|
+
prompt = self.class.system_prompt || default_system_prompt
|
|
130
|
+
knowledge = self.class.knowledge
|
|
131
|
+
|
|
132
|
+
prompt = "#{prompt}\n\n#{knowledge.to_prompt}" if knowledge
|
|
133
|
+
|
|
134
|
+
prompt
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def default_system_prompt
|
|
138
|
+
categories = self.class.categories.join(", ")
|
|
139
|
+
multi = self.class.multi_label
|
|
140
|
+
|
|
141
|
+
<<~PROMPT
|
|
142
|
+
You are a classifier. Classify the given input into #{multi ? "one or more of" : "exactly one of"} these categories: #{categories}.
|
|
143
|
+
|
|
144
|
+
Respond with ONLY a JSON object in this format:
|
|
145
|
+
{
|
|
146
|
+
"categories": [#{multi ? '"category1", "category2"' : '"category"'}],
|
|
147
|
+
"confidence": 0.0-1.0,
|
|
148
|
+
"reasoning": "Brief explanation"
|
|
149
|
+
}
|
|
150
|
+
PROMPT
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def build_user_prompt(processed_input)
|
|
154
|
+
case processed_input
|
|
155
|
+
when String
|
|
156
|
+
processed_input
|
|
157
|
+
when Hash
|
|
158
|
+
processed_input.map { |k, v| "#{k}: #{v}" }.join("\n")
|
|
159
|
+
else
|
|
160
|
+
processed_input.to_s
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def parse_response(response)
|
|
165
|
+
json = JSON.parse(response)
|
|
166
|
+
valid_categories = extract_valid_categories(json)
|
|
167
|
+
|
|
168
|
+
return build_failure_result(response, json) if should_fail?(valid_categories)
|
|
169
|
+
|
|
170
|
+
build_success_result(json, valid_categories, response)
|
|
171
|
+
rescue JSON::ParserError => e
|
|
172
|
+
Result.failure(error: "Failed to parse response: #{e.message}", raw_response: response)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def extract_valid_categories(json)
|
|
176
|
+
raw_categories = Array(json["categories"] || json["category"])
|
|
177
|
+
raw_categories.select { |c| self.class.categories.include?(c.to_s) }
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def should_fail?(valid_categories)
|
|
181
|
+
valid_categories.empty? && !self.class.categories.empty? && !self.class.multi_label
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def build_failure_result(response, json)
|
|
185
|
+
Result.failure(
|
|
186
|
+
error: "No valid categories returned",
|
|
187
|
+
raw_response: response,
|
|
188
|
+
metadata: { parsed: json }
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def build_success_result(json, valid_categories, response)
|
|
193
|
+
categories = self.class.multi_label ? valid_categories : [valid_categories.first].compact
|
|
194
|
+
excluded_keys = %w[categories category confidence reasoning]
|
|
195
|
+
metadata = json.reject { |k, _| excluded_keys.include?(k) }
|
|
196
|
+
|
|
197
|
+
Result.success(
|
|
198
|
+
categories: categories,
|
|
199
|
+
confidence: json["confidence"]&.to_f,
|
|
200
|
+
reasoning: json["reasoning"],
|
|
201
|
+
raw_response: response,
|
|
202
|
+
metadata: metadata
|
|
203
|
+
)
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "logger"
|
|
4
|
+
|
|
5
|
+
module LlmClassifier
|
|
6
|
+
# Configuration object for LlmClassifier settings
|
|
7
|
+
class Configuration
|
|
8
|
+
attr_accessor :adapter, :default_model, :openai_api_key, :anthropic_api_key,
|
|
9
|
+
:web_fetch_timeout, :web_fetch_user_agent, :default_queue,
|
|
10
|
+
:logger
|
|
11
|
+
|
|
12
|
+
def initialize
|
|
13
|
+
@adapter = :ruby_llm
|
|
14
|
+
@default_model = "gpt-4o-mini"
|
|
15
|
+
@openai_api_key = ENV.fetch("OPENAI_API_KEY", nil)
|
|
16
|
+
@anthropic_api_key = ENV.fetch("ANTHROPIC_API_KEY", nil)
|
|
17
|
+
@web_fetch_timeout = 10
|
|
18
|
+
@web_fetch_user_agent = "LlmClassifier/#{VERSION}"
|
|
19
|
+
@default_queue = :classification
|
|
20
|
+
@logger = defined?(::Rails) ? ::Rails.logger : Logger.new($stdout)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def adapter_class
|
|
24
|
+
case adapter
|
|
25
|
+
when :ruby_llm
|
|
26
|
+
Adapters::RubyLlm
|
|
27
|
+
when :openai
|
|
28
|
+
Adapters::OpenAI
|
|
29
|
+
when :anthropic
|
|
30
|
+
Adapters::Anthropic
|
|
31
|
+
when Class
|
|
32
|
+
adapter
|
|
33
|
+
else
|
|
34
|
+
raise ConfigurationError, "Unknown adapter: #{adapter}"
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmClassifier
|
|
4
|
+
module ContentFetchers
|
|
5
|
+
# Base content fetcher class
|
|
6
|
+
class Base
|
|
7
|
+
def fetch(source)
|
|
8
|
+
raise NotImplementedError, "Subclasses must implement #fetch"
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
protected
|
|
12
|
+
|
|
13
|
+
def config
|
|
14
|
+
LlmClassifier.configuration
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "resolv"
|
|
6
|
+
require "ipaddr"
|
|
7
|
+
|
|
8
|
+
module LlmClassifier
|
|
9
|
+
module ContentFetchers
|
|
10
|
+
# Web content fetcher with SSRF protection
|
|
11
|
+
class Web < Base
|
|
12
|
+
PRIVATE_IP_RANGES = [
|
|
13
|
+
IPAddr.new("10.0.0.0/8"),
|
|
14
|
+
IPAddr.new("172.16.0.0/12"),
|
|
15
|
+
IPAddr.new("192.168.0.0/16"),
|
|
16
|
+
IPAddr.new("127.0.0.0/8"),
|
|
17
|
+
IPAddr.new("169.254.0.0/16"),
|
|
18
|
+
IPAddr.new("::1/128"),
|
|
19
|
+
IPAddr.new("fc00::/7"),
|
|
20
|
+
IPAddr.new("fe80::/10")
|
|
21
|
+
].freeze
|
|
22
|
+
|
|
23
|
+
attr_reader :debug_info
|
|
24
|
+
|
|
25
|
+
def initialize(timeout: nil, user_agent: nil)
|
|
26
|
+
super()
|
|
27
|
+
@timeout = timeout || config.web_fetch_timeout
|
|
28
|
+
@user_agent = user_agent || config.web_fetch_user_agent
|
|
29
|
+
@debug_info = {}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def fetch(url)
|
|
33
|
+
return nil if url.nil? || url.empty?
|
|
34
|
+
|
|
35
|
+
url = normalize_url(url)
|
|
36
|
+
@debug_info[:url] = url
|
|
37
|
+
|
|
38
|
+
response = fetch_url(url)
|
|
39
|
+
return handle_empty_response if response.nil? || response.empty?
|
|
40
|
+
|
|
41
|
+
process_successful_response(response)
|
|
42
|
+
rescue StandardError => e
|
|
43
|
+
handle_error(e)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def normalize_url(url)
|
|
49
|
+
url.match?(%r{\Ahttps?://}i) ? url : "https://#{url}"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def fetch_url(url, redirect_limit = 3)
|
|
53
|
+
return nil if redirect_limit.zero?
|
|
54
|
+
|
|
55
|
+
uri = URI.parse(url)
|
|
56
|
+
return nil unless validate_host_is_public(uri)
|
|
57
|
+
|
|
58
|
+
response = send_http_request(uri)
|
|
59
|
+
handle_http_response(response, url, redirect_limit)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def validate_host_is_public(uri)
|
|
63
|
+
return false unless %w[http https].include?(uri.scheme)
|
|
64
|
+
return false if uri.host.nil?
|
|
65
|
+
|
|
66
|
+
addresses = Resolv.getaddresses(uri.host)
|
|
67
|
+
addresses.any? { |addr| !private_ip?(addr) }
|
|
68
|
+
rescue Resolv::ResolvError
|
|
69
|
+
false
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def private_ip?(address)
|
|
73
|
+
ip = IPAddr.new(address)
|
|
74
|
+
PRIVATE_IP_RANGES.any? { |range| range.include?(ip) }
|
|
75
|
+
rescue IPAddr::InvalidAddressError
|
|
76
|
+
true
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def normalize_redirect_url(base_url, redirect_url)
|
|
80
|
+
return nil if redirect_url.blank?
|
|
81
|
+
|
|
82
|
+
if redirect_url.start_with?("http://", "https://")
|
|
83
|
+
redirect_url
|
|
84
|
+
elsif redirect_url.start_with?("//")
|
|
85
|
+
uri = URI.parse(base_url)
|
|
86
|
+
"#{uri.scheme}:#{redirect_url}"
|
|
87
|
+
else
|
|
88
|
+
URI.join(base_url, redirect_url).to_s
|
|
89
|
+
end
|
|
90
|
+
rescue URI::InvalidURIError
|
|
91
|
+
nil
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def handle_empty_response
|
|
95
|
+
@debug_info[:status] = "failed_empty_response"
|
|
96
|
+
nil
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def process_successful_response(response)
|
|
100
|
+
content = extract_text_content(response)
|
|
101
|
+
@debug_info[:status] = "success"
|
|
102
|
+
@debug_info[:content_length] = content&.length || 0
|
|
103
|
+
@debug_info[:content_preview] = content ? truncate_string(content, 500) : nil
|
|
104
|
+
content
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def handle_error(error)
|
|
108
|
+
@debug_info[:status] = "error"
|
|
109
|
+
@debug_info[:error] = error.message
|
|
110
|
+
nil
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def send_http_request(uri)
|
|
114
|
+
http = build_http_client(uri)
|
|
115
|
+
request = build_http_request(uri)
|
|
116
|
+
http.request(request)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def build_http_client(uri)
|
|
120
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
121
|
+
http.use_ssl = (uri.scheme == "https")
|
|
122
|
+
http.open_timeout = @timeout
|
|
123
|
+
http.read_timeout = @timeout
|
|
124
|
+
http
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def build_http_request(uri)
|
|
128
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
|
129
|
+
request["Host"] = uri.host
|
|
130
|
+
request["User-Agent"] = @user_agent
|
|
131
|
+
request
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def handle_http_response(response, url, redirect_limit)
|
|
135
|
+
return response.body if response.is_a?(Net::HTTPSuccess)
|
|
136
|
+
return handle_redirect(response, url, redirect_limit) if response.is_a?(Net::HTTPRedirection)
|
|
137
|
+
|
|
138
|
+
nil
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def handle_redirect(response, url, redirect_limit)
|
|
142
|
+
redirect_url = normalize_redirect_url(url, response["location"])
|
|
143
|
+
return fetch_url(redirect_url, redirect_limit - 1) if redirect_url
|
|
144
|
+
|
|
145
|
+
nil
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def extract_text_content(html)
|
|
149
|
+
return nil if html.nil? || html.empty?
|
|
150
|
+
|
|
151
|
+
require_nokogiri!
|
|
152
|
+
|
|
153
|
+
doc = Nokogiri::HTML(html)
|
|
154
|
+
doc.css("script, style, nav, footer, header").remove
|
|
155
|
+
|
|
156
|
+
text = doc.css("body").text
|
|
157
|
+
text = text.gsub(/\s+/, " ").strip
|
|
158
|
+
truncate_string(text, 2000)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def truncate_string(str, max_length)
|
|
162
|
+
return str if str.length <= max_length
|
|
163
|
+
|
|
164
|
+
"#{str[0...max_length]}..."
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def require_nokogiri!
|
|
168
|
+
return if defined?(Nokogiri)
|
|
169
|
+
|
|
170
|
+
begin
|
|
171
|
+
require "nokogiri"
|
|
172
|
+
rescue LoadError
|
|
173
|
+
raise Error, "nokogiri gem is required for web content fetching. Add it to your Gemfile."
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmClassifier
|
|
4
|
+
# Domain knowledge container that converts structured data into LLM prompts
|
|
5
|
+
class Knowledge
|
|
6
|
+
def initialize
|
|
7
|
+
@entries = {}
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def method_missing(name, *args, &)
|
|
11
|
+
if args.any?
|
|
12
|
+
@entries[name] = args.first
|
|
13
|
+
elsif @entries.key?(name)
|
|
14
|
+
@entries[name]
|
|
15
|
+
else
|
|
16
|
+
super
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def respond_to_missing?(name, include_private = false)
|
|
21
|
+
@entries.key?(name) || super
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def to_prompt
|
|
25
|
+
return "" if @entries.empty?
|
|
26
|
+
|
|
27
|
+
sections = @entries.map do |key, value|
|
|
28
|
+
formatted_key = key.to_s.tr("_", " ").upcase
|
|
29
|
+
formatted_value = case value
|
|
30
|
+
when Array then value.join(", ")
|
|
31
|
+
when Hash then value.map { |k, v| "#{k}: #{v}" }.join("\n ")
|
|
32
|
+
else value.to_s
|
|
33
|
+
end
|
|
34
|
+
"#{formatted_key}:\n#{formatted_value}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
"DOMAIN KNOWLEDGE:\n\n#{sections.join("\n\n")}"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def to_h
|
|
41
|
+
@entries.dup
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LlmClassifier
|
|
4
|
+
module Rails
|
|
5
|
+
module Concerns
|
|
6
|
+
# Rails concern for adding classification capabilities to ActiveRecord models
|
|
7
|
+
module Classifiable
|
|
8
|
+
extend ActiveSupport::Concern
|
|
9
|
+
|
|
10
|
+
class_methods do
|
|
11
|
+
def classifies(attribute, with:, from:, store_in: nil)
|
|
12
|
+
classifier_class = with
|
|
13
|
+
source = from
|
|
14
|
+
storage_column = store_in
|
|
15
|
+
|
|
16
|
+
# Define the classify method
|
|
17
|
+
define_method("classify_#{attribute}!") do
|
|
18
|
+
input = build_classification_input(source)
|
|
19
|
+
result = classifier_class.classify(input)
|
|
20
|
+
|
|
21
|
+
store_classification_result(attribute, result, storage_column) if result.success?
|
|
22
|
+
|
|
23
|
+
result
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Define getter for category
|
|
27
|
+
define_method("#{attribute}_category") do
|
|
28
|
+
get_stored_classification(attribute, storage_column)&.dig("category")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Define getter for categories (multi-label)
|
|
32
|
+
define_method("#{attribute}_categories") do
|
|
33
|
+
get_stored_classification(attribute, storage_column)&.dig("categories") || []
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Define getter for full classification data
|
|
37
|
+
define_method("#{attribute}_classification") do
|
|
38
|
+
get_stored_classification(attribute, storage_column)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def build_classification_input(source)
|
|
46
|
+
case source
|
|
47
|
+
when Symbol
|
|
48
|
+
send(source)
|
|
49
|
+
when Proc
|
|
50
|
+
source.call(self)
|
|
51
|
+
when Array
|
|
52
|
+
source.to_h { |attr| [attr, send(attr)] }
|
|
53
|
+
else
|
|
54
|
+
source
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def store_classification_result(attribute, result, storage_column)
|
|
59
|
+
data = {
|
|
60
|
+
"category" => result.category,
|
|
61
|
+
"categories" => result.categories,
|
|
62
|
+
"confidence" => result.confidence,
|
|
63
|
+
"reasoning" => result.reasoning,
|
|
64
|
+
"classified_at" => Time.current.iso8601
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if storage_column
|
|
68
|
+
current = send(storage_column) || {}
|
|
69
|
+
updated = current.merge("#{attribute}_classification" => data)
|
|
70
|
+
send("#{storage_column}=", updated)
|
|
71
|
+
save! if persisted?
|
|
72
|
+
else
|
|
73
|
+
@classification_results ||= {}
|
|
74
|
+
@classification_results[attribute] = data
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def get_stored_classification(attribute, storage_column)
|
|
79
|
+
if storage_column
|
|
80
|
+
send(storage_column)&.dig("#{attribute}_classification")
|
|
81
|
+
else
|
|
82
|
+
@classification_results&.dig(attribute)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
|
|
5
|
+
module LlmClassifier
|
|
6
|
+
module Generators
|
|
7
|
+
# Rails generator for creating classifier classes
|
|
8
|
+
class ClassifierGenerator < ::Rails::Generators::NamedBase
|
|
9
|
+
source_root File.expand_path("templates", __dir__)
|
|
10
|
+
|
|
11
|
+
desc "Creates an LlmClassifier classifier class"
|
|
12
|
+
|
|
13
|
+
argument :categories, type: :array, default: [], banner: "category1 category2 ..."
|
|
14
|
+
|
|
15
|
+
def create_classifier_file
|
|
16
|
+
template "classifier.rb.erb", File.join("app/classifiers", "#{file_name}.rb")
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def create_spec_file
|
|
20
|
+
return unless File.exist?("spec")
|
|
21
|
+
|
|
22
|
+
template "classifier_spec.rb.erb", File.join("spec/classifiers", "#{file_name}_spec.rb")
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def categories_array
|
|
28
|
+
return %w[category_a category_b] if categories.empty?
|
|
29
|
+
|
|
30
|
+
categories
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
|
|
5
|
+
module LlmClassifier
|
|
6
|
+
module Generators
|
|
7
|
+
# Rails generator for installing LlmClassifier configuration
|
|
8
|
+
class InstallGenerator < ::Rails::Generators::Base
|
|
9
|
+
source_root File.expand_path("templates", __dir__)
|
|
10
|
+
|
|
11
|
+
desc "Creates an LlmClassifier initializer"
|
|
12
|
+
|
|
13
|
+
def create_initializer_file
|
|
14
|
+
create_file "config/initializers/llm_classifier.rb", <<~RUBY
|
|
15
|
+
# frozen_string_literal: true
|
|
16
|
+
|
|
17
|
+
LlmClassifier.configure do |config|
|
|
18
|
+
# LLM adapter to use. Options: :ruby_llm, :openai, :anthropic
|
|
19
|
+
config.adapter = :ruby_llm
|
|
20
|
+
|
|
21
|
+
# Default model for classification
|
|
22
|
+
config.default_model = "gpt-4o-mini"
|
|
23
|
+
|
|
24
|
+
# API keys (reads from ENV by default)
|
|
25
|
+
# config.openai_api_key = ENV["OPENAI_API_KEY"]
|
|
26
|
+
# config.anthropic_api_key = ENV["ANTHROPIC_API_KEY"]
|
|
27
|
+
|
|
28
|
+
# Content fetching settings
|
|
29
|
+
config.web_fetch_timeout = 10
|
|
30
|
+
config.web_fetch_user_agent = "LlmClassifier/#{LlmClassifier::VERSION}"
|
|
31
|
+
|
|
32
|
+
# Rails integration
|
|
33
|
+
config.default_queue = :classification
|
|
34
|
+
end
|
|
35
|
+
RUBY
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def create_classifiers_directory
|
|
39
|
+
empty_directory "app/classifiers"
|
|
40
|
+
create_file "app/classifiers/.keep", ""
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def show_post_install_message
|
|
44
|
+
say "\n"
|
|
45
|
+
say "LlmClassifier installed successfully!", :green
|
|
46
|
+
say "\n"
|
|
47
|
+
say "Next steps:"
|
|
48
|
+
say " 1. Configure your API keys in config/initializers/llm_classifier.rb"
|
|
49
|
+
say " 2. Generate a classifier: rails g llm_classifier:classifier SentimentClassifier"
|
|
50
|
+
say "\n"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|