llm_classifier 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -3
- data/README.md +22 -1
- data/lib/llm_classifier/adapters/ruby_llm.rb +5 -1
- data/lib/llm_classifier/classifier.rb +40 -11
- data/lib/llm_classifier/content_fetchers/web.rb +1 -1
- data/lib/llm_classifier/result.rb +19 -5
- data/lib/llm_classifier/version.rb +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2740f212b3f80944530c9b0ca84d18828499d8cc6d66de231bac734d2f83fc43
|
|
4
|
+
data.tar.gz: 1a9c8211890f2a74c16d6883a28a58c8a6be4d7340e62d8c1b8aafc89746fe7e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8332595d0ecb1390cda51139c745be5bc2f3c407e545594f2b9c57e22cd52f7ec0c42d2d08cf252c4e2d08a9def41b34ae31aed8062e45f4eee484776be8b4f2
|
|
7
|
+
data.tar.gz: 3bd39aaf2842079e629046a8bf3afda9ec01d41e850eaf9be39a5d958513f63c0e51603dada50aeac1f560f3d469740d2fddeea59c9e1acc559c1dc34e4cc5e4
|
data/.rubocop.yml
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
|
|
1
|
+
plugins:
|
|
2
2
|
- rubocop-rspec
|
|
3
3
|
|
|
4
4
|
AllCops:
|
|
5
|
-
TargetRubyVersion: 3.
|
|
5
|
+
TargetRubyVersion: 3.2
|
|
6
6
|
NewCops: enable
|
|
7
7
|
SuggestExtensions: false
|
|
8
8
|
Exclude:
|
|
@@ -42,4 +42,4 @@ RSpec/ExampleLength:
|
|
|
42
42
|
Max: 15
|
|
43
43
|
|
|
44
44
|
RSpec/MultipleExpectations:
|
|
45
|
-
Max:
|
|
45
|
+
Max: 6
|
data/README.md
CHANGED
|
@@ -100,6 +100,27 @@ result = TopicClassifier.classify("Building a Rails API with React frontend")
|
|
|
100
100
|
result.categories # => ["rails", "javascript"]
|
|
101
101
|
```
|
|
102
102
|
|
|
103
|
+
### Requiring Categories
|
|
104
|
+
|
|
105
|
+
By default, multi-label classifiers return `Result.success` even when no categories match (empty array). Use `require_categories` to treat empty results as failures:
|
|
106
|
+
|
|
107
|
+
```ruby
|
|
108
|
+
class StrictClassifier < LlmClassifier::Classifier
|
|
109
|
+
categories :mechanic, :instructor, :gear
|
|
110
|
+
multi_label true
|
|
111
|
+
require_categories true # Result.failure when no categories match
|
|
112
|
+
|
|
113
|
+
system_prompt "Classify this business..."
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
result = StrictClassifier.classify("Joe's Pizza Shop")
|
|
117
|
+
result.success? # => false (no motorcycle categories matched)
|
|
118
|
+
result.failure? # => true
|
|
119
|
+
result.error # => "No valid categories returned"
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
This is useful when classification is a filtering step and you need to distinguish "no match" from "classification succeeded."
|
|
123
|
+
|
|
103
124
|
### Domain Knowledge
|
|
104
125
|
|
|
105
126
|
Inject domain-specific knowledge into your prompts:
|
|
@@ -281,7 +302,7 @@ This project includes a [Dev Container](https://containers.dev/) configuration f
|
|
|
281
302
|
3. Press `Cmd+Shift+P` and select "Dev Containers: Reopen in Container"
|
|
282
303
|
4. Wait for the container to build and start
|
|
283
304
|
|
|
284
|
-
The container includes Ruby
|
|
305
|
+
The container includes Ruby, GitHub CLI, and useful VS Code extensions.
|
|
285
306
|
|
|
286
307
|
### Local Setup
|
|
287
308
|
|
|
@@ -11,7 +11,11 @@ module LlmClassifier
|
|
|
11
11
|
chat_instance.with_instructions(system_prompt)
|
|
12
12
|
response = chat_instance.ask(user_prompt)
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
{
|
|
15
|
+
content: response.content,
|
|
16
|
+
input_tokens: response.input_tokens,
|
|
17
|
+
output_tokens: response.output_tokens
|
|
18
|
+
}
|
|
15
19
|
end
|
|
16
20
|
|
|
17
21
|
private
|
|
@@ -7,7 +7,8 @@ module LlmClassifier
|
|
|
7
7
|
class Classifier
|
|
8
8
|
class << self
|
|
9
9
|
attr_reader :defined_categories, :defined_system_prompt, :defined_model,
|
|
10
|
-
:defined_adapter, :defined_multi_label, :
|
|
10
|
+
:defined_adapter, :defined_multi_label, :defined_require_categories,
|
|
11
|
+
:defined_knowledge,
|
|
11
12
|
:before_classify_callbacks, :after_classify_callbacks
|
|
12
13
|
|
|
13
14
|
def categories(*cats)
|
|
@@ -50,6 +51,14 @@ module LlmClassifier
|
|
|
50
51
|
end
|
|
51
52
|
end
|
|
52
53
|
|
|
54
|
+
def require_categories(value = nil)
|
|
55
|
+
if value.nil?
|
|
56
|
+
@defined_require_categories || false
|
|
57
|
+
else
|
|
58
|
+
@defined_require_categories = value
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
53
62
|
def knowledge(&)
|
|
54
63
|
if block_given?
|
|
55
64
|
@defined_knowledge = Knowledge.new
|
|
@@ -68,8 +77,8 @@ module LlmClassifier
|
|
|
68
77
|
@after_classify_callbacks << block
|
|
69
78
|
end
|
|
70
79
|
|
|
71
|
-
def classify(input, **
|
|
72
|
-
new(input, **
|
|
80
|
+
def classify(input, **)
|
|
81
|
+
new(input, **).classify
|
|
73
82
|
end
|
|
74
83
|
end
|
|
75
84
|
|
|
@@ -103,13 +112,23 @@ module LlmClassifier
|
|
|
103
112
|
|
|
104
113
|
def perform_classification(processed_input)
|
|
105
114
|
adapter_instance = build_adapter
|
|
115
|
+
resolved_model = options[:model] || self.class.model
|
|
106
116
|
response = adapter_instance.chat(
|
|
107
|
-
model:
|
|
117
|
+
model: resolved_model,
|
|
108
118
|
system_prompt: build_system_prompt,
|
|
109
119
|
user_prompt: build_user_prompt(processed_input)
|
|
110
120
|
)
|
|
111
121
|
|
|
112
|
-
|
|
122
|
+
content, token_data = extract_response_data(response)
|
|
123
|
+
parse_response(content, resolved_model, token_data)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def extract_response_data(response)
|
|
127
|
+
if response.is_a?(Hash)
|
|
128
|
+
[response[:content], { input_tokens: response[:input_tokens], output_tokens: response[:output_tokens] }]
|
|
129
|
+
else
|
|
130
|
+
[response, {}]
|
|
131
|
+
end
|
|
113
132
|
end
|
|
114
133
|
|
|
115
134
|
def build_adapter
|
|
@@ -161,24 +180,31 @@ module LlmClassifier
|
|
|
161
180
|
end
|
|
162
181
|
end
|
|
163
182
|
|
|
164
|
-
def parse_response(response)
|
|
165
|
-
json = JSON.parse(response)
|
|
183
|
+
def parse_response(response, resolved_model = nil, token_data = {})
|
|
184
|
+
json = JSON.parse(strip_code_fences(response))
|
|
166
185
|
valid_categories = extract_valid_categories(json)
|
|
167
186
|
|
|
168
187
|
return build_failure_result(response, json) if should_fail?(valid_categories)
|
|
169
188
|
|
|
170
|
-
build_success_result(json, valid_categories, response)
|
|
189
|
+
build_success_result(json, valid_categories, response, resolved_model, token_data)
|
|
171
190
|
rescue JSON::ParserError => e
|
|
172
191
|
Result.failure(error: "Failed to parse response: #{e.message}", raw_response: response)
|
|
173
192
|
end
|
|
174
193
|
|
|
194
|
+
def strip_code_fences(text)
|
|
195
|
+
text.sub(/\A\s*```\w*\R?/, "").sub(/\R?```\s*\z/, "")
|
|
196
|
+
end
|
|
197
|
+
|
|
175
198
|
def extract_valid_categories(json)
|
|
176
199
|
raw_categories = Array(json["categories"] || json["category"])
|
|
177
200
|
raw_categories.select { |c| self.class.categories.include?(c.to_s) }
|
|
178
201
|
end
|
|
179
202
|
|
|
180
203
|
def should_fail?(valid_categories)
|
|
181
|
-
|
|
204
|
+
return false if valid_categories.any?
|
|
205
|
+
return false if self.class.categories.empty?
|
|
206
|
+
|
|
207
|
+
!self.class.multi_label || self.class.require_categories
|
|
182
208
|
end
|
|
183
209
|
|
|
184
210
|
def build_failure_result(response, json)
|
|
@@ -189,7 +215,7 @@ module LlmClassifier
|
|
|
189
215
|
)
|
|
190
216
|
end
|
|
191
217
|
|
|
192
|
-
def build_success_result(json, valid_categories, response)
|
|
218
|
+
def build_success_result(json, valid_categories, response, resolved_model = nil, token_data = {})
|
|
193
219
|
categories = self.class.multi_label ? valid_categories : [valid_categories.first].compact
|
|
194
220
|
excluded_keys = %w[categories category confidence reasoning]
|
|
195
221
|
metadata = json.reject { |k, _| excluded_keys.include?(k) }
|
|
@@ -199,7 +225,10 @@ module LlmClassifier
|
|
|
199
225
|
confidence: json["confidence"]&.to_f,
|
|
200
226
|
reasoning: json["reasoning"],
|
|
201
227
|
raw_response: response,
|
|
202
|
-
metadata: metadata
|
|
228
|
+
metadata: metadata,
|
|
229
|
+
model: resolved_model,
|
|
230
|
+
input_tokens: token_data[:input_tokens],
|
|
231
|
+
output_tokens: token_data[:output_tokens]
|
|
203
232
|
)
|
|
204
233
|
end
|
|
205
234
|
end
|
|
@@ -3,15 +3,21 @@
|
|
|
3
3
|
module LlmClassifier
|
|
4
4
|
# Result object returned from classification operations
|
|
5
5
|
class Result
|
|
6
|
-
attr_reader :categories, :confidence, :reasoning, :raw_response, :metadata, :error
|
|
6
|
+
attr_reader :categories, :confidence, :reasoning, :raw_response, :metadata, :error, :model,
|
|
7
|
+
:input_tokens, :output_tokens
|
|
7
8
|
|
|
8
|
-
def initialize(categories: [], confidence: nil, reasoning: nil,
|
|
9
|
+
def initialize(categories: [], confidence: nil, reasoning: nil,
|
|
10
|
+
raw_response: nil, error: nil, metadata: {},
|
|
11
|
+
model: nil, input_tokens: nil, output_tokens: nil)
|
|
9
12
|
@categories = Array(categories)
|
|
10
13
|
@confidence = confidence
|
|
11
14
|
@reasoning = reasoning
|
|
12
15
|
@raw_response = raw_response
|
|
13
16
|
@metadata = metadata
|
|
14
17
|
@error = error
|
|
18
|
+
@model = model
|
|
19
|
+
@input_tokens = input_tokens
|
|
20
|
+
@output_tokens = output_tokens
|
|
15
21
|
end
|
|
16
22
|
|
|
17
23
|
def success?
|
|
@@ -38,18 +44,26 @@ module LlmClassifier
|
|
|
38
44
|
confidence: @confidence,
|
|
39
45
|
reasoning: @reasoning,
|
|
40
46
|
metadata: @metadata,
|
|
41
|
-
error: @error
|
|
47
|
+
error: @error,
|
|
48
|
+
model: @model,
|
|
49
|
+
input_tokens: @input_tokens,
|
|
50
|
+
output_tokens: @output_tokens
|
|
42
51
|
}
|
|
43
52
|
end
|
|
44
53
|
|
|
45
54
|
class << self
|
|
46
|
-
def success(categories:, confidence: nil, reasoning: nil,
|
|
55
|
+
def success(categories:, confidence: nil, reasoning: nil,
|
|
56
|
+
raw_response: nil, metadata: {},
|
|
57
|
+
model: nil, input_tokens: nil, output_tokens: nil)
|
|
47
58
|
new(
|
|
48
59
|
categories: categories,
|
|
49
60
|
confidence: confidence,
|
|
50
61
|
reasoning: reasoning,
|
|
51
62
|
raw_response: raw_response,
|
|
52
|
-
metadata: metadata
|
|
63
|
+
metadata: metadata,
|
|
64
|
+
model: model,
|
|
65
|
+
input_tokens: input_tokens,
|
|
66
|
+
output_tokens: output_tokens
|
|
53
67
|
)
|
|
54
68
|
end
|
|
55
69
|
|
metadata
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llm_classifier
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dmitry Sychev
|
|
8
|
+
autorequire:
|
|
8
9
|
bindir: exe
|
|
9
10
|
cert_chain: []
|
|
10
|
-
date:
|
|
11
|
+
date: 2026-04-05 00:00:00.000000000 Z
|
|
11
12
|
dependencies:
|
|
12
13
|
- !ruby/object:Gem::Dependency
|
|
13
14
|
name: zeitwerk
|
|
@@ -68,6 +69,7 @@ metadata:
|
|
|
68
69
|
source_code_uri: https://github.com/AxiumFoundry/llm_classifier
|
|
69
70
|
changelog_uri: https://github.com/AxiumFoundry/llm_classifier/blob/main/CHANGELOG.md
|
|
70
71
|
rubygems_mfa_required: 'true'
|
|
72
|
+
post_install_message:
|
|
71
73
|
rdoc_options: []
|
|
72
74
|
require_paths:
|
|
73
75
|
- lib
|
|
@@ -75,14 +77,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
75
77
|
requirements:
|
|
76
78
|
- - ">="
|
|
77
79
|
- !ruby/object:Gem::Version
|
|
78
|
-
version: 3.
|
|
80
|
+
version: 3.2.0
|
|
79
81
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
80
82
|
requirements:
|
|
81
83
|
- - ">="
|
|
82
84
|
- !ruby/object:Gem::Version
|
|
83
85
|
version: '0'
|
|
84
86
|
requirements: []
|
|
85
|
-
rubygems_version: 3.
|
|
87
|
+
rubygems_version: 3.4.20
|
|
88
|
+
signing_key:
|
|
86
89
|
specification_version: 4
|
|
87
90
|
summary: LLM-powered classification for Ruby with pluggable adapters and Rails integration
|
|
88
91
|
test_files: []
|