legion-llm 0.5.16 → 0.5.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/lib/legion/llm/confidence_score.rb +49 -0
- data/lib/legion/llm/confidence_scorer.rb +229 -0
- data/lib/legion/llm/pipeline/executor.rb +8 -5
- data/lib/legion/llm/pipeline/steps/confidence_scoring.rb +34 -0
- data/lib/legion/llm/pipeline/steps.rb +1 -0
- data/lib/legion/llm/settings.rb +12 -0
- data/lib/legion/llm/version.rb +1 -1
- data/lib/legion/llm.rb +2 -0
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '0915f8beeff34fe070509f7d5bb3fe78a242213c7fc6b406905d48a264527fc1'
|
|
4
|
+
data.tar.gz: 01c1189bac8f90310518c1650b5621fd75b1a8cbf503c2b02277fef0c195f986
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aadc8ac33d46d40e470be524ca9f59f252c021425ab56ea1f19072e9fdd95bc2f96b74096193fbe5b0572417aeca2daf026b9d3e6486bd59f527d0ee98a108d3
|
|
7
|
+
data.tar.gz: f55c014a191d403b991ce8280e5d50b689adfaf7a5bb6f320add35555dbbefe615b860a2754507a22b830b413cd52ad819bc181448ddc0f36f6c970ab60b86c2
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.17] - 2026-03-28
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `Legion::LLM::ConfidenceScore` value object (`lib/legion/llm/confidence_score.rb`): immutable struct with `score` (Float 0.0–1.0), `band` (`:very_low/:low/:medium/:high/:very_high`), `source` (`:heuristic/:logprobs/:caller_provided`), and `signals` hash. `#at_least?(band)` for band comparison. `BAND_ORDER` constant for ordered band comparison.
|
|
7
|
+
- `Legion::LLM::ConfidenceScorer` module (`lib/legion/llm/confidence_scorer.rb`): computes `ConfidenceScore` from three strategy sources in priority order — (1) caller-provided score via `confidence_score:` option, (2) model-native logprobs (detected via `class.method_defined?(:logprobs)` to avoid test-double interference), (3) heuristic analysis (refusal, truncation, repetition, too_short, json_parse_failure, hedging language penalties; structured output bonus for valid JSON). Band boundaries are read from `Legion::Settings[:llm][:confidence][:bands]` at call time, per-call overrides accepted via `confidence_bands:` option.
|
|
8
|
+
- `Legion::LLM::Pipeline::Steps::ConfidenceScoring` module (`lib/legion/llm/pipeline/steps/confidence_scoring.rb`): new pipeline step `step_confidence_scoring` inserted after `response_normalization`. Reads `confidence_score:`, `confidence_bands:`, and `quality_threshold:` from `request.extra`; propagates `json_expected:` from `request.response_format`. Errors are soft-caught (appended to `@warnings`, step skipped).
|
|
9
|
+
- `confidence_defaults` settings method: band boundaries `{ low: 0.3, medium: 0.5, high: 0.7, very_high: 0.9 }` under `Legion::Settings[:llm][:confidence][:bands]`.
|
|
10
|
+
- `confidence_score` attr_reader on `Pipeline::Executor` for post-pipeline inspection.
|
|
11
|
+
- `quality:` field of `Pipeline::Response` is now populated with `@confidence_score.to_h` (score, band, source, signals).
|
|
12
|
+
- 54 new specs across `confidence_score_spec.rb`, `confidence_scorer_spec.rb`, `confidence_settings_spec.rb`, and `pipeline/steps/confidence_scoring_spec.rb`.
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
- `Pipeline::Executor::STEPS` and `POST_PROVIDER_STEPS` now include `:confidence_scoring` after `:response_normalization`.
|
|
16
|
+
- `Legion::LLM.start` now requires `confidence_score` and `confidence_scorer` after `quality_checker`.
|
|
17
|
+
|
|
3
18
|
## [0.5.16] - 2026-03-28
|
|
4
19
|
|
|
5
20
|
### Fixed
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
# Immutable value object representing a scored confidence level for an LLM response.
|
|
6
|
+
#
|
|
7
|
+
# score - Float in [0.0, 1.0]
|
|
8
|
+
# band - Symbol: :very_low, :low, :medium, :high, :very_high
|
|
9
|
+
# source - Symbol: :heuristic, :logprobs, :caller_provided
|
|
10
|
+
# signals - Hash of contributing signals and their raw values (informational)
|
|
11
|
+
ConfidenceScore = ::Data.define(:score, :band, :source, :signals) do
|
|
12
|
+
def self.build(score:, bands:, source: :heuristic, signals: {})
|
|
13
|
+
clamped = score.to_f.clamp(0.0, 1.0)
|
|
14
|
+
new(
|
|
15
|
+
score: clamped,
|
|
16
|
+
band: classify(clamped, bands),
|
|
17
|
+
source: source,
|
|
18
|
+
signals: signals
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Returns true when the band is at or above the given band name.
|
|
23
|
+
def at_least?(band_name)
|
|
24
|
+
Legion::LLM::ConfidenceScore::BAND_ORDER.index(band) >= Legion::LLM::ConfidenceScore::BAND_ORDER.index(band_name.to_sym)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def to_h
|
|
28
|
+
{ score: score, band: band, source: source, signals: signals }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
class << self
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def classify(score, bands)
|
|
35
|
+
return :very_low if score < bands.fetch(:low, 0.3)
|
|
36
|
+
return :low if score < bands.fetch(:medium, 0.5)
|
|
37
|
+
return :medium if score < bands.fetch(:high, 0.7)
|
|
38
|
+
return :high if score < bands.fetch(:very_high, 0.9)
|
|
39
|
+
|
|
40
|
+
:very_high
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Band ordering from lowest to highest — defined outside the ::Data.define block
|
|
46
|
+
# so it is accessible as Legion::LLM::ConfidenceScore::BAND_ORDER.
|
|
47
|
+
ConfidenceScore::BAND_ORDER = %i[very_low low medium high very_high].freeze
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
# Computes a ConfidenceScore for an LLM response using available signals.
|
|
6
|
+
#
|
|
7
|
+
# Strategy selection (in priority order):
|
|
8
|
+
# 1. logprobs — native model confidence from token log-probabilities (when available)
|
|
9
|
+
# 2. caller — caller-provided score passed via options[:confidence_score]
|
|
10
|
+
# 3. heuristic — derived from response content characteristics
|
|
11
|
+
#
|
|
12
|
+
# Band boundaries are read from Legion::Settings[:llm][:confidence][:bands] when
|
|
13
|
+
# Legion::Settings is available, otherwise the DEFAULT_BANDS constants are used.
|
|
14
|
+
# Per-call overrides can be passed as options[:confidence_bands].
|
|
15
|
+
module ConfidenceScorer
|
|
16
|
+
# Default band boundaries. Keys are the *lower* boundary of that band name:
|
|
17
|
+
# score < :low -> :very_low
|
|
18
|
+
# score < :medium -> :low
|
|
19
|
+
# score < :high -> :medium
|
|
20
|
+
# score < :very_high -> :high
|
|
21
|
+
# score >= :very_high -> :very_high
|
|
22
|
+
DEFAULT_BANDS = {
|
|
23
|
+
low: 0.3,
|
|
24
|
+
medium: 0.5,
|
|
25
|
+
high: 0.7,
|
|
26
|
+
very_high: 0.9
|
|
27
|
+
}.freeze
|
|
28
|
+
|
|
29
|
+
# Penalty weights used in heuristic scoring.
|
|
30
|
+
HEURISTIC_WEIGHTS = {
|
|
31
|
+
refusal: -0.8,
|
|
32
|
+
empty: -1.0,
|
|
33
|
+
truncated: -0.4,
|
|
34
|
+
repetition: -0.5,
|
|
35
|
+
json_parse_failure: -0.6,
|
|
36
|
+
too_short: -0.3
|
|
37
|
+
}.freeze
|
|
38
|
+
|
|
39
|
+
# Bonus applied when structured output parse succeeds.
|
|
40
|
+
STRUCTURED_OUTPUT_BONUS = 0.1
|
|
41
|
+
|
|
42
|
+
# Hedging language patterns that reduce confidence.
|
|
43
|
+
HEDGING_PATTERNS = [
|
|
44
|
+
/\b(?:I think|I believe|I'm not sure|I'm uncertain|it seems|it appears|maybe|perhaps|possibly|probably|I guess|I assume)\b/i,
|
|
45
|
+
/\bnot (?:certain|sure|definite|confirmed)\b/i,
|
|
46
|
+
/\bunclear\b/i,
|
|
47
|
+
/\bcould be\b/i
|
|
48
|
+
].freeze
|
|
49
|
+
|
|
50
|
+
class << self
|
|
51
|
+
# Compute a ConfidenceScore for the given raw_response.
|
|
52
|
+
#
|
|
53
|
+
# raw_response - the RubyLLM response object (must respond to #content)
|
|
54
|
+
# options - Hash:
|
|
55
|
+
# :confidence_score - Float caller-provided score (bypasses heuristics)
|
|
56
|
+
# :confidence_bands - Hash per-call band overrides
|
|
57
|
+
# :json_expected - Boolean whether JSON output was expected
|
|
58
|
+
# :quality_result - QualityResult from QualityChecker (optional, avoids re-running checks)
|
|
59
|
+
#
|
|
60
|
+
# Returns a ConfidenceScore.
|
|
61
|
+
def score(raw_response, **options)
|
|
62
|
+
bands = resolve_bands(options[:confidence_bands])
|
|
63
|
+
|
|
64
|
+
if (caller_score = options[:confidence_score])
|
|
65
|
+
return ConfidenceScore.build(
|
|
66
|
+
score: caller_score.to_f,
|
|
67
|
+
bands: bands,
|
|
68
|
+
source: :caller_provided,
|
|
69
|
+
signals: { caller_provided: caller_score.to_f }
|
|
70
|
+
)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
if (lp = extract_logprobs(raw_response))
|
|
74
|
+
return ConfidenceScore.build(
|
|
75
|
+
score: lp,
|
|
76
|
+
bands: bands,
|
|
77
|
+
source: :logprobs,
|
|
78
|
+
signals: { avg_logprob: lp }
|
|
79
|
+
)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
heuristic_score(raw_response, bands: bands, options: options)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
# Resolve band configuration. Per-call overrides win, then settings,
|
|
88
|
+
# then DEFAULT_BANDS.
|
|
89
|
+
def resolve_bands(per_call_override)
|
|
90
|
+
base = settings_bands
|
|
91
|
+
return base.merge(per_call_override) if per_call_override.is_a?(Hash)
|
|
92
|
+
|
|
93
|
+
base
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def settings_bands
|
|
97
|
+
return DEFAULT_BANDS unless defined?(Legion::Settings)
|
|
98
|
+
|
|
99
|
+
raw = Legion::Settings[:llm]
|
|
100
|
+
return DEFAULT_BANDS unless raw.is_a?(Hash)
|
|
101
|
+
|
|
102
|
+
conf = raw.dig(:confidence, :bands)
|
|
103
|
+
return DEFAULT_BANDS unless conf.is_a?(Hash)
|
|
104
|
+
|
|
105
|
+
DEFAULT_BANDS.merge(conf.transform_keys(&:to_sym))
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Attempt to derive a score from logprobs attached to the response.
|
|
109
|
+
# RubyLLM does not currently expose logprobs in its standard interface,
|
|
110
|
+
# but some providers return them in extra metadata. We probe the response
|
|
111
|
+
# object defensively to avoid unexpected-message errors from test doubles.
|
|
112
|
+
def extract_logprobs(raw_response)
|
|
113
|
+
lp = probe_logprobs(raw_response)
|
|
114
|
+
return nil unless lp.is_a?(Array) && !lp.empty?
|
|
115
|
+
|
|
116
|
+
# lp is expected to be an array of token log-probability floats (negative values).
|
|
117
|
+
avg_lp = lp.sum.to_f / lp.size
|
|
118
|
+
# Convert average log-probability to a probability-like score in [0, 1].
|
|
119
|
+
# avg_lp is in (-inf, 0]; e^0 = 1.0 (perfect), e^(-5) ≈ 0.007 (very uncertain).
|
|
120
|
+
# We clamp at -5 so very negative values still map to > 0.
|
|
121
|
+
Math.exp([avg_lp, -5.0].max)
|
|
122
|
+
rescue StandardError
|
|
123
|
+
nil
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Safely probe a response object for logprobs.
|
|
127
|
+
# Checks method_defined? on the concrete class first (not via stubs or method_missing)
|
|
128
|
+
# to avoid triggering MockExpectationError on RSpec test doubles.
|
|
129
|
+
def probe_logprobs(raw_response)
|
|
130
|
+
klass = raw_response.class
|
|
131
|
+
lp = raw_response.logprobs if klass.method_defined?(:logprobs)
|
|
132
|
+
lp ||= raw_response.metadata&.dig(:logprobs) if klass.method_defined?(:metadata)
|
|
133
|
+
lp
|
|
134
|
+
rescue StandardError
|
|
135
|
+
nil
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def heuristic_score(raw_response, bands:, options:)
|
|
139
|
+
signals = {}
|
|
140
|
+
penalty = 0.0
|
|
141
|
+
content = raw_response.respond_to?(:content) ? raw_response.content.to_s : ''
|
|
142
|
+
|
|
143
|
+
# Use pre-computed QualityResult when available to avoid duplicate work.
|
|
144
|
+
quality_result = options[:quality_result]
|
|
145
|
+
|
|
146
|
+
if content.strip.empty?
|
|
147
|
+
signals[:empty] = true
|
|
148
|
+
penalty += HEURISTIC_WEIGHTS[:empty].abs
|
|
149
|
+
else
|
|
150
|
+
failures = quality_result ? quality_result.failures : detect_failures(content, options)
|
|
151
|
+
|
|
152
|
+
failures.each do |failure|
|
|
153
|
+
weight = HEURISTIC_WEIGHTS[failure]
|
|
154
|
+
next unless weight
|
|
155
|
+
|
|
156
|
+
signals[failure] = true
|
|
157
|
+
penalty += weight.abs
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
hedges = count_hedges(content)
|
|
161
|
+
if hedges.positive?
|
|
162
|
+
hedge_penalty = [hedges * 0.05, 0.3].min
|
|
163
|
+
signals[:hedging] = hedges
|
|
164
|
+
penalty += hedge_penalty
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
if options[:json_expected] && !failures.include?(:json_parse_failure)
|
|
168
|
+
signals[:structured_output_valid] = true
|
|
169
|
+
penalty -= STRUCTURED_OUTPUT_BONUS
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
raw_score = [1.0 - penalty.clamp(0.0, 1.0), 0.0].max
|
|
174
|
+
ConfidenceScore.build(score: raw_score, bands: bands, source: :heuristic, signals: signals)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def detect_failures(content, options)
|
|
178
|
+
return [] if content.strip.empty?
|
|
179
|
+
|
|
180
|
+
failures = []
|
|
181
|
+
threshold = options.fetch(:quality_threshold, QualityChecker::DEFAULT_QUALITY_THRESHOLD)
|
|
182
|
+
failures << :too_short if content.length < threshold
|
|
183
|
+
failures << :truncated if truncated?(content)
|
|
184
|
+
failures << :refusal if refusal?(content)
|
|
185
|
+
failures << :repetition if repetitive?(content)
|
|
186
|
+
failures << :json_parse_failure if options[:json_expected] && !valid_json?(content)
|
|
187
|
+
failures
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def truncated?(content)
|
|
191
|
+
return false if content.length < 100
|
|
192
|
+
|
|
193
|
+
last_chars = content[-3..]
|
|
194
|
+
last_chars&.match?(/\w{3}\z/) &&
|
|
195
|
+
!content.end_with?('.', '!', '?', '`', '"', "'", ')', ']', '}', "\n")
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def refusal?(content)
|
|
199
|
+
first_line = content.lines.first.to_s
|
|
200
|
+
QualityChecker::REFUSAL_PATTERNS.any? { |pat| first_line.match?(pat) }
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def repetitive?(content)
|
|
204
|
+
return false if content.length < QualityChecker::REPETITION_MIN_LENGTH * QualityChecker::REPETITION_THRESHOLD
|
|
205
|
+
|
|
206
|
+
seen = {}
|
|
207
|
+
step = QualityChecker::REPETITION_MIN_LENGTH
|
|
208
|
+
(0..(content.length - step)).step(step) do |i|
|
|
209
|
+
chunk = content[i, step]
|
|
210
|
+
seen[chunk] = (seen[chunk] || 0) + 1
|
|
211
|
+
return true if seen[chunk] >= QualityChecker::REPETITION_THRESHOLD
|
|
212
|
+
end
|
|
213
|
+
false
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def valid_json?(content)
|
|
217
|
+
::JSON.parse(content)
|
|
218
|
+
true
|
|
219
|
+
rescue ::JSON::ParserError
|
|
220
|
+
false
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def count_hedges(content)
|
|
224
|
+
HEDGING_PATTERNS.sum { |pat| content.scan(pat).size }
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
@@ -12,17 +12,18 @@ module Legion
|
|
|
12
12
|
include Steps::RagContext
|
|
13
13
|
|
|
14
14
|
attr_reader :request, :profile, :timeline, :tracing, :enrichments,
|
|
15
|
-
:audit, :warnings, :discovered_tools
|
|
15
|
+
:audit, :warnings, :discovered_tools, :confidence_score
|
|
16
16
|
|
|
17
17
|
include Steps::McpDiscovery
|
|
18
18
|
include Steps::ToolCalls
|
|
19
19
|
include Steps::KnowledgeCapture
|
|
20
|
+
include Steps::ConfidenceScoring
|
|
20
21
|
|
|
21
22
|
STEPS = %i[
|
|
22
23
|
tracing_init idempotency conversation_uuid context_load
|
|
23
24
|
rbac classification billing gaia_advisory rag_context mcp_discovery
|
|
24
25
|
routing request_normalization provider_call response_normalization
|
|
25
|
-
tool_calls context_store post_response knowledge_capture response_return
|
|
26
|
+
confidence_scoring tool_calls context_store post_response knowledge_capture response_return
|
|
26
27
|
].freeze
|
|
27
28
|
|
|
28
29
|
PRE_PROVIDER_STEPS = %i[
|
|
@@ -32,7 +33,7 @@ module Legion
|
|
|
32
33
|
].freeze
|
|
33
34
|
|
|
34
35
|
POST_PROVIDER_STEPS = %i[
|
|
35
|
-
response_normalization tool_calls context_store post_response knowledge_capture response_return
|
|
36
|
+
response_normalization confidence_scoring tool_calls context_store post_response knowledge_capture response_return
|
|
36
37
|
].freeze
|
|
37
38
|
|
|
38
39
|
def initialize(request)
|
|
@@ -46,9 +47,10 @@ module Legion
|
|
|
46
47
|
@timestamps = { received: Time.now }
|
|
47
48
|
@raw_response = nil
|
|
48
49
|
@exchange_id = nil
|
|
49
|
-
@discovered_tools
|
|
50
|
+
@discovered_tools = []
|
|
50
51
|
@resolved_provider = nil
|
|
51
52
|
@resolved_model = nil
|
|
53
|
+
@confidence_score = nil
|
|
52
54
|
end
|
|
53
55
|
|
|
54
56
|
def call
|
|
@@ -310,7 +312,8 @@ module Legion
|
|
|
310
312
|
caller: @request.caller,
|
|
311
313
|
classification: @request.classification,
|
|
312
314
|
billing: @request.billing,
|
|
313
|
-
test: @request.test
|
|
315
|
+
test: @request.test,
|
|
316
|
+
quality: @confidence_score&.to_h
|
|
314
317
|
)
|
|
315
318
|
end
|
|
316
319
|
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module LLM
|
|
5
|
+
module Pipeline
|
|
6
|
+
module Steps
|
|
7
|
+
module ConfidenceScoring
|
|
8
|
+
def step_confidence_scoring
|
|
9
|
+
return unless @raw_response
|
|
10
|
+
|
|
11
|
+
opts = {
|
|
12
|
+
json_expected: @request.response_format&.dig(:type) == :json,
|
|
13
|
+
quality_threshold: @request.extra&.dig(:quality_threshold),
|
|
14
|
+
confidence_score: @request.extra&.dig(:confidence_score),
|
|
15
|
+
confidence_bands: @request.extra&.dig(:confidence_bands)
|
|
16
|
+
}.compact
|
|
17
|
+
|
|
18
|
+
@confidence_score = ConfidenceScorer.score(@raw_response, **opts)
|
|
19
|
+
|
|
20
|
+
@timeline.record(
|
|
21
|
+
category: :internal, key: 'confidence:scored',
|
|
22
|
+
direction: :internal,
|
|
23
|
+
detail: "score=#{@confidence_score.score.round(3)} band=#{@confidence_score.band} source=#{@confidence_score.source}",
|
|
24
|
+
from: 'pipeline', to: 'pipeline'
|
|
25
|
+
)
|
|
26
|
+
rescue StandardError => e
|
|
27
|
+
@warnings << "confidence_scoring error: #{e.message}"
|
|
28
|
+
@confidence_score = nil
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
data/lib/legion/llm/settings.rb
CHANGED
|
@@ -13,6 +13,7 @@ module Legion
|
|
|
13
13
|
default_provider: nil,
|
|
14
14
|
providers: providers,
|
|
15
15
|
routing: routing_defaults,
|
|
16
|
+
confidence: confidence_defaults,
|
|
16
17
|
discovery: discovery_defaults,
|
|
17
18
|
gateway: gateway_defaults,
|
|
18
19
|
daemon: daemon_defaults,
|
|
@@ -25,6 +26,17 @@ module Legion
|
|
|
25
26
|
}
|
|
26
27
|
end
|
|
27
28
|
|
|
29
|
+
def self.confidence_defaults
|
|
30
|
+
{
|
|
31
|
+
bands: {
|
|
32
|
+
low: 0.3,
|
|
33
|
+
medium: 0.5,
|
|
34
|
+
high: 0.7,
|
|
35
|
+
very_high: 0.9
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
|
|
28
40
|
def self.daemon_defaults
|
|
29
41
|
{
|
|
30
42
|
url: nil,
|
data/lib/legion/llm/version.rb
CHANGED
data/lib/legion/llm.rb
CHANGED
|
@@ -9,6 +9,8 @@ require 'legion/llm/providers'
|
|
|
9
9
|
require 'legion/llm/router'
|
|
10
10
|
require 'legion/llm/compressor'
|
|
11
11
|
require 'legion/llm/quality_checker'
|
|
12
|
+
require 'legion/llm/confidence_score'
|
|
13
|
+
require 'legion/llm/confidence_scorer'
|
|
12
14
|
require 'legion/llm/escalation_history'
|
|
13
15
|
require 'legion/llm/hooks'
|
|
14
16
|
require 'legion/llm/cache'
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: legion-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.17
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -214,6 +214,8 @@ files:
|
|
|
214
214
|
- lib/legion/llm/cache.rb
|
|
215
215
|
- lib/legion/llm/claude_config_loader.rb
|
|
216
216
|
- lib/legion/llm/compressor.rb
|
|
217
|
+
- lib/legion/llm/confidence_score.rb
|
|
218
|
+
- lib/legion/llm/confidence_scorer.rb
|
|
217
219
|
- lib/legion/llm/conversation_store.rb
|
|
218
220
|
- lib/legion/llm/cost_estimator.rb
|
|
219
221
|
- lib/legion/llm/cost_tracker.rb
|
|
@@ -249,6 +251,7 @@ files:
|
|
|
249
251
|
- lib/legion/llm/pipeline/steps.rb
|
|
250
252
|
- lib/legion/llm/pipeline/steps/billing.rb
|
|
251
253
|
- lib/legion/llm/pipeline/steps/classification.rb
|
|
254
|
+
- lib/legion/llm/pipeline/steps/confidence_scoring.rb
|
|
252
255
|
- lib/legion/llm/pipeline/steps/gaia_advisory.rb
|
|
253
256
|
- lib/legion/llm/pipeline/steps/knowledge_capture.rb
|
|
254
257
|
- lib/legion/llm/pipeline/steps/mcp_discovery.rb
|