llm.rb 6.0.0 → 6.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +51 -0
- data/README.md +10 -4
- data/lib/llm/agent.rb +2 -0
- data/lib/llm/buffer.rb +8 -0
- data/lib/llm/compactor.rb +26 -7
- data/lib/llm/context/deserializer.rb +1 -0
- data/lib/llm/context.rb +48 -21
- data/lib/llm/provider/transport/http/execution.rb +1 -1
- data/lib/llm/provider/transport/http/interruptible.rb +99 -94
- data/lib/llm/provider/transport/http.rb +3 -2
- data/lib/llm/provider.rb +8 -0
- data/lib/llm/skill.rb +2 -0
- data/lib/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 57b39b3b4b79d1d9f8cfd10426ad233d698dd6e3ed84bfef887c8c63f543f40f
|
|
4
|
+
data.tar.gz: 443ed7e2a04259c69d41b1da7a42e7637efaa4ab1075548706ce349bced7ed51
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f8e53dc41eacf16cea35f64a6048aa77852fcf7a135676b2b9c02e37beff174b5a500948477c4f931ff0a71d20c4503ba3e9eef19358d3aaa204040e77fe14c5
|
|
7
|
+
data.tar.gz: 358ce7f33d2dca51365f6581867006970fd66079dcaa189268e2deff2f297c89b8332fd11b714bedfd89124413b7a9e12fc09d928c2c28f2e9cb2368f2bc3e24
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,56 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## v6.1.0
|
|
4
|
+
|
|
5
|
+
Changes since `v6.0.0`.
|
|
6
|
+
|
|
7
|
+
This release tightens interrupt and compaction behavior for long-running
|
|
8
|
+
contexts. It adds `LLM::Buffer#rindex`, supports percentage-based token
|
|
9
|
+
thresholds in `LLM::Compactor`, tracks persisted compaction state through
|
|
10
|
+
context serialization, reliably interrupts Async-backed requests, preserves
|
|
11
|
+
valid tool-call history on cancellation, keeps concurrent skill tool loops
|
|
12
|
+
running on streamed agents, and returns zero-valued usage objects when no
|
|
13
|
+
provider usage has been recorded yet.
|
|
14
|
+
|
|
15
|
+
### Change
|
|
16
|
+
|
|
17
|
+
* **Add `LLM::Buffer#rindex`** <br>
|
|
18
|
+
Add `LLM::Buffer#rindex` as a direct forward to the underlying message
|
|
19
|
+
array so callers can find the last matching message index through the
|
|
20
|
+
buffer API.
|
|
21
|
+
|
|
22
|
+
* **Support percentage compaction token thresholds** <br>
|
|
23
|
+
Let `LLM::Compactor` accept `token_threshold:` values like `"90%"` so
|
|
24
|
+
compaction can trigger at a percentage of the active model context
|
|
25
|
+
window.
|
|
26
|
+
|
|
27
|
+
### Fix
|
|
28
|
+
|
|
29
|
+
* **Interrupt Async-backed requests reliably** <br>
|
|
30
|
+
Track request ownership through the provider transport so contexts use
|
|
31
|
+
the active Async task when available, letting `ctx.interrupt!`
|
|
32
|
+
reliably cancel streamed requests under Async runtimes and surface
|
|
33
|
+
them as `LLM::Interrupt`.
|
|
34
|
+
|
|
35
|
+
* **Preserve valid tool-call history on cancellation** <br>
|
|
36
|
+
Append cancelled tool-return messages for unresolved tool calls during
|
|
37
|
+
`ctx.interrupt!` so follow-up provider requests do not fail with
|
|
38
|
+
invalid tool-call history after pending tool work is cancelled.
|
|
39
|
+
|
|
40
|
+
* **Preserve concurrent skill tool loops on streamed agents** <br>
|
|
41
|
+
Propagate the active agent concurrency through the effective request
|
|
42
|
+
stream so nested skill agents keep using queued `wait(...)` tool
|
|
43
|
+
execution instead of falling back to direct `:call` execution.
|
|
44
|
+
|
|
45
|
+
* **Track persisted compaction state on contexts** <br>
|
|
46
|
+
Mark contexts as compacted after `LLM::Compactor#compact!`, persist and
|
|
47
|
+
restore that state through context serialization, and clear it after the
|
|
48
|
+
next successful model response.
|
|
49
|
+
|
|
50
|
+
* **Return zero-valued usage objects from contexts** <br>
|
|
51
|
+
Make `LLM::Context#usage` consistently return an `LLM::Object`, using a
|
|
52
|
+
zero-valued usage object when no provider usage has been recorded yet.
|
|
53
|
+
|
|
3
54
|
## v6.0.0
|
|
4
55
|
|
|
5
56
|
Changes since `v5.4.0`.
|
data/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-6.
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-6.1.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
@@ -163,12 +163,15 @@ and when a stream is present it emits `on_compaction` and
|
|
|
163
163
|
`on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
164
164
|
The compactor can also use a different model from the main context, which is
|
|
165
165
|
useful when you want summarization to run on a cheaper or faster model.
|
|
166
|
+
`token_threshold:` accepts either a fixed token count or a percentage string
|
|
167
|
+
like `"90%"`, which resolves against the active model context window and
|
|
168
|
+
triggers compaction once total token usage goes over that percentage.
|
|
166
169
|
|
|
167
170
|
```ruby
|
|
168
171
|
ctx = LLM::Context.new(
|
|
169
172
|
llm,
|
|
170
173
|
compactor: {
|
|
171
|
-
|
|
174
|
+
token_threshold: "90%",
|
|
172
175
|
retention_window: 8,
|
|
173
176
|
model: "gpt-5.4-mini"
|
|
174
177
|
}
|
|
@@ -624,7 +627,10 @@ long-lived contexts can summarize older history and expose the lifecycle
|
|
|
624
627
|
through stream hooks. This approach is inspired by General Intelligence
|
|
625
628
|
Systems' [Brute](https://github.com/general-intelligence-systems/brute). The
|
|
626
629
|
compactor can also use its own `model:` if you want summarization to run on a
|
|
627
|
-
different model from the main context.
|
|
630
|
+
different model from the main context. `token_threshold:` accepts either a
|
|
631
|
+
fixed token count or a percentage string like `"90%"`, which resolves
|
|
632
|
+
against the active model context window and triggers compaction once total
|
|
633
|
+
token usage goes over that percentage. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
628
634
|
|
|
629
635
|
```ruby
|
|
630
636
|
require "llm"
|
|
@@ -644,7 +650,7 @@ ctx = LLM::Context.new(
|
|
|
644
650
|
llm,
|
|
645
651
|
stream: Stream.new,
|
|
646
652
|
compactor: {
|
|
647
|
-
|
|
653
|
+
token_threshold: "90%",
|
|
648
654
|
retention_window: 8,
|
|
649
655
|
model: "gpt-5.4-mini"
|
|
650
656
|
}
|
data/lib/llm/agent.rb
CHANGED
|
@@ -394,6 +394,8 @@ module LLM
|
|
|
394
394
|
def run_loop(method, prompt, params)
|
|
395
395
|
loop = proc do
|
|
396
396
|
max = Integer(params.delete(:tool_attempts) || 25)
|
|
397
|
+
stream = params[:stream] || @ctx.params[:stream]
|
|
398
|
+
stream.extra[:concurrency] = concurrency if LLM::Stream === stream
|
|
397
399
|
res = @ctx.public_send(method, apply_instructions(prompt), params)
|
|
398
400
|
max.times do
|
|
399
401
|
break if @ctx.functions.empty?
|
data/lib/llm/buffer.rb
CHANGED
|
@@ -52,6 +52,14 @@ module LLM
|
|
|
52
52
|
reverse_each.find(...)
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
+
##
|
|
56
|
+
# Returns the index of the last message matching the given block.
|
|
57
|
+
# @yield [LLM::Message]
|
|
58
|
+
# @return [Integer, nil]
|
|
59
|
+
def rindex(...)
|
|
60
|
+
@messages.rindex(...)
|
|
61
|
+
end
|
|
62
|
+
|
|
55
63
|
##
|
|
56
64
|
# Returns the last message(s) in the buffer
|
|
57
65
|
# @param [Integer, nil] n
|
data/lib/llm/compactor.rb
CHANGED
|
@@ -11,7 +11,9 @@
|
|
|
11
11
|
# The compactor can also use a different model from the main context by
|
|
12
12
|
# setting `model:` in the compactor config. Compaction thresholds are opt-in:
|
|
13
13
|
# provide `message_threshold:` and/or `token_threshold:` to enable policy-
|
|
14
|
-
# driven compaction.
|
|
14
|
+
# driven compaction. `token_threshold:` accepts either an integer token count
|
|
15
|
+
# or a percentage string like `"90%"`, which resolves against the current
|
|
16
|
+
# model context window.
|
|
15
17
|
class LLM::Compactor
|
|
16
18
|
DEFAULTS = {
|
|
17
19
|
retention_window: 8,
|
|
@@ -25,8 +27,11 @@ class LLM::Compactor
|
|
|
25
27
|
##
|
|
26
28
|
# @param [LLM::Context] ctx
|
|
27
29
|
# @param [Hash] config
|
|
28
|
-
# @option config [Integer, nil] :token_threshold
|
|
29
|
-
# Enables token-based compaction.
|
|
30
|
+
# @option config [Integer, String, nil] :token_threshold
|
|
31
|
+
# Enables token-based compaction. Integer values are treated as a fixed
|
|
32
|
+
# token count. Percentage strings like `"90%"` are resolved against
|
|
33
|
+
# {LLM::Context#context_window}; if the context window is unknown, the
|
|
34
|
+
# percentage threshold is treated as disabled.
|
|
30
35
|
# @option config [Integer, nil] :message_threshold
|
|
31
36
|
# Enables message-count-based compaction.
|
|
32
37
|
# @option config [Integer] :retention_window
|
|
@@ -39,18 +44,22 @@ class LLM::Compactor
|
|
|
39
44
|
end
|
|
40
45
|
|
|
41
46
|
##
|
|
42
|
-
# Returns true when the context should be compacted
|
|
47
|
+
# Returns true when the context should be compacted.
|
|
48
|
+
#
|
|
49
|
+
# When `token_threshold:` is a percentage string such as `"90%"`, the
|
|
50
|
+
# threshold is resolved against the current context window and compared to
|
|
51
|
+
# the current total token usage.
|
|
43
52
|
# @param [Object] prompt
|
|
44
53
|
# The next prompt or turn input
|
|
45
54
|
# @return [Boolean]
|
|
46
|
-
def
|
|
55
|
+
def compactable?(prompt = nil)
|
|
47
56
|
return false if ctx.functions.any? || [*prompt].grep(LLM::Function::Return).any?
|
|
48
57
|
messages = ctx.messages.reject(&:system?)
|
|
49
58
|
return true if config[:message_threshold] && messages.size > config[:message_threshold]
|
|
50
|
-
|
|
51
|
-
return true if config[:token_threshold] && usage && usage.total_tokens > config[:token_threshold]
|
|
59
|
+
return true if token_threshold and ctx.usage.total_tokens > token_threshold
|
|
52
60
|
false
|
|
53
61
|
end
|
|
62
|
+
alias_method :compact?, :compactable?
|
|
54
63
|
|
|
55
64
|
##
|
|
56
65
|
# Summarize older messages and replace them with a compact summary.
|
|
@@ -68,6 +77,7 @@ class LLM::Compactor
|
|
|
68
77
|
older = messages[0...(messages.size - recent.size)]
|
|
69
78
|
summary = LLM::Message.new(ctx.llm.user_role, "[Previous conversation summary]\n\n#{summarize(older)}", {compaction: true})
|
|
70
79
|
ctx.messages.replace([*ctx.messages.take_while(&:system?), summary, *recent])
|
|
80
|
+
ctx.compacted = true
|
|
71
81
|
stream.on_compaction_finish(ctx, self) if LLM::Stream === stream
|
|
72
82
|
summary
|
|
73
83
|
end
|
|
@@ -84,6 +94,15 @@ class LLM::Compactor
|
|
|
84
94
|
messages[start..] || []
|
|
85
95
|
end
|
|
86
96
|
|
|
97
|
+
def token_threshold
|
|
98
|
+
@token_threshold ||= begin
|
|
99
|
+
threshold = config[:token_threshold]
|
|
100
|
+
return threshold unless threshold.to_s.end_with?("%")
|
|
101
|
+
return if ctx.context_window <= 0
|
|
102
|
+
(ctx.context_window * threshold.delete_suffix("%").to_f / 100).floor
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
87
106
|
def summarize(messages)
|
|
88
107
|
model = config[:model] || ctx.params[:model] || ctx.llm.default_model
|
|
89
108
|
ctx.llm.complete(summary_prompt(messages), model:).content
|
data/lib/llm/context.rb
CHANGED
|
@@ -40,6 +40,14 @@ module LLM
|
|
|
40
40
|
include Serializer
|
|
41
41
|
include Deserializer
|
|
42
42
|
|
|
43
|
+
ZERO_USAGE = LLM::Object.from(
|
|
44
|
+
input_tokens: 0,
|
|
45
|
+
output_tokens: 0,
|
|
46
|
+
reasoning_tokens: 0,
|
|
47
|
+
total_tokens: 0
|
|
48
|
+
)
|
|
49
|
+
private_constant :ZERO_USAGE
|
|
50
|
+
|
|
43
51
|
##
|
|
44
52
|
# Returns the accumulated message history for this context
|
|
45
53
|
# @return [LLM::Buffer<LLM::Message>]
|
|
@@ -104,6 +112,14 @@ module LLM
|
|
|
104
112
|
@compactor = compactor
|
|
105
113
|
end
|
|
106
114
|
|
|
115
|
+
##
|
|
116
|
+
# Returns whether the context has been compacted and no later model
|
|
117
|
+
# response has cleared that state.
|
|
118
|
+
# @return [Boolean]
|
|
119
|
+
# @api private
|
|
120
|
+
attr_accessor :compacted
|
|
121
|
+
alias_method :compacted?, :compacted
|
|
122
|
+
|
|
107
123
|
##
|
|
108
124
|
# Returns a guard, if configured.
|
|
109
125
|
#
|
|
@@ -172,13 +188,14 @@ module LLM
|
|
|
172
188
|
# puts res.messages[0].content
|
|
173
189
|
def talk(prompt, params = {})
|
|
174
190
|
return respond(prompt, params) if mode == :responses
|
|
175
|
-
@owner =
|
|
191
|
+
@owner = @llm.request_owner
|
|
176
192
|
compactor.compact!(prompt) if compactor.compact?(prompt)
|
|
177
193
|
params = params.merge(messages: @messages.to_a)
|
|
178
194
|
params = @params.merge(params)
|
|
179
195
|
prompt, params = transform(prompt, params)
|
|
180
196
|
bind!(params[:stream], params[:model], params[:tools])
|
|
181
197
|
res = @llm.complete(prompt, params)
|
|
198
|
+
self.compacted = false
|
|
182
199
|
role = params[:role] || @llm.user_role
|
|
183
200
|
role = @llm.tool_role if params[:role].nil? && [*prompt].grep(LLM::Function::Return).any?
|
|
184
201
|
@messages.concat LLM::Prompt === prompt ? prompt.to_a : [LLM::Message.new(role, prompt)]
|
|
@@ -201,7 +218,7 @@ module LLM
|
|
|
201
218
|
# res = ctx.respond("What is the capital of France?")
|
|
202
219
|
# puts res.output_text
|
|
203
220
|
def respond(prompt, params = {})
|
|
204
|
-
@owner =
|
|
221
|
+
@owner = @llm.request_owner
|
|
205
222
|
compactor.compact!(prompt) if compactor.compact?(prompt)
|
|
206
223
|
params = @params.merge(params)
|
|
207
224
|
prompt, params = transform(prompt, params)
|
|
@@ -209,6 +226,7 @@ module LLM
|
|
|
209
226
|
res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
|
|
210
227
|
params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
|
|
211
228
|
res = @llm.responses.create(prompt, params)
|
|
229
|
+
self.compacted = false
|
|
212
230
|
role = params[:role] || @llm.user_role
|
|
213
231
|
@messages.concat LLM::Prompt === prompt ? prompt.to_a : [LLM::Message.new(role, prompt)]
|
|
214
232
|
@messages.concat [res.choices[-1]]
|
|
@@ -313,27 +331,31 @@ module LLM
|
|
|
313
331
|
# This is inspired by Go's context cancellation model.
|
|
314
332
|
# @return [nil]
|
|
315
333
|
def interrupt!
|
|
334
|
+
pending = functions.to_a
|
|
316
335
|
llm.interrupt!(@owner)
|
|
317
336
|
queue&.interrupt!
|
|
337
|
+
return if pending.empty?
|
|
338
|
+
pending.each(&:interrupt!)
|
|
339
|
+
returns = pending.map { _1.cancel(reason: "function call cancelled") }
|
|
340
|
+
@messages << LLM::Message.new(@llm.tool_role, returns)
|
|
341
|
+
nil
|
|
318
342
|
end
|
|
319
343
|
alias_method :cancel!, :interrupt!
|
|
320
344
|
|
|
321
345
|
##
|
|
322
346
|
# Returns token usage accumulated in this context
|
|
323
|
-
# @
|
|
324
|
-
# This method returns token usage for the latest
|
|
325
|
-
# assistant message, and it returns nil for non-assistant
|
|
326
|
-
# messages.
|
|
327
|
-
# @return [LLM::Object, nil]
|
|
347
|
+
# @return [LLM::Object]
|
|
328
348
|
def usage
|
|
329
|
-
usage = @messages.find(&:assistant?)&.usage
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
349
|
+
if usage = @messages.find(&:assistant?)&.usage
|
|
350
|
+
LLM::Object.from(
|
|
351
|
+
input_tokens: usage.input_tokens || 0,
|
|
352
|
+
output_tokens: usage.output_tokens || 0,
|
|
353
|
+
reasoning_tokens: usage.reasoning_tokens || 0,
|
|
354
|
+
total_tokens: usage.total_tokens || 0
|
|
355
|
+
)
|
|
356
|
+
else
|
|
357
|
+
ZERO_USAGE
|
|
358
|
+
end
|
|
337
359
|
end
|
|
338
360
|
|
|
339
361
|
##
|
|
@@ -403,7 +425,12 @@ module LLM
|
|
|
403
425
|
##
|
|
404
426
|
# @return [Hash]
|
|
405
427
|
def to_h
|
|
406
|
-
{
|
|
428
|
+
{
|
|
429
|
+
schema_version: 1,
|
|
430
|
+
model:,
|
|
431
|
+
compacted:,
|
|
432
|
+
messages: @messages.map { serialize_message(_1) }
|
|
433
|
+
}
|
|
407
434
|
end
|
|
408
435
|
|
|
409
436
|
##
|
|
@@ -432,12 +459,12 @@ module LLM
|
|
|
432
459
|
# Returns an _approximate_ cost for a given context
|
|
433
460
|
# based on both the provider, and model
|
|
434
461
|
def cost
|
|
435
|
-
return LLM::Cost.new(0, 0) unless usage
|
|
436
462
|
cost = LLM.registry_for(llm).cost(model:)
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
463
|
+
input_cost = (cost.input.to_f / 1_000_000.0) * usage.input_tokens
|
|
464
|
+
output_cost = (cost.output.to_f / 1_000_000.0) * usage.output_tokens
|
|
465
|
+
LLM::Cost.new(input_cost, output_cost)
|
|
466
|
+
rescue LLM::NoSuchModelError, LLM::NoSuchRegistryError
|
|
467
|
+
LLM::Cost.new(0, 0)
|
|
441
468
|
end
|
|
442
469
|
|
|
443
470
|
##
|
|
@@ -38,7 +38,7 @@ module LLM::Provider::Transport
|
|
|
38
38
|
perform_request(http, request, stream, stream_parser, &b)
|
|
39
39
|
end
|
|
40
40
|
[handle_response(res, tracer, span), span, tracer]
|
|
41
|
-
rescue *
|
|
41
|
+
rescue *transport.interrupt_errors
|
|
42
42
|
raise LLM::Interrupt, "request interrupted" if transport.interrupted?(owner)
|
|
43
43
|
raise
|
|
44
44
|
end
|
|
@@ -1,109 +1,114 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
class LLM::Provider
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
INTERRUPT_ERRORS = [::IOError, ::EOFError, Errno::EBADF].freeze
|
|
17
|
-
Request = Struct.new(:http, :connection, keyword_init: true)
|
|
4
|
+
##
|
|
5
|
+
# Internal request interruption methods for
|
|
6
|
+
# {LLM::Provider::Transport::HTTP}.
|
|
7
|
+
#
|
|
8
|
+
# This module tracks active requests by execution owner and provides
|
|
9
|
+
# the logic used to interrupt an in-flight request by closing the
|
|
10
|
+
# active HTTP connection.
|
|
11
|
+
#
|
|
12
|
+
# @api private
|
|
13
|
+
module Transport::HTTP::Interruptible
|
|
14
|
+
INTERRUPT_ERRORS = [::IOError, ::EOFError, Errno::EBADF].freeze
|
|
15
|
+
Request = Struct.new(:http, :connection, keyword_init: true)
|
|
18
16
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
# The execution owner whose request should be interrupted
|
|
23
|
-
# @return [nil]
|
|
24
|
-
def interrupt!(owner)
|
|
25
|
-
req = request_for(owner) or return
|
|
26
|
-
lock { (@interrupts ||= {})[owner] = true }
|
|
27
|
-
if persistent_http?(req.http)
|
|
28
|
-
close_socket(req.connection&.http)
|
|
29
|
-
req.http.finish(req.connection)
|
|
30
|
-
elsif transient_http?(req.http)
|
|
31
|
-
close_socket(req.http)
|
|
32
|
-
req.http.finish if req.http.active?
|
|
33
|
-
end
|
|
34
|
-
rescue *INTERRUPT_ERRORS
|
|
35
|
-
nil
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
private
|
|
17
|
+
def interrupt_errors
|
|
18
|
+
[*INTERRUPT_ERRORS, *optional_interrupt_errors]
|
|
19
|
+
end
|
|
39
20
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
21
|
+
##
|
|
22
|
+
# Interrupt an active request, if any.
|
|
23
|
+
# @param [Fiber] owner
|
|
24
|
+
# The execution owner whose request should be interrupted
|
|
25
|
+
# @return [nil]
|
|
26
|
+
def interrupt!(owner)
|
|
27
|
+
req = request_for(owner) or return
|
|
28
|
+
lock { (@interrupts ||= {})[owner] = true }
|
|
29
|
+
if persistent_http?(req.http)
|
|
30
|
+
close_socket(req.connection&.http)
|
|
31
|
+
req.http.finish(req.connection)
|
|
32
|
+
elsif transient_http?(req.http)
|
|
33
|
+
close_socket(req.http)
|
|
34
|
+
req.http.finish if req.http.active?
|
|
35
|
+
end
|
|
36
|
+
owner.stop if owner.respond_to?(:stop)
|
|
37
|
+
rescue *interrupt_errors
|
|
38
|
+
nil
|
|
39
|
+
end
|
|
51
40
|
|
|
52
|
-
|
|
53
|
-
# Returns whether the active request is using a transient HTTP client.
|
|
54
|
-
# @param [Object, nil] http
|
|
55
|
-
# @return [Boolean]
|
|
56
|
-
def transient_http?(http)
|
|
57
|
-
Net::HTTP === http
|
|
58
|
-
end
|
|
41
|
+
private
|
|
59
42
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
43
|
+
##
|
|
44
|
+
# Closes the active socket for a request, if present.
|
|
45
|
+
# @param [Net::HTTP, nil] http
|
|
46
|
+
# @return [nil]
|
|
47
|
+
def close_socket(http)
|
|
48
|
+
socket = http&.instance_variable_get(:@socket) or return
|
|
49
|
+
socket = socket.io if socket.respond_to?(:io)
|
|
50
|
+
socket.close
|
|
51
|
+
rescue *interrupt_errors
|
|
52
|
+
nil
|
|
53
|
+
end
|
|
67
54
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
@requests[owner]
|
|
76
|
-
end
|
|
77
|
-
end
|
|
55
|
+
##
|
|
56
|
+
# Returns whether the active request is using a transient HTTP client.
|
|
57
|
+
# @param [Object, nil] http
|
|
58
|
+
# @return [Boolean]
|
|
59
|
+
def transient_http?(http)
|
|
60
|
+
Net::HTTP === http
|
|
61
|
+
end
|
|
78
62
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
@requests ||= {}
|
|
87
|
-
@requests[owner] = req
|
|
88
|
-
end
|
|
89
|
-
end
|
|
63
|
+
##
|
|
64
|
+
# Returns whether the active request is using a persistent HTTP client.
|
|
65
|
+
# @param [Object, nil] http
|
|
66
|
+
# @return [Boolean]
|
|
67
|
+
def persistent_http?(http)
|
|
68
|
+
defined?(Net::HTTP::Persistent) && Net::HTTP::Persistent === http
|
|
69
|
+
end
|
|
90
70
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
71
|
+
##
|
|
72
|
+
# Returns the active request for an execution owner.
|
|
73
|
+
# @param [Fiber] owner
|
|
74
|
+
# @return [Request, nil]
|
|
75
|
+
def request_for(owner)
|
|
76
|
+
lock do
|
|
77
|
+
@requests ||= {}
|
|
78
|
+
@requests[owner]
|
|
79
|
+
end
|
|
80
|
+
end
|
|
98
81
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
82
|
+
##
|
|
83
|
+
# Records an active request for an execution owner.
|
|
84
|
+
# @param [Request] req
|
|
85
|
+
# @param [Fiber] owner
|
|
86
|
+
# @return [Request]
|
|
87
|
+
def set_request(req, owner)
|
|
88
|
+
lock do
|
|
89
|
+
@requests ||= {}
|
|
90
|
+
@requests[owner] = req
|
|
106
91
|
end
|
|
107
92
|
end
|
|
93
|
+
|
|
94
|
+
##
|
|
95
|
+
# Clears the active request for an execution owner.
|
|
96
|
+
# @param [Fiber] owner
|
|
97
|
+
# @return [Request, nil]
|
|
98
|
+
def clear_request(owner)
|
|
99
|
+
lock { @requests&.delete(owner) }
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
##
|
|
103
|
+
# Returns whether an execution owner was interrupted.
|
|
104
|
+
# @param [Fiber] owner
|
|
105
|
+
# @return [Boolean, nil]
|
|
106
|
+
def interrupted?(owner)
|
|
107
|
+
lock { @interrupts&.delete(owner) }
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def optional_interrupt_errors
|
|
111
|
+
defined?(::Async::Stop) ? [Async::Stop] : []
|
|
112
|
+
end
|
|
108
113
|
end
|
|
109
114
|
end
|
|
@@ -50,9 +50,10 @@ class LLM::Provider
|
|
|
50
50
|
|
|
51
51
|
##
|
|
52
52
|
# Returns the current request owner.
|
|
53
|
-
# @return [
|
|
53
|
+
# @return [Object]
|
|
54
54
|
def request_owner
|
|
55
|
-
Fiber.current
|
|
55
|
+
return Fiber.current unless defined?(::Async)
|
|
56
|
+
Async::Task.current || Fiber.current
|
|
56
57
|
end
|
|
57
58
|
|
|
58
59
|
##
|
data/lib/llm/provider.rb
CHANGED
|
@@ -338,6 +338,14 @@ class LLM::Provider
|
|
|
338
338
|
end
|
|
339
339
|
alias_method :cancel!, :interrupt!
|
|
340
340
|
|
|
341
|
+
##
|
|
342
|
+
# Returns the current request owner used by the transport.
|
|
343
|
+
# @return [Object]
|
|
344
|
+
# @api private
|
|
345
|
+
def request_owner
|
|
346
|
+
transport.request_owner
|
|
347
|
+
end
|
|
348
|
+
|
|
341
349
|
##
|
|
342
350
|
# @param [Object] stream
|
|
343
351
|
# @return [Boolean]
|
data/lib/llm/skill.rb
CHANGED
|
@@ -76,6 +76,8 @@ module LLM
|
|
|
76
76
|
def call(ctx)
|
|
77
77
|
instructions, tools, tracer = self.instructions, self.tools, ctx.llm.tracer
|
|
78
78
|
params = ctx.params.merge(mode: ctx.mode).reject { [:tools, :schema].include?(_1) }
|
|
79
|
+
concurrency = params[:stream].extra[:concurrency] if LLM::Stream === params[:stream]
|
|
80
|
+
params[:concurrency] = concurrency if concurrency
|
|
79
81
|
agent = Class.new(LLM::Agent) do
|
|
80
82
|
instructions(instructions)
|
|
81
83
|
tools(*tools)
|
data/lib/llm/version.rb
CHANGED