llm.rb 6.0.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8cc16b548be77e0a2bb78c0e130b14f0ac8fdabb2c2f082dd2824282e5a5733b
4
- data.tar.gz: 668655ba6a7d65d44b53cc1b8b33afddf3563dc216df83181fb2b7394a2847ff
3
+ metadata.gz: 6c923952039095a2234eb1bd5c058a951b0d797d27577cdf7f679df59b49060b
4
+ data.tar.gz: 3667e0d79e44634f769dfced198dd07c1039f173cb43b72aab7d3204aa3638f8
5
5
  SHA512:
6
- metadata.gz: 2257aeec49a43c56bfc974e3fc190a850ea27c98c437c7c242efe6c645c000eebdbe2e731fcc0b287303690c1055768f2de32be6ac900391c5156260da9c2ce5
7
- data.tar.gz: 8f4f8f3475ac1bd2d9ffbd8c0b413b46224936459eb0ed6bb395876b994c1ab67ec02cf670176f90b8d9b892b59ab4b67271f4b69c80fa7c094aa89310ab5c58
6
+ metadata.gz: 655d450b2ffeb71ed9564b7c5c23a2a86e9e385de9dc1abdac18588e460cffdecd1b2da1d5ef9fc162dc3f3286b7d2c979baec3953cd1ddbdab74d1ef5b87112
7
+ data.tar.gz: a044fedb675c4d92eff55c210d588b68b80c7e3967188674c2de4d8f6bc69d76e8f15c18f49fb54e09a8c93dff89074304d231609337bfa3bc79c96e1f3f576b
data/CHANGELOG.md CHANGED
@@ -1,5 +1,87 @@
1
1
  # Changelog
2
2
 
3
+ ## Unreleased
4
+
5
+ ## v7.0.0
6
+
7
+ Changes since `v6.1.0`.
8
+
9
+ This release turns agent tool-loop limit errors into in-band advisory
10
+ returns so the LLM can react to rate limits and continue the loop. It
11
+ adds `tool_attempts: nil` as a way to opt out of advisory tool-limit
12
+ returns entirely, and fixes the default provider HTTP path to keep
13
+ `net-http-persistent` optional when not explicitly enabled.
14
+
15
+ ### Breaking
16
+
17
+ * **Return in-band tool-loop limit errors from agents** <br>
18
+ Stop raising `LLM::ToolLoopError` when an agent exhausts its tool loop
19
+ attempt budget, and instead send advisory `LLM::Function::Return`
20
+ errors back through the model so the LLM can react to the rate limit
21
+ in-band and continue the loop.
22
+
23
+ * **Allow `tool_attempts: nil` to disable advisory tool-limit returns** <br>
24
+ Keep the default `tool_attempts` budget at `25`, but treat an explicit
25
+ `tool_attempts: nil` as an opt-out that disables advisory tool-limit
26
+ returns entirely.
27
+
28
+ ### Fix
29
+
30
+ * **Keep `net-http-persistent` optional on normal HTTP requests** <br>
31
+ Stop the default provider HTTP path from loading `net/http/persistent`
32
+ unless persistent transport support is explicitly enabled.
33
+
34
+ ## v6.1.0
35
+
36
+ Changes since `v6.0.0`.
37
+
38
+ This release tightens interrupt and compaction behavior for long-running
39
+ contexts. It adds `LLM::Buffer#rindex`, supports percentage-based token
40
+ thresholds in `LLM::Compactor`, tracks persisted compaction state through
41
+ context serialization, reliably interrupts Async-backed requests, preserves
42
+ valid tool-call history on cancellation, keeps concurrent skill tool loops
43
+ running on streamed agents, and returns zero-valued usage objects when no
44
+ provider usage has been recorded yet.
45
+
46
+ ### Change
47
+
48
+ * **Add `LLM::Buffer#rindex`** <br>
49
+ Add `LLM::Buffer#rindex` as a direct forward to the underlying message
50
+ array so callers can find the last matching message index through the
51
+ buffer API.
52
+
53
+ * **Support percentage compaction token thresholds** <br>
54
+ Let `LLM::Compactor` accept `token_threshold:` values like `"90%"` so
55
+ compaction can trigger at a percentage of the active model context
56
+ window.
57
+
58
+ ### Fix
59
+
60
+ * **Interrupt Async-backed requests reliably** <br>
61
+ Track request ownership through the provider transport so contexts use
62
+ the active Async task when available, letting `ctx.interrupt!`
63
+ reliably cancel streamed requests under Async runtimes and surface
64
+ them as `LLM::Interrupt`.
65
+
66
+ * **Preserve valid tool-call history on cancellation** <br>
67
+ Append cancelled tool-return messages for unresolved tool calls during
68
+ `ctx.interrupt!` so follow-up provider requests do not fail with
69
+ invalid tool-call history after pending tool work is cancelled.
70
+
71
+ * **Preserve concurrent skill tool loops on streamed agents** <br>
72
+ Propagate the active agent concurrency through the effective request
73
+ stream so nested skill agents keep using queued `wait(...)` tool
74
+ execution instead of falling back to direct `:call` execution.
75
+
76
+ * **Track persisted compaction state on contexts** <br>
77
+ Mark contexts as compacted after `LLM::Compactor#compact!`, persist and
78
+ restore that state through context serialization, and clear it after the
79
+ next successful model response.
80
+
81
+ * **Return zero-valued usage objects from contexts** <br>
82
+ Make `LLM::Context#usage` consistently return an `LLM::Object`, using a
83
+ zero-valued usage object when no provider usage has been recorded yet.
84
+
3
85
  ## v6.0.0
4
86
 
5
87
  Changes since `v5.4.0`.
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  <p align="center">
5
5
  <a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
6
6
  <a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
7
- <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-6.0.0-green.svg?" alt="Version"></a>
7
+ <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-7.0.0-green.svg?" alt="Version"></a>
8
8
  </p>
9
9
 
10
10
  ## About
@@ -163,12 +163,15 @@ and when a stream is present it emits `on_compaction` and
163
163
  `on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
164
164
  The compactor can also use a different model from the main context, which is
165
165
  useful when you want summarization to run on a cheaper or faster model.
166
+ `token_threshold:` accepts either a fixed token count or a percentage string
167
+ like `"90%"`, which resolves against the active model context window and
168
+ triggers compaction once total token usage goes over that percentage.
166
169
 
167
170
  ```ruby
168
171
  ctx = LLM::Context.new(
169
172
  llm,
170
173
  compactor: {
171
- message_threshold: 200,
174
+ token_threshold: "90%",
172
175
  retention_window: 8,
173
176
  model: "gpt-5.4-mini"
174
177
  }
@@ -367,6 +370,10 @@ worker.join
367
370
  or experimental `:ractor` support for class-based tools. MCP tools are not
368
371
  supported by the current `:ractor` mode, but mixed tool sets can still
369
372
  route MCP tools and local tools through different strategies at runtime.
373
+ By default, the tool attempt budget is `25`. When an agent exhausts that
374
+ budget, it sends advisory tool errors back through the model instead of
375
+ raising out of the runtime. Set `tool_attempts: nil` to disable that
376
+ advisory behavior.
370
377
  - **Tool calls have an explicit lifecycle** <br>
371
378
  A tool call can be executed, cancelled through
372
379
  [`LLM::Function#cancel`](https://0x1eef.github.io/x/llm.rb/LLM/Function.html#cancel-instance_method),
@@ -622,9 +629,12 @@ This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context
622
629
  [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) together so
623
630
  long-lived contexts can summarize older history and expose the lifecycle
624
631
  through stream hooks. This approach is inspired by General Intelligence
625
- Systems' [Brute](https://github.com/general-intelligence-systems/brute). The
632
+ Systems. The
626
633
  compactor can also use its own `model:` if you want summarization to run on a
627
- different model from the main context. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
634
+ different model from the main context. `token_threshold:` accepts either a
635
+ fixed token count or a percentage string like `"90%"`, which resolves
636
+ against the active model context window and triggers compaction once total
637
+ token usage goes over that percentage. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
628
638
 
629
639
  ```ruby
630
640
  require "llm"
@@ -644,7 +654,7 @@ ctx = LLM::Context.new(
644
654
  llm,
645
655
  stream: Stream.new,
646
656
  compactor: {
647
- message_threshold: 200,
657
+ token_threshold: "90%",
648
658
  retention_window: 8,
649
659
  model: "gpt-5.4-mini"
650
660
  }
data/lib/llm/agent.rb CHANGED
@@ -19,6 +19,9 @@ module LLM
19
19
  # * The automatic tool loop enables the wrapped context's `guard` by default.
20
20
  # The built-in {LLM::LoopGuard LLM::LoopGuard} detects repeated tool-call
21
21
  # patterns and blocks stuck execution before more tool work is queued.
22
+ # * The default tool attempt budget is `25`. After that, the agent sends
23
+ # advisory tool errors back through the model and keeps the loop in-band.
24
+ # Set `tool_attempts: nil` to disable that advisory behavior.
22
25
  # * Tool loop execution can be configured with `concurrency :call`,
23
26
  # `:thread`, `:task`, `:fiber`, `:ractor`, or a list of queued task
24
27
  # types such as `[:thread, :ractor]`.
@@ -161,7 +164,10 @@ module LLM
161
164
  #
162
165
  # @param prompt (see LLM::Provider#complete)
163
166
  # @param [Hash] params The params passed to the provider, including optional :stream, :tools, :schema etc.
164
- # @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default 25)
167
+ # @option params [Integer] :tool_attempts
168
+ # The maxinum number of tool call iterations before the agent sends
169
+ # in-band advisory tool errors back through the model (default 25).
170
+ # Set to `nil` to disable advisory tool-limit returns.
165
171
  # @return [LLM::Response] Returns the LLM's response for this turn.
166
172
  # @example
167
173
  # llm = LLM.openai(key: ENV["KEY"])
@@ -180,7 +186,10 @@ module LLM
180
186
  # @note Not all LLM providers support this API
181
187
  # @param prompt (see LLM::Provider#complete)
182
188
  # @param [Hash] params The params passed to the provider, including optional :stream, :tools, :schema etc.
183
- # @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default 25)
189
+ # @option params [Integer] :tool_attempts
190
+ # The maxinum number of tool call iterations before the agent sends
191
+ # in-band advisory tool errors back through the model (default 25).
192
+ # Set to `nil` to disable advisory tool-limit returns.
184
193
  # @return [LLM::Response] Returns the LLM's response for this turn.
185
194
  # @example
186
195
  # llm = LLM.openai(key: ENV["KEY"])
@@ -393,18 +402,37 @@ module LLM
393
402
 
394
403
  def run_loop(method, prompt, params)
395
404
  loop = proc do
396
- max = Integer(params.delete(:tool_attempts) || 25)
405
+ max = params.key?(:tool_attempts) ? params.delete(:tool_attempts) : 25
406
+ max = Integer(max) if max
407
+ stream = params[:stream] || @ctx.params[:stream]
408
+ stream.extra[:concurrency] = concurrency if LLM::Stream === stream
397
409
  res = @ctx.public_send(method, apply_instructions(prompt), params)
398
- max.times do
410
+ loop do
399
411
  break if @ctx.functions.empty?
400
- res = @ctx.public_send(method, call_functions, params)
412
+ if max
413
+ max.times do
414
+ break if @ctx.functions.empty?
415
+ res = @ctx.public_send(method, call_functions, params)
416
+ end
417
+ break if @ctx.functions.empty?
418
+ res = @ctx.public_send(method, @ctx.functions.map { rate_limit(_1) }, params)
419
+ else
420
+ res = @ctx.public_send(method, call_functions, params)
421
+ end
401
422
  end
402
- raise LLM::ToolLoopError, "pending tool calls remain" unless @ctx.functions.empty?
403
423
  res
404
424
  end
405
425
  @tracer ? @llm.with_tracer(@tracer, &loop) : loop.call
406
426
  end
407
427
 
428
+ def rate_limit(function)
429
+ LLM::Function::Return.new(function.id, function.name, {
430
+ error: true,
431
+ type: LLM::ToolLoopError.name,
432
+ message: "tool loop rate limit reached"
433
+ })
434
+ end
435
+
408
436
  def resolve_option(option)
409
437
  Proc === option ? instance_exec(&option) : option
410
438
  end
data/lib/llm/buffer.rb CHANGED
@@ -52,6 +52,14 @@ module LLM
52
52
  reverse_each.find(...)
53
53
  end
54
54
 
55
+ ##
56
+ # Returns the index of the last message matching the given block.
57
+ # @yield [LLM::Message]
58
+ # @return [Integer, nil]
59
+ def rindex(...)
60
+ @messages.rindex(...)
61
+ end
62
+
55
63
  ##
56
64
  # Returns the last message(s) in the buffer
57
65
  # @param [Integer, nil] n
data/lib/llm/compactor.rb CHANGED
@@ -5,13 +5,14 @@
5
5
  # smaller replacement message when a context grows too large.
6
6
  #
7
7
  # This work is directly inspired by the compaction approach developed by
8
- # General Intelligence Systems in
9
- # [Brute](https://github.com/general-intelligence-systems/brute).
8
+ # General Intelligence Systems.
10
9
  #
11
10
  # The compactor can also use a different model from the main context by
12
11
  # setting `model:` in the compactor config. Compaction thresholds are opt-in:
13
12
  # provide `message_threshold:` and/or `token_threshold:` to enable policy-
14
- # driven compaction.
13
+ # driven compaction. `token_threshold:` accepts either an integer token count
14
+ # or a percentage string like `"90%"`, which resolves against the current
15
+ # model context window.
15
16
  class LLM::Compactor
16
17
  DEFAULTS = {
17
18
  retention_window: 8,
@@ -25,8 +26,11 @@ class LLM::Compactor
25
26
  ##
26
27
  # @param [LLM::Context] ctx
27
28
  # @param [Hash] config
28
- # @option config [Integer, nil] :token_threshold
29
- # Enables token-based compaction.
29
+ # @option config [Integer, String, nil] :token_threshold
30
+ # Enables token-based compaction. Integer values are treated as a fixed
31
+ # token count. Percentage strings like `"90%"` are resolved against
32
+ # {LLM::Context#context_window}; if the context window is unknown, the
33
+ # percentage threshold is treated as disabled.
30
34
  # @option config [Integer, nil] :message_threshold
31
35
  # Enables message-count-based compaction.
32
36
  # @option config [Integer] :retention_window
@@ -39,18 +43,22 @@ class LLM::Compactor
39
43
  end
40
44
 
41
45
  ##
42
- # Returns true when the context should be compacted
46
+ # Returns true when the context should be compacted.
47
+ #
48
+ # When `token_threshold:` is a percentage string such as `"90%"`, the
49
+ # threshold is resolved against the current context window and compared to
50
+ # the current total token usage.
43
51
  # @param [Object] prompt
44
52
  # The next prompt or turn input
45
53
  # @return [Boolean]
46
- def compact?(prompt = nil)
54
+ def compactable?(prompt = nil)
47
55
  return false if ctx.functions.any? || [*prompt].grep(LLM::Function::Return).any?
48
56
  messages = ctx.messages.reject(&:system?)
49
57
  return true if config[:message_threshold] && messages.size > config[:message_threshold]
50
- usage = ctx.usage
51
- return true if config[:token_threshold] && usage && usage.total_tokens > config[:token_threshold]
58
+ return true if token_threshold and ctx.usage.total_tokens > token_threshold
52
59
  false
53
60
  end
61
+ alias_method :compact?, :compactable?
54
62
 
55
63
  ##
56
64
  # Summarize older messages and replace them with a compact summary.
@@ -68,6 +76,7 @@ class LLM::Compactor
68
76
  older = messages[0...(messages.size - recent.size)]
69
77
  summary = LLM::Message.new(ctx.llm.user_role, "[Previous conversation summary]\n\n#{summarize(older)}", {compaction: true})
70
78
  ctx.messages.replace([*ctx.messages.take_while(&:system?), summary, *recent])
79
+ ctx.compacted = true
71
80
  stream.on_compaction_finish(ctx, self) if LLM::Stream === stream
72
81
  summary
73
82
  end
@@ -84,6 +93,15 @@ class LLM::Compactor
84
93
  messages[start..] || []
85
94
  end
86
95
 
96
+ def token_threshold
97
+ @token_threshold ||= begin
98
+ threshold = config[:token_threshold]
99
+ return threshold unless threshold.to_s.end_with?("%")
100
+ return if ctx.context_window <= 0
101
+ (ctx.context_window * threshold.delete_suffix("%").to_f / 100).floor
102
+ end
103
+ end
104
+
87
105
  def summarize(messages)
88
106
  model = config[:model] || ctx.params[:model] || ctx.llm.default_model
89
107
  ctx.llm.complete(summary_prompt(messages), model:).content
@@ -26,6 +26,7 @@ class LLM::Context
26
26
  LLM.json.load(string)
27
27
  end
28
28
  @messages.concat [*ctx["messages"]].map { deserialize_message(_1) }
29
+ @compacted = !!ctx["compacted"]
29
30
  self
30
31
  end
31
32
  alias_method :restore, :deserialize
data/lib/llm/context.rb CHANGED
@@ -40,6 +40,14 @@ module LLM
40
40
  include Serializer
41
41
  include Deserializer
42
42
 
43
+ ZERO_USAGE = LLM::Object.from(
44
+ input_tokens: 0,
45
+ output_tokens: 0,
46
+ reasoning_tokens: 0,
47
+ total_tokens: 0
48
+ )
49
+ private_constant :ZERO_USAGE
50
+
43
51
  ##
44
52
  # Returns the accumulated message history for this context
45
53
  # @return [LLM::Buffer<LLM::Message>]
@@ -88,8 +96,7 @@ module LLM
88
96
  ##
89
97
  # Returns a context compactor
90
98
  # This feature is inspired by the compaction approach developed by
91
- # General Intelligence Systems in
92
- # [Brute](https://github.com/general-intelligence-systems/brute).
99
+ # General Intelligence Systems.
93
100
  # @return [LLM::Compactor]
94
101
  def compactor
95
102
  @compactor = LLM::Compactor.new(self, @compactor || {}) unless LLM::Compactor === @compactor
@@ -104,6 +111,14 @@ module LLM
104
111
  @compactor = compactor
105
112
  end
106
113
 
114
+ ##
115
+ # Returns whether the context has been compacted and no later model
116
+ # response has cleared that state.
117
+ # @return [Boolean]
118
+ # @api private
119
+ attr_accessor :compacted
120
+ alias_method :compacted?, :compacted
121
+
107
122
  ##
108
123
  # Returns a guard, if configured.
109
124
  #
@@ -172,13 +187,14 @@ module LLM
172
187
  # puts res.messages[0].content
173
188
  def talk(prompt, params = {})
174
189
  return respond(prompt, params) if mode == :responses
175
- @owner = Fiber.current
190
+ @owner = @llm.request_owner
176
191
  compactor.compact!(prompt) if compactor.compact?(prompt)
177
192
  params = params.merge(messages: @messages.to_a)
178
193
  params = @params.merge(params)
179
194
  prompt, params = transform(prompt, params)
180
195
  bind!(params[:stream], params[:model], params[:tools])
181
196
  res = @llm.complete(prompt, params)
197
+ self.compacted = false
182
198
  role = params[:role] || @llm.user_role
183
199
  role = @llm.tool_role if params[:role].nil? && [*prompt].grep(LLM::Function::Return).any?
184
200
  @messages.concat LLM::Prompt === prompt ? prompt.to_a : [LLM::Message.new(role, prompt)]
@@ -201,7 +217,7 @@ module LLM
201
217
  # res = ctx.respond("What is the capital of France?")
202
218
  # puts res.output_text
203
219
  def respond(prompt, params = {})
204
- @owner = Fiber.current
220
+ @owner = @llm.request_owner
205
221
  compactor.compact!(prompt) if compactor.compact?(prompt)
206
222
  params = @params.merge(params)
207
223
  prompt, params = transform(prompt, params)
@@ -209,6 +225,7 @@ module LLM
209
225
  res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
210
226
  params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
211
227
  res = @llm.responses.create(prompt, params)
228
+ self.compacted = false
212
229
  role = params[:role] || @llm.user_role
213
230
  @messages.concat LLM::Prompt === prompt ? prompt.to_a : [LLM::Message.new(role, prompt)]
214
231
  @messages.concat [res.choices[-1]]
@@ -313,27 +330,31 @@ module LLM
313
330
  # This is inspired by Go's context cancellation model.
314
331
  # @return [nil]
315
332
  def interrupt!
333
+ pending = functions.to_a
316
334
  llm.interrupt!(@owner)
317
335
  queue&.interrupt!
336
+ return if pending.empty?
337
+ pending.each(&:interrupt!)
338
+ returns = pending.map { _1.cancel(reason: "function call cancelled") }
339
+ @messages << LLM::Message.new(@llm.tool_role, returns)
340
+ nil
318
341
  end
319
342
  alias_method :cancel!, :interrupt!
320
343
 
321
344
  ##
322
345
  # Returns token usage accumulated in this context
323
- # @note
324
- # This method returns token usage for the latest
325
- # assistant message, and it returns nil for non-assistant
326
- # messages.
327
- # @return [LLM::Object, nil]
346
+ # @return [LLM::Object]
328
347
  def usage
329
- usage = @messages.find(&:assistant?)&.usage
330
- return unless usage
331
- LLM::Object.from(
332
- input_tokens: usage.input_tokens || 0,
333
- output_tokens: usage.output_tokens || 0,
334
- reasoning_tokens: usage.reasoning_tokens || 0,
335
- total_tokens: usage.total_tokens || 0
336
- )
348
+ if usage = @messages.find(&:assistant?)&.usage
349
+ LLM::Object.from(
350
+ input_tokens: usage.input_tokens || 0,
351
+ output_tokens: usage.output_tokens || 0,
352
+ reasoning_tokens: usage.reasoning_tokens || 0,
353
+ total_tokens: usage.total_tokens || 0
354
+ )
355
+ else
356
+ ZERO_USAGE
357
+ end
337
358
  end
338
359
 
339
360
  ##
@@ -403,7 +424,12 @@ module LLM
403
424
  ##
404
425
  # @return [Hash]
405
426
  def to_h
406
- {schema_version: 1, model:, messages: @messages.map { serialize_message(_1) }}
427
+ {
428
+ schema_version: 1,
429
+ model:,
430
+ compacted:,
431
+ messages: @messages.map { serialize_message(_1) }
432
+ }
407
433
  end
408
434
 
409
435
  ##
@@ -432,12 +458,12 @@ module LLM
432
458
  # Returns an _approximate_ cost for a given context
433
459
  # based on both the provider, and model
434
460
  def cost
435
- return LLM::Cost.new(0, 0) unless usage
436
461
  cost = LLM.registry_for(llm).cost(model:)
437
- LLM::Cost.new(
438
- (cost.input.to_f / 1_000_000.0) * usage.input_tokens,
439
- (cost.output.to_f / 1_000_000.0) * usage.output_tokens
440
- )
462
+ input_cost = (cost.input.to_f / 1_000_000.0) * usage.input_tokens
463
+ output_cost = (cost.output.to_f / 1_000_000.0) * usage.output_tokens
464
+ LLM::Cost.new(input_cost, output_cost)
465
+ rescue LLM::NoSuchModelError, LLM::NoSuchRegistryError
466
+ LLM::Cost.new(0, 0)
441
467
  end
442
468
 
443
469
  ##
@@ -10,8 +10,7 @@
10
10
  #
11
11
  # {LLM::LoopGuard LLM::LoopGuard} detects when a context is repeating the same
12
12
  # tool-call pattern instead of making progress. It is directly inspired by
13
- # General Intelligence Systems' Brute runtime and its doom-loop detection
14
- # approach.
13
+ # General Intelligence Systems and its doom-loop detection approach.
15
14
  #
16
15
  # The public interface is intentionally small:
17
16
  # - `call(ctx)` returns `nil` when no intervention is needed
@@ -22,14 +21,6 @@
22
21
  # {LLM::Agent LLM::Agent} enables this guard by default through its wrapped
23
22
  # context.
24
23
  #
25
- # Brute is MIT licensed. The relevant license grant is:
26
- #
27
- # Permission is hereby granted, free of charge, to any person obtaining a copy
28
- # of this software and associated documentation files (the "Software"), to deal
29
- # in the Software without restriction, including without limitation the rights
30
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31
- # copies of the Software, and to permit persons to whom the Software is
32
- # furnished to do so.
33
24
  class LLM::LoopGuard
34
25
  ##
35
26
  # The default number of repeated tool-call patterns required before
@@ -38,7 +38,7 @@ module LLM::Provider::Transport
38
38
  perform_request(http, request, stream, stream_parser, &b)
39
39
  end
40
40
  [handle_response(res, tracer, span), span, tracer]
41
- rescue *LLM::Provider::Transport::HTTP::Interruptible::INTERRUPT_ERRORS
41
+ rescue *transport.interrupt_errors
42
42
  raise LLM::Interrupt, "request interrupted" if transport.interrupted?(owner)
43
43
  raise
44
44
  end
@@ -1,109 +1,114 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LLM::Provider
4
- module Transport
5
- class HTTP
6
- ##
7
- # Internal request interruption methods for
8
- # {LLM::Provider::Transport::HTTP}.
9
- #
10
- # This module tracks active requests by execution owner and provides
11
- # the logic used to interrupt an in-flight request by closing the
12
- # active HTTP connection.
13
- #
14
- # @api private
15
- module Interruptible
16
- INTERRUPT_ERRORS = [::IOError, ::EOFError, Errno::EBADF].freeze
17
- Request = Struct.new(:http, :connection, keyword_init: true)
4
+ ##
5
+ # Internal request interruption methods for
6
+ # {LLM::Provider::Transport::HTTP}.
7
+ #
8
+ # This module tracks active requests by execution owner and provides
9
+ # the logic used to interrupt an in-flight request by closing the
10
+ # active HTTP connection.
11
+ #
12
+ # @api private
13
+ module Transport::HTTP::Interruptible
14
+ INTERRUPT_ERRORS = [::IOError, ::EOFError, Errno::EBADF].freeze
15
+ Request = Struct.new(:http, :connection, keyword_init: true)
18
16
 
19
- ##
20
- # Interrupt an active request, if any.
21
- # @param [Fiber] owner
22
- # The execution owner whose request should be interrupted
23
- # @return [nil]
24
- def interrupt!(owner)
25
- req = request_for(owner) or return
26
- lock { (@interrupts ||= {})[owner] = true }
27
- if persistent_http?(req.http)
28
- close_socket(req.connection&.http)
29
- req.http.finish(req.connection)
30
- elsif transient_http?(req.http)
31
- close_socket(req.http)
32
- req.http.finish if req.http.active?
33
- end
34
- rescue *INTERRUPT_ERRORS
35
- nil
36
- end
37
-
38
- private
17
+ def interrupt_errors
18
+ [*INTERRUPT_ERRORS, *optional_interrupt_errors]
19
+ end
39
20
 
40
- ##
41
- # Closes the active socket for a request, if present.
42
- # @param [Net::HTTP, nil] http
43
- # @return [nil]
44
- def close_socket(http)
45
- socket = http&.instance_variable_get(:@socket) or return
46
- socket = socket.io if socket.respond_to?(:io)
47
- socket.close
48
- rescue *INTERRUPT_ERRORS
49
- nil
50
- end
21
+ ##
22
+ # Interrupt an active request, if any.
23
+ # @param [Fiber] owner
24
+ # The execution owner whose request should be interrupted
25
+ # @return [nil]
26
+ def interrupt!(owner)
27
+ req = request_for(owner) or return
28
+ lock { (@interrupts ||= {})[owner] = true }
29
+ if persistent_http?(req.http)
30
+ close_socket(req.connection&.http)
31
+ req.http.finish(req.connection)
32
+ elsif transient_http?(req.http)
33
+ close_socket(req.http)
34
+ req.http.finish if req.http.active?
35
+ end
36
+ owner.stop if owner.respond_to?(:stop)
37
+ rescue *interrupt_errors
38
+ nil
39
+ end
51
40
 
52
- ##
53
- # Returns whether the active request is using a transient HTTP client.
54
- # @param [Object, nil] http
55
- # @return [Boolean]
56
- def transient_http?(http)
57
- Net::HTTP === http
58
- end
41
+ private
59
42
 
60
- ##
61
- # Returns whether the active request is using a persistent HTTP client.
62
- # @param [Object, nil] http
63
- # @return [Boolean]
64
- def persistent_http?(http)
65
- defined?(Net::HTTP::Persistent) && Net::HTTP::Persistent === http
66
- end
43
+ ##
44
+ # Closes the active socket for a request, if present.
45
+ # @param [Net::HTTP, nil] http
46
+ # @return [nil]
47
+ def close_socket(http)
48
+ socket = http&.instance_variable_get(:@socket) or return
49
+ socket = socket.io if socket.respond_to?(:io)
50
+ socket.close
51
+ rescue *interrupt_errors
52
+ nil
53
+ end
67
54
 
68
- ##
69
- # Returns the active request for an execution owner.
70
- # @param [Fiber] owner
71
- # @return [Request, nil]
72
- def request_for(owner)
73
- lock do
74
- @requests ||= {}
75
- @requests[owner]
76
- end
77
- end
55
+ ##
56
+ # Returns whether the active request is using a transient HTTP client.
57
+ # @param [Object, nil] http
58
+ # @return [Boolean]
59
+ def transient_http?(http)
60
+ Net::HTTP === http
61
+ end
78
62
 
79
- ##
80
- # Records an active request for an execution owner.
81
- # @param [Request] req
82
- # @param [Fiber] owner
83
- # @return [Request]
84
- def set_request(req, owner)
85
- lock do
86
- @requests ||= {}
87
- @requests[owner] = req
88
- end
89
- end
63
+ ##
64
+ # Returns whether the active request is using a persistent HTTP client.
65
+ # @param [Object, nil] http
66
+ # @return [Boolean]
67
+ def persistent_http?(http)
68
+ defined?(Net::HTTP::Persistent) && Net::HTTP::Persistent === http
69
+ end
90
70
 
91
- ##
92
- # Clears the active request for an execution owner.
93
- # @param [Fiber] owner
94
- # @return [Request, nil]
95
- def clear_request(owner)
96
- lock { @requests&.delete(owner) }
97
- end
71
+ ##
72
+ # Returns the active request for an execution owner.
73
+ # @param [Fiber] owner
74
+ # @return [Request, nil]
75
+ def request_for(owner)
76
+ lock do
77
+ @requests ||= {}
78
+ @requests[owner]
79
+ end
80
+ end
98
81
 
99
- ##
100
- # Returns whether an execution owner was interrupted.
101
- # @param [Fiber] owner
102
- # @return [Boolean, nil]
103
- def interrupted?(owner)
104
- lock { @interrupts&.delete(owner) }
105
- end
82
+ ##
83
+ # Records an active request for an execution owner.
84
+ # @param [Request] req
85
+ # @param [Fiber] owner
86
+ # @return [Request]
87
+ def set_request(req, owner)
88
+ lock do
89
+ @requests ||= {}
90
+ @requests[owner] = req
106
91
  end
107
92
  end
93
+
94
+ ##
95
+ # Clears the active request for an execution owner.
96
+ # @param [Fiber] owner
97
+ # @return [Request, nil]
98
+ def clear_request(owner)
99
+ lock { @requests&.delete(owner) }
100
+ end
101
+
102
+ ##
103
+ # Returns whether an execution owner was interrupted.
104
+ # @param [Fiber] owner
105
+ # @return [Boolean, nil]
106
+ def interrupted?(owner)
107
+ lock { @interrupts&.delete(owner) }
108
+ end
109
+
110
+ def optional_interrupt_errors
111
+ defined?(::Async::Stop) ? [Async::Stop] : []
112
+ end
108
113
  end
109
114
  end
@@ -50,9 +50,10 @@ class LLM::Provider
50
50
 
51
51
  ##
52
52
  # Returns the current request owner.
53
- # @return [Fiber]
53
+ # @return [Object]
54
54
  def request_owner
55
- Fiber.current
55
+ return Fiber.current unless defined?(::Async)
56
+ Async::Task.current || Fiber.current
56
57
  end
57
58
 
58
59
  ##
@@ -70,7 +71,7 @@ class LLM::Provider
70
71
  ##
71
72
  # @return [Boolean]
72
73
  def persistent?
73
- !persistent_client.nil?
74
+ !@persistent_client.nil?
74
75
  end
75
76
 
76
77
  ##
data/lib/llm/provider.rb CHANGED
@@ -338,6 +338,14 @@ class LLM::Provider
338
338
  end
339
339
  alias_method :cancel!, :interrupt!
340
340
 
341
+ ##
342
+ # Returns the current request owner used by the transport.
343
+ # @return [Object]
344
+ # @api private
345
+ def request_owner
346
+ transport.request_owner
347
+ end
348
+
341
349
  ##
342
350
  # @param [Object] stream
343
351
  # @return [Boolean]
data/lib/llm/skill.rb CHANGED
@@ -76,6 +76,8 @@ module LLM
76
76
  def call(ctx)
77
77
  instructions, tools, tracer = self.instructions, self.tools, ctx.llm.tracer
78
78
  params = ctx.params.merge(mode: ctx.mode).reject { [:tools, :schema].include?(_1) }
79
+ concurrency = params[:stream].extra[:concurrency] if LLM::Stream === params[:stream]
80
+ params[:concurrency] = concurrency if concurrency
79
81
  agent = Class.new(LLM::Agent) do
80
82
  instructions(instructions)
81
83
  tools(*tools)
data/lib/llm/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LLM
4
- VERSION = "6.0.0"
4
+ VERSION = "7.0.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm.rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.0.0
4
+ version: 7.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Antar Azri