llm.rb 4.23.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49ed8077a6283802d4141dcb9ec037c7fc46920ebd3273b30c55624b575f3156
4
- data.tar.gz: e2289baf740ba9603ed1c308414e632ddda296356659c8714bf3a1744c216104
3
+ metadata.gz: 56ddedb75f6c791cc42bca736bc62360ba4850a3a204f9a82288e8c6ea977eeb
4
+ data.tar.gz: 3881b731dacd921e258eac954c4468d052e673e48ad53c63ae1a246973c84d33
5
5
  SHA512:
6
- metadata.gz: b6b0d72baa785a6bf25cbfd3f2581d7f6a5850a0fa61dea29668596e19eb8a1142330f8acfea7f04a1bc76461c02c0af681588332d955aae2b5c6808f2fc0610
7
- data.tar.gz: 836fc45489b9d86c7bde3ed2b94d2813be5bdaea1ebf7697f7e7eca5962f5374343e371188e40ced180ed50e053cd74ec8fcec8dea08c164291ee8577301f195
6
+ metadata.gz: a8838f57a1232afc42448d28a0f3f7b8907c2a527be284579b3af56e398edda50d7cc02a8dda2794c65096699831058494cccae3f8a116b538f76bc42127eba8
7
+ data.tar.gz: ef49e8046b4aab4e59b252ffdbf16135673d227b4124bf45bf5c31856c49822168182928b56fdc7428d2056dbae7c211fac0d6d8ef3eba48a7aca47420bb96e7
data/CHANGELOG.md CHANGED
@@ -2,8 +2,73 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ Changes since `v5.1.0`.
6
+
7
+ ## v5.1.0
8
+
9
+ Changes since `v5.0.0`.
10
+
11
+ This release tightens streamed tool execution around the actual request-local
12
+ runtime state. It fixes streamed resolution of per-request tools and makes
13
+ that streamed path work cleanly with `LLM.function(...)`, MCP tools, bound
14
+ tool instances, and normal tool classes.
15
+
16
+ ### Fix
17
+
18
+ * **Resolve request-local tools during streaming** <br>
19
+ Resolve streamed tool calls through `LLM::Stream` request-local tools
20
+ before falling back to the global registry, so per-request tools and bound
21
+ tool instances work correctly during streaming.
22
+
23
+ * **Support `LLM.function(...)` and MCP tools in streamed tool resolution** <br>
24
+ Let streamed tool resolution use the current request tool set, so
25
+ `LLM.function(...)`, MCP tools, bound tool instances, and normal
26
+ `LLM::Tool` classes all work through the same streamed tool path.
27
+
28
+ ## v5.0.0
29
+
5
30
  Changes since `v4.23.0`.
6
31
 
32
+ This release expands llm.rb from an execution runtime into a more explicit
33
+ supervision and transformation runtime. It adds context-level guards,
34
+ transformers, and loop supervision through `LLM::LoopGuard`, while deepening
35
+ long-lived context behavior through compaction, interruption hooks, and
36
+ streamed `ctx.spawn(...)` tool execution.
37
+
38
+ ### Change
39
+
40
+ * **Make compactor thresholds explicit** <br>
41
+ Require `message_threshold:` and `token_threshold:` to be opted into
42
+ explicitly, so `LLM::Compactor` only compacts automatically when one of
43
+ those thresholds is configured. Context-window-derived token limits can be
44
+ computed by the caller when needed.
45
+
46
+ * **Allow assigning a compactor through `LLM::Context`** <br>
47
+ Let `LLM::Context` accept `ctx.compactor = ...` in addition to the
48
+ constructor `compactor:` option, so compactor config can be assigned or
49
+ replaced after context initialization.
50
+
51
+ * **Mark compaction summaries in message metadata** <br>
52
+ Mark compaction summaries with `extra[:compaction]` and
53
+ `LLM::Message#compaction?`, so applications can detect or hide synthetic
54
+ summary messages in conversation history.
55
+
56
+ * **Add cooperative tool interruption hooks** <br>
57
+ Let `ctx.interrupt!` notify queued tool work through `on_interrupt`, so
58
+ running tools can clean up cooperatively when a context is cancelled.
59
+
60
+ * **Add `LLM::Context` guards** <br>
61
+ Add a new `guard` capability to `LLM::Context` so execution can be
62
+ supervised at the runtime level. The built-in `LLM::LoopGuard` detects
63
+ repeated tool-call patterns and stops stuck agentic loops through in-band
64
+ `LLM::GuardError` returns. `LLM::Agent` enables this guard by default.
65
+
66
+ * **Add `LLM::Context` transformers** <br>
67
+ Add a new `transformer` capability to `LLM::Context` so prompts and params
68
+ can be rewritten before provider requests are sent. This makes it possible
69
+ to apply context-wide behaviors such as PII scrubbing or request-level
70
+ param injection without rewriting every `talk` and `respond` call site.
71
+
7
72
  ## v4.23.0
8
73
 
9
74
  Changes since `v4.22.0`.
@@ -18,8 +83,9 @@ OpenAI-compatible no-arg tool schemas for stricter providers such as xAI.
18
83
  * **Add `LLM::Compactor` for long-lived contexts** <br>
19
84
  Add built-in context compaction through `LLM::Compactor`, so older history
20
85
  can be summarized, retained windows can stay bounded, compaction can run on
21
- its own `model:`, and `LLM::Stream` can observe the lifecycle through
22
- `on_compaction` and `on_compaction_finish`.
86
+ its own `model:`, thresholds can be configured explicitly, and
87
+ `LLM::Stream` can observe the lifecycle through `on_compaction` and
88
+ `on_compaction_finish`.
23
89
 
24
90
  * **Allow bound tool instances in explicit tool lists** <br>
25
91
  Let explicit `tools:` arrays accept `LLM::Tool` instances such as
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  <p align="center">
5
5
  <a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
6
6
  <a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
7
- <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-4.23.0-green.svg?" alt="Version"></a>
7
+ <a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-5.1.0-green.svg?" alt="Version"></a>
8
8
  </p>
9
9
 
10
10
  ## About
@@ -25,6 +25,7 @@ schemas, files, and persisted state, so real systems can be built out of one coh
25
25
  execution model instead of a pile of adapters.
26
26
 
27
27
  Want to see some code? Jump to [the examples](#examples) section. <br>
28
+ Want to see an agentic framework built on top of llm.rb? Check out [general-intelligence-systems/brute](https://github.com/general-intelligence-systems/brute). <br>
28
29
  Want a taste of what llm.rb can build? See [the screencast](#screencast).
29
30
 
30
31
  ## Architecture
@@ -168,6 +169,58 @@ ctx = LLM::Context.new(
168
169
  )
169
170
  ```
170
171
 
172
+ #### Guards
173
+
174
+ Guards let llm.rb supervise agentic execution, not just run it.
175
+ They live on [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
176
+ can inspect the current runtime state, and can step in when a context is no
177
+ longer making progress.
178
+
179
+ [`LLM::LoopGuard`](https://0x1eef.github.io/x/llm.rb/LLM/LoopGuard.html) is
180
+ the built-in implementation. It detects repeated tool-call patterns and
181
+ blocks pending tool execution with in-band guarded tool errors instead of
182
+ letting the loop keep spinning. [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
183
+ enables that guard by default through its wrapped context.
184
+
185
+ ```ruby
186
+ ctx = LLM::Context.new(llm)
187
+ ctx.guard = MyGuard.new
188
+ ```
189
+
190
+ #### Transformers
191
+
192
+ Transformers let llm.rb rewrite outgoing prompts and params before a request
193
+ is sent to the provider. They also live on
194
+ [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html), but
195
+ they solve a different problem from guards: instead of blocking execution,
196
+ they can normalize or scrub what gets sent.
197
+
198
+ That makes them a good fit for things like PII scrubbing, prompt
199
+ normalization, or request-level param injection. A transformer just needs to
200
+ implement `call(ctx, prompt, params)` and return `[prompt, params]`.
201
+
202
+ ```ruby
203
+ class ScrubPII
204
+ EMAIL = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i
205
+
206
+ def call(ctx, prompt, params)
207
+ [scrub(prompt), params]
208
+ end
209
+
210
+ private
211
+
212
+ def scrub(prompt)
213
+ case prompt
214
+ when String then prompt.gsub(EMAIL, "[REDACTED_EMAIL]")
215
+ else prompt
216
+ end
217
+ end
218
+ end
219
+
220
+ ctx = LLM::Context.new(llm)
221
+ ctx.transformer = ScrubPII.new
222
+ ```
223
+
171
224
  #### LLM::Stream
172
225
 
173
226
  `LLM::Stream` is not just for printing tokens. It supports `on_content`,
@@ -179,7 +232,7 @@ execution path.
179
232
  ```ruby
180
233
  class Stream < LLM::Stream
181
234
  def on_tool_call(tool, error)
182
- queue << tool.spawn(:thread)
235
+ queue << (error || ctx.spawn(tool, :thread))
183
236
  end
184
237
 
185
238
  def on_tool_return(tool, result)
@@ -468,7 +521,7 @@ class Stream < LLM::Stream
468
521
  def on_tool_call(tool, error)
469
522
  return queue << error if error
470
523
  $stdout << "\nRunning tool #{tool.name}...\n"
471
- queue << tool.spawn(:thread)
524
+ queue << ctx.spawn(tool, :thread)
472
525
  end
473
526
 
474
527
  def on_tool_return(tool, result)
@@ -481,7 +534,8 @@ class Stream < LLM::Stream
481
534
  end
482
535
 
483
536
  llm = LLM.openai(key: ENV["KEY"])
484
- ctx = LLM::Context.new(llm, stream: Stream.new, tools: [System])
537
+ stream = Stream.new
538
+ ctx = LLM::Context.new(llm, stream:, tools: [System])
485
539
 
486
540
  ctx.talk("Run `date` and `uname -a`.")
487
541
  ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
data/lib/llm/agent.rb CHANGED
@@ -16,6 +16,9 @@ module LLM
16
16
  # **Notes:**
17
17
  # * Instructions are injected once unless a system message is already present.
18
18
  # * An agent automatically executes tool loops (unlike {LLM::Context LLM::Context}).
19
+ # * The automatic tool loop enables the wrapped context's `guard` by default.
20
+ # The built-in {LLM::LoopGuard LLM::LoopGuard} detects repeated tool-call
21
+ # patterns and blocks stuck execution before more tool work is queued.
19
22
  # * Tool loop execution can be configured with `concurrency :call`,
20
23
  # `:thread`, `:task`, `:fiber`, `:ractor`, or a list of queued task
21
24
  # types such as `[:thread, :ractor]`.
@@ -128,7 +131,7 @@ module LLM
128
131
  defaults = {model: self.class.model, tools: self.class.tools, skills: self.class.skills, schema: self.class.schema}.compact
129
132
  @concurrency = params.delete(:concurrency) || self.class.concurrency
130
133
  @llm = llm
131
- @ctx = LLM::Context.new(llm, defaults.merge(params))
134
+ @ctx = LLM::Context.new(llm, defaults.merge({guard: true}).merge(params))
132
135
  end
133
136
 
134
137
  ##
@@ -137,7 +140,7 @@ module LLM
137
140
  #
138
141
  # @param prompt (see LLM::Provider#complete)
139
142
  # @param [Hash] params The params passed to the provider, including optional :stream, :tools, :schema etc.
140
- # @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default 10)
143
+ # @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default 25)
141
144
  # @return [LLM::Response] Returns the LLM's response for this turn.
142
145
  # @example
143
146
  # llm = LLM.openai(key: ENV["KEY"])
@@ -145,14 +148,7 @@ module LLM
145
148
  # response = agent.talk("Hello, what is your name?")
146
149
  # puts response.choices[0].content
147
150
  def talk(prompt, params = {})
148
- max = Integer(params.delete(:tool_attempts) || 10)
149
- res = @ctx.talk(apply_instructions(prompt), params)
150
- max.times do
151
- break if @ctx.functions.empty?
152
- res = @ctx.talk(call_functions, params)
153
- end
154
- raise LLM::ToolLoopError, "pending tool calls remain" unless @ctx.functions.empty?
155
- res
151
+ run_loop(:talk, prompt, params)
156
152
  end
157
153
  alias_method :chat, :talk
158
154
 
@@ -163,7 +159,7 @@ module LLM
163
159
  # @note Not all LLM providers support this API
164
160
  # @param prompt (see LLM::Provider#complete)
165
161
  # @param [Hash] params The params passed to the provider, including optional :stream, :tools, :schema etc.
166
- # @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default 10)
162
+ # @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default 25)
167
163
  # @return [LLM::Response] Returns the LLM's response for this turn.
168
164
  # @example
169
165
  # llm = LLM.openai(key: ENV["KEY"])
@@ -171,14 +167,7 @@ module LLM
171
167
  # res = agent.respond("What is the capital of France?")
172
168
  # puts res.output_text
173
169
  def respond(prompt, params = {})
174
- max = Integer(params.delete(:tool_attempts) || 10)
175
- res = @ctx.respond(apply_instructions(prompt), params)
176
- max.times do
177
- break if @ctx.functions.empty?
178
- res = @ctx.respond(call_functions, params)
179
- end
180
- raise LLM::ToolLoopError, "pending tool calls remain" unless @ctx.functions.empty?
181
- res
170
+ run_loop(:respond, prompt, params)
182
171
  end
183
172
 
184
173
  ##
@@ -380,5 +369,16 @@ module LLM
380
369
  else raise ArgumentError, "Unknown concurrency: #{concurrency.inspect}. Expected :call, :thread, :task, :fiber, :ractor, or an array of queued task types"
381
370
  end
382
371
  end
372
+
373
+ def run_loop(method, prompt, params)
374
+ max = Integer(params.delete(:tool_attempts) || 25)
375
+ res = @ctx.public_send(method, apply_instructions(prompt), params)
376
+ max.times do
377
+ break if @ctx.functions.empty?
378
+ res = @ctx.public_send(method, call_functions, params)
379
+ end
380
+ raise LLM::ToolLoopError, "pending tool calls remain" unless @ctx.functions.empty?
381
+ res
382
+ end
383
383
  end
384
384
  end
data/lib/llm/compactor.rb CHANGED
@@ -9,14 +9,11 @@
9
9
  # [Brute](https://github.com/general-intelligence-systems/brute).
10
10
  #
11
11
  # The compactor can also use a different model from the main context by
12
- # setting `model:` in the compactor config. By default, `token_threshold` is
13
- # 10% less than the current context window, or `100_000` when the context
14
- # window is unknown. Set `message_threshold:` or `token_threshold:` to `nil`
15
- # to disable that constraint.
12
+ # setting `model:` in the compactor config. Compaction thresholds are opt-in:
13
+ # provide `message_threshold:` and/or `token_threshold:` to enable policy-
14
+ # driven compaction.
16
15
  class LLM::Compactor
17
- DEFAULT_TOKEN_THRESHOLD = 100_000
18
16
  DEFAULTS = {
19
- message_threshold: 200,
20
17
  retention_window: 8,
21
18
  model: nil
22
19
  }.freeze
@@ -28,19 +25,17 @@ class LLM::Compactor
28
25
  ##
29
26
  # @param [LLM::Context] ctx
30
27
  # @param [Hash] config
31
- # @option config [Integer] :token_threshold
32
- # Defaults to 10% less than the current context window, or `100_000` when
33
- # the context window is unknown. Set to `nil` to disable token-based
34
- # compaction.
35
- # @option config [Integer] :message_threshold
36
- # Set to `nil` to disable message-count-based compaction.
28
+ # @option config [Integer, nil] :token_threshold
29
+ # Enables token-based compaction.
30
+ # @option config [Integer, nil] :message_threshold
31
+ # Enables message-count-based compaction.
37
32
  # @option config [Integer] :retention_window
38
33
  # @option config [String, nil] :model
39
34
  # The model to use for the summarization request. Defaults to the current
40
35
  # context model.
41
- def initialize(ctx, **config)
36
+ def initialize(ctx, config = {})
42
37
  @ctx = ctx
43
- @config = DEFAULTS.merge(token_threshold: default_token_threshold).merge(config)
38
+ @config = DEFAULTS.merge(config)
44
39
  end
45
40
 
46
41
  ##
@@ -71,7 +66,7 @@ class LLM::Compactor
71
66
  stream.on_compaction(ctx, self) if LLM::Stream === stream
72
67
  recent = retained_messages
73
68
  older = messages[0...(messages.size - recent.size)]
74
- summary = LLM::Message.new(ctx.llm.user_role, "[Previous conversation summary]\n\n#{summarize(older)}")
69
+ summary = LLM::Message.new(ctx.llm.user_role, "[Previous conversation summary]\n\n#{summarize(older)}", {compaction: true})
75
70
  ctx.messages.replace([*ctx.messages.take_while(&:system?), summary, *recent])
76
71
  stream.on_compaction_finish(ctx, self) if LLM::Stream === stream
77
72
  summary
@@ -81,12 +76,6 @@ class LLM::Compactor
81
76
 
82
77
  attr_reader :ctx
83
78
 
84
- def default_token_threshold
85
- window = ctx.context_window
86
- return DEFAULT_TOKEN_THRESHOLD if window.zero?
87
- window - (window / 10)
88
- end
89
-
90
79
  def retained_messages
91
80
  messages = ctx.messages.reject(&:system?)
92
81
  retention_window = [config[:retention_window], messages.size].min
@@ -39,7 +39,8 @@ class LLM::Context
39
39
  original_tool_calls = payload["original_tool_calls"]
40
40
  usage = payload["usage"]
41
41
  reasoning_content = payload["reasoning_content"]
42
- extra = {tool_calls:, original_tool_calls:, tools: @params[:tools], usage:, reasoning_content:}.compact
42
+ compaction = payload["compaction"]
43
+ extra = {tool_calls:, original_tool_calls:, tools: @params[:tools], usage:, reasoning_content:, compaction:}.compact
43
44
  content = returns.nil? ? deserialize_content(payload["content"]) : returns
44
45
  LLM::Message.new(payload["role"], content, extra)
45
46
  end
data/lib/llm/context.rb CHANGED
@@ -77,6 +77,8 @@ module LLM
77
77
  @llm = llm
78
78
  @mode = params.delete(:mode) || :completions
79
79
  @compactor = params.delete(:compactor)
80
+ @guard = params.delete(:guard)
81
+ @transformer = params.delete(:transformer)
80
82
  tools = [*params.delete(:tools), *load_skills(params.delete(:skills))]
81
83
  @params = {model: llm.default_model, schema: nil}.compact.merge!(params)
82
84
  @params[:tools] = tools unless tools.empty?
@@ -90,11 +92,73 @@ module LLM
90
92
  # [Brute](https://github.com/general-intelligence-systems/brute).
91
93
  # @return [LLM::Compactor]
92
94
  def compactor
93
- @compactor = LLM::Compactor.new(self, **(@compactor || {})) unless LLM::Compactor === @compactor
95
+ @compactor = LLM::Compactor.new(self, @compactor || {}) unless LLM::Compactor === @compactor
94
96
  @compactor
95
97
  end
96
98
 
97
99
  ##
100
+ # Sets a context compactor or compactor config
101
+ # @param [LLM::Compactor, Hash, nil] compactor
102
+ # @return [LLM::Compactor, Hash, nil]
103
+ def compactor=(compactor)
104
+ @compactor = compactor
105
+ end
106
+
107
+ ##
108
+ # Returns a guard, if configured.
109
+ #
110
+ # Guards are context-level supervisors for agentic execution. A guard can
111
+ # inspect the runtime state and decide whether pending tool work should be
112
+ # blocked before the context keeps looping.
113
+ #
114
+ # The built-in implementation is {LLM::LoopGuard LLM::LoopGuard}, which
115
+ # detects repeated tool-call patterns and turns them into in-band
116
+ # {LLM::GuardError LLM::GuardError} tool returns.
117
+ #
118
+ # @return [#call, nil]
119
+ def guard
120
+ return if @guard.nil? || @guard == false
121
+ @guard = LLM::LoopGuard.new if @guard == true
122
+ @guard = LLM::LoopGuard.new(@guard) if Hash === @guard
123
+ @guard
124
+ end
125
+
126
+ ##
127
+ # Sets a guard or guard config.
128
+ #
129
+ # Guards must implement `call(ctx)` and return either `nil` or a warning
130
+ # string. Returning a warning tells the context to block pending tool work
131
+ # with guarded tool errors instead of continuing the loop.
132
+ #
133
+ # @param [#call, Hash, Boolean, nil] guard
134
+ # @return [#call, Hash, Boolean, nil]
135
+ def guard=(guard)
136
+ @guard = guard
137
+ end
138
+
139
+ ##
140
+ # Returns a transformer, if configured.
141
+ #
142
+ # Transformers can rewrite outgoing prompts and params before a request is
143
+ # sent to the provider.
144
+ #
145
+ # @return [#call, nil]
146
+ def transformer
147
+ @transformer
148
+ end
149
+
150
+ ##
151
+ # Sets a transformer.
152
+ #
153
+ # Transformers must implement `call(ctx, prompt, params)` and return a
154
+ # two-element array of `[prompt, params]`.
155
+ #
156
+ # @param [#call, nil] transformer
157
+ # @return [#call, nil]
158
+ def transformer=(transformer)
159
+ @transformer = transformer
160
+ end
161
+
98
162
  # Interact with the context via the chat completions API.
99
163
  # This method immediately sends a request to the LLM and returns the response.
100
164
  #
@@ -112,7 +176,8 @@ module LLM
112
176
  compactor.compact!(prompt) if compactor.compact?(prompt)
113
177
  params = params.merge(messages: @messages.to_a)
114
178
  params = @params.merge(params)
115
- bind!(params[:stream], params[:model])
179
+ prompt, params = transform(prompt, params)
180
+ bind!(params[:stream], params[:model], params[:tools])
116
181
  res = @llm.complete(prompt, params)
117
182
  role = params[:role] || @llm.user_role
118
183
  role = @llm.tool_role if params[:role].nil? && [*prompt].grep(LLM::Function::Return).any?
@@ -139,7 +204,8 @@ module LLM
139
204
  @owner = Fiber.current
140
205
  compactor.compact!(prompt) if compactor.compact?(prompt)
141
206
  params = @params.merge(params)
142
- bind!(params[:stream], params[:model])
207
+ prompt, params = transform(prompt, params)
208
+ bind!(params[:stream], params[:model], params[:tools])
143
209
  res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
144
210
  params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
145
211
  res = @llm.responses.create(prompt, params)
@@ -183,11 +249,26 @@ module LLM
183
249
  # @return [Array<LLM::Function::Return>]
184
250
  def call(target)
185
251
  case target
186
- when :functions then functions.call
252
+ when :functions then guarded_returns || functions.call
187
253
  else raise ArgumentError, "Unknown target: #{target.inspect}. Expected :functions"
188
254
  end
189
255
  end
190
256
 
257
+ ##
258
+ # Spawns a function through the context.
259
+ #
260
+ # When a guard is configured, this method can return an in-band guarded
261
+ # tool error instead of spawning work.
262
+ #
263
+ # @param [LLM::Function] function
264
+ # @param [Symbol] strategy
265
+ # @return [LLM::Function::Return, LLM::Function::Task]
266
+ def spawn(function, strategy)
267
+ warning = guard&.call(self)
268
+ return guarded_return_for(function, warning) if warning
269
+ function.spawn(strategy)
270
+ end
271
+
191
272
  ##
192
273
  # Returns tool returns accumulated in this context
193
274
  # @return [Array<LLM::Function::Return>]
@@ -216,10 +297,15 @@ module LLM
216
297
  def wait(strategy)
217
298
  stream = @params[:stream]
218
299
  if LLM::Stream === stream && !stream.queue.empty?
219
- stream.wait(strategy)
300
+ @queue = stream.queue
301
+ @queue.wait(strategy)
220
302
  else
221
- functions.wait(strategy)
303
+ return guarded_returns if guarded_returns
304
+ @queue = functions.spawn(strategy)
305
+ @queue.wait
222
306
  end
307
+ ensure
308
+ @queue = nil
223
309
  end
224
310
 
225
311
  ##
@@ -228,6 +314,7 @@ module LLM
228
314
  # @return [nil]
229
315
  def interrupt!
230
316
  llm.interrupt!(@owner)
317
+ queue&.interrupt!
231
318
  end
232
319
  alias_method :cancel!, :interrupt!
233
320
 
@@ -372,15 +459,42 @@ module LLM
372
459
 
373
460
  private
374
461
 
375
- def bind!(stream, model)
462
+ def bind!(stream, model, tools)
376
463
  return unless LLM::Stream === stream
464
+ stream.extra[:ctx] = self
377
465
  stream.extra[:tracer] = tracer
378
466
  stream.extra[:model] = model
467
+ stream.extra[:tools] = tools
468
+ end
469
+
470
+ def queue
471
+ return @queue if @queue
472
+ stream = @params[:stream]
473
+ stream.queue if LLM::Stream === stream
379
474
  end
380
475
 
381
476
  def load_skills(skills)
382
477
  [*skills].map { LLM::Skill.load(_1).to_tool(self) }
383
478
  end
479
+
480
+ def guarded_returns
481
+ warning = guard&.call(self)
482
+ return unless warning
483
+ functions.map { guarded_return_for(_1, warning) }
484
+ end
485
+
486
+ def transform(prompt, params)
487
+ return [prompt, params] unless transformer
488
+ transformer.call(self, prompt, params)
489
+ end
490
+
491
+ def guarded_return_for(function, warning)
492
+ LLM::Function::Return.new(function.id, function.name, {
493
+ error: true,
494
+ type: LLM::GuardError.name,
495
+ message: warning
496
+ })
497
+ end
384
498
  end
385
499
 
386
500
  # Backward-compatible alias
data/lib/llm/error.rb CHANGED
@@ -55,6 +55,10 @@ module LLM
55
55
  # When stuck in a tool call loop
56
56
  ToolLoopError = Class.new(Error)
57
57
 
58
+ ##
59
+ # When a guard blocks pending tool execution
60
+ GuardError = Class.new(Error)
61
+
58
62
  ##
59
63
  # When a request is interrupted
60
64
  Interrupt = Class.new(Error)
@@ -59,6 +59,14 @@ class LLM::Function
59
59
  @fibers.any?(&:alive?)
60
60
  end
61
61
 
62
+ ##
63
+ # @return [nil]
64
+ def interrupt!
65
+ @fibers.each(&:interrupt!)
66
+ nil
67
+ end
68
+ alias_method :cancel!, :interrupt!
69
+
62
70
  ##
63
71
  # Waits for all fibers in the group to finish and returns
64
72
  # their {LLM::Function::Return} values.
@@ -26,6 +26,13 @@ class LLM::Function
26
26
  mailbox.alive?
27
27
  end
28
28
 
29
+ ##
30
+ # @return [nil]
31
+ def interrupt!
32
+ nil
33
+ end
34
+ alias_method :cancel!, :interrupt!
35
+
29
36
  ##
30
37
  # @return [LLM::Function::Return]
31
38
  def wait
@@ -19,6 +19,14 @@ class LLM::Function
19
19
  @tasks.any?(&:alive?)
20
20
  end
21
21
 
22
+ ##
23
+ # @return [nil]
24
+ def interrupt!
25
+ @tasks.each(&:interrupt!)
26
+ nil
27
+ end
28
+ alias_method :cancel!, :interrupt!
29
+
22
30
  ##
23
31
  # @return [Array<LLM::Function::Return>]
24
32
  def wait
@@ -29,6 +29,14 @@ class LLM::Function
29
29
  false
30
30
  end
31
31
 
32
+ ##
33
+ # @return [nil]
34
+ def interrupt!
35
+ function&.interrupt!
36
+ nil
37
+ end
38
+ alias_method :cancel!, :interrupt!
39
+
32
40
  ##
33
41
  # @return [LLM::Function::Return]
34
42
  def wait
@@ -60,6 +60,14 @@ class LLM::Function
60
60
  @tasks.any?(&:alive?)
61
61
  end
62
62
 
63
+ ##
64
+ # @return [nil]
65
+ def interrupt!
66
+ @tasks.each(&:interrupt!)
67
+ nil
68
+ end
69
+ alias_method :cancel!, :interrupt!
70
+
63
71
  ##
64
72
  # Waits for all tasks in the group to finish and returns
65
73
  # their {LLM::Function::Return} values.
@@ -65,6 +65,14 @@ class LLM::Function
65
65
  @threads.any?(&:alive?)
66
66
  end
67
67
 
68
+ ##
69
+ # @return [nil]
70
+ def interrupt!
71
+ @threads.each(&:interrupt!)
72
+ nil
73
+ end
74
+ alias_method :cancel!, :interrupt!
75
+
68
76
  ##
69
77
  # Waits for all threads in the group to finish and returns
70
78
  # their {LLM::Function::Return} values.
data/lib/llm/function.rb CHANGED
@@ -62,6 +62,13 @@ class LLM::Function
62
62
  def to_json(...)
63
63
  LLM.json.dump(to_h, ...)
64
64
  end
65
+
66
+ ##
67
+ # @return [nil]
68
+ def interrupt!
69
+ nil
70
+ end
71
+ alias_method :cancel!, :interrupt!
65
72
  end
66
73
 
67
74
  ##
@@ -218,6 +225,18 @@ class LLM::Function
218
225
  @cancelled = true
219
226
  end
220
227
 
228
+ ##
229
+ # Notifies the function runner that the call was interrupted.
230
+ # This is cooperative and only applies to runners that implement
231
+ # `on_interrupt`.
232
+ # @return [nil]
233
+ def interrupt!
234
+ hook = %i[on_cancel on_interrupt].find { @runner.respond_to?(_1) }
235
+ @runner.public_send(hook) if hook
236
+ nil
237
+ end
238
+ alias_method :cancel!, :interrupt!
239
+
221
240
  ##
222
241
  # Returns true when a function has been called
223
242
  # @return [Boolean]
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # {LLM::LoopGuard LLM::LoopGuard} is the built-in implementation of
5
+ # llm.rb's `guard` capability.
6
+ #
7
+ # A guard is a context-level supervisor for agentic execution. It can inspect
8
+ # the current runtime state and return a warning string when pending tool work
9
+ # should be blocked before the loop keeps going.
10
+ #
11
+ # {LLM::LoopGuard LLM::LoopGuard} detects when a context is repeating the same
12
+ # tool-call pattern instead of making progress. It is directly inspired by
13
+ # General Intelligence Systems' Brute runtime and its doom-loop detection
14
+ # approach.
15
+ #
16
+ # The public interface is intentionally small:
17
+ # - `call(ctx)` returns `nil` when no intervention is needed
18
+ # - `call(ctx)` returns a warning string when pending tool execution should be blocked
19
+ #
20
+ # {LLM::Context LLM::Context} can use that warning to return in-band
21
+ # {LLM::GuardError LLM::GuardError} tool errors, and
22
+ # {LLM::Agent LLM::Agent} enables this guard by default through its wrapped
23
+ # context.
24
+ #
25
+ # Brute is MIT licensed. The relevant license grant is:
26
+ #
27
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
28
+ # of this software and associated documentation files (the "Software"), to deal
29
+ # in the Software without restriction, including without limitation the rights
30
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31
+ # copies of the Software, and to permit persons to whom the Software is
32
+ # furnished to do so.
33
+ class LLM::LoopGuard
34
+ ##
35
+ # The default number of repeated tool-call patterns required before
36
+ # the guard intervenes.
37
+ # @return [Integer]
38
+ DEFAULT_THRESHOLD = 3
39
+
40
+ ##
41
+ # Returns the repetition threshold.
42
+ # @return [Integer]
43
+ attr_reader :threshold
44
+
45
+ ##
46
+ # @param [Hash] config
47
+ # @option config [Integer] :threshold
48
+ # How many repeated tool-call patterns must appear at the tail of the
49
+ # sequence before the guard returns a warning.
50
+ def initialize(config = {})
51
+ @threshold = config.fetch(:threshold, DEFAULT_THRESHOLD)
52
+ end
53
+
54
+ ##
55
+ # Checks the current context for repeated tool-call patterns.
56
+ #
57
+ # This method inspects assistant tool calls only. It reduces each call to a
58
+ # `[tool_name, arguments]` signature and checks whether the tail of the
59
+ # sequence is repeating.
60
+ #
61
+ # @param [LLM::Context] ctx
62
+ # @return [String, nil]
63
+ # Returns a warning string when pending tool execution should be blocked,
64
+ # or `nil` when execution should continue.
65
+ def call(ctx)
66
+ repetitions = detect(ctx.messages.to_a)
67
+ repetitions ? warning(repetitions) : nil
68
+ end
69
+
70
+ private
71
+
72
+ def detect(messages)
73
+ signatures = extract_signatures(messages)
74
+ return if signatures.size < threshold
75
+ check_repeating_pattern(signatures)
76
+ end
77
+
78
+ def warning(repetitions)
79
+ <<~MSG
80
+ SYSTEM NOTICE: Repeated tool-call pattern detected - the same pattern has repeated #{repetitions} times.
81
+ You are stuck in a loop and not making progress. Stop and try a fundamentally different approach:
82
+ - Re-read the relevant context before retrying
83
+ - Try a different tool or strategy
84
+ - Break the problem into smaller steps
85
+ - If a tool keeps failing, investigate why before retrying
86
+ MSG
87
+ end
88
+
89
+ def extract_signatures(messages)
90
+ messages
91
+ .select { _1.respond_to?(:functions) && _1.assistant? }
92
+ .flat_map { |message| message.functions.map { [_1.name.to_s, _1.arguments.to_s] } }
93
+ end
94
+
95
+ def check_repeating_pattern(sequence)
96
+ max_pattern_len = sequence.size / threshold
97
+ (1..max_pattern_len).each do |pattern_len|
98
+ count = count_tail_repetitions(sequence, pattern_len)
99
+ return count if count >= threshold
100
+ end
101
+ nil
102
+ end
103
+
104
+ def count_tail_repetitions(sequence, length)
105
+ return 0 if sequence.size < length
106
+ pattern = sequence.last(length)
107
+ count = 1
108
+ pos = sequence.size - length
109
+ while pos >= length
110
+ candidate = sequence[(pos - length)...pos]
111
+ break unless candidate == pattern
112
+ count += 1
113
+ pos -= length
114
+ end
115
+ count
116
+ end
117
+ end
data/lib/llm/message.rb CHANGED
@@ -34,6 +34,7 @@ module LLM
34
34
  # @return [Hash]
35
35
  def to_h
36
36
  {role:, content:, reasoning_content:,
37
+ compaction: extra.compaction,
37
38
  tools: extra.tool_calls,
38
39
  usage:,
39
40
  original_tool_calls: extra.original_tool_calls}.compact
@@ -74,6 +75,13 @@ module LLM
74
75
  extra.reasoning_content
75
76
  end
76
77
 
78
+ ##
79
+ # Returns true when a message was created by context compaction
80
+ # @return [Boolean]
81
+ def compaction?
82
+ !!extra.compaction
83
+ end
84
+
77
85
  ##
78
86
  # Returns true when a message contains an image URL
79
87
  # @return [Boolean]
@@ -105,7 +105,7 @@ class LLM::Anthropic
105
105
  end
106
106
 
107
107
  def resolve_tool(tool)
108
- registered = LLM::Function.find_by_name(tool["name"])
108
+ registered = @stream.find_tool(tool["name"])
109
109
  fn = (registered || LLM::Function.new(tool["name"])).dup.tap do |fn|
110
110
  fn.id = tool["id"]
111
111
  fn.arguments = LLM::Anthropic.parse_tool_input(tool["input"])
@@ -153,7 +153,7 @@ class LLM::Google
153
153
 
154
154
  def resolve_tool(part, cindex, pindex)
155
155
  call = part["functionCall"]
156
- registered = LLM::Function.find_by_name(call["name"])
156
+ registered = @stream.find_tool(call["name"])
157
157
  fn = (registered || LLM::Function.new(call["name"])).dup.tap do |fn|
158
158
  fn.id = LLM::Google.tool_id(part:, cindex:, pindex:)
159
159
  fn.arguments = call["args"]
@@ -269,7 +269,7 @@ class LLM::OpenAI
269
269
  # @group Resolvers
270
270
 
271
271
  def resolve_tool(tool, arguments)
272
- registered = LLM::Function.find_by_name(tool["name"])
272
+ registered = @stream.find_tool(tool["name"])
273
273
  fn = (registered || LLM::Function.new(tool["name"])).dup.tap do |fn|
274
274
  fn.id = tool["call_id"]
275
275
  fn.arguments = arguments
@@ -185,7 +185,7 @@ class LLM::OpenAI
185
185
  end
186
186
 
187
187
  def resolve_tool(tool, function, arguments)
188
- registered = LLM::Function.find_by_name(function["name"])
188
+ registered = @stream.find_tool(function["name"])
189
189
  fn = (registered || LLM::Function.new(function["name"])).dup.tap do |fn|
190
190
  fn.id = tool["id"]
191
191
  fn.arguments = arguments
@@ -31,6 +31,14 @@ class LLM::Stream
31
31
  @items.empty?
32
32
  end
33
33
 
34
+ ##
35
+ # @return [nil]
36
+ def interrupt!
37
+ @items.each(&:interrupt!)
38
+ nil
39
+ end
40
+ alias_method :cancel!, :interrupt!
41
+
34
42
  ##
35
43
  # Waits for queued work to finish and returns function results.
36
44
  # @param [Symbol, Array<Symbol>] strategy
data/lib/llm/stream.rb CHANGED
@@ -30,6 +30,13 @@ module LLM
30
30
  @extra ||= LLM::Object.from({})
31
31
  end
32
32
 
33
+ ##
34
+ # Returns the current context, if one was attached to the stream.
35
+ # @return [LLM::Context, nil]
36
+ def ctx
37
+ extra[:ctx]
38
+ end
39
+
33
40
  ##
34
41
  # Returns a lazily-initialized queue for tool results or spawned work.
35
42
  # @return [LLM::Stream::Queue]
@@ -70,17 +77,18 @@ module LLM
70
77
  ##
71
78
  # Called when a streamed tool call has been fully constructed.
72
79
  # @note A stream implementation may start tool execution here, for
73
- # example by pushing `tool.spawn(:thread)`, `tool.spawn(:fiber)`, or
74
- # `tool.spawn(:task)` onto {#queue}. Mixed strategies can also be
75
- # selected per tool, such as `tool.mcp? ? tool.spawn(:task) :
76
- # tool.spawn(:ractor)`. When a streamed tool cannot be resolved, `error`
77
- # is passed as an {LLM::Function::Return}. It can be sent back to the
78
- # model, allowing the tool-call path to recover and the session to
79
- # continue. Tool resolution depends on
80
- # {LLM::Function.registry}, which includes {LLM::Tool LLM::Tool}
81
- # subclasses, including MCP tools, but not functions defined with
82
- # {LLM.function}. The current `:ractor` mode is for class-based tools
83
- # and does not support MCP tools.
80
+ # example by pushing `ctx.spawn(tool, :thread)`,
81
+ # `ctx.spawn(tool, :fiber)`, or `ctx.spawn(tool, :task)` onto {#queue}.
82
+ # Mixed strategies can also be selected per tool, such as
83
+ # `tool.mcp? ? ctx.spawn(tool, :task) : ctx.spawn(tool, :ractor)`.
84
+ # When a streamed tool cannot be resolved, `error` is passed as an
85
+ # {LLM::Function::Return}. It can be sent back to the model, allowing
86
+ # the tool-call path to recover and the session to continue. Streamed
87
+ # tool resolution now prefers the current request tools, so
88
+ # {LLM.function}, MCP tools, bound tool instances, and normal
89
+ # {LLM::Tool LLM::Tool} classes can all resolve through the same
90
+ # request-local path. The current `:ractor` mode is for class-based
91
+ # tools and does not support MCP tools.
84
92
  # @param [LLM::Function] tool
85
93
  # The parsed tool call.
86
94
  # @param [LLM::Function::Return, nil] error
@@ -93,8 +101,8 @@ module LLM
93
101
  ##
94
102
  # Called when queued streamed tool work returns.
95
103
  # @note This callback runs when {#wait} resolves work that was queued from
96
- # {#on_tool_call}, such as values returned by `tool.spawn(:thread)`,
97
- # `tool.spawn(:fiber)`, or `tool.spawn(:task)`.
104
+ # {#on_tool_call}, such as values returned by `ctx.spawn(tool, :thread)`,
105
+ # `ctx.spawn(tool, :fiber)`, or `ctx.spawn(tool, :task)`.
98
106
  # @param [LLM::Function] tool
99
107
  # The tool that returned.
100
108
  # @param [LLM::Function::Return] result
@@ -140,6 +148,34 @@ module LLM
140
148
  })
141
149
  end
142
150
 
151
+ ##
152
+ # Returns the tool definitions available for the current streamed request.
153
+ # This prefers request-local tools attached to the stream and falls back
154
+ # to the current context defaults when present.
155
+ # @return [Array<LLM::Function, LLM::Tool>]
156
+ def tools
157
+ extra[:tools] || ctx&.params&.dig(:tools) || []
158
+ end
159
+
160
+ ##
161
+ # Resolves a streamed tool call against the current request tools first,
162
+ # then falls back to the global function registry.
163
+ # @param [String] name
164
+ # @return [LLM::Function, nil]
165
+ def find_tool(name)
166
+ tool = tools.find do |candidate|
167
+ candidate_name =
168
+ if candidate.respond_to?(:function)
169
+ candidate.function.name
170
+ else
171
+ candidate.name
172
+ end
173
+ candidate_name.to_s == name.to_s
174
+ end
175
+ tool&.then { _1.respond_to?(:function) ? _1.function : _1 } ||
176
+ LLM::Function.find_by_name(name)
177
+ end
178
+
143
179
  # @endgroup
144
180
  end
145
181
  end
data/lib/llm/tool.rb CHANGED
@@ -185,4 +185,18 @@ class LLM::Tool
185
185
  def mcp?
186
186
  self.class.mcp?
187
187
  end
188
+
189
+ ##
190
+ # Called when an in-flight tool run is interrupted.
191
+ # Tools can override this to implement cooperative cleanup.
192
+ # @return [nil]
193
+ def on_interrupt
194
+ end
195
+
196
+ ##
197
+ # Called when an in-flight tool run is cancelled.
198
+ # @return [nil]
199
+ def on_cancel
200
+ on_interrupt
201
+ end
188
202
  end
data/lib/llm/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LLM
4
- VERSION = "4.23.0"
4
+ VERSION = "5.1.0"
5
5
  end
data/lib/llm.rb CHANGED
@@ -23,6 +23,7 @@ module LLM
23
23
  require_relative "llm/stream"
24
24
  require_relative "llm/provider"
25
25
  require_relative "llm/context"
26
+ require_relative "llm/loop_guard"
26
27
  require_relative "llm/agent"
27
28
  require_relative "llm/buffer"
28
29
  require_relative "llm/function"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm.rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.23.0
4
+ version: 5.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Antar Azri
@@ -298,6 +298,7 @@ files:
298
298
  - lib/llm/function/thread_group.rb
299
299
  - lib/llm/function/tracing.rb
300
300
  - lib/llm/json_adapter.rb
301
+ - lib/llm/loop_guard.rb
301
302
  - lib/llm/mcp.rb
302
303
  - lib/llm/mcp/command.rb
303
304
  - lib/llm/mcp/error.rb