llm.rb 4.22.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +74 -0
- data/README.md +120 -7
- data/lib/llm/agent.rb +19 -19
- data/lib/llm/buffer.rb +10 -0
- data/lib/llm/compactor.rb +117 -0
- data/lib/llm/context/deserializer.rb +2 -1
- data/lib/llm/context.rb +140 -4
- data/lib/llm/error.rb +4 -0
- data/lib/llm/function/fiber_group.rb +8 -0
- data/lib/llm/function/ractor/task.rb +7 -0
- data/lib/llm/function/ractor_group.rb +8 -0
- data/lib/llm/function/task.rb +8 -0
- data/lib/llm/function/task_group.rb +8 -0
- data/lib/llm/function/thread_group.rb +8 -0
- data/lib/llm/function.rb +21 -1
- data/lib/llm/loop_guard.rb +117 -0
- data/lib/llm/message.rb +8 -0
- data/lib/llm/stream/queue.rb +8 -0
- data/lib/llm/stream.rb +37 -10
- data/lib/llm/tool.rb +28 -0
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +1 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 482fe176a5e48457ba806d4ca3ae46c2e39f0e6a8037b8b39f4aaeea399ea33c
|
|
4
|
+
data.tar.gz: 71088a3ae2878ad20ed021324ab3da60df42c99753d062c3063bf9ba45cfc079
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aef5fa2469606524a5c00cd582c12035b513c284743a2f86650cb8e0b828952be06935c173bf6b20d0230c31c91dbb20475fca5ca1c25b04e08b02069d399a49
|
|
7
|
+
data.tar.gz: f937bb7f2d381e131f6c4d87b5c095cab5c4a2c3af9a0dc0557b263c26d637fcc463fd876442faa128cb31edb7833a67a34da5bd9e527c86d4030349d388b000
|
data/CHANGELOG.md
CHANGED
|
@@ -2,8 +2,82 @@
|
|
|
2
2
|
|
|
3
3
|
## Unreleased
|
|
4
4
|
|
|
5
|
+
Changes since `v5.0.0`.
|
|
6
|
+
|
|
7
|
+
## v5.0.0
|
|
8
|
+
|
|
9
|
+
Changes since `v4.23.0`.
|
|
10
|
+
|
|
11
|
+
This release expands llm.rb from an execution runtime into a more explicit
|
|
12
|
+
supervision and transformation runtime. It adds context-level guards,
|
|
13
|
+
transformers, and loop supervision through `LLM::LoopGuard`, while deepening
|
|
14
|
+
long-lived context behavior through compaction, interruption hooks, and
|
|
15
|
+
streamed `ctx.spawn(...)` tool execution.
|
|
16
|
+
|
|
17
|
+
### Change
|
|
18
|
+
|
|
19
|
+
* **Make compactor thresholds explicit** <br>
|
|
20
|
+
Require `message_threshold:` and `token_threshold:` to be opted into
|
|
21
|
+
explicitly, so `LLM::Compactor` only compacts automatically when one of
|
|
22
|
+
those thresholds is configured. Context-window-derived token limits can be
|
|
23
|
+
computed by the caller when needed.
|
|
24
|
+
|
|
25
|
+
* **Allow assigning a compactor through `LLM::Context`** <br>
|
|
26
|
+
Let `LLM::Context` accept `ctx.compactor = ...` in addition to the
|
|
27
|
+
constructor `compactor:` option, so compactor config can be assigned or
|
|
28
|
+
replaced after context initialization.
|
|
29
|
+
|
|
30
|
+
* **Mark compaction summaries in message metadata** <br>
|
|
31
|
+
Mark compaction summaries with `extra[:compaction]` and
|
|
32
|
+
`LLM::Message#compaction?`, so applications can detect or hide synthetic
|
|
33
|
+
summary messages in conversation history.
|
|
34
|
+
|
|
35
|
+
* **Add cooperative tool interruption hooks** <br>
|
|
36
|
+
Let `ctx.interrupt!` notify queued tool work through `on_interrupt`, so
|
|
37
|
+
running tools can clean up cooperatively when a context is cancelled.
|
|
38
|
+
|
|
39
|
+
* **Add `LLM::Context` guards** <br>
|
|
40
|
+
Add a new `guard` capability to `LLM::Context` so execution can be
|
|
41
|
+
supervised at the runtime level. The built-in `LLM::LoopGuard` detects
|
|
42
|
+
repeated tool-call patterns and stops stuck agentic loops through in-band
|
|
43
|
+
`LLM::GuardError` returns. `LLM::Agent` enables this guard by default.
|
|
44
|
+
|
|
45
|
+
* **Add `LLM::Context` transformers** <br>
|
|
46
|
+
Add a new `transformer` capability to `LLM::Context` so prompts and params
|
|
47
|
+
can be rewritten before provider requests are sent. This makes it possible
|
|
48
|
+
to apply context-wide behaviors such as PII scrubbing or request-level
|
|
49
|
+
param injection without rewriting every `talk` and `respond` call site.
|
|
50
|
+
|
|
51
|
+
## v4.23.0
|
|
52
|
+
|
|
5
53
|
Changes since `v4.22.0`.
|
|
6
54
|
|
|
55
|
+
This release expands llm.rb's runtime surface for long-lived contexts and
|
|
56
|
+
stateful tools. It adds built-in context compaction through `LLM::Compactor`,
|
|
57
|
+
lets explicit `tools:` arrays accept bound `LLM::Tool` instances, and fixes
|
|
58
|
+
OpenAI-compatible no-arg tool schemas for stricter providers such as xAI.
|
|
59
|
+
|
|
60
|
+
### Change
|
|
61
|
+
|
|
62
|
+
* **Add `LLM::Compactor` for long-lived contexts** <br>
|
|
63
|
+
Add built-in context compaction through `LLM::Compactor`, so older history
|
|
64
|
+
can be summarized, retained windows can stay bounded, compaction can run on
|
|
65
|
+
its own `model:`, thresholds can be configured explicitly, and
|
|
66
|
+
`LLM::Stream` can observe the lifecycle through `on_compaction` and
|
|
67
|
+
`on_compaction_finish`.
|
|
68
|
+
|
|
69
|
+
* **Allow bound tool instances in explicit tool lists** <br>
|
|
70
|
+
Let explicit `tools:` arrays accept `LLM::Tool` instances such as
|
|
71
|
+
`MyTool.new(foo: 1)`, so tools can carry bound state without changing the
|
|
72
|
+
global tool registry model.
|
|
73
|
+
|
|
74
|
+
### Fix
|
|
75
|
+
|
|
76
|
+
* **Fix xAI/OpenAI-compatible no-arg tool schemas** <br>
|
|
77
|
+
Send an empty object schema for tools without declared parameters instead
|
|
78
|
+
of `null`, so stricter providers such as xAI accept mixed tool sets that
|
|
79
|
+
include no-arg tools.
|
|
80
|
+
|
|
7
81
|
## v4.22.0
|
|
8
82
|
|
|
9
83
|
Changes since `v4.21.0`.
|
data/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-5.0.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
@@ -25,6 +25,7 @@ schemas, files, and persisted state, so real systems can be built out of one coh
|
|
|
25
25
|
execution model instead of a pile of adapters.
|
|
26
26
|
|
|
27
27
|
Want to see some code? Jump to [the examples](#examples) section. <br>
|
|
28
|
+
Want to see an agentic framework built on top of llm.rb? Check out [general-intelligence-systems/brute](https://github.com/general-intelligence-systems/brute). <br>
|
|
28
29
|
Want a taste of what llm.rb can build? See [the screencast](#screencast).
|
|
29
30
|
|
|
30
31
|
## Architecture
|
|
@@ -147,17 +148,91 @@ ctx.talk("Remember that my favorite language is Ruby.")
|
|
|
147
148
|
ctx.save(path: "context.json")
|
|
148
149
|
```
|
|
149
150
|
|
|
151
|
+
#### Context Compaction
|
|
152
|
+
|
|
153
|
+
Long-lived contexts can compact older history into a summary instead of
|
|
154
|
+
growing forever. Compaction is built into [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html)
|
|
155
|
+
through [`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html),
|
|
156
|
+
and when a stream is present it emits `on_compaction` and
|
|
157
|
+
`on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
158
|
+
The compactor can also use a different model from the main context, which is
|
|
159
|
+
useful when you want summarization to run on a cheaper or faster model.
|
|
160
|
+
|
|
161
|
+
```ruby
|
|
162
|
+
ctx = LLM::Context.new(
|
|
163
|
+
llm,
|
|
164
|
+
compactor: {
|
|
165
|
+
message_threshold: 200,
|
|
166
|
+
retention_window: 8,
|
|
167
|
+
model: "gpt-5.4-mini"
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
#### Guards
|
|
173
|
+
|
|
174
|
+
Guards let llm.rb supervise agentic execution, not just run it.
|
|
175
|
+
They live on [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
|
|
176
|
+
can inspect the current runtime state, and can step in when a context is no
|
|
177
|
+
longer making progress.
|
|
178
|
+
|
|
179
|
+
[`LLM::LoopGuard`](https://0x1eef.github.io/x/llm.rb/LLM/LoopGuard.html) is
|
|
180
|
+
the built-in implementation. It detects repeated tool-call patterns and
|
|
181
|
+
blocks pending tool execution with in-band guarded tool errors instead of
|
|
182
|
+
letting the loop keep spinning. [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html)
|
|
183
|
+
enables that guard by default through its wrapped context.
|
|
184
|
+
|
|
185
|
+
```ruby
|
|
186
|
+
ctx = LLM::Context.new(llm)
|
|
187
|
+
ctx.guard = MyGuard.new
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
#### Transformers
|
|
191
|
+
|
|
192
|
+
Transformers let llm.rb rewrite outgoing prompts and params before a request
|
|
193
|
+
is sent to the provider. They also live on
|
|
194
|
+
[`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html), but
|
|
195
|
+
they solve a different problem from guards: instead of blocking execution,
|
|
196
|
+
they can normalize or scrub what gets sent.
|
|
197
|
+
|
|
198
|
+
That makes them a good fit for things like PII scrubbing, prompt
|
|
199
|
+
normalization, or request-level param injection. A transformer just needs to
|
|
200
|
+
implement `call(ctx, prompt, params)` and return `[prompt, params]`.
|
|
201
|
+
|
|
202
|
+
```ruby
|
|
203
|
+
class ScrubPII
|
|
204
|
+
EMAIL = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/i
|
|
205
|
+
|
|
206
|
+
def call(ctx, prompt, params)
|
|
207
|
+
[scrub(prompt), params]
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
private
|
|
211
|
+
|
|
212
|
+
def scrub(prompt)
|
|
213
|
+
case prompt
|
|
214
|
+
when String then prompt.gsub(EMAIL, "[REDACTED_EMAIL]")
|
|
215
|
+
else prompt
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
ctx = LLM::Context.new(llm)
|
|
221
|
+
ctx.transformer = ScrubPII.new
|
|
222
|
+
```
|
|
223
|
+
|
|
150
224
|
#### LLM::Stream
|
|
151
225
|
|
|
152
226
|
`LLM::Stream` is not just for printing tokens. It supports `on_content`,
|
|
153
|
-
`on_reasoning_content`, `on_tool_call`,
|
|
154
|
-
visible output, reasoning output,
|
|
155
|
-
the same
|
|
227
|
+
`on_reasoning_content`, `on_tool_call`, `on_tool_return`, `on_compaction`,
|
|
228
|
+
and `on_compaction_finish`, which means visible output, reasoning output, tool
|
|
229
|
+
execution, and context compaction can all be driven through the same
|
|
230
|
+
execution path.
|
|
156
231
|
|
|
157
232
|
```ruby
|
|
158
233
|
class Stream < LLM::Stream
|
|
159
234
|
def on_tool_call(tool, error)
|
|
160
|
-
queue <<
|
|
235
|
+
queue << (error || ctx.spawn(tool, :thread))
|
|
161
236
|
end
|
|
162
237
|
|
|
163
238
|
def on_tool_return(tool, result)
|
|
@@ -350,6 +425,7 @@ Runtime Building Blocks:
|
|
|
350
425
|
- **Agents** — reusable assistants with tool auto-execution
|
|
351
426
|
- **Skills** — directory-backed capabilities loaded from `SKILL.md`
|
|
352
427
|
- **MCP Support** — stdio and HTTP MCP clients with prompt and tool support
|
|
428
|
+
- **Context Compaction** — summarize older history in long-lived contexts
|
|
353
429
|
|
|
354
430
|
Data and Structure:
|
|
355
431
|
- **Structured Outputs** — JSON Schema-based responses
|
|
@@ -445,7 +521,7 @@ class Stream < LLM::Stream
|
|
|
445
521
|
def on_tool_call(tool, error)
|
|
446
522
|
return queue << error if error
|
|
447
523
|
$stdout << "\nRunning tool #{tool.name}...\n"
|
|
448
|
-
queue <<
|
|
524
|
+
queue << ctx.spawn(tool, :thread)
|
|
449
525
|
end
|
|
450
526
|
|
|
451
527
|
def on_tool_return(tool, result)
|
|
@@ -458,12 +534,49 @@ class Stream < LLM::Stream
|
|
|
458
534
|
end
|
|
459
535
|
|
|
460
536
|
llm = LLM.openai(key: ENV["KEY"])
|
|
461
|
-
|
|
537
|
+
stream = Stream.new
|
|
538
|
+
ctx = LLM::Context.new(llm, stream:, tools: [System])
|
|
462
539
|
|
|
463
540
|
ctx.talk("Run `date` and `uname -a`.")
|
|
464
541
|
ctx.talk(ctx.wait(:thread)) while ctx.functions.any?
|
|
465
542
|
```
|
|
466
543
|
|
|
544
|
+
#### Context Compaction
|
|
545
|
+
|
|
546
|
+
This example uses [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html),
|
|
547
|
+
[`LLM::Compactor`](https://0x1eef.github.io/x/llm.rb/LLM/Compactor.html), and
|
|
548
|
+
[`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) together so
|
|
549
|
+
long-lived contexts can summarize older history and expose the lifecycle
|
|
550
|
+
through stream hooks. This approach is inspired by General Intelligence
|
|
551
|
+
Systems' [Brute](https://github.com/general-intelligence-systems/brute). The
|
|
552
|
+
compactor can also use its own `model:` if you want summarization to run on a
|
|
553
|
+
different model from the main context. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
554
|
+
|
|
555
|
+
```ruby
|
|
556
|
+
require "llm"
|
|
557
|
+
|
|
558
|
+
class Stream < LLM::Stream
|
|
559
|
+
def on_compaction(ctx, compactor)
|
|
560
|
+
puts "Compacting #{ctx.messages.size} messages..."
|
|
561
|
+
end
|
|
562
|
+
|
|
563
|
+
def on_compaction_finish(ctx, compactor)
|
|
564
|
+
puts "Compacted to #{ctx.messages.size} messages."
|
|
565
|
+
end
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
llm = LLM.openai(key: ENV["KEY"])
|
|
569
|
+
ctx = LLM::Context.new(
|
|
570
|
+
llm,
|
|
571
|
+
stream: Stream.new,
|
|
572
|
+
compactor: {
|
|
573
|
+
message_threshold: 200,
|
|
574
|
+
retention_window: 8,
|
|
575
|
+
model: "gpt-5.4-mini"
|
|
576
|
+
}
|
|
577
|
+
)
|
|
578
|
+
```
|
|
579
|
+
|
|
467
580
|
#### Reasoning
|
|
468
581
|
|
|
469
582
|
This example uses [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html) with the OpenAI Responses API so reasoning output is streamed separately from visible assistant output. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
data/lib/llm/agent.rb
CHANGED
|
@@ -16,6 +16,9 @@ module LLM
|
|
|
16
16
|
# **Notes:**
|
|
17
17
|
# * Instructions are injected once unless a system message is already present.
|
|
18
18
|
# * An agent automatically executes tool loops (unlike {LLM::Context LLM::Context}).
|
|
19
|
+
# * The automatic tool loop enables the wrapped context's `guard` by default.
|
|
20
|
+
# The built-in {LLM::LoopGuard LLM::LoopGuard} detects repeated tool-call
|
|
21
|
+
# patterns and blocks stuck execution before more tool work is queued.
|
|
19
22
|
# * Tool loop execution can be configured with `concurrency :call`,
|
|
20
23
|
# `:thread`, `:task`, `:fiber`, `:ractor`, or a list of queued task
|
|
21
24
|
# types such as `[:thread, :ractor]`.
|
|
@@ -128,7 +131,7 @@ module LLM
|
|
|
128
131
|
defaults = {model: self.class.model, tools: self.class.tools, skills: self.class.skills, schema: self.class.schema}.compact
|
|
129
132
|
@concurrency = params.delete(:concurrency) || self.class.concurrency
|
|
130
133
|
@llm = llm
|
|
131
|
-
@ctx = LLM::Context.new(llm, defaults.merge(params))
|
|
134
|
+
@ctx = LLM::Context.new(llm, defaults.merge({guard: true}).merge(params))
|
|
132
135
|
end
|
|
133
136
|
|
|
134
137
|
##
|
|
@@ -137,7 +140,7 @@ module LLM
|
|
|
137
140
|
#
|
|
138
141
|
# @param prompt (see LLM::Provider#complete)
|
|
139
142
|
# @param [Hash] params The params passed to the provider, including optional :stream, :tools, :schema etc.
|
|
140
|
-
# @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default
|
|
143
|
+
# @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default 25)
|
|
141
144
|
# @return [LLM::Response] Returns the LLM's response for this turn.
|
|
142
145
|
# @example
|
|
143
146
|
# llm = LLM.openai(key: ENV["KEY"])
|
|
@@ -145,14 +148,7 @@ module LLM
|
|
|
145
148
|
# response = agent.talk("Hello, what is your name?")
|
|
146
149
|
# puts response.choices[0].content
|
|
147
150
|
def talk(prompt, params = {})
|
|
148
|
-
|
|
149
|
-
res = @ctx.talk(apply_instructions(prompt), params)
|
|
150
|
-
max.times do
|
|
151
|
-
break if @ctx.functions.empty?
|
|
152
|
-
res = @ctx.talk(call_functions, params)
|
|
153
|
-
end
|
|
154
|
-
raise LLM::ToolLoopError, "pending tool calls remain" unless @ctx.functions.empty?
|
|
155
|
-
res
|
|
151
|
+
run_loop(:talk, prompt, params)
|
|
156
152
|
end
|
|
157
153
|
alias_method :chat, :talk
|
|
158
154
|
|
|
@@ -163,7 +159,7 @@ module LLM
|
|
|
163
159
|
# @note Not all LLM providers support this API
|
|
164
160
|
# @param prompt (see LLM::Provider#complete)
|
|
165
161
|
# @param [Hash] params The params passed to the provider, including optional :stream, :tools, :schema etc.
|
|
166
|
-
# @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default
|
|
162
|
+
# @option params [Integer] :tool_attempts The maxinum number of tool call iterations (default 25)
|
|
167
163
|
# @return [LLM::Response] Returns the LLM's response for this turn.
|
|
168
164
|
# @example
|
|
169
165
|
# llm = LLM.openai(key: ENV["KEY"])
|
|
@@ -171,14 +167,7 @@ module LLM
|
|
|
171
167
|
# res = agent.respond("What is the capital of France?")
|
|
172
168
|
# puts res.output_text
|
|
173
169
|
def respond(prompt, params = {})
|
|
174
|
-
|
|
175
|
-
res = @ctx.respond(apply_instructions(prompt), params)
|
|
176
|
-
max.times do
|
|
177
|
-
break if @ctx.functions.empty?
|
|
178
|
-
res = @ctx.respond(call_functions, params)
|
|
179
|
-
end
|
|
180
|
-
raise LLM::ToolLoopError, "pending tool calls remain" unless @ctx.functions.empty?
|
|
181
|
-
res
|
|
170
|
+
run_loop(:respond, prompt, params)
|
|
182
171
|
end
|
|
183
172
|
|
|
184
173
|
##
|
|
@@ -380,5 +369,16 @@ module LLM
|
|
|
380
369
|
else raise ArgumentError, "Unknown concurrency: #{concurrency.inspect}. Expected :call, :thread, :task, :fiber, :ractor, or an array of queued task types"
|
|
381
370
|
end
|
|
382
371
|
end
|
|
372
|
+
|
|
373
|
+
def run_loop(method, prompt, params)
|
|
374
|
+
max = Integer(params.delete(:tool_attempts) || 25)
|
|
375
|
+
res = @ctx.public_send(method, apply_instructions(prompt), params)
|
|
376
|
+
max.times do
|
|
377
|
+
break if @ctx.functions.empty?
|
|
378
|
+
res = @ctx.public_send(method, call_functions, params)
|
|
379
|
+
end
|
|
380
|
+
raise LLM::ToolLoopError, "pending tool calls remain" unless @ctx.functions.empty?
|
|
381
|
+
res
|
|
382
|
+
end
|
|
383
383
|
end
|
|
384
384
|
end
|
data/lib/llm/buffer.rb
CHANGED
|
@@ -23,6 +23,16 @@ module LLM
|
|
|
23
23
|
@messages.concat(ary)
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
+
##
|
|
27
|
+
# Replace the tracked messages
|
|
28
|
+
# @param [Array<LLM::Message>] messages
|
|
29
|
+
# The replacement messages
|
|
30
|
+
# @return [LLM::Buffer]
|
|
31
|
+
def replace(messages)
|
|
32
|
+
@messages.replace(messages)
|
|
33
|
+
self
|
|
34
|
+
end
|
|
35
|
+
|
|
26
36
|
##
|
|
27
37
|
# @yield [LLM::Message]
|
|
28
38
|
# Yields each message in the conversation thread
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
##
|
|
4
|
+
# {LLM::Compactor LLM::Compactor} summarizes older context messages into a
|
|
5
|
+
# smaller replacement message when a context grows too large.
|
|
6
|
+
#
|
|
7
|
+
# This work is directly inspired by the compaction approach developed by
|
|
8
|
+
# General Intelligence Systems in
|
|
9
|
+
# [Brute](https://github.com/general-intelligence-systems/brute).
|
|
10
|
+
#
|
|
11
|
+
# The compactor can also use a different model from the main context by
|
|
12
|
+
# setting `model:` in the compactor config. Compaction thresholds are opt-in:
|
|
13
|
+
# provide `message_threshold:` and/or `token_threshold:` to enable policy-
|
|
14
|
+
# driven compaction.
|
|
15
|
+
class LLM::Compactor
|
|
16
|
+
DEFAULTS = {
|
|
17
|
+
retention_window: 8,
|
|
18
|
+
model: nil
|
|
19
|
+
}.freeze
|
|
20
|
+
|
|
21
|
+
##
|
|
22
|
+
# @return [Hash]
|
|
23
|
+
attr_reader :config
|
|
24
|
+
|
|
25
|
+
##
|
|
26
|
+
# @param [LLM::Context] ctx
|
|
27
|
+
# @param [Hash] config
|
|
28
|
+
# @option config [Integer, nil] :token_threshold
|
|
29
|
+
# Enables token-based compaction.
|
|
30
|
+
# @option config [Integer, nil] :message_threshold
|
|
31
|
+
# Enables message-count-based compaction.
|
|
32
|
+
# @option config [Integer] :retention_window
|
|
33
|
+
# @option config [String, nil] :model
|
|
34
|
+
# The model to use for the summarization request. Defaults to the current
|
|
35
|
+
# context model.
|
|
36
|
+
def initialize(ctx, config = {})
|
|
37
|
+
@ctx = ctx
|
|
38
|
+
@config = DEFAULTS.merge(config)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
##
|
|
42
|
+
# Returns true when the context should be compacted
|
|
43
|
+
# @param [Object] prompt
|
|
44
|
+
# The next prompt or turn input
|
|
45
|
+
# @return [Boolean]
|
|
46
|
+
def compact?(prompt = nil)
|
|
47
|
+
return false if ctx.functions.any? || [*prompt].grep(LLM::Function::Return).any?
|
|
48
|
+
messages = ctx.messages.reject(&:system?)
|
|
49
|
+
return true if config[:message_threshold] && messages.size > config[:message_threshold]
|
|
50
|
+
usage = ctx.usage
|
|
51
|
+
return true if config[:token_threshold] && usage && usage.total_tokens > config[:token_threshold]
|
|
52
|
+
false
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
##
|
|
56
|
+
# Summarize older messages and replace them with a compact summary.
|
|
57
|
+
# @param [Object] prompt
|
|
58
|
+
# The next prompt or turn input
|
|
59
|
+
# @return [LLM::Message, nil]
|
|
60
|
+
def compact!(prompt = nil)
|
|
61
|
+
return nil if ctx.functions.any? || [*prompt].grep(LLM::Function::Return).any?
|
|
62
|
+
messages = ctx.messages.reject(&:system?)
|
|
63
|
+
retention_window = [config[:retention_window], messages.size].min
|
|
64
|
+
return nil unless messages.size > retention_window
|
|
65
|
+
stream = ctx.params[:stream]
|
|
66
|
+
stream.on_compaction(ctx, self) if LLM::Stream === stream
|
|
67
|
+
recent = retained_messages
|
|
68
|
+
older = messages[0...(messages.size - recent.size)]
|
|
69
|
+
summary = LLM::Message.new(ctx.llm.user_role, "[Previous conversation summary]\n\n#{summarize(older)}", {compaction: true})
|
|
70
|
+
ctx.messages.replace([*ctx.messages.take_while(&:system?), summary, *recent])
|
|
71
|
+
stream.on_compaction_finish(ctx, self) if LLM::Stream === stream
|
|
72
|
+
summary
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
attr_reader :ctx
|
|
78
|
+
|
|
79
|
+
def retained_messages
|
|
80
|
+
messages = ctx.messages.reject(&:system?)
|
|
81
|
+
retention_window = [config[:retention_window], messages.size].min
|
|
82
|
+
start = [messages.size - retention_window, 0].max
|
|
83
|
+
start -= 1 while start > 0 && messages[start].tool_return?
|
|
84
|
+
messages[start..] || []
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def summarize(messages)
|
|
88
|
+
model = config[:model] || ctx.params[:model] || ctx.llm.default_model
|
|
89
|
+
ctx.llm.complete(summary_prompt(messages), model:).content
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def summary_prompt(messages)
|
|
93
|
+
<<~PROMPT
|
|
94
|
+
Summarize this conversation history for context continuity.
|
|
95
|
+
The summary will replace these messages in the context window.
|
|
96
|
+
|
|
97
|
+
Focus on:
|
|
98
|
+
- What the user asked for
|
|
99
|
+
- Important facts and decisions
|
|
100
|
+
- Tool calls and outcomes that still matter
|
|
101
|
+
- What should happen next
|
|
102
|
+
|
|
103
|
+
Conversation:
|
|
104
|
+
#{serialize(messages)}
|
|
105
|
+
PROMPT
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def serialize(messages)
|
|
109
|
+
messages.map do |message|
|
|
110
|
+
content = case message.content
|
|
111
|
+
when Array then message.content.map(&:inspect).join(", ")
|
|
112
|
+
else message.content.to_s
|
|
113
|
+
end
|
|
114
|
+
"#{message.role}: #{content.empty? ? "(empty)" : content}"
|
|
115
|
+
end.join("\n---\n")
|
|
116
|
+
end
|
|
117
|
+
end
|
|
@@ -39,7 +39,8 @@ class LLM::Context
|
|
|
39
39
|
original_tool_calls = payload["original_tool_calls"]
|
|
40
40
|
usage = payload["usage"]
|
|
41
41
|
reasoning_content = payload["reasoning_content"]
|
|
42
|
-
|
|
42
|
+
compaction = payload["compaction"]
|
|
43
|
+
extra = {tool_calls:, original_tool_calls:, tools: @params[:tools], usage:, reasoning_content:, compaction:}.compact
|
|
43
44
|
content = returns.nil? ? deserialize_content(payload["content"]) : returns
|
|
44
45
|
LLM::Message.new(payload["role"], content, extra)
|
|
45
46
|
end
|
data/lib/llm/context.rb
CHANGED
|
@@ -34,6 +34,7 @@ module LLM
|
|
|
34
34
|
# ctx.talk(prompt)
|
|
35
35
|
# ctx.messages.each { |m| puts "[#{m.role}] #{m.content}" }
|
|
36
36
|
class Context
|
|
37
|
+
require_relative "compactor"
|
|
37
38
|
require_relative "context/serializer"
|
|
38
39
|
require_relative "context/deserializer"
|
|
39
40
|
include Serializer
|
|
@@ -75,6 +76,9 @@ module LLM
|
|
|
75
76
|
def initialize(llm, params = {})
|
|
76
77
|
@llm = llm
|
|
77
78
|
@mode = params.delete(:mode) || :completions
|
|
79
|
+
@compactor = params.delete(:compactor)
|
|
80
|
+
@guard = params.delete(:guard)
|
|
81
|
+
@transformer = params.delete(:transformer)
|
|
78
82
|
tools = [*params.delete(:tools), *load_skills(params.delete(:skills))]
|
|
79
83
|
@params = {model: llm.default_model, schema: nil}.compact.merge!(params)
|
|
80
84
|
@params[:tools] = tools unless tools.empty?
|
|
@@ -82,6 +86,79 @@ module LLM
|
|
|
82
86
|
end
|
|
83
87
|
|
|
84
88
|
##
|
|
89
|
+
# Returns a context compactor
|
|
90
|
+
# This feature is inspired by the compaction approach developed by
|
|
91
|
+
# General Intelligence Systems in
|
|
92
|
+
# [Brute](https://github.com/general-intelligence-systems/brute).
|
|
93
|
+
# @return [LLM::Compactor]
|
|
94
|
+
def compactor
|
|
95
|
+
@compactor = LLM::Compactor.new(self, @compactor || {}) unless LLM::Compactor === @compactor
|
|
96
|
+
@compactor
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
##
|
|
100
|
+
# Sets a context compactor or compactor config
|
|
101
|
+
# @param [LLM::Compactor, Hash, nil] compactor
|
|
102
|
+
# @return [LLM::Compactor, Hash, nil]
|
|
103
|
+
def compactor=(compactor)
|
|
104
|
+
@compactor = compactor
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
##
|
|
108
|
+
# Returns a guard, if configured.
|
|
109
|
+
#
|
|
110
|
+
# Guards are context-level supervisors for agentic execution. A guard can
|
|
111
|
+
# inspect the runtime state and decide whether pending tool work should be
|
|
112
|
+
# blocked before the context keeps looping.
|
|
113
|
+
#
|
|
114
|
+
# The built-in implementation is {LLM::LoopGuard LLM::LoopGuard}, which
|
|
115
|
+
# detects repeated tool-call patterns and turns them into in-band
|
|
116
|
+
# {LLM::GuardError LLM::GuardError} tool returns.
|
|
117
|
+
#
|
|
118
|
+
# @return [#call, nil]
|
|
119
|
+
def guard
|
|
120
|
+
return if @guard.nil? || @guard == false
|
|
121
|
+
@guard = LLM::LoopGuard.new if @guard == true
|
|
122
|
+
@guard = LLM::LoopGuard.new(@guard) if Hash === @guard
|
|
123
|
+
@guard
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
##
|
|
127
|
+
# Sets a guard or guard config.
|
|
128
|
+
#
|
|
129
|
+
# Guards must implement `call(ctx)` and return either `nil` or a warning
|
|
130
|
+
# string. Returning a warning tells the context to block pending tool work
|
|
131
|
+
# with guarded tool errors instead of continuing the loop.
|
|
132
|
+
#
|
|
133
|
+
# @param [#call, Hash, Boolean, nil] guard
|
|
134
|
+
# @return [#call, Hash, Boolean, nil]
|
|
135
|
+
def guard=(guard)
|
|
136
|
+
@guard = guard
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
##
|
|
140
|
+
# Returns a transformer, if configured.
|
|
141
|
+
#
|
|
142
|
+
# Transformers can rewrite outgoing prompts and params before a request is
|
|
143
|
+
# sent to the provider.
|
|
144
|
+
#
|
|
145
|
+
# @return [#call, nil]
|
|
146
|
+
def transformer
|
|
147
|
+
@transformer
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
##
|
|
151
|
+
# Sets a transformer.
|
|
152
|
+
#
|
|
153
|
+
# Transformers must implement `call(ctx, prompt, params)` and return a
|
|
154
|
+
# two-element array of `[prompt, params]`.
|
|
155
|
+
#
|
|
156
|
+
# @param [#call, nil] transformer
|
|
157
|
+
# @return [#call, nil]
|
|
158
|
+
def transformer=(transformer)
|
|
159
|
+
@transformer = transformer
|
|
160
|
+
end
|
|
161
|
+
|
|
85
162
|
# Interact with the context via the chat completions API.
|
|
86
163
|
# This method immediately sends a request to the LLM and returns the response.
|
|
87
164
|
#
|
|
@@ -96,8 +173,10 @@ module LLM
|
|
|
96
173
|
def talk(prompt, params = {})
|
|
97
174
|
return respond(prompt, params) if mode == :responses
|
|
98
175
|
@owner = Fiber.current
|
|
176
|
+
compactor.compact!(prompt) if compactor.compact?(prompt)
|
|
99
177
|
params = params.merge(messages: @messages.to_a)
|
|
100
178
|
params = @params.merge(params)
|
|
179
|
+
prompt, params = transform(prompt, params)
|
|
101
180
|
bind!(params[:stream], params[:model])
|
|
102
181
|
res = @llm.complete(prompt, params)
|
|
103
182
|
role = params[:role] || @llm.user_role
|
|
@@ -123,7 +202,9 @@ module LLM
|
|
|
123
202
|
# puts res.output_text
|
|
124
203
|
def respond(prompt, params = {})
|
|
125
204
|
@owner = Fiber.current
|
|
205
|
+
compactor.compact!(prompt) if compactor.compact?(prompt)
|
|
126
206
|
params = @params.merge(params)
|
|
207
|
+
prompt, params = transform(prompt, params)
|
|
127
208
|
bind!(params[:stream], params[:model])
|
|
128
209
|
res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
|
|
129
210
|
params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
|
|
@@ -168,11 +249,26 @@ module LLM
|
|
|
168
249
|
# @return [Array<LLM::Function::Return>]
|
|
169
250
|
def call(target)
|
|
170
251
|
case target
|
|
171
|
-
when :functions then functions.call
|
|
252
|
+
when :functions then guarded_returns || functions.call
|
|
172
253
|
else raise ArgumentError, "Unknown target: #{target.inspect}. Expected :functions"
|
|
173
254
|
end
|
|
174
255
|
end
|
|
175
256
|
|
|
257
|
+
##
|
|
258
|
+
# Spawns a function through the context.
|
|
259
|
+
#
|
|
260
|
+
# When a guard is configured, this method can return an in-band guarded
|
|
261
|
+
# tool error instead of spawning work.
|
|
262
|
+
#
|
|
263
|
+
# @param [LLM::Function] function
|
|
264
|
+
# @param [Symbol] strategy
|
|
265
|
+
# @return [LLM::Function::Return, LLM::Function::Task]
|
|
266
|
+
def spawn(function, strategy)
|
|
267
|
+
warning = guard&.call(self)
|
|
268
|
+
return guarded_return_for(function, warning) if warning
|
|
269
|
+
function.spawn(strategy)
|
|
270
|
+
end
|
|
271
|
+
|
|
176
272
|
##
|
|
177
273
|
# Returns tool returns accumulated in this context
|
|
178
274
|
# @return [Array<LLM::Function::Return>]
|
|
@@ -201,10 +297,15 @@ module LLM
|
|
|
201
297
|
def wait(strategy)
|
|
202
298
|
stream = @params[:stream]
|
|
203
299
|
if LLM::Stream === stream && !stream.queue.empty?
|
|
204
|
-
stream.
|
|
300
|
+
@queue = stream.queue
|
|
301
|
+
@queue.wait(strategy)
|
|
205
302
|
else
|
|
206
|
-
|
|
303
|
+
return guarded_returns if guarded_returns
|
|
304
|
+
@queue = functions.spawn(strategy)
|
|
305
|
+
@queue.wait
|
|
207
306
|
end
|
|
307
|
+
ensure
|
|
308
|
+
@queue = nil
|
|
208
309
|
end
|
|
209
310
|
|
|
210
311
|
##
|
|
@@ -213,6 +314,7 @@ module LLM
|
|
|
213
314
|
# @return [nil]
|
|
214
315
|
def interrupt!
|
|
215
316
|
llm.interrupt!(@owner)
|
|
317
|
+
queue&.interrupt!
|
|
216
318
|
end
|
|
217
319
|
alias_method :cancel!, :interrupt!
|
|
218
320
|
|
|
@@ -224,7 +326,14 @@ module LLM
|
|
|
224
326
|
# messages.
|
|
225
327
|
# @return [LLM::Object, nil]
|
|
226
328
|
def usage
|
|
227
|
-
@messages.find(&:assistant?)&.usage
|
|
329
|
+
usage = @messages.find(&:assistant?)&.usage
|
|
330
|
+
return unless usage
|
|
331
|
+
LLM::Object.from(
|
|
332
|
+
input_tokens: usage.input_tokens || 0,
|
|
333
|
+
output_tokens: usage.output_tokens || 0,
|
|
334
|
+
reasoning_tokens: usage.reasoning_tokens || 0,
|
|
335
|
+
total_tokens: usage.total_tokens || 0
|
|
336
|
+
)
|
|
228
337
|
end
|
|
229
338
|
|
|
230
339
|
##
|
|
@@ -352,13 +461,40 @@ module LLM
|
|
|
352
461
|
|
|
353
462
|
def bind!(stream, model)
|
|
354
463
|
return unless LLM::Stream === stream
|
|
464
|
+
stream.extra[:ctx] = self
|
|
355
465
|
stream.extra[:tracer] = tracer
|
|
356
466
|
stream.extra[:model] = model
|
|
357
467
|
end
|
|
358
468
|
|
|
469
|
+
def queue
|
|
470
|
+
return @queue if @queue
|
|
471
|
+
stream = @params[:stream]
|
|
472
|
+
stream.queue if LLM::Stream === stream
|
|
473
|
+
end
|
|
474
|
+
|
|
359
475
|
def load_skills(skills)
|
|
360
476
|
[*skills].map { LLM::Skill.load(_1).to_tool(self) }
|
|
361
477
|
end
|
|
478
|
+
|
|
479
|
+
def guarded_returns
|
|
480
|
+
warning = guard&.call(self)
|
|
481
|
+
return unless warning
|
|
482
|
+
functions.map { guarded_return_for(_1, warning) }
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
def transform(prompt, params)
|
|
486
|
+
return [prompt, params] unless transformer
|
|
487
|
+
transformer.call(self, prompt, params)
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
def guarded_return_for(function, warning)
|
|
491
|
+
LLM::Function::Return.new(function.id, function.name, {
|
|
492
|
+
error: true,
|
|
493
|
+
type: LLM::GuardError.name,
|
|
494
|
+
message: warning
|
|
495
|
+
})
|
|
496
|
+
end
|
|
497
|
+
|
|
362
498
|
end
|
|
363
499
|
|
|
364
500
|
# Backward-compatible alias
|
data/lib/llm/error.rb
CHANGED
|
@@ -59,6 +59,14 @@ class LLM::Function
|
|
|
59
59
|
@fibers.any?(&:alive?)
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
##
|
|
63
|
+
# @return [nil]
|
|
64
|
+
def interrupt!
|
|
65
|
+
@fibers.each(&:interrupt!)
|
|
66
|
+
nil
|
|
67
|
+
end
|
|
68
|
+
alias_method :cancel!, :interrupt!
|
|
69
|
+
|
|
62
70
|
##
|
|
63
71
|
# Waits for all fibers in the group to finish and returns
|
|
64
72
|
# their {LLM::Function::Return} values.
|
data/lib/llm/function/task.rb
CHANGED
|
@@ -60,6 +60,14 @@ class LLM::Function
|
|
|
60
60
|
@tasks.any?(&:alive?)
|
|
61
61
|
end
|
|
62
62
|
|
|
63
|
+
##
|
|
64
|
+
# @return [nil]
|
|
65
|
+
def interrupt!
|
|
66
|
+
@tasks.each(&:interrupt!)
|
|
67
|
+
nil
|
|
68
|
+
end
|
|
69
|
+
alias_method :cancel!, :interrupt!
|
|
70
|
+
|
|
63
71
|
##
|
|
64
72
|
# Waits for all tasks in the group to finish and returns
|
|
65
73
|
# their {LLM::Function::Return} values.
|
|
@@ -65,6 +65,14 @@ class LLM::Function
|
|
|
65
65
|
@threads.any?(&:alive?)
|
|
66
66
|
end
|
|
67
67
|
|
|
68
|
+
##
|
|
69
|
+
# @return [nil]
|
|
70
|
+
def interrupt!
|
|
71
|
+
@threads.each(&:interrupt!)
|
|
72
|
+
nil
|
|
73
|
+
end
|
|
74
|
+
alias_method :cancel!, :interrupt!
|
|
75
|
+
|
|
68
76
|
##
|
|
69
77
|
# Waits for all threads in the group to finish and returns
|
|
70
78
|
# their {LLM::Function::Return} values.
|
data/lib/llm/function.rb
CHANGED
|
@@ -62,6 +62,13 @@ class LLM::Function
|
|
|
62
62
|
def to_json(...)
|
|
63
63
|
LLM.json.dump(to_h, ...)
|
|
64
64
|
end
|
|
65
|
+
|
|
66
|
+
##
|
|
67
|
+
# @return [nil]
|
|
68
|
+
def interrupt!
|
|
69
|
+
nil
|
|
70
|
+
end
|
|
71
|
+
alias_method :cancel!, :interrupt!
|
|
65
72
|
end
|
|
66
73
|
|
|
67
74
|
##
|
|
@@ -218,6 +225,18 @@ class LLM::Function
|
|
|
218
225
|
@cancelled = true
|
|
219
226
|
end
|
|
220
227
|
|
|
228
|
+
##
|
|
229
|
+
# Notifies the function runner that the call was interrupted.
|
|
230
|
+
# This is cooperative and only applies to runners that implement
|
|
231
|
+
# `on_interrupt`.
|
|
232
|
+
# @return [nil]
|
|
233
|
+
def interrupt!
|
|
234
|
+
hook = %i[on_cancel on_interrupt].find { @runner.respond_to?(_1) }
|
|
235
|
+
@runner.public_send(hook) if hook
|
|
236
|
+
nil
|
|
237
|
+
end
|
|
238
|
+
alias_method :cancel!, :interrupt!
|
|
239
|
+
|
|
221
240
|
##
|
|
222
241
|
# Returns true when a function has been called
|
|
223
242
|
# @return [Boolean]
|
|
@@ -266,9 +285,10 @@ class LLM::Function
|
|
|
266
285
|
parameters: (@params || {type: "object", properties: {}}).to_h.merge(additionalProperties: false), strict: false
|
|
267
286
|
}.compact
|
|
268
287
|
else
|
|
288
|
+
params = @params || {type: "object", properties: {}}
|
|
269
289
|
{
|
|
270
290
|
type: "function", name: @name,
|
|
271
|
-
function: {name: @name, description: @description, parameters:
|
|
291
|
+
function: {name: @name, description: @description, parameters: params}
|
|
272
292
|
}.compact
|
|
273
293
|
end
|
|
274
294
|
end
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
##
|
|
4
|
+
# {LLM::LoopGuard LLM::LoopGuard} is the built-in implementation of
|
|
5
|
+
# llm.rb's `guard` capability.
|
|
6
|
+
#
|
|
7
|
+
# A guard is a context-level supervisor for agentic execution. It can inspect
|
|
8
|
+
# the current runtime state and return a warning string when pending tool work
|
|
9
|
+
# should be blocked before the loop keeps going.
|
|
10
|
+
#
|
|
11
|
+
# {LLM::LoopGuard LLM::LoopGuard} detects when a context is repeating the same
|
|
12
|
+
# tool-call pattern instead of making progress. It is directly inspired by
|
|
13
|
+
# General Intelligence Systems' Brute runtime and its doom-loop detection
|
|
14
|
+
# approach.
|
|
15
|
+
#
|
|
16
|
+
# The public interface is intentionally small:
|
|
17
|
+
# - `call(ctx)` returns `nil` when no intervention is needed
|
|
18
|
+
# - `call(ctx)` returns a warning string when pending tool execution should be blocked
|
|
19
|
+
#
|
|
20
|
+
# {LLM::Context LLM::Context} can use that warning to return in-band
|
|
21
|
+
# {LLM::GuardError LLM::GuardError} tool errors, and
|
|
22
|
+
# {LLM::Agent LLM::Agent} enables this guard by default through its wrapped
|
|
23
|
+
# context.
|
|
24
|
+
#
|
|
25
|
+
# Brute is MIT licensed. The relevant license grant is:
|
|
26
|
+
#
|
|
27
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
28
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
29
|
+
# in the Software without restriction, including without limitation the rights
|
|
30
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
31
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
32
|
+
# furnished to do so.
|
|
33
|
+
class LLM::LoopGuard
|
|
34
|
+
##
|
|
35
|
+
# The default number of repeated tool-call patterns required before
|
|
36
|
+
# the guard intervenes.
|
|
37
|
+
# @return [Integer]
|
|
38
|
+
DEFAULT_THRESHOLD = 3
|
|
39
|
+
|
|
40
|
+
##
|
|
41
|
+
# Returns the repetition threshold.
|
|
42
|
+
# @return [Integer]
|
|
43
|
+
attr_reader :threshold
|
|
44
|
+
|
|
45
|
+
##
|
|
46
|
+
# @param [Hash] config
|
|
47
|
+
# @option config [Integer] :threshold
|
|
48
|
+
# How many repeated tool-call patterns must appear at the tail of the
|
|
49
|
+
# sequence before the guard returns a warning.
|
|
50
|
+
def initialize(config = {})
|
|
51
|
+
@threshold = config.fetch(:threshold, DEFAULT_THRESHOLD)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
##
|
|
55
|
+
# Checks the current context for repeated tool-call patterns.
|
|
56
|
+
#
|
|
57
|
+
# This method inspects assistant tool calls only. It reduces each call to a
|
|
58
|
+
# `[tool_name, arguments]` signature and checks whether the tail of the
|
|
59
|
+
# sequence is repeating.
|
|
60
|
+
#
|
|
61
|
+
# @param [LLM::Context] ctx
|
|
62
|
+
# @return [String, nil]
|
|
63
|
+
# Returns a warning string when pending tool execution should be blocked,
|
|
64
|
+
# or `nil` when execution should continue.
|
|
65
|
+
def call(ctx)
|
|
66
|
+
repetitions = detect(ctx.messages.to_a)
|
|
67
|
+
repetitions ? warning(repetitions) : nil
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def detect(messages)
|
|
73
|
+
signatures = extract_signatures(messages)
|
|
74
|
+
return if signatures.size < threshold
|
|
75
|
+
check_repeating_pattern(signatures)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def warning(repetitions)
|
|
79
|
+
<<~MSG
|
|
80
|
+
SYSTEM NOTICE: Repeated tool-call pattern detected - the same pattern has repeated #{repetitions} times.
|
|
81
|
+
You are stuck in a loop and not making progress. Stop and try a fundamentally different approach:
|
|
82
|
+
- Re-read the relevant context before retrying
|
|
83
|
+
- Try a different tool or strategy
|
|
84
|
+
- Break the problem into smaller steps
|
|
85
|
+
- If a tool keeps failing, investigate why before retrying
|
|
86
|
+
MSG
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def extract_signatures(messages)
|
|
90
|
+
messages
|
|
91
|
+
.select { _1.respond_to?(:functions) && _1.assistant? }
|
|
92
|
+
.flat_map { |message| message.functions.map { [_1.name.to_s, _1.arguments.to_s] } }
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def check_repeating_pattern(sequence)
|
|
96
|
+
max_pattern_len = sequence.size / threshold
|
|
97
|
+
(1..max_pattern_len).each do |pattern_len|
|
|
98
|
+
count = count_tail_repetitions(sequence, pattern_len)
|
|
99
|
+
return count if count >= threshold
|
|
100
|
+
end
|
|
101
|
+
nil
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def count_tail_repetitions(sequence, length)
|
|
105
|
+
return 0 if sequence.size < length
|
|
106
|
+
pattern = sequence.last(length)
|
|
107
|
+
count = 1
|
|
108
|
+
pos = sequence.size - length
|
|
109
|
+
while pos >= length
|
|
110
|
+
candidate = sequence[(pos - length)...pos]
|
|
111
|
+
break unless candidate == pattern
|
|
112
|
+
count += 1
|
|
113
|
+
pos -= length
|
|
114
|
+
end
|
|
115
|
+
count
|
|
116
|
+
end
|
|
117
|
+
end
|
data/lib/llm/message.rb
CHANGED
|
@@ -34,6 +34,7 @@ module LLM
|
|
|
34
34
|
# @return [Hash]
|
|
35
35
|
def to_h
|
|
36
36
|
{role:, content:, reasoning_content:,
|
|
37
|
+
compaction: extra.compaction,
|
|
37
38
|
tools: extra.tool_calls,
|
|
38
39
|
usage:,
|
|
39
40
|
original_tool_calls: extra.original_tool_calls}.compact
|
|
@@ -74,6 +75,13 @@ module LLM
|
|
|
74
75
|
extra.reasoning_content
|
|
75
76
|
end
|
|
76
77
|
|
|
78
|
+
##
|
|
79
|
+
# Returns true when a message was created by context compaction
|
|
80
|
+
# @return [Boolean]
|
|
81
|
+
def compaction?
|
|
82
|
+
!!extra.compaction
|
|
83
|
+
end
|
|
84
|
+
|
|
77
85
|
##
|
|
78
86
|
# Returns true when a message contains an image URL
|
|
79
87
|
# @return [Boolean]
|
data/lib/llm/stream/queue.rb
CHANGED
|
@@ -31,6 +31,14 @@ class LLM::Stream
|
|
|
31
31
|
@items.empty?
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
+
##
|
|
35
|
+
# @return [nil]
|
|
36
|
+
def interrupt!
|
|
37
|
+
@items.each(&:interrupt!)
|
|
38
|
+
nil
|
|
39
|
+
end
|
|
40
|
+
alias_method :cancel!, :interrupt!
|
|
41
|
+
|
|
34
42
|
##
|
|
35
43
|
# Waits for queued work to finish and returns function results.
|
|
36
44
|
# @param [Symbol, Array<Symbol>] strategy
|
data/lib/llm/stream.rb
CHANGED
|
@@ -18,7 +18,8 @@ module LLM
|
|
|
18
18
|
#
|
|
19
19
|
# The most common callback is {#on_content}, which also maps to {#<<}.
|
|
20
20
|
# Providers may also call {#on_reasoning_content} and {#on_tool_call} when
|
|
21
|
-
# that data is available.
|
|
21
|
+
# that data is available. Runtime features such as context compaction may
|
|
22
|
+
# also emit lifecycle callbacks like {#on_compaction}.
|
|
22
23
|
class Stream
|
|
23
24
|
require_relative "stream/queue"
|
|
24
25
|
|
|
@@ -29,6 +30,13 @@ module LLM
|
|
|
29
30
|
@extra ||= LLM::Object.from({})
|
|
30
31
|
end
|
|
31
32
|
|
|
33
|
+
##
|
|
34
|
+
# Returns the current context, if one was attached to the stream.
|
|
35
|
+
# @return [LLM::Context, nil]
|
|
36
|
+
def ctx
|
|
37
|
+
extra[:ctx]
|
|
38
|
+
end
|
|
39
|
+
|
|
32
40
|
##
|
|
33
41
|
# Returns a lazily-initialized queue for tool results or spawned work.
|
|
34
42
|
# @return [LLM::Stream::Queue]
|
|
@@ -69,13 +77,14 @@ module LLM
|
|
|
69
77
|
##
|
|
70
78
|
# Called when a streamed tool call has been fully constructed.
|
|
71
79
|
# @note A stream implementation may start tool execution here, for
|
|
72
|
-
# example by pushing `
|
|
73
|
-
# `tool.spawn(:task)` onto {#queue}.
|
|
74
|
-
# selected per tool, such as
|
|
75
|
-
# tool.spawn(:
|
|
76
|
-
#
|
|
77
|
-
#
|
|
78
|
-
# continue. Tool
|
|
80
|
+
# example by pushing `ctx.spawn(tool, :thread)`,
|
|
81
|
+
# `ctx.spawn(tool, :fiber)`, or `ctx.spawn(tool, :task)` onto {#queue}.
|
|
82
|
+
# Mixed strategies can also be selected per tool, such as
|
|
83
|
+
# `tool.mcp? ? ctx.spawn(tool, :task) : ctx.spawn(tool, :ractor)`.
|
|
84
|
+
# When a streamed tool cannot be resolved, `error` is passed as an
|
|
85
|
+
# {LLM::Function::Return}. It can be sent back to the model, allowing
|
|
86
|
+
# the tool-call path to recover and the session to continue. Tool
|
|
87
|
+
# resolution depends on
|
|
79
88
|
# {LLM::Function.registry}, which includes {LLM::Tool LLM::Tool}
|
|
80
89
|
# subclasses, including MCP tools, but not functions defined with
|
|
81
90
|
# {LLM.function}. The current `:ractor` mode is for class-based tools
|
|
@@ -92,8 +101,8 @@ module LLM
|
|
|
92
101
|
##
|
|
93
102
|
# Called when queued streamed tool work returns.
|
|
94
103
|
# @note This callback runs when {#wait} resolves work that was queued from
|
|
95
|
-
# {#on_tool_call}, such as values returned by `
|
|
96
|
-
# `
|
|
104
|
+
# {#on_tool_call}, such as values returned by `ctx.spawn(tool, :thread)`,
|
|
105
|
+
# `ctx.spawn(tool, :fiber)`, or `ctx.spawn(tool, :task)`.
|
|
97
106
|
# @param [LLM::Function] tool
|
|
98
107
|
# The tool that returned.
|
|
99
108
|
# @param [LLM::Function::Return] result
|
|
@@ -103,6 +112,24 @@ module LLM
|
|
|
103
112
|
nil
|
|
104
113
|
end
|
|
105
114
|
|
|
115
|
+
##
|
|
116
|
+
# Called before a context compaction starts.
|
|
117
|
+
# @param [LLM::Context] ctx
|
|
118
|
+
# @param [LLM::Compactor] compactor
|
|
119
|
+
# @return [nil]
|
|
120
|
+
def on_compaction(ctx, compactor)
|
|
121
|
+
nil
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
##
|
|
125
|
+
# Called after a context compaction finishes.
|
|
126
|
+
# @param [LLM::Context] ctx
|
|
127
|
+
# @param [LLM::Compactor] compactor
|
|
128
|
+
# @return [nil]
|
|
129
|
+
def on_compaction_finish(ctx, compactor)
|
|
130
|
+
nil
|
|
131
|
+
end
|
|
132
|
+
|
|
106
133
|
# @endgroup
|
|
107
134
|
|
|
108
135
|
# @group Error handlers
|
data/lib/llm/tool.rb
CHANGED
|
@@ -171,4 +171,32 @@ class LLM::Tool
|
|
|
171
171
|
def self.mcp?
|
|
172
172
|
false
|
|
173
173
|
end
|
|
174
|
+
|
|
175
|
+
##
|
|
176
|
+
# Returns a function bound to this tool instance.
|
|
177
|
+
# @return [LLM::Function]
|
|
178
|
+
def function
|
|
179
|
+
@function ||= self.class.function.dup.tap { _1.register(self) }
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
##
|
|
183
|
+
# Returns true if the tool is an MCP tool
|
|
184
|
+
# @return [Boolean]
|
|
185
|
+
def mcp?
|
|
186
|
+
self.class.mcp?
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
##
|
|
190
|
+
# Called when an in-flight tool run is interrupted.
|
|
191
|
+
# Tools can override this to implement cooperative cleanup.
|
|
192
|
+
# @return [nil]
|
|
193
|
+
def on_interrupt
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
##
|
|
197
|
+
# Called when an in-flight tool run is cancelled.
|
|
198
|
+
# @return [nil]
|
|
199
|
+
def on_cancel
|
|
200
|
+
on_interrupt
|
|
201
|
+
end
|
|
174
202
|
end
|
data/lib/llm/version.rb
CHANGED
data/lib/llm.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llm.rb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 5.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Antar Azri
|
|
@@ -271,6 +271,7 @@ files:
|
|
|
271
271
|
- lib/llm/agent.rb
|
|
272
272
|
- lib/llm/bot.rb
|
|
273
273
|
- lib/llm/buffer.rb
|
|
274
|
+
- lib/llm/compactor.rb
|
|
274
275
|
- lib/llm/context.rb
|
|
275
276
|
- lib/llm/context/deserializer.rb
|
|
276
277
|
- lib/llm/context/serializer.rb
|
|
@@ -297,6 +298,7 @@ files:
|
|
|
297
298
|
- lib/llm/function/thread_group.rb
|
|
298
299
|
- lib/llm/function/tracing.rb
|
|
299
300
|
- lib/llm/json_adapter.rb
|
|
301
|
+
- lib/llm/loop_guard.rb
|
|
300
302
|
- lib/llm/mcp.rb
|
|
301
303
|
- lib/llm/mcp/command.rb
|
|
302
304
|
- lib/llm/mcp/error.rb
|