llm.rb 5.4.0 → 6.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +87 -1
- data/README.md +113 -25
- data/lib/llm/active_record/acts_as_agent.rb +5 -11
- data/lib/llm/active_record/acts_as_llm.rb +17 -37
- data/lib/llm/agent.rb +2 -0
- data/lib/llm/buffer.rb +8 -0
- data/lib/llm/compactor.rb +26 -7
- data/lib/llm/context/deserializer.rb +1 -0
- data/lib/llm/context.rb +48 -21
- data/lib/llm/error.rb +4 -0
- data/lib/llm/function/ractor/task.rb +8 -2
- data/lib/llm/function.rb +6 -2
- data/lib/llm/provider/transport/http/execution.rb +1 -1
- data/lib/llm/provider/transport/http/interruptible.rb +99 -94
- data/lib/llm/provider/transport/http.rb +3 -2
- data/lib/llm/provider.rb +8 -0
- data/lib/llm/sequel/agent.rb +2 -7
- data/lib/llm/sequel/plugin.rb +31 -38
- data/lib/llm/skill.rb +6 -0
- data/lib/llm/version.rb +1 -1
- data/llm.gemspec +1 -0
- metadata +15 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 57b39b3b4b79d1d9f8cfd10426ad233d698dd6e3ed84bfef887c8c63f543f40f
|
|
4
|
+
data.tar.gz: 443ed7e2a04259c69d41b1da7a42e7637efaa4ab1075548706ce349bced7ed51
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f8e53dc41eacf16cea35f64a6048aa77852fcf7a135676b2b9c02e37beff174b5a500948477c4f931ff0a71d20c4503ba3e9eef19358d3aaa204040e77fe14c5
|
|
7
|
+
data.tar.gz: 358ce7f33d2dca51365f6581867006970fd66079dcaa189268e2deff2f297c89b8332fd11b714bedfd89124413b7a9e12fc09d928c2c28f2e9cb2368f2bc3e24
|
data/CHANGELOG.md
CHANGED
|
@@ -1,9 +1,95 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
##
|
|
3
|
+
## v6.1.0
|
|
4
|
+
|
|
5
|
+
Changes since `v6.0.0`.
|
|
6
|
+
|
|
7
|
+
This release tightens interrupt and compaction behavior for long-running
|
|
8
|
+
contexts. It adds `LLM::Buffer#rindex`, supports percentage-based token
|
|
9
|
+
thresholds in `LLM::Compactor`, tracks persisted compaction state through
|
|
10
|
+
context serialization, reliably interrupts Async-backed requests, preserves
|
|
11
|
+
valid tool-call history on cancellation, keeps concurrent skill tool loops
|
|
12
|
+
running on streamed agents, and returns zero-valued usage objects when no
|
|
13
|
+
provider usage has been recorded yet.
|
|
14
|
+
|
|
15
|
+
### Change
|
|
16
|
+
|
|
17
|
+
* **Add `LLM::Buffer#rindex`** <br>
|
|
18
|
+
Add `LLM::Buffer#rindex` as a direct forward to the underlying message
|
|
19
|
+
array so callers can find the last matching message index through the
|
|
20
|
+
buffer API.
|
|
21
|
+
|
|
22
|
+
* **Support percentage compaction token thresholds** <br>
|
|
23
|
+
Let `LLM::Compactor` accept `token_threshold:` values like `"90%"` so
|
|
24
|
+
compaction can trigger at a percentage of the active model context
|
|
25
|
+
window.
|
|
26
|
+
|
|
27
|
+
### Fix
|
|
28
|
+
|
|
29
|
+
* **Interrupt Async-backed requests reliably** <br>
|
|
30
|
+
Track request ownership through the provider transport so contexts use
|
|
31
|
+
the active Async task when available, letting `ctx.interrupt!`
|
|
32
|
+
reliably cancel streamed requests under Async runtimes and surface
|
|
33
|
+
them as `LLM::Interrupt`.
|
|
34
|
+
|
|
35
|
+
* **Preserve valid tool-call history on cancellation** <br>
|
|
36
|
+
Append cancelled tool-return messages for unresolved tool calls during
|
|
37
|
+
`ctx.interrupt!` so follow-up provider requests do not fail with
|
|
38
|
+
invalid tool-call history after pending tool work is cancelled.
|
|
39
|
+
|
|
40
|
+
* **Preserve concurrent skill tool loops on streamed agents** <br>
|
|
41
|
+
Propagate the active agent concurrency through the effective request
|
|
42
|
+
stream so nested skill agents keep using queued `wait(...)` tool
|
|
43
|
+
execution instead of falling back to direct `:call` execution.
|
|
44
|
+
|
|
45
|
+
* **Track persisted compaction state on contexts** <br>
|
|
46
|
+
Mark contexts as compacted after `LLM::Compactor#compact!`, persist and
|
|
47
|
+
restore that state through context serialization, and clear it after the
|
|
48
|
+
next successful model response.
|
|
49
|
+
|
|
50
|
+
* **Return zero-valued usage objects from contexts** <br>
|
|
51
|
+
Make `LLM::Context#usage` consistently return an `LLM::Object`, using a
|
|
52
|
+
zero-valued usage object when no provider usage has been recorded yet.
|
|
53
|
+
|
|
54
|
+
## v6.0.0
|
|
4
55
|
|
|
5
56
|
Changes since `v5.4.0`.
|
|
6
57
|
|
|
58
|
+
This release simplifies the ORM persistence contract around serialized
|
|
59
|
+
`data` state, removing the assumption of reserved `provider`, `model`, and
|
|
60
|
+
usage columns. Provider selection must now come from `provider:` hooks,
|
|
61
|
+
model defaults come from `context:` or agent DSL, and usage is read from the
|
|
62
|
+
serialized runtime state. Alongside this breaking change, Sequel JSON and
|
|
63
|
+
JSONB persistence is fixed, ractor-backed tools now fire tracer callbacks,
|
|
64
|
+
and `LLM::RactorError` is raised for unsupported ractor tool work.
|
|
65
|
+
|
|
66
|
+
### Change
|
|
67
|
+
|
|
68
|
+
* **Simplify ORM persistence to serialized `data` state** <br>
|
|
69
|
+
Change the built-in ActiveRecord and Sequel wrappers to treat serialized
|
|
70
|
+
`data` as the persistence contract, instead of assuming reserved
|
|
71
|
+
`provider`, `model`, and usage columns. Provider selection must now come
|
|
72
|
+
from `provider:` hooks that resolve a real `LLM::Provider` instance, model
|
|
73
|
+
defaults come from `context:` or agent DSL, and `usage` is read from the
|
|
74
|
+
serialized runtime state.
|
|
75
|
+
|
|
76
|
+
### Fix
|
|
77
|
+
|
|
78
|
+
* **Fix Sequel JSON and JSONB persistence** <br>
|
|
79
|
+
Load Sequel PostgreSQL JSON support when `plugin :llm` is configured with
|
|
80
|
+
`format: :json` or `:jsonb`, and wrap structured payloads correctly so
|
|
81
|
+
persisted context state can be stored in PostgreSQL JSON columns.
|
|
82
|
+
|
|
83
|
+
* **Trace ractor-backed tool callbacks** <br>
|
|
84
|
+
Make tool tracers fire `on_tool_start` and `on_tool_finish` for
|
|
85
|
+
class-based `:ractor` execution too, so ractor-backed tool calls show up
|
|
86
|
+
in tracer callbacks like the other concurrent tool paths.
|
|
87
|
+
|
|
88
|
+
* **Raise `LLM::RactorError` for unsupported ractor tool work** <br>
|
|
89
|
+
Add `LLM::RactorError` and fail fast when `:ractor` execution is requested
|
|
90
|
+
for unsupported tool types such as skill-backed tools, instead of letting
|
|
91
|
+
deeper Ruby isolation errors leak out later in execution.
|
|
92
|
+
|
|
7
93
|
## v5.4.0
|
|
8
94
|
|
|
9
95
|
Changes since `v5.3.0`.
|
data/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://0x1eef.github.io/x/llm.rb?rebuild=1"><img src="https://img.shields.io/badge/docs-0x1eef.github.io-blue.svg" alt="RubyDoc"></a>
|
|
6
6
|
<a href="https://opensource.org/license/0bsd"><img src="https://img.shields.io/badge/License-0BSD-orange.svg?" alt="License"></a>
|
|
7
|
-
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-
|
|
7
|
+
<a href="https://github.com/llmrb/llm.rb/tags"><img src="https://img.shields.io/badge/version-6.1.0-green.svg?" alt="Version"></a>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
## About
|
|
@@ -25,7 +25,6 @@ schemas, files, and persisted state, so real systems can be built out of one coh
|
|
|
25
25
|
execution model instead of a pile of adapters.
|
|
26
26
|
|
|
27
27
|
Want to see some code? Jump to [the examples](#examples) section. <br>
|
|
28
|
-
Want to see an agentic framework built on top of llm.rb? Check out [general-intelligence-systems/brute](https://github.com/general-intelligence-systems/brute). <br>
|
|
29
28
|
Want to see a self-hosted LLM environment built on llm.rb? Check out [Relay](https://github.com/llmrb/relay).
|
|
30
29
|
|
|
31
30
|
## Architecture
|
|
@@ -102,20 +101,26 @@ separate agent table or a second persistence layer.
|
|
|
102
101
|
|
|
103
102
|
`acts_as_agent` extends a model with agent capabilities: the same runtime
|
|
104
103
|
surface as [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html),
|
|
105
|
-
because it actually wraps an `LLM::Agent`, plus persistence through
|
|
106
|
-
JSON, or JSONB-backed column on the same table.
|
|
104
|
+
because it actually wraps an `LLM::Agent`, plus persistence through one text,
|
|
105
|
+
JSON, or JSONB-backed `data` column on the same table. If your app also has
|
|
106
|
+
provider or model columns, provide them to llm.rb through `set_provider` and
|
|
107
|
+
`set_context`.
|
|
107
108
|
|
|
108
109
|
|
|
109
110
|
```ruby
|
|
110
111
|
class Ticket < ApplicationRecord
|
|
111
|
-
acts_as_agent provider: :set_provider
|
|
112
|
+
acts_as_agent provider: :set_provider, context: :set_context
|
|
112
113
|
model "gpt-5.4-mini"
|
|
113
114
|
instructions "You are a support assistant."
|
|
114
115
|
|
|
115
116
|
private
|
|
116
117
|
|
|
117
118
|
def set_provider
|
|
118
|
-
|
|
119
|
+
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def set_context
|
|
123
|
+
{ mode: :responses, store: false }
|
|
119
124
|
end
|
|
120
125
|
end
|
|
121
126
|
```
|
|
@@ -158,12 +163,15 @@ and when a stream is present it emits `on_compaction` and
|
|
|
158
163
|
`on_compaction_finish` through [`LLM::Stream`](https://0x1eef.github.io/x/llm.rb/LLM/Stream.html).
|
|
159
164
|
The compactor can also use a different model from the main context, which is
|
|
160
165
|
useful when you want summarization to run on a cheaper or faster model.
|
|
166
|
+
`token_threshold:` accepts either a fixed token count or a percentage string
|
|
167
|
+
like `"90%"`, which resolves against the active model context window and
|
|
168
|
+
triggers compaction once total token usage goes over that percentage.
|
|
161
169
|
|
|
162
170
|
```ruby
|
|
163
171
|
ctx = LLM::Context.new(
|
|
164
172
|
llm,
|
|
165
173
|
compactor: {
|
|
166
|
-
|
|
174
|
+
token_threshold: "90%",
|
|
167
175
|
retention_window: 8,
|
|
168
176
|
model: "gpt-5.4-mini"
|
|
169
177
|
}
|
|
@@ -303,7 +311,7 @@ finer sequential control across several steps before shutting the client down.
|
|
|
303
311
|
```ruby
|
|
304
312
|
mcp = LLM::MCP.http(
|
|
305
313
|
url: "https://api.githubcopilot.com/mcp/",
|
|
306
|
-
headers: {"Authorization" => "Bearer #{ENV
|
|
314
|
+
headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"}
|
|
307
315
|
).persistent
|
|
308
316
|
mcp.run do
|
|
309
317
|
ctx = LLM::Context.new(llm, tools: mcp.tools)
|
|
@@ -376,7 +384,8 @@ worker.join
|
|
|
376
384
|
Use threads, fibers, async tasks, or experimental ractors without
|
|
377
385
|
rewriting your tool layer. The current `:ractor` mode is for class-based
|
|
378
386
|
tools and does not support MCP tools, but mixed workloads can branch on
|
|
379
|
-
`tool.mcp?` and choose a supported strategy per tool.
|
|
387
|
+
`tool.mcp?` and choose a supported strategy per tool. Class-based
|
|
388
|
+
`:ractor` tools still emit normal tool tracer callbacks. `:ractor` is
|
|
380
389
|
especially useful for CPU-bound tools, while `:task`, `:fiber`, or
|
|
381
390
|
`:thread` may be a better fit for I/O-bound work.
|
|
382
391
|
- **Advanced workloads are built in, not bolted on** <br>
|
|
@@ -618,7 +627,10 @@ long-lived contexts can summarize older history and expose the lifecycle
|
|
|
618
627
|
through stream hooks. This approach is inspired by General Intelligence
|
|
619
628
|
Systems' [Brute](https://github.com/general-intelligence-systems/brute). The
|
|
620
629
|
compactor can also use its own `model:` if you want summarization to run on a
|
|
621
|
-
different model from the main context.
|
|
630
|
+
different model from the main context. `token_threshold:` accepts either a
|
|
631
|
+
fixed token count or a percentage string like `"90%"`, which resolves
|
|
632
|
+
against the active model context window and triggers compaction once total
|
|
633
|
+
token usage goes over that percentage. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
622
634
|
|
|
623
635
|
```ruby
|
|
624
636
|
require "llm"
|
|
@@ -638,7 +650,7 @@ ctx = LLM::Context.new(
|
|
|
638
650
|
llm,
|
|
639
651
|
stream: Stream.new,
|
|
640
652
|
compactor: {
|
|
641
|
-
|
|
653
|
+
token_threshold: "90%",
|
|
642
654
|
retention_window: 8,
|
|
643
655
|
model: "gpt-5.4-mini"
|
|
644
656
|
}
|
|
@@ -696,7 +708,7 @@ worker.join
|
|
|
696
708
|
|
|
697
709
|
#### Sequel (ORM)
|
|
698
710
|
|
|
699
|
-
The `plugin :llm` integration wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) on a `Sequel::Model` and keeps tool execution explicit. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
711
|
+
The `plugin :llm` integration wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) on a `Sequel::Model` and keeps tool execution explicit. Like the ActiveRecord wrappers, its built-in persistence contract is the serialized `data` column, while `provider:` resolves a real `LLM::Provider` instance and `context:` injects defaults such as `model:`. <br> See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
700
712
|
|
|
701
713
|
```ruby
|
|
702
714
|
require "llm"
|
|
@@ -705,10 +717,20 @@ require "sequel"
|
|
|
705
717
|
require "sequel/plugins/llm"
|
|
706
718
|
|
|
707
719
|
class Context < Sequel::Model
|
|
708
|
-
plugin :llm, provider:
|
|
720
|
+
plugin :llm, provider: :set_provider, context: :set_context
|
|
721
|
+
|
|
722
|
+
private
|
|
723
|
+
|
|
724
|
+
def set_provider
|
|
725
|
+
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
726
|
+
end
|
|
727
|
+
|
|
728
|
+
def set_context
|
|
729
|
+
{model: "gpt-5.4-mini", mode: :responses, store: false}
|
|
730
|
+
end
|
|
709
731
|
end
|
|
710
732
|
|
|
711
|
-
ctx = Context.create
|
|
733
|
+
ctx = Context.create
|
|
712
734
|
ctx.talk("Remember that my favorite language is Ruby")
|
|
713
735
|
puts ctx.talk("What is my favorite language?").content
|
|
714
736
|
```
|
|
@@ -716,36 +738,76 @@ puts ctx.talk("What is my favorite language?").content
|
|
|
716
738
|
#### ActiveRecord (ORM): acts_as_llm
|
|
717
739
|
|
|
718
740
|
The `acts_as_llm` method wraps [`LLM::Context`](https://0x1eef.github.io/x/llm.rb/LLM/Context.html) and
|
|
719
|
-
provides full control over tool execution.
|
|
741
|
+
provides full control over tool execution. Its built-in persistence contract is
|
|
742
|
+
one serialized `data` column. If your app has provider, model, or usage
|
|
743
|
+
columns, provide them to llm.rb through `provider:` and `context:` instead of
|
|
744
|
+
relying on reserved wrapper columns.
|
|
745
|
+
|
|
746
|
+
See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
720
747
|
|
|
721
748
|
```ruby
|
|
722
749
|
require "llm"
|
|
723
|
-
require "net/http/persistent"
|
|
724
750
|
require "active_record"
|
|
725
751
|
require "llm/active_record"
|
|
726
752
|
|
|
727
753
|
class Context < ApplicationRecord
|
|
728
|
-
acts_as_llm provider:
|
|
754
|
+
acts_as_llm provider: :set_provider, context: :set_context
|
|
755
|
+
|
|
756
|
+
private
|
|
757
|
+
|
|
758
|
+
def set_provider
|
|
759
|
+
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
760
|
+
end
|
|
761
|
+
|
|
762
|
+
def set_context
|
|
763
|
+
{model: "gpt-5.4-mini", mode: :responses, store: false}
|
|
764
|
+
end
|
|
729
765
|
end
|
|
730
766
|
|
|
731
|
-
ctx = Context.create!
|
|
767
|
+
ctx = Context.create!
|
|
732
768
|
ctx.talk("Remember that my favorite language is Ruby")
|
|
733
769
|
puts ctx.talk("What is my favorite language?").content
|
|
734
770
|
```
|
|
735
771
|
|
|
772
|
+
```ruby
|
|
773
|
+
require "llm"
|
|
774
|
+
require "active_record"
|
|
775
|
+
require "llm/active_record"
|
|
776
|
+
|
|
777
|
+
class Context < ApplicationRecord
|
|
778
|
+
acts_as_llm provider: :set_provider, context: :set_context
|
|
779
|
+
|
|
780
|
+
# Optional application columns can still provide the provider and context.
|
|
781
|
+
# For example, `provider_name` and `model_name` can be normal columns.
|
|
782
|
+
|
|
783
|
+
private
|
|
784
|
+
|
|
785
|
+
def set_provider
|
|
786
|
+
LLM.public_send(provider_name, key: provider_key)
|
|
787
|
+
end
|
|
788
|
+
|
|
789
|
+
def set_context
|
|
790
|
+
{model: model_name, mode: :responses, store: false}
|
|
791
|
+
end
|
|
792
|
+
end
|
|
793
|
+
```
|
|
794
|
+
|
|
736
795
|
#### ActiveRecord (ORM): acts_as_agent
|
|
737
796
|
|
|
738
797
|
The `acts_as_agent` method wraps [`LLM::Agent`](https://0x1eef.github.io/x/llm.rb/LLM/Agent.html) and
|
|
739
|
-
manages tool execution for you.
|
|
798
|
+
manages tool execution for you. Like `acts_as_llm`, its built-in persistence
|
|
799
|
+
contract is one serialized `data` column. If your app has provider or model
|
|
800
|
+
columns, provide them to llm.rb through your hooks and agent DSL.
|
|
801
|
+
|
|
802
|
+
See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
740
803
|
|
|
741
804
|
```ruby
|
|
742
805
|
require "llm"
|
|
743
|
-
require "net/http/persistent"
|
|
744
806
|
require "active_record"
|
|
745
807
|
require "llm/active_record"
|
|
746
808
|
|
|
747
809
|
class Ticket < ApplicationRecord
|
|
748
|
-
acts_as_agent provider: :set_provider
|
|
810
|
+
acts_as_agent provider: :set_provider, context: :set_context
|
|
749
811
|
model "gpt-5.4-mini"
|
|
750
812
|
instructions "You are a concise support assistant."
|
|
751
813
|
tools SearchDocs, Escalate
|
|
@@ -754,14 +816,40 @@ class Ticket < ApplicationRecord
|
|
|
754
816
|
private
|
|
755
817
|
|
|
756
818
|
def set_provider
|
|
757
|
-
|
|
819
|
+
LLM.openai(key: ENV["OPENAI_SECRET"])
|
|
820
|
+
end
|
|
821
|
+
|
|
822
|
+
def set_context
|
|
823
|
+
{mode: :responses, store: false}
|
|
758
824
|
end
|
|
759
825
|
end
|
|
760
826
|
|
|
761
|
-
ticket = Ticket.create!
|
|
827
|
+
ticket = Ticket.create!
|
|
762
828
|
puts ticket.talk("How do I rotate my API key?").content
|
|
763
829
|
```
|
|
764
830
|
|
|
831
|
+
```ruby
|
|
832
|
+
require "llm"
|
|
833
|
+
require "active_record"
|
|
834
|
+
require "llm/active_record"
|
|
835
|
+
|
|
836
|
+
class Ticket < ApplicationRecord
|
|
837
|
+
acts_as_agent provider: :set_provider, context: :set_context
|
|
838
|
+
model "gpt-5.4-mini"
|
|
839
|
+
instructions "You are a concise support assistant."
|
|
840
|
+
|
|
841
|
+
private
|
|
842
|
+
|
|
843
|
+
def set_provider
|
|
844
|
+
LLM.public_send(provider_name, key: provider_key)
|
|
845
|
+
end
|
|
846
|
+
|
|
847
|
+
def set_context
|
|
848
|
+
{mode: :responses, store: false}
|
|
849
|
+
end
|
|
850
|
+
end
|
|
851
|
+
```
|
|
852
|
+
|
|
765
853
|
#### MCP
|
|
766
854
|
|
|
767
855
|
This example uses [`LLM::MCP`](https://0x1eef.github.io/x/llm.rb/LLM/MCP.html) over HTTP so remote GitHub MCP tools run through the same `LLM::Context` tool path as local tools. It expects a GitHub token in `ENV["GITHUB_PAT"]`. See the [deepdive (web)](https://0x1eef.github.io/x/llm.rb/file.deepdive.html) or [deepdive (markdown)](resources/deepdive.md) for more examples.
|
|
@@ -773,7 +861,7 @@ require "net/http/persistent"
|
|
|
773
861
|
llm = LLM.openai(key: ENV["KEY"])
|
|
774
862
|
mcp = LLM::MCP.http(
|
|
775
863
|
url: "https://api.githubcopilot.com/mcp/",
|
|
776
|
-
headers: {"Authorization" => "Bearer #{ENV
|
|
864
|
+
headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"}
|
|
777
865
|
).persistent
|
|
778
866
|
|
|
779
867
|
mcp.start
|
|
@@ -788,7 +876,7 @@ For scoped work, `mcp.run do ... end` is shorter and handles cleanup for you:
|
|
|
788
876
|
```ruby
|
|
789
877
|
mcp = LLM::MCP.http(
|
|
790
878
|
url: "https://api.githubcopilot.com/mcp/",
|
|
791
|
-
headers: {"Authorization" => "Bearer #{ENV
|
|
879
|
+
headers: {"Authorization" => "Bearer #{ENV["GITHUB_PAT"]}"}
|
|
792
880
|
).persistent
|
|
793
881
|
mcp.run do
|
|
794
882
|
ctx = LLM::Context.new(llm, stream: $stdout, tools: mcp.tools)
|
|
@@ -11,7 +11,6 @@ module LLM::ActiveRecord
|
|
|
11
11
|
# class and forwarded to an internal agent subclass.
|
|
12
12
|
module ActsAsAgent
|
|
13
13
|
EMPTY_HASH = LLM::ActiveRecord::ActsAsLLM::EMPTY_HASH
|
|
14
|
-
DEFAULT_USAGE_COLUMNS = LLM::ActiveRecord::ActsAsLLM::DEFAULT_USAGE_COLUMNS
|
|
15
14
|
DEFAULTS = LLM::ActiveRecord::ActsAsLLM::DEFAULTS
|
|
16
15
|
Utils = LLM::ActiveRecord::ActsAsLLM::Utils
|
|
17
16
|
|
|
@@ -58,8 +57,6 @@ module LLM::ActiveRecord
|
|
|
58
57
|
# @param [Class] model
|
|
59
58
|
# @return [void]
|
|
60
59
|
def self.extended(model)
|
|
61
|
-
options = model.llm_plugin_options
|
|
62
|
-
model.validates options[:provider_column], options[:model_column], presence: true
|
|
63
60
|
model.include LLM::ActiveRecord::ActsAsLLM::InstanceMethods unless model.ancestors.include?(LLM::ActiveRecord::ActsAsLLM::InstanceMethods)
|
|
64
61
|
model.include InstanceMethods unless model.ancestors.include?(InstanceMethods)
|
|
65
62
|
model.extend ClassMethods unless model.singleton_class.ancestors.include?(ClassMethods)
|
|
@@ -77,6 +74,8 @@ module LLM::ActiveRecord
|
|
|
77
74
|
# @option options [Proc, Symbol, LLM::Tracer, nil] :tracer
|
|
78
75
|
# Optional tracer, method name, or proc that resolves to one and is
|
|
79
76
|
# assigned through `llm.tracer = ...` on the resolved provider.
|
|
77
|
+
# @option options [Proc, Symbol, LLM::Provider] :provider
|
|
78
|
+
# Must resolve to an `LLM::Provider` instance for the current record.
|
|
80
79
|
# @yield
|
|
81
80
|
# Evaluated in the model class after the wrapper is installed, so agent
|
|
82
81
|
# DSL methods such as `model`, `tools`, `schema`, `instructions`, and
|
|
@@ -84,9 +83,8 @@ module LLM::ActiveRecord
|
|
|
84
83
|
# @return [void]
|
|
85
84
|
def acts_as_agent(options = EMPTY_HASH, &block)
|
|
86
85
|
options = DEFAULTS.merge(options)
|
|
87
|
-
usage_columns = DEFAULT_USAGE_COLUMNS.merge(options[:usage_columns] || EMPTY_HASH)
|
|
88
86
|
class_attribute :llm_plugin_options, instance_accessor: false, default: DEFAULTS unless respond_to?(:llm_plugin_options)
|
|
89
|
-
self.llm_plugin_options = options.
|
|
87
|
+
self.llm_plugin_options = options.freeze
|
|
90
88
|
extend Hooks
|
|
91
89
|
class_exec(&block) if block
|
|
92
90
|
end
|
|
@@ -97,11 +95,8 @@ module LLM::ActiveRecord
|
|
|
97
95
|
# @return [LLM::Provider]
|
|
98
96
|
def llm
|
|
99
97
|
options = self.class.llm_plugin_options
|
|
100
|
-
columns = Utils.columns(options)
|
|
101
|
-
provider = self[columns[:provider_column]]
|
|
102
|
-
kwargs = Utils.resolve_options(self, options[:provider], ActsAsAgent::EMPTY_HASH)
|
|
103
98
|
return @llm if @llm
|
|
104
|
-
@llm =
|
|
99
|
+
@llm = Utils.resolve_provider(self, options, ActsAsAgent::EMPTY_HASH)
|
|
105
100
|
@llm.tracer = Utils.resolve_option(self, options[:tracer]) if options[:tracer]
|
|
106
101
|
@llm
|
|
107
102
|
end
|
|
@@ -113,10 +108,9 @@ module LLM::ActiveRecord
|
|
|
113
108
|
def ctx
|
|
114
109
|
@ctx ||= begin
|
|
115
110
|
options = self.class.llm_plugin_options
|
|
116
|
-
columns = Utils.columns(options)
|
|
117
111
|
params = Utils.resolve_options(self, options[:context], ActsAsAgent::EMPTY_HASH).dup
|
|
118
|
-
params[:model] ||= self[columns[:model_column]]
|
|
119
112
|
ctx = self.class.agent.new(llm, params.compact)
|
|
113
|
+
columns = Utils.columns(options)
|
|
120
114
|
data = self[columns[:data_column]]
|
|
121
115
|
if data.nil? || data == ""
|
|
122
116
|
ctx
|
|
@@ -17,19 +17,11 @@ module LLM::ActiveRecord
|
|
|
17
17
|
# `tracer:` can also be configured as symbols that are called on the model.
|
|
18
18
|
module ActsAsLLM
|
|
19
19
|
EMPTY_HASH = {}.freeze
|
|
20
|
-
DEFAULT_USAGE_COLUMNS = {
|
|
21
|
-
input_tokens: :input_tokens,
|
|
22
|
-
output_tokens: :output_tokens,
|
|
23
|
-
total_tokens: :total_tokens
|
|
24
|
-
}.freeze
|
|
25
20
|
DEFAULTS = {
|
|
26
|
-
provider_column: :provider,
|
|
27
|
-
model_column: :model,
|
|
28
21
|
data_column: :data,
|
|
29
22
|
format: :string,
|
|
30
|
-
usage_columns: DEFAULT_USAGE_COLUMNS,
|
|
31
23
|
tracer: nil,
|
|
32
|
-
provider:
|
|
24
|
+
provider: nil,
|
|
33
25
|
context: EMPTY_HASH
|
|
34
26
|
}.freeze
|
|
35
27
|
|
|
@@ -78,28 +70,26 @@ module LLM::ActiveRecord
|
|
|
78
70
|
# Maps wrapper options onto the record's storage columns.
|
|
79
71
|
# @return [Hash]
|
|
80
72
|
def self.columns(options)
|
|
81
|
-
usage_columns = options[:usage_columns]
|
|
82
73
|
{
|
|
83
|
-
|
|
84
|
-
model_column: options[:model_column],
|
|
85
|
-
data_column: options[:data_column],
|
|
86
|
-
input_tokens: usage_columns[:input_tokens],
|
|
87
|
-
output_tokens: usage_columns[:output_tokens],
|
|
88
|
-
total_tokens: usage_columns[:total_tokens]
|
|
74
|
+
data_column: options[:data_column]
|
|
89
75
|
}.freeze
|
|
90
76
|
end
|
|
91
77
|
|
|
78
|
+
##
|
|
79
|
+
# Resolves the provider runtime for a record.
|
|
80
|
+
# @return [LLM::Provider]
|
|
81
|
+
def self.resolve_provider(obj, options, empty_hash)
|
|
82
|
+
provider = resolve_option(obj, options[:provider])
|
|
83
|
+
return provider if LLM::Provider === provider
|
|
84
|
+
raise ArgumentError, "provider: must resolve to an LLM::Provider instance"
|
|
85
|
+
end
|
|
86
|
+
|
|
92
87
|
##
|
|
93
88
|
# Persists the runtime state and usage columns back onto the record.
|
|
94
89
|
# @return [void]
|
|
95
90
|
def self.save(obj, ctx, options)
|
|
96
91
|
columns = self.columns(options)
|
|
97
|
-
obj.assign_attributes(
|
|
98
|
-
columns[:data_column] => serialize_context(ctx, options[:format]),
|
|
99
|
-
columns[:input_tokens] => ctx.usage.input_tokens,
|
|
100
|
-
columns[:output_tokens] => ctx.usage.output_tokens,
|
|
101
|
-
columns[:total_tokens] => ctx.usage.total_tokens
|
|
102
|
-
)
|
|
92
|
+
obj.assign_attributes(columns[:data_column] => serialize_context(ctx, options[:format]))
|
|
103
93
|
obj.save!
|
|
104
94
|
end
|
|
105
95
|
end
|
|
@@ -111,8 +101,6 @@ module LLM::ActiveRecord
|
|
|
111
101
|
# @param [Class] model
|
|
112
102
|
# @return [void]
|
|
113
103
|
def self.extended(model)
|
|
114
|
-
options = model.llm_plugin_options
|
|
115
|
-
model.validates options[:provider_column], options[:model_column], presence: true
|
|
116
104
|
model.include InstanceMethods unless model.ancestors.include?(InstanceMethods)
|
|
117
105
|
end
|
|
118
106
|
end
|
|
@@ -128,12 +116,13 @@ module LLM::ActiveRecord
|
|
|
128
116
|
# @option options [Proc, Symbol, LLM::Tracer, nil] :tracer
|
|
129
117
|
# Optional tracer, method name, or proc that resolves to one and is
|
|
130
118
|
# assigned through `llm.tracer = ...` on the resolved provider.
|
|
119
|
+
# @option options [Proc, Symbol, LLM::Provider] :provider
|
|
120
|
+
# Must resolve to an `LLM::Provider` instance for the current record.
|
|
131
121
|
# @return [void]
|
|
132
122
|
def acts_as_llm(options = EMPTY_HASH)
|
|
133
123
|
options = DEFAULTS.merge(options)
|
|
134
|
-
usage_columns = DEFAULT_USAGE_COLUMNS.merge(options[:usage_columns] || EMPTY_HASH)
|
|
135
124
|
class_attribute :llm_plugin_options, instance_accessor: false, default: DEFAULTS unless respond_to?(:llm_plugin_options)
|
|
136
|
-
self.llm_plugin_options = options.
|
|
125
|
+
self.llm_plugin_options = options.freeze
|
|
137
126
|
extend Hooks
|
|
138
127
|
end
|
|
139
128
|
|
|
@@ -228,12 +217,7 @@ module LLM::ActiveRecord
|
|
|
228
217
|
# Returns usage from the mapped usage columns.
|
|
229
218
|
# @return [LLM::Object]
|
|
230
219
|
def usage
|
|
231
|
-
|
|
232
|
-
LLM::Object.from(
|
|
233
|
-
input_tokens: self[columns[:input_tokens]] || 0,
|
|
234
|
-
output_tokens: self[columns[:output_tokens]] || 0,
|
|
235
|
-
total_tokens: self[columns[:total_tokens]] || 0
|
|
236
|
-
)
|
|
220
|
+
ctx.usage || LLM::Object.from(input_tokens: 0, output_tokens: 0, total_tokens: 0)
|
|
237
221
|
end
|
|
238
222
|
|
|
239
223
|
##
|
|
@@ -285,11 +269,8 @@ module LLM::ActiveRecord
|
|
|
285
269
|
# @return [LLM::Provider]
|
|
286
270
|
def llm
|
|
287
271
|
options = self.class.llm_plugin_options
|
|
288
|
-
columns = Utils.columns(options)
|
|
289
|
-
provider = self[columns[:provider_column]]
|
|
290
|
-
kwargs = Utils.resolve_options(self, options[:provider], ActsAsLLM::EMPTY_HASH)
|
|
291
272
|
return @llm if @llm
|
|
292
|
-
@llm =
|
|
273
|
+
@llm = Utils.resolve_provider(self, options, ActsAsLLM::EMPTY_HASH)
|
|
293
274
|
@llm.tracer = Utils.resolve_option(self, options[:tracer]) if options[:tracer]
|
|
294
275
|
@llm
|
|
295
276
|
end
|
|
@@ -303,7 +284,6 @@ module LLM::ActiveRecord
|
|
|
303
284
|
options = self.class.llm_plugin_options
|
|
304
285
|
columns = Utils.columns(options)
|
|
305
286
|
params = Utils.resolve_options(self, options[:context], ActsAsLLM::EMPTY_HASH).dup
|
|
306
|
-
params[:model] ||= self[columns[:model_column]]
|
|
307
287
|
ctx = LLM::Context.new(llm, params.compact)
|
|
308
288
|
data = self[columns[:data_column]]
|
|
309
289
|
if data.nil? || data == ""
|
data/lib/llm/agent.rb
CHANGED
|
@@ -394,6 +394,8 @@ module LLM
|
|
|
394
394
|
def run_loop(method, prompt, params)
|
|
395
395
|
loop = proc do
|
|
396
396
|
max = Integer(params.delete(:tool_attempts) || 25)
|
|
397
|
+
stream = params[:stream] || @ctx.params[:stream]
|
|
398
|
+
stream.extra[:concurrency] = concurrency if LLM::Stream === stream
|
|
397
399
|
res = @ctx.public_send(method, apply_instructions(prompt), params)
|
|
398
400
|
max.times do
|
|
399
401
|
break if @ctx.functions.empty?
|
data/lib/llm/buffer.rb
CHANGED
|
@@ -52,6 +52,14 @@ module LLM
|
|
|
52
52
|
reverse_each.find(...)
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
+
##
|
|
56
|
+
# Returns the index of the last message matching the given block.
|
|
57
|
+
# @yield [LLM::Message]
|
|
58
|
+
# @return [Integer, nil]
|
|
59
|
+
def rindex(...)
|
|
60
|
+
@messages.rindex(...)
|
|
61
|
+
end
|
|
62
|
+
|
|
55
63
|
##
|
|
56
64
|
# Returns the last message(s) in the buffer
|
|
57
65
|
# @param [Integer, nil] n
|
data/lib/llm/compactor.rb
CHANGED
|
@@ -11,7 +11,9 @@
|
|
|
11
11
|
# The compactor can also use a different model from the main context by
|
|
12
12
|
# setting `model:` in the compactor config. Compaction thresholds are opt-in:
|
|
13
13
|
# provide `message_threshold:` and/or `token_threshold:` to enable policy-
|
|
14
|
-
# driven compaction.
|
|
14
|
+
# driven compaction. `token_threshold:` accepts either an integer token count
|
|
15
|
+
# or a percentage string like `"90%"`, which resolves against the current
|
|
16
|
+
# model context window.
|
|
15
17
|
class LLM::Compactor
|
|
16
18
|
DEFAULTS = {
|
|
17
19
|
retention_window: 8,
|
|
@@ -25,8 +27,11 @@ class LLM::Compactor
|
|
|
25
27
|
##
|
|
26
28
|
# @param [LLM::Context] ctx
|
|
27
29
|
# @param [Hash] config
|
|
28
|
-
# @option config [Integer, nil] :token_threshold
|
|
29
|
-
# Enables token-based compaction.
|
|
30
|
+
# @option config [Integer, String, nil] :token_threshold
|
|
31
|
+
# Enables token-based compaction. Integer values are treated as a fixed
|
|
32
|
+
# token count. Percentage strings like `"90%"` are resolved against
|
|
33
|
+
# {LLM::Context#context_window}; if the context window is unknown, the
|
|
34
|
+
# percentage threshold is treated as disabled.
|
|
30
35
|
# @option config [Integer, nil] :message_threshold
|
|
31
36
|
# Enables message-count-based compaction.
|
|
32
37
|
# @option config [Integer] :retention_window
|
|
@@ -39,18 +44,22 @@ class LLM::Compactor
|
|
|
39
44
|
end
|
|
40
45
|
|
|
41
46
|
##
|
|
42
|
-
# Returns true when the context should be compacted
|
|
47
|
+
# Returns true when the context should be compacted.
|
|
48
|
+
#
|
|
49
|
+
# When `token_threshold:` is a percentage string such as `"90%"`, the
|
|
50
|
+
# threshold is resolved against the current context window and compared to
|
|
51
|
+
# the current total token usage.
|
|
43
52
|
# @param [Object] prompt
|
|
44
53
|
# The next prompt or turn input
|
|
45
54
|
# @return [Boolean]
|
|
46
|
-
def
|
|
55
|
+
def compactable?(prompt = nil)
|
|
47
56
|
return false if ctx.functions.any? || [*prompt].grep(LLM::Function::Return).any?
|
|
48
57
|
messages = ctx.messages.reject(&:system?)
|
|
49
58
|
return true if config[:message_threshold] && messages.size > config[:message_threshold]
|
|
50
|
-
|
|
51
|
-
return true if config[:token_threshold] && usage && usage.total_tokens > config[:token_threshold]
|
|
59
|
+
return true if token_threshold and ctx.usage.total_tokens > token_threshold
|
|
52
60
|
false
|
|
53
61
|
end
|
|
62
|
+
alias_method :compact?, :compactable?
|
|
54
63
|
|
|
55
64
|
##
|
|
56
65
|
# Summarize older messages and replace them with a compact summary.
|
|
@@ -68,6 +77,7 @@ class LLM::Compactor
|
|
|
68
77
|
older = messages[0...(messages.size - recent.size)]
|
|
69
78
|
summary = LLM::Message.new(ctx.llm.user_role, "[Previous conversation summary]\n\n#{summarize(older)}", {compaction: true})
|
|
70
79
|
ctx.messages.replace([*ctx.messages.take_while(&:system?), summary, *recent])
|
|
80
|
+
ctx.compacted = true
|
|
71
81
|
stream.on_compaction_finish(ctx, self) if LLM::Stream === stream
|
|
72
82
|
summary
|
|
73
83
|
end
|
|
@@ -84,6 +94,15 @@ class LLM::Compactor
|
|
|
84
94
|
messages[start..] || []
|
|
85
95
|
end
|
|
86
96
|
|
|
97
|
+
def token_threshold
|
|
98
|
+
@token_threshold ||= begin
|
|
99
|
+
threshold = config[:token_threshold]
|
|
100
|
+
return threshold unless threshold.to_s.end_with?("%")
|
|
101
|
+
return if ctx.context_window <= 0
|
|
102
|
+
(ctx.context_window * threshold.delete_suffix("%").to_f / 100).floor
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
87
106
|
def summarize(messages)
|
|
88
107
|
model = config[:model] || ctx.params[:model] || ctx.llm.default_model
|
|
89
108
|
ctx.llm.complete(summary_prompt(messages), model:).content
|