@librechat/agents 3.1.70 → 3.1.71-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +45 -0
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +4 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +9 -2
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +4 -0
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/tools/BashExecutor.cjs +43 -0
- package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +453 -45
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/toolOutputReferences.cjs +475 -0
- package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -0
- package/dist/cjs/utils/truncation.cjs +28 -0
- package/dist/cjs/utils/truncation.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +45 -0
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -2
- package/dist/esm/messages/prune.mjs +9 -2
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +4 -0
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/tools/BashExecutor.mjs +42 -1
- package/dist/esm/tools/BashExecutor.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +453 -45
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/toolOutputReferences.mjs +468 -0
- package/dist/esm/tools/toolOutputReferences.mjs.map +1 -0
- package/dist/esm/utils/truncation.mjs +27 -1
- package/dist/esm/utils/truncation.mjs.map +1 -1
- package/dist/types/graphs/Graph.d.ts +21 -0
- package/dist/types/run.d.ts +1 -0
- package/dist/types/tools/BashExecutor.d.ts +31 -0
- package/dist/types/tools/ToolNode.d.ts +86 -3
- package/dist/types/tools/toolOutputReferences.d.ts +205 -0
- package/dist/types/types/run.d.ts +9 -1
- package/dist/types/types/tools.d.ts +70 -0
- package/dist/types/utils/truncation.d.ts +21 -0
- package/package.json +1 -1
- package/src/graphs/Graph.ts +48 -0
- package/src/messages/prune.ts +9 -2
- package/src/run.ts +4 -0
- package/src/specs/prune.test.ts +413 -0
- package/src/tools/BashExecutor.ts +45 -0
- package/src/tools/ToolNode.ts +618 -55
- package/src/tools/__tests__/BashExecutor.test.ts +36 -0
- package/src/tools/__tests__/ToolNode.outputReferences.test.ts +1395 -0
- package/src/tools/__tests__/toolOutputReferences.test.ts +415 -0
- package/src/tools/toolOutputReferences.ts +590 -0
- package/src/types/run.ts +9 -1
- package/src/types/tools.ts +71 -0
- package/src/utils/__tests__/truncation.test.ts +66 -0
- package/src/utils/truncation.ts +30 -0
package/src/run.ts
CHANGED
|
@@ -45,6 +45,7 @@ export class Run<_T extends t.BaseGraphState> {
|
|
|
45
45
|
private tokenCounter?: t.TokenCounter;
|
|
46
46
|
private handlerRegistry?: HandlerRegistry;
|
|
47
47
|
private hookRegistry?: HookRegistry;
|
|
48
|
+
private toolOutputReferences?: t.ToolOutputReferencesConfig;
|
|
48
49
|
private indexTokenCountMap?: Record<string, number>;
|
|
49
50
|
calibrationRatio: number = 1;
|
|
50
51
|
graphRunnable?: t.CompiledStateWorkflow;
|
|
@@ -78,6 +79,7 @@ export class Run<_T extends t.BaseGraphState> {
|
|
|
78
79
|
|
|
79
80
|
this.handlerRegistry = handlerRegistry;
|
|
80
81
|
this.hookRegistry = config.hooks;
|
|
82
|
+
this.toolOutputReferences = config.toolOutputReferences;
|
|
81
83
|
|
|
82
84
|
if (!config.graphConfig) {
|
|
83
85
|
throw new Error('Graph config not provided');
|
|
@@ -154,6 +156,7 @@ export class Run<_T extends t.BaseGraphState> {
|
|
|
154
156
|
/** Propagate compile options from graph config */
|
|
155
157
|
standardGraph.compileOptions = config.compileOptions;
|
|
156
158
|
standardGraph.hookRegistry = this.hookRegistry;
|
|
159
|
+
standardGraph.toolOutputReferences = this.toolOutputReferences;
|
|
157
160
|
this.Graph = standardGraph;
|
|
158
161
|
return standardGraph.createWorkflow();
|
|
159
162
|
}
|
|
@@ -177,6 +180,7 @@ export class Run<_T extends t.BaseGraphState> {
|
|
|
177
180
|
}
|
|
178
181
|
|
|
179
182
|
multiAgentGraph.hookRegistry = this.hookRegistry;
|
|
183
|
+
multiAgentGraph.toolOutputReferences = this.toolOutputReferences;
|
|
180
184
|
this.Graph = multiAgentGraph;
|
|
181
185
|
return multiAgentGraph.createWorkflow();
|
|
182
186
|
}
|
package/src/specs/prune.test.ts
CHANGED
|
@@ -21,6 +21,7 @@ import {
|
|
|
21
21
|
createPruneMessages,
|
|
22
22
|
} from '@/messages/prune';
|
|
23
23
|
import { getLLMConfig } from '@/utils/llmConfig';
|
|
24
|
+
import { ensureThinkingBlockInMessages } from '@/messages/format';
|
|
24
25
|
import { Providers, ContentTypes } from '@/common';
|
|
25
26
|
import { Run } from '@/run';
|
|
26
27
|
|
|
@@ -1930,3 +1931,415 @@ describe('prunedMemory ordering with thinking enabled', () => {
|
|
|
1930
1931
|
}
|
|
1931
1932
|
});
|
|
1932
1933
|
});
|
|
1934
|
+
|
|
1935
|
+
describe('thinking enabled — tail tool_use without a thinking block (issue #115)', () => {
|
|
1936
|
+
it('does not throw when the trailing AI message issued a tool call without a thinking block', () => {
|
|
1937
|
+
const tokenCounter = createTestTokenCounter();
|
|
1938
|
+
const messages: BaseMessage[] = [
|
|
1939
|
+
new HumanMessage('first turn'),
|
|
1940
|
+
new AIMessage({
|
|
1941
|
+
content: [
|
|
1942
|
+
{
|
|
1943
|
+
type: ContentTypes.THINKING,
|
|
1944
|
+
thinking: 'thinking about the first response',
|
|
1945
|
+
signature: 'sig0',
|
|
1946
|
+
},
|
|
1947
|
+
{ type: 'text', text: 'first reply' },
|
|
1948
|
+
],
|
|
1949
|
+
}),
|
|
1950
|
+
new HumanMessage('please read this doc and tell me X'),
|
|
1951
|
+
// Anthropic may emit a tool_use without an accompanying thinking block —
|
|
1952
|
+
// valid API behavior that the pruner must tolerate.
|
|
1953
|
+
new AIMessage({
|
|
1954
|
+
content: [
|
|
1955
|
+
{
|
|
1956
|
+
type: 'tool_use',
|
|
1957
|
+
id: 'tc_get_doc',
|
|
1958
|
+
name: 'get_doc_content',
|
|
1959
|
+
input: { docId: 'abc' },
|
|
1960
|
+
},
|
|
1961
|
+
],
|
|
1962
|
+
tool_calls: [
|
|
1963
|
+
{
|
|
1964
|
+
id: 'tc_get_doc',
|
|
1965
|
+
name: 'get_doc_content',
|
|
1966
|
+
args: { docId: 'abc' },
|
|
1967
|
+
type: 'tool_call',
|
|
1968
|
+
},
|
|
1969
|
+
],
|
|
1970
|
+
}),
|
|
1971
|
+
new ToolMessage({
|
|
1972
|
+
content: 'a'.repeat(8000), // huge tool result that pushes us past budget
|
|
1973
|
+
tool_call_id: 'tc_get_doc',
|
|
1974
|
+
name: 'get_doc_content',
|
|
1975
|
+
}),
|
|
1976
|
+
];
|
|
1977
|
+
|
|
1978
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
1979
|
+
for (let i = 0; i < messages.length; i++) {
|
|
1980
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
1981
|
+
}
|
|
1982
|
+
|
|
1983
|
+
expect(() =>
|
|
1984
|
+
realGetMessagesWithinTokenLimit({
|
|
1985
|
+
messages,
|
|
1986
|
+
maxContextTokens: 200, // tight budget so pruning actually runs
|
|
1987
|
+
indexTokenCountMap,
|
|
1988
|
+
thinkingEnabled: true,
|
|
1989
|
+
tokenCounter,
|
|
1990
|
+
reasoningType: ContentTypes.THINKING,
|
|
1991
|
+
})
|
|
1992
|
+
).not.toThrow();
|
|
1993
|
+
});
|
|
1994
|
+
|
|
1995
|
+
it('returns a prunable context for the [AI tool_use, Tool] tail without a thinking block', () => {
|
|
1996
|
+
const tokenCounter = createTestTokenCounter();
|
|
1997
|
+
const messages: BaseMessage[] = [
|
|
1998
|
+
new HumanMessage('please read this doc'),
|
|
1999
|
+
new AIMessage({
|
|
2000
|
+
content: [
|
|
2001
|
+
{
|
|
2002
|
+
type: 'tool_use',
|
|
2003
|
+
id: 'tc_get_doc',
|
|
2004
|
+
name: 'get_doc_content',
|
|
2005
|
+
input: { docId: 'abc' },
|
|
2006
|
+
},
|
|
2007
|
+
],
|
|
2008
|
+
tool_calls: [
|
|
2009
|
+
{
|
|
2010
|
+
id: 'tc_get_doc',
|
|
2011
|
+
name: 'get_doc_content',
|
|
2012
|
+
args: { docId: 'abc' },
|
|
2013
|
+
type: 'tool_call',
|
|
2014
|
+
},
|
|
2015
|
+
],
|
|
2016
|
+
}),
|
|
2017
|
+
new ToolMessage({
|
|
2018
|
+
content: 'b'.repeat(6000),
|
|
2019
|
+
tool_call_id: 'tc_get_doc',
|
|
2020
|
+
name: 'get_doc_content',
|
|
2021
|
+
}),
|
|
2022
|
+
];
|
|
2023
|
+
|
|
2024
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
2025
|
+
for (let i = 0; i < messages.length; i++) {
|
|
2026
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
2027
|
+
}
|
|
2028
|
+
|
|
2029
|
+
const result = realGetMessagesWithinTokenLimit({
|
|
2030
|
+
messages,
|
|
2031
|
+
maxContextTokens: 200,
|
|
2032
|
+
indexTokenCountMap,
|
|
2033
|
+
thinkingEnabled: true,
|
|
2034
|
+
tokenCounter,
|
|
2035
|
+
reasoningType: ContentTypes.THINKING,
|
|
2036
|
+
});
|
|
2037
|
+
|
|
2038
|
+
expect(result.context).toBeDefined();
|
|
2039
|
+
expect(result.messagesToRefine.length).toBeGreaterThan(0);
|
|
2040
|
+
expect(result.thinkingStartIndex).toBeUndefined();
|
|
2041
|
+
});
|
|
2042
|
+
|
|
2043
|
+
it('handles consecutive tool calls without any thinking block in the tail', () => {
|
|
2044
|
+
const tokenCounter = createTestTokenCounter();
|
|
2045
|
+
const messages: BaseMessage[] = [
|
|
2046
|
+
new HumanMessage('do two things'),
|
|
2047
|
+
new AIMessage({
|
|
2048
|
+
content: [
|
|
2049
|
+
{
|
|
2050
|
+
type: 'tool_use',
|
|
2051
|
+
id: 'tc_1',
|
|
2052
|
+
name: 'tool_a',
|
|
2053
|
+
input: { x: 1 },
|
|
2054
|
+
},
|
|
2055
|
+
],
|
|
2056
|
+
tool_calls: [
|
|
2057
|
+
{ id: 'tc_1', name: 'tool_a', args: { x: 1 }, type: 'tool_call' },
|
|
2058
|
+
],
|
|
2059
|
+
}),
|
|
2060
|
+
new ToolMessage({
|
|
2061
|
+
content: 'result_a',
|
|
2062
|
+
tool_call_id: 'tc_1',
|
|
2063
|
+
name: 'tool_a',
|
|
2064
|
+
}),
|
|
2065
|
+
new AIMessage({
|
|
2066
|
+
content: [
|
|
2067
|
+
{
|
|
2068
|
+
type: 'tool_use',
|
|
2069
|
+
id: 'tc_2',
|
|
2070
|
+
name: 'tool_b',
|
|
2071
|
+
input: { y: 2 },
|
|
2072
|
+
},
|
|
2073
|
+
],
|
|
2074
|
+
tool_calls: [
|
|
2075
|
+
{ id: 'tc_2', name: 'tool_b', args: { y: 2 }, type: 'tool_call' },
|
|
2076
|
+
],
|
|
2077
|
+
}),
|
|
2078
|
+
new ToolMessage({
|
|
2079
|
+
content: 'd'.repeat(6000),
|
|
2080
|
+
tool_call_id: 'tc_2',
|
|
2081
|
+
name: 'tool_b',
|
|
2082
|
+
}),
|
|
2083
|
+
];
|
|
2084
|
+
|
|
2085
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
2086
|
+
for (let i = 0; i < messages.length; i++) {
|
|
2087
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
2088
|
+
}
|
|
2089
|
+
|
|
2090
|
+
const result = realGetMessagesWithinTokenLimit({
|
|
2091
|
+
messages,
|
|
2092
|
+
maxContextTokens: 200,
|
|
2093
|
+
indexTokenCountMap,
|
|
2094
|
+
thinkingEnabled: true,
|
|
2095
|
+
tokenCounter,
|
|
2096
|
+
reasoningType: ContentTypes.THINKING,
|
|
2097
|
+
});
|
|
2098
|
+
expect(result.thinkingStartIndex).toBeUndefined();
|
|
2099
|
+
});
|
|
2100
|
+
|
|
2101
|
+
it('honors prior runThinkingStartIndex carry-over when the next call has a no-thinking tail', () => {
|
|
2102
|
+
// First call's tight budget forces pruning, which makes the closure
|
|
2103
|
+
// record the AI(thinking) message's index in runThinkingStartIndex.
|
|
2104
|
+
// Second call's tail is AI(tool_use) without a thinking block; the
|
|
2105
|
+
// pre-loaded thinkingBlock from the carry-over keeps the new guard
|
|
2106
|
+
// dormant and the existing reattachment path runs. Verifies the fix
|
|
2107
|
+
// doesn't disturb the carry-over interaction.
|
|
2108
|
+
const tokenCounter = createTestTokenCounter();
|
|
2109
|
+
const firstTurn: BaseMessage[] = [
|
|
2110
|
+
new HumanMessage('h'.repeat(120)),
|
|
2111
|
+
new AIMessage({
|
|
2112
|
+
content: [
|
|
2113
|
+
{
|
|
2114
|
+
type: ContentTypes.THINKING,
|
|
2115
|
+
thinking: 'planning the response',
|
|
2116
|
+
signature: 'sig-prior',
|
|
2117
|
+
},
|
|
2118
|
+
{ type: 'text', text: 'hi' },
|
|
2119
|
+
],
|
|
2120
|
+
}),
|
|
2121
|
+
];
|
|
2122
|
+
|
|
2123
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
2124
|
+
for (let i = 0; i < firstTurn.length; i++) {
|
|
2125
|
+
indexTokenCountMap[i] = tokenCounter(firstTurn[i]);
|
|
2126
|
+
}
|
|
2127
|
+
|
|
2128
|
+
const pruneMessages = createPruneMessages({
|
|
2129
|
+
maxTokens: 68,
|
|
2130
|
+
startIndex: 0,
|
|
2131
|
+
tokenCounter,
|
|
2132
|
+
indexTokenCountMap,
|
|
2133
|
+
thinkingEnabled: true,
|
|
2134
|
+
reserveRatio: 0,
|
|
2135
|
+
});
|
|
2136
|
+
|
|
2137
|
+
const firstResult = pruneMessages({ messages: firstTurn });
|
|
2138
|
+
expect(firstResult.messagesToRefine?.length).toBeGreaterThan(0);
|
|
2139
|
+
expect(firstResult.context.some((m) => m.getType() === 'ai')).toBe(true);
|
|
2140
|
+
|
|
2141
|
+
const secondTurn: BaseMessage[] = [
|
|
2142
|
+
...firstTurn,
|
|
2143
|
+
new HumanMessage('please read the doc'),
|
|
2144
|
+
new AIMessage({
|
|
2145
|
+
content: [
|
|
2146
|
+
{
|
|
2147
|
+
type: 'tool_use',
|
|
2148
|
+
id: 'tc_get_doc',
|
|
2149
|
+
name: 'get_doc_content',
|
|
2150
|
+
input: { docId: 'abc' },
|
|
2151
|
+
},
|
|
2152
|
+
],
|
|
2153
|
+
tool_calls: [
|
|
2154
|
+
{
|
|
2155
|
+
id: 'tc_get_doc',
|
|
2156
|
+
name: 'get_doc_content',
|
|
2157
|
+
args: { docId: 'abc' },
|
|
2158
|
+
type: 'tool_call',
|
|
2159
|
+
},
|
|
2160
|
+
],
|
|
2161
|
+
}),
|
|
2162
|
+
new ToolMessage({
|
|
2163
|
+
content: 'e'.repeat(40),
|
|
2164
|
+
tool_call_id: 'tc_get_doc',
|
|
2165
|
+
name: 'get_doc_content',
|
|
2166
|
+
}),
|
|
2167
|
+
];
|
|
2168
|
+
|
|
2169
|
+
let secondResult: ReturnType<typeof pruneMessages> | undefined;
|
|
2170
|
+
expect(() => {
|
|
2171
|
+
secondResult = pruneMessages({ messages: secondTurn });
|
|
2172
|
+
}).not.toThrow();
|
|
2173
|
+
|
|
2174
|
+
// Carry-over reattachment: even though the trailing AI(tool_use) has
|
|
2175
|
+
// no thinking block of its own, the closure's runThinkingStartIndex
|
|
2176
|
+
// points at the prior AI(thinking) and that block gets prepended to
|
|
2177
|
+
// the surviving AI message in context.
|
|
2178
|
+
const trailingAi = secondResult!.context.find(
|
|
2179
|
+
(m) =>
|
|
2180
|
+
m.getType() === 'ai' &&
|
|
2181
|
+
Array.isArray(m.content) &&
|
|
2182
|
+
(m.content as t.ExtendedMessageContent[]).some(
|
|
2183
|
+
(c) => typeof c === 'object' && c.type === 'tool_use'
|
|
2184
|
+
)
|
|
2185
|
+
);
|
|
2186
|
+
expect(trailingAi).toBeDefined();
|
|
2187
|
+
expect(
|
|
2188
|
+
(trailingAi!.content as t.ExtendedMessageContent[]).some(
|
|
2189
|
+
(c) => typeof c === 'object' && c.type === ContentTypes.THINKING
|
|
2190
|
+
)
|
|
2191
|
+
).toBe(true);
|
|
2192
|
+
});
|
|
2193
|
+
|
|
2194
|
+
it('integrates with ensureThinkingBlockInMessages so the API-bound payload stays valid', () => {
|
|
2195
|
+
// Models the full Graph.ts pipeline: pruner runs first, then
|
|
2196
|
+
// ensureThinkingBlockInMessages on the pruned context. The pruner used
|
|
2197
|
+
// to throw on the issue #115 tail; with the fix it returns the
|
|
2198
|
+
// messages, and ensureThinkingBlockInMessages folds the orphan
|
|
2199
|
+
// AI(tool_use)+Tool tail into a `[Previous agent context]`
|
|
2200
|
+
// HumanMessage. The Tool size is tuned so the trailing sequence
|
|
2201
|
+
// actually survives pruning — otherwise the assertions would be
|
|
2202
|
+
// vacuous.
|
|
2203
|
+
const tokenCounter = createTestTokenCounter();
|
|
2204
|
+
const messages: BaseMessage[] = [
|
|
2205
|
+
new HumanMessage('please read this doc and tell me X'),
|
|
2206
|
+
new AIMessage({
|
|
2207
|
+
content: [
|
|
2208
|
+
{
|
|
2209
|
+
type: 'tool_use',
|
|
2210
|
+
id: 'tc_get_doc',
|
|
2211
|
+
name: 'get_doc_content',
|
|
2212
|
+
input: { docId: 'abc' },
|
|
2213
|
+
},
|
|
2214
|
+
],
|
|
2215
|
+
tool_calls: [
|
|
2216
|
+
{
|
|
2217
|
+
id: 'tc_get_doc',
|
|
2218
|
+
name: 'get_doc_content',
|
|
2219
|
+
args: { docId: 'abc' },
|
|
2220
|
+
type: 'tool_call',
|
|
2221
|
+
},
|
|
2222
|
+
],
|
|
2223
|
+
}),
|
|
2224
|
+
new ToolMessage({
|
|
2225
|
+
content: 'f'.repeat(100),
|
|
2226
|
+
tool_call_id: 'tc_get_doc',
|
|
2227
|
+
name: 'get_doc_content',
|
|
2228
|
+
}),
|
|
2229
|
+
];
|
|
2230
|
+
|
|
2231
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
2232
|
+
for (let i = 0; i < messages.length; i++) {
|
|
2233
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
2234
|
+
}
|
|
2235
|
+
|
|
2236
|
+
const pruneResult = realGetMessagesWithinTokenLimit({
|
|
2237
|
+
messages,
|
|
2238
|
+
maxContextTokens: 300,
|
|
2239
|
+
indexTokenCountMap,
|
|
2240
|
+
thinkingEnabled: true,
|
|
2241
|
+
tokenCounter,
|
|
2242
|
+
reasoningType: ContentTypes.THINKING,
|
|
2243
|
+
});
|
|
2244
|
+
|
|
2245
|
+
expect(pruneResult.context.length).toBe(3);
|
|
2246
|
+
|
|
2247
|
+
const finalMessages = ensureThinkingBlockInMessages(
|
|
2248
|
+
pruneResult.context,
|
|
2249
|
+
Providers.ANTHROPIC
|
|
2250
|
+
);
|
|
2251
|
+
|
|
2252
|
+
// ensureThinkingBlockInMessages should fold the orphan AI(tool_use)+Tool
|
|
2253
|
+
// into a synthetic HumanMessage carrying the `[Previous agent context]`
|
|
2254
|
+
// marker, leaving no AI(tool_use) in the outgoing payload.
|
|
2255
|
+
expect(finalMessages.length).toBe(2);
|
|
2256
|
+
expect(finalMessages[0]).toBeInstanceOf(HumanMessage);
|
|
2257
|
+
expect(finalMessages[1]).toBeInstanceOf(HumanMessage);
|
|
2258
|
+
|
|
2259
|
+
const folded = finalMessages[1] as HumanMessage;
|
|
2260
|
+
const foldedContent = folded.content;
|
|
2261
|
+
const foldedText = Array.isArray(foldedContent)
|
|
2262
|
+
? (foldedContent as t.ExtendedMessageContent[])
|
|
2263
|
+
.filter((c) => typeof c === 'object' && c.type === 'text')
|
|
2264
|
+
.map((c) => String(c.text ?? ''))
|
|
2265
|
+
.join('\n')
|
|
2266
|
+
: String(foldedContent);
|
|
2267
|
+
expect(foldedText).toContain('[Previous agent context]');
|
|
2268
|
+
|
|
2269
|
+
const hasOrphanToolUse = finalMessages.some((m) => {
|
|
2270
|
+
if (m.getType() !== 'ai') {
|
|
2271
|
+
return false;
|
|
2272
|
+
}
|
|
2273
|
+
const content = (m as AIMessage).content;
|
|
2274
|
+
if (!Array.isArray(content)) {
|
|
2275
|
+
return false;
|
|
2276
|
+
}
|
|
2277
|
+
return content.some(
|
|
2278
|
+
(c) => typeof c === 'object' && c.type === 'tool_use'
|
|
2279
|
+
);
|
|
2280
|
+
});
|
|
2281
|
+
expect(hasOrphanToolUse).toBe(false);
|
|
2282
|
+
});
|
|
2283
|
+
|
|
2284
|
+
it('still preserves the thinking block when the trailing AI message has one', () => {
|
|
2285
|
+
const tokenCounter = createTestTokenCounter();
|
|
2286
|
+
const messages: BaseMessage[] = [
|
|
2287
|
+
new HumanMessage('hi'),
|
|
2288
|
+
new AIMessage({
|
|
2289
|
+
content: [
|
|
2290
|
+
{
|
|
2291
|
+
type: ContentTypes.THINKING,
|
|
2292
|
+
thinking: 'older thinking',
|
|
2293
|
+
signature: 'sig-old',
|
|
2294
|
+
},
|
|
2295
|
+
{ type: 'text', text: 'older reply' },
|
|
2296
|
+
],
|
|
2297
|
+
}),
|
|
2298
|
+
new HumanMessage('please read this doc'),
|
|
2299
|
+
new AIMessage({
|
|
2300
|
+
content: [
|
|
2301
|
+
{
|
|
2302
|
+
type: ContentTypes.THINKING,
|
|
2303
|
+
thinking: 'I will fetch the doc',
|
|
2304
|
+
signature: 'sig-new',
|
|
2305
|
+
},
|
|
2306
|
+
{
|
|
2307
|
+
type: 'tool_use',
|
|
2308
|
+
id: 'tc_get_doc',
|
|
2309
|
+
name: 'get_doc_content',
|
|
2310
|
+
input: { docId: 'abc' },
|
|
2311
|
+
},
|
|
2312
|
+
],
|
|
2313
|
+
tool_calls: [
|
|
2314
|
+
{
|
|
2315
|
+
id: 'tc_get_doc',
|
|
2316
|
+
name: 'get_doc_content',
|
|
2317
|
+
args: { docId: 'abc' },
|
|
2318
|
+
type: 'tool_call',
|
|
2319
|
+
},
|
|
2320
|
+
],
|
|
2321
|
+
}),
|
|
2322
|
+
new ToolMessage({
|
|
2323
|
+
content: 'c'.repeat(6000),
|
|
2324
|
+
tool_call_id: 'tc_get_doc',
|
|
2325
|
+
name: 'get_doc_content',
|
|
2326
|
+
}),
|
|
2327
|
+
];
|
|
2328
|
+
|
|
2329
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
2330
|
+
for (let i = 0; i < messages.length; i++) {
|
|
2331
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
2332
|
+
}
|
|
2333
|
+
|
|
2334
|
+
const result = realGetMessagesWithinTokenLimit({
|
|
2335
|
+
messages,
|
|
2336
|
+
maxContextTokens: 200,
|
|
2337
|
+
indexTokenCountMap,
|
|
2338
|
+
thinkingEnabled: true,
|
|
2339
|
+
tokenCounter,
|
|
2340
|
+
reasoningType: ContentTypes.THINKING,
|
|
2341
|
+
});
|
|
2342
|
+
|
|
2343
|
+
expect(result.thinkingStartIndex).toBeGreaterThanOrEqual(0);
|
|
2344
|
+
});
|
|
2345
|
+
});
|
|
@@ -51,8 +51,53 @@ Usage:
|
|
|
51
51
|
- NEVER use this tool to execute malicious commands.
|
|
52
52
|
`.trim();
|
|
53
53
|
|
|
54
|
+
/**
|
|
55
|
+
* Supplemental prompt documenting the tool-output reference feature.
|
|
56
|
+
*
|
|
57
|
+
* Hosts should append this (separated by a blank line) to the base
|
|
58
|
+
* {@link BashExecutionToolDescription} only when
|
|
59
|
+
* `RunConfig.toolOutputReferences.enabled` is `true`. When the feature
|
|
60
|
+
* is disabled, including this text would tell the LLM to emit
|
|
61
|
+
* `{{tool0turn0}}` placeholders that pass through unsubstituted and
|
|
62
|
+
* leak into the shell.
|
|
63
|
+
*/
|
|
64
|
+
export const BashToolOutputReferencesGuide = `
|
|
65
|
+
Referencing previous tool outputs:
|
|
66
|
+
- Every successful tool result is tagged with a reference key of the form \`tool<idx>turn<turn>\` (e.g., \`tool0turn0\`). The key appears either as a \`[ref: tool0turn0]\` prefix line or, when the output is a JSON object, as a \`_ref\` field on the object.
|
|
67
|
+
- To pipe a previous tool output into this tool, embed the placeholder \`{{tool<idx>turn<turn>}}\` literally anywhere in the \`command\` string (or any string arg). It will be substituted with the stored output verbatim before the command runs.
|
|
68
|
+
- The substituted value is the original output string (no \`[ref: …]\` prefix, no \`_ref\` key), so it is safe to pipe directly into \`jq\`, \`grep\`, \`awk\`, etc.
|
|
69
|
+
- Example: \`echo '{{tool0turn0}}' | jq '.foo'\` takes the full output of the first tool from the first turn and pipes it into jq.
|
|
70
|
+
- Unknown reference keys are left in place and surfaced as \`[unresolved refs: …]\` after the output.
|
|
71
|
+
`.trim();
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Composes the bash tool description, optionally appending the
|
|
75
|
+
* tool-output references guide. Hosts that enable
|
|
76
|
+
* `RunConfig.toolOutputReferences` should pass `enableToolOutputReferences: true`
|
|
77
|
+
* when registering the tool so the LLM learns the `{{…}}` syntax it
|
|
78
|
+
* will actually be able to use.
|
|
79
|
+
*/
|
|
80
|
+
export function buildBashExecutionToolDescription(options?: {
|
|
81
|
+
enableToolOutputReferences?: boolean;
|
|
82
|
+
}): string {
|
|
83
|
+
if (options?.enableToolOutputReferences === true) {
|
|
84
|
+
return `${BashExecutionToolDescription}\n\n${BashToolOutputReferencesGuide}`;
|
|
85
|
+
}
|
|
86
|
+
return BashExecutionToolDescription;
|
|
87
|
+
}
|
|
88
|
+
|
|
54
89
|
export const BashExecutionToolName = Constants.BASH_TOOL;
|
|
55
90
|
|
|
91
|
+
/**
|
|
92
|
+
* Default bash tool definition using the base description.
|
|
93
|
+
*
|
|
94
|
+
* When `RunConfig.toolOutputReferences.enabled` is `true`, build a
|
|
95
|
+
* reference-aware description with
|
|
96
|
+
* {@link buildBashExecutionToolDescription}
|
|
97
|
+
* (`{ enableToolOutputReferences: true }`) and construct a custom
|
|
98
|
+
* definition using it — using this constant as-is leaves the LLM
|
|
99
|
+
* unaware of the `{{tool<i>turn<n>}}` syntax.
|
|
100
|
+
*/
|
|
56
101
|
export const BashExecutionToolDefinition = {
|
|
57
102
|
name: BashExecutionToolName,
|
|
58
103
|
description: BashExecutionToolDescription,
|