@librechat/agents 3.1.70 → 3.1.71-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cjs/graphs/Graph.cjs +52 -0
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/llm/invoke.cjs +13 -2
  4. package/dist/cjs/llm/invoke.cjs.map +1 -1
  5. package/dist/cjs/main.cjs +4 -0
  6. package/dist/cjs/main.cjs.map +1 -1
  7. package/dist/cjs/messages/prune.cjs +9 -2
  8. package/dist/cjs/messages/prune.cjs.map +1 -1
  9. package/dist/cjs/run.cjs +4 -0
  10. package/dist/cjs/run.cjs.map +1 -1
  11. package/dist/cjs/tools/BashExecutor.cjs +43 -0
  12. package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
  13. package/dist/cjs/tools/ToolNode.cjs +482 -45
  14. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  15. package/dist/cjs/tools/toolOutputReferences.cjs +657 -0
  16. package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -0
  17. package/dist/cjs/utils/truncation.cjs +28 -0
  18. package/dist/cjs/utils/truncation.cjs.map +1 -1
  19. package/dist/esm/graphs/Graph.mjs +52 -0
  20. package/dist/esm/graphs/Graph.mjs.map +1 -1
  21. package/dist/esm/llm/invoke.mjs +13 -2
  22. package/dist/esm/llm/invoke.mjs.map +1 -1
  23. package/dist/esm/main.mjs +2 -2
  24. package/dist/esm/messages/prune.mjs +9 -2
  25. package/dist/esm/messages/prune.mjs.map +1 -1
  26. package/dist/esm/run.mjs +4 -0
  27. package/dist/esm/run.mjs.map +1 -1
  28. package/dist/esm/tools/BashExecutor.mjs +42 -1
  29. package/dist/esm/tools/BashExecutor.mjs.map +1 -1
  30. package/dist/esm/tools/ToolNode.mjs +482 -45
  31. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  32. package/dist/esm/tools/toolOutputReferences.mjs +649 -0
  33. package/dist/esm/tools/toolOutputReferences.mjs.map +1 -0
  34. package/dist/esm/utils/truncation.mjs +27 -1
  35. package/dist/esm/utils/truncation.mjs.map +1 -1
  36. package/dist/types/graphs/Graph.d.ts +28 -0
  37. package/dist/types/llm/invoke.d.ts +9 -0
  38. package/dist/types/run.d.ts +1 -0
  39. package/dist/types/tools/BashExecutor.d.ts +31 -0
  40. package/dist/types/tools/ToolNode.d.ts +84 -3
  41. package/dist/types/tools/toolOutputReferences.d.ts +236 -0
  42. package/dist/types/types/index.d.ts +1 -0
  43. package/dist/types/types/messages.d.ts +26 -0
  44. package/dist/types/types/run.d.ts +9 -1
  45. package/dist/types/types/tools.d.ts +70 -0
  46. package/dist/types/utils/truncation.d.ts +21 -0
  47. package/package.json +1 -1
  48. package/src/graphs/Graph.ts +55 -0
  49. package/src/llm/invoke.test.ts +442 -0
  50. package/src/llm/invoke.ts +23 -2
  51. package/src/messages/prune.ts +9 -2
  52. package/src/run.ts +4 -0
  53. package/src/specs/prune.test.ts +413 -0
  54. package/src/tools/BashExecutor.ts +45 -0
  55. package/src/tools/ToolNode.ts +631 -55
  56. package/src/tools/__tests__/BashExecutor.test.ts +36 -0
  57. package/src/tools/__tests__/ToolNode.outputReferences.test.ts +1438 -0
  58. package/src/tools/__tests__/annotateMessagesForLLM.test.ts +419 -0
  59. package/src/tools/__tests__/toolOutputReferences.test.ts +415 -0
  60. package/src/tools/toolOutputReferences.ts +813 -0
  61. package/src/types/index.ts +1 -0
  62. package/src/types/messages.ts +27 -0
  63. package/src/types/run.ts +9 -1
  64. package/src/types/tools.ts +71 -0
  65. package/src/utils/__tests__/truncation.test.ts +66 -0
  66. package/src/utils/truncation.ts +30 -0
@@ -21,6 +21,7 @@ import {
21
21
  createPruneMessages,
22
22
  } from '@/messages/prune';
23
23
  import { getLLMConfig } from '@/utils/llmConfig';
24
+ import { ensureThinkingBlockInMessages } from '@/messages/format';
24
25
  import { Providers, ContentTypes } from '@/common';
25
26
  import { Run } from '@/run';
26
27
 
@@ -1930,3 +1931,415 @@ describe('prunedMemory ordering with thinking enabled', () => {
1930
1931
  }
1931
1932
  });
1932
1933
  });
1934
+
1935
+ describe('thinking enabled — tail tool_use without a thinking block (issue #115)', () => {
1936
+ it('does not throw when the trailing AI message issued a tool call without a thinking block', () => {
1937
+ const tokenCounter = createTestTokenCounter();
1938
+ const messages: BaseMessage[] = [
1939
+ new HumanMessage('first turn'),
1940
+ new AIMessage({
1941
+ content: [
1942
+ {
1943
+ type: ContentTypes.THINKING,
1944
+ thinking: 'thinking about the first response',
1945
+ signature: 'sig0',
1946
+ },
1947
+ { type: 'text', text: 'first reply' },
1948
+ ],
1949
+ }),
1950
+ new HumanMessage('please read this doc and tell me X'),
1951
+ // Anthropic may emit a tool_use without an accompanying thinking block —
1952
+ // valid API behavior that the pruner must tolerate.
1953
+ new AIMessage({
1954
+ content: [
1955
+ {
1956
+ type: 'tool_use',
1957
+ id: 'tc_get_doc',
1958
+ name: 'get_doc_content',
1959
+ input: { docId: 'abc' },
1960
+ },
1961
+ ],
1962
+ tool_calls: [
1963
+ {
1964
+ id: 'tc_get_doc',
1965
+ name: 'get_doc_content',
1966
+ args: { docId: 'abc' },
1967
+ type: 'tool_call',
1968
+ },
1969
+ ],
1970
+ }),
1971
+ new ToolMessage({
1972
+ content: 'a'.repeat(8000), // huge tool result that pushes us past budget
1973
+ tool_call_id: 'tc_get_doc',
1974
+ name: 'get_doc_content',
1975
+ }),
1976
+ ];
1977
+
1978
+ const indexTokenCountMap: Record<string, number | undefined> = {};
1979
+ for (let i = 0; i < messages.length; i++) {
1980
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
1981
+ }
1982
+
1983
+ expect(() =>
1984
+ realGetMessagesWithinTokenLimit({
1985
+ messages,
1986
+ maxContextTokens: 200, // tight budget so pruning actually runs
1987
+ indexTokenCountMap,
1988
+ thinkingEnabled: true,
1989
+ tokenCounter,
1990
+ reasoningType: ContentTypes.THINKING,
1991
+ })
1992
+ ).not.toThrow();
1993
+ });
1994
+
1995
+ it('returns a prunable context for the [AI tool_use, Tool] tail without a thinking block', () => {
1996
+ const tokenCounter = createTestTokenCounter();
1997
+ const messages: BaseMessage[] = [
1998
+ new HumanMessage('please read this doc'),
1999
+ new AIMessage({
2000
+ content: [
2001
+ {
2002
+ type: 'tool_use',
2003
+ id: 'tc_get_doc',
2004
+ name: 'get_doc_content',
2005
+ input: { docId: 'abc' },
2006
+ },
2007
+ ],
2008
+ tool_calls: [
2009
+ {
2010
+ id: 'tc_get_doc',
2011
+ name: 'get_doc_content',
2012
+ args: { docId: 'abc' },
2013
+ type: 'tool_call',
2014
+ },
2015
+ ],
2016
+ }),
2017
+ new ToolMessage({
2018
+ content: 'b'.repeat(6000),
2019
+ tool_call_id: 'tc_get_doc',
2020
+ name: 'get_doc_content',
2021
+ }),
2022
+ ];
2023
+
2024
+ const indexTokenCountMap: Record<string, number | undefined> = {};
2025
+ for (let i = 0; i < messages.length; i++) {
2026
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
2027
+ }
2028
+
2029
+ const result = realGetMessagesWithinTokenLimit({
2030
+ messages,
2031
+ maxContextTokens: 200,
2032
+ indexTokenCountMap,
2033
+ thinkingEnabled: true,
2034
+ tokenCounter,
2035
+ reasoningType: ContentTypes.THINKING,
2036
+ });
2037
+
2038
+ expect(result.context).toBeDefined();
2039
+ expect(result.messagesToRefine.length).toBeGreaterThan(0);
2040
+ expect(result.thinkingStartIndex).toBeUndefined();
2041
+ });
2042
+
2043
+ it('handles consecutive tool calls without any thinking block in the tail', () => {
2044
+ const tokenCounter = createTestTokenCounter();
2045
+ const messages: BaseMessage[] = [
2046
+ new HumanMessage('do two things'),
2047
+ new AIMessage({
2048
+ content: [
2049
+ {
2050
+ type: 'tool_use',
2051
+ id: 'tc_1',
2052
+ name: 'tool_a',
2053
+ input: { x: 1 },
2054
+ },
2055
+ ],
2056
+ tool_calls: [
2057
+ { id: 'tc_1', name: 'tool_a', args: { x: 1 }, type: 'tool_call' },
2058
+ ],
2059
+ }),
2060
+ new ToolMessage({
2061
+ content: 'result_a',
2062
+ tool_call_id: 'tc_1',
2063
+ name: 'tool_a',
2064
+ }),
2065
+ new AIMessage({
2066
+ content: [
2067
+ {
2068
+ type: 'tool_use',
2069
+ id: 'tc_2',
2070
+ name: 'tool_b',
2071
+ input: { y: 2 },
2072
+ },
2073
+ ],
2074
+ tool_calls: [
2075
+ { id: 'tc_2', name: 'tool_b', args: { y: 2 }, type: 'tool_call' },
2076
+ ],
2077
+ }),
2078
+ new ToolMessage({
2079
+ content: 'd'.repeat(6000),
2080
+ tool_call_id: 'tc_2',
2081
+ name: 'tool_b',
2082
+ }),
2083
+ ];
2084
+
2085
+ const indexTokenCountMap: Record<string, number | undefined> = {};
2086
+ for (let i = 0; i < messages.length; i++) {
2087
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
2088
+ }
2089
+
2090
+ const result = realGetMessagesWithinTokenLimit({
2091
+ messages,
2092
+ maxContextTokens: 200,
2093
+ indexTokenCountMap,
2094
+ thinkingEnabled: true,
2095
+ tokenCounter,
2096
+ reasoningType: ContentTypes.THINKING,
2097
+ });
2098
+ expect(result.thinkingStartIndex).toBeUndefined();
2099
+ });
2100
+
2101
+ it('honors prior runThinkingStartIndex carry-over when the next call has a no-thinking tail', () => {
2102
+ // First call's tight budget forces pruning, which makes the closure
2103
+ // record the AI(thinking) message's index in runThinkingStartIndex.
2104
+ // Second call's tail is AI(tool_use) without a thinking block; the
2105
+ // pre-loaded thinkingBlock from the carry-over keeps the new guard
2106
+ // dormant and the existing reattachment path runs. Verifies the fix
2107
+ // doesn't disturb the carry-over interaction.
2108
+ const tokenCounter = createTestTokenCounter();
2109
+ const firstTurn: BaseMessage[] = [
2110
+ new HumanMessage('h'.repeat(120)),
2111
+ new AIMessage({
2112
+ content: [
2113
+ {
2114
+ type: ContentTypes.THINKING,
2115
+ thinking: 'planning the response',
2116
+ signature: 'sig-prior',
2117
+ },
2118
+ { type: 'text', text: 'hi' },
2119
+ ],
2120
+ }),
2121
+ ];
2122
+
2123
+ const indexTokenCountMap: Record<string, number | undefined> = {};
2124
+ for (let i = 0; i < firstTurn.length; i++) {
2125
+ indexTokenCountMap[i] = tokenCounter(firstTurn[i]);
2126
+ }
2127
+
2128
+ const pruneMessages = createPruneMessages({
2129
+ maxTokens: 68,
2130
+ startIndex: 0,
2131
+ tokenCounter,
2132
+ indexTokenCountMap,
2133
+ thinkingEnabled: true,
2134
+ reserveRatio: 0,
2135
+ });
2136
+
2137
+ const firstResult = pruneMessages({ messages: firstTurn });
2138
+ expect(firstResult.messagesToRefine?.length).toBeGreaterThan(0);
2139
+ expect(firstResult.context.some((m) => m.getType() === 'ai')).toBe(true);
2140
+
2141
+ const secondTurn: BaseMessage[] = [
2142
+ ...firstTurn,
2143
+ new HumanMessage('please read the doc'),
2144
+ new AIMessage({
2145
+ content: [
2146
+ {
2147
+ type: 'tool_use',
2148
+ id: 'tc_get_doc',
2149
+ name: 'get_doc_content',
2150
+ input: { docId: 'abc' },
2151
+ },
2152
+ ],
2153
+ tool_calls: [
2154
+ {
2155
+ id: 'tc_get_doc',
2156
+ name: 'get_doc_content',
2157
+ args: { docId: 'abc' },
2158
+ type: 'tool_call',
2159
+ },
2160
+ ],
2161
+ }),
2162
+ new ToolMessage({
2163
+ content: 'e'.repeat(40),
2164
+ tool_call_id: 'tc_get_doc',
2165
+ name: 'get_doc_content',
2166
+ }),
2167
+ ];
2168
+
2169
+ let secondResult: ReturnType<typeof pruneMessages> | undefined;
2170
+ expect(() => {
2171
+ secondResult = pruneMessages({ messages: secondTurn });
2172
+ }).not.toThrow();
2173
+
2174
+ // Carry-over reattachment: even though the trailing AI(tool_use) has
2175
+ // no thinking block of its own, the closure's runThinkingStartIndex
2176
+ // points at the prior AI(thinking) and that block gets prepended to
2177
+ // the surviving AI message in context.
2178
+ const trailingAi = secondResult!.context.find(
2179
+ (m) =>
2180
+ m.getType() === 'ai' &&
2181
+ Array.isArray(m.content) &&
2182
+ (m.content as t.ExtendedMessageContent[]).some(
2183
+ (c) => typeof c === 'object' && c.type === 'tool_use'
2184
+ )
2185
+ );
2186
+ expect(trailingAi).toBeDefined();
2187
+ expect(
2188
+ (trailingAi!.content as t.ExtendedMessageContent[]).some(
2189
+ (c) => typeof c === 'object' && c.type === ContentTypes.THINKING
2190
+ )
2191
+ ).toBe(true);
2192
+ });
2193
+
2194
+ it('integrates with ensureThinkingBlockInMessages so the API-bound payload stays valid', () => {
2195
+ // Models the full Graph.ts pipeline: pruner runs first, then
2196
+ // ensureThinkingBlockInMessages on the pruned context. The pruner used
2197
+ // to throw on the issue #115 tail; with the fix it returns the
2198
+ // messages, and ensureThinkingBlockInMessages folds the orphan
2199
+ // AI(tool_use)+Tool tail into a `[Previous agent context]`
2200
+ // HumanMessage. The Tool size is tuned so the trailing sequence
2201
+ // actually survives pruning — otherwise the assertions would be
2202
+ // vacuous.
2203
+ const tokenCounter = createTestTokenCounter();
2204
+ const messages: BaseMessage[] = [
2205
+ new HumanMessage('please read this doc and tell me X'),
2206
+ new AIMessage({
2207
+ content: [
2208
+ {
2209
+ type: 'tool_use',
2210
+ id: 'tc_get_doc',
2211
+ name: 'get_doc_content',
2212
+ input: { docId: 'abc' },
2213
+ },
2214
+ ],
2215
+ tool_calls: [
2216
+ {
2217
+ id: 'tc_get_doc',
2218
+ name: 'get_doc_content',
2219
+ args: { docId: 'abc' },
2220
+ type: 'tool_call',
2221
+ },
2222
+ ],
2223
+ }),
2224
+ new ToolMessage({
2225
+ content: 'f'.repeat(100),
2226
+ tool_call_id: 'tc_get_doc',
2227
+ name: 'get_doc_content',
2228
+ }),
2229
+ ];
2230
+
2231
+ const indexTokenCountMap: Record<string, number | undefined> = {};
2232
+ for (let i = 0; i < messages.length; i++) {
2233
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
2234
+ }
2235
+
2236
+ const pruneResult = realGetMessagesWithinTokenLimit({
2237
+ messages,
2238
+ maxContextTokens: 300,
2239
+ indexTokenCountMap,
2240
+ thinkingEnabled: true,
2241
+ tokenCounter,
2242
+ reasoningType: ContentTypes.THINKING,
2243
+ });
2244
+
2245
+ expect(pruneResult.context.length).toBe(3);
2246
+
2247
+ const finalMessages = ensureThinkingBlockInMessages(
2248
+ pruneResult.context,
2249
+ Providers.ANTHROPIC
2250
+ );
2251
+
2252
+ // ensureThinkingBlockInMessages should fold the orphan AI(tool_use)+Tool
2253
+ // into a synthetic HumanMessage carrying the `[Previous agent context]`
2254
+ // marker, leaving no AI(tool_use) in the outgoing payload.
2255
+ expect(finalMessages.length).toBe(2);
2256
+ expect(finalMessages[0]).toBeInstanceOf(HumanMessage);
2257
+ expect(finalMessages[1]).toBeInstanceOf(HumanMessage);
2258
+
2259
+ const folded = finalMessages[1] as HumanMessage;
2260
+ const foldedContent = folded.content;
2261
+ const foldedText = Array.isArray(foldedContent)
2262
+ ? (foldedContent as t.ExtendedMessageContent[])
2263
+ .filter((c) => typeof c === 'object' && c.type === 'text')
2264
+ .map((c) => String(c.text ?? ''))
2265
+ .join('\n')
2266
+ : String(foldedContent);
2267
+ expect(foldedText).toContain('[Previous agent context]');
2268
+
2269
+ const hasOrphanToolUse = finalMessages.some((m) => {
2270
+ if (m.getType() !== 'ai') {
2271
+ return false;
2272
+ }
2273
+ const content = (m as AIMessage).content;
2274
+ if (!Array.isArray(content)) {
2275
+ return false;
2276
+ }
2277
+ return content.some(
2278
+ (c) => typeof c === 'object' && c.type === 'tool_use'
2279
+ );
2280
+ });
2281
+ expect(hasOrphanToolUse).toBe(false);
2282
+ });
2283
+
2284
+ it('still preserves the thinking block when the trailing AI message has one', () => {
2285
+ const tokenCounter = createTestTokenCounter();
2286
+ const messages: BaseMessage[] = [
2287
+ new HumanMessage('hi'),
2288
+ new AIMessage({
2289
+ content: [
2290
+ {
2291
+ type: ContentTypes.THINKING,
2292
+ thinking: 'older thinking',
2293
+ signature: 'sig-old',
2294
+ },
2295
+ { type: 'text', text: 'older reply' },
2296
+ ],
2297
+ }),
2298
+ new HumanMessage('please read this doc'),
2299
+ new AIMessage({
2300
+ content: [
2301
+ {
2302
+ type: ContentTypes.THINKING,
2303
+ thinking: 'I will fetch the doc',
2304
+ signature: 'sig-new',
2305
+ },
2306
+ {
2307
+ type: 'tool_use',
2308
+ id: 'tc_get_doc',
2309
+ name: 'get_doc_content',
2310
+ input: { docId: 'abc' },
2311
+ },
2312
+ ],
2313
+ tool_calls: [
2314
+ {
2315
+ id: 'tc_get_doc',
2316
+ name: 'get_doc_content',
2317
+ args: { docId: 'abc' },
2318
+ type: 'tool_call',
2319
+ },
2320
+ ],
2321
+ }),
2322
+ new ToolMessage({
2323
+ content: 'c'.repeat(6000),
2324
+ tool_call_id: 'tc_get_doc',
2325
+ name: 'get_doc_content',
2326
+ }),
2327
+ ];
2328
+
2329
+ const indexTokenCountMap: Record<string, number | undefined> = {};
2330
+ for (let i = 0; i < messages.length; i++) {
2331
+ indexTokenCountMap[i] = tokenCounter(messages[i]);
2332
+ }
2333
+
2334
+ const result = realGetMessagesWithinTokenLimit({
2335
+ messages,
2336
+ maxContextTokens: 200,
2337
+ indexTokenCountMap,
2338
+ thinkingEnabled: true,
2339
+ tokenCounter,
2340
+ reasoningType: ContentTypes.THINKING,
2341
+ });
2342
+
2343
+ expect(result.thinkingStartIndex).toBeGreaterThanOrEqual(0);
2344
+ });
2345
+ });
@@ -51,8 +51,53 @@ Usage:
51
51
  - NEVER use this tool to execute malicious commands.
52
52
  `.trim();
53
53
 
54
+ /**
55
+ * Supplemental prompt documenting the tool-output reference feature.
56
+ *
57
+ * Hosts should append this (separated by a blank line) to the base
58
+ * {@link BashExecutionToolDescription} only when
59
+ * `RunConfig.toolOutputReferences.enabled` is `true`. When the feature
60
+ * is disabled, including this text would tell the LLM to emit
61
+ * `{{tool0turn0}}` placeholders that pass through unsubstituted and
62
+ * leak into the shell.
63
+ */
64
+ export const BashToolOutputReferencesGuide = `
65
+ Referencing previous tool outputs:
66
+ - Every successful tool result is tagged with a reference key of the form \`tool<idx>turn<turn>\` (e.g., \`tool0turn0\`). The key appears either as a \`[ref: tool0turn0]\` prefix line or, when the output is a JSON object, as a \`_ref\` field on the object.
67
+ - To pipe a previous tool output into this tool, embed the placeholder \`{{tool<idx>turn<turn>}}\` literally anywhere in the \`command\` string (or any string arg). It will be substituted with the stored output verbatim before the command runs.
68
+ - The substituted value is the original output string (no \`[ref: …]\` prefix, no \`_ref\` key), so it is safe to pipe directly into \`jq\`, \`grep\`, \`awk\`, etc.
69
+ - Example: \`echo '{{tool0turn0}}' | jq '.foo'\` takes the full output of the first tool from the first turn and pipes it into jq.
70
+ - Unknown reference keys are left in place and surfaced as \`[unresolved refs: …]\` after the output.
71
+ `.trim();
72
+
73
+ /**
74
+ * Composes the bash tool description, optionally appending the
75
+ * tool-output references guide. Hosts that enable
76
+ * `RunConfig.toolOutputReferences` should pass `enableToolOutputReferences: true`
77
+ * when registering the tool so the LLM learns the `{{…}}` syntax it
78
+ * will actually be able to use.
79
+ */
80
+ export function buildBashExecutionToolDescription(options?: {
81
+ enableToolOutputReferences?: boolean;
82
+ }): string {
83
+ if (options?.enableToolOutputReferences === true) {
84
+ return `${BashExecutionToolDescription}\n\n${BashToolOutputReferencesGuide}`;
85
+ }
86
+ return BashExecutionToolDescription;
87
+ }
88
+
54
89
  export const BashExecutionToolName = Constants.BASH_TOOL;
55
90
 
91
+ /**
92
+ * Default bash tool definition using the base description.
93
+ *
94
+ * When `RunConfig.toolOutputReferences.enabled` is `true`, build a
95
+ * reference-aware description with
96
+ * {@link buildBashExecutionToolDescription}
97
+ * (`{ enableToolOutputReferences: true }`) and construct a custom
98
+ * definition using it — using this constant as-is leaves the LLM
99
+ * unaware of the `{{tool<i>turn<n>}}` syntax.
100
+ */
56
101
  export const BashExecutionToolDefinition = {
57
102
  name: BashExecutionToolName,
58
103
  description: BashExecutionToolDescription,