@tyvm/knowhow 0.0.20 โ 0.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -1
- package/src/agents/base/base.ts +16 -77
- package/src/agents/tools/executeScript/README.md +78 -0
- package/src/agents/tools/executeScript/definition.ts +73 -0
- package/src/agents/tools/executeScript/examples/quick-test.ts +80 -0
- package/src/agents/tools/executeScript/examples/serialization-test.ts +309 -0
- package/src/agents/tools/executeScript/examples/test-runner.ts +204 -0
- package/src/agents/tools/executeScript/index.ts +74 -0
- package/src/agents/tools/index.ts +1 -0
- package/src/agents/tools/list.ts +2 -1
- package/src/cli.ts +2 -6
- package/src/clients/index.ts +23 -9
- package/src/services/Tools.ts +150 -9
- package/src/services/script-execution/SandboxContext.ts +278 -0
- package/src/services/script-execution/ScriptExecutor.ts +337 -0
- package/src/services/script-execution/ScriptPolicy.ts +236 -0
- package/src/services/script-execution/ScriptTracer.ts +249 -0
- package/src/services/script-execution/types.ts +134 -0
- package/ts_build/src/agents/base/base.js +2 -53
- package/ts_build/src/agents/base/base.js.map +1 -1
- package/ts_build/src/agents/tools/executeScript/definition.d.ts +2 -0
- package/ts_build/src/agents/tools/executeScript/definition.js +70 -0
- package/ts_build/src/agents/tools/executeScript/definition.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/examples/quick-test.d.ts +3 -0
- package/ts_build/src/agents/tools/executeScript/examples/quick-test.js +68 -0
- package/ts_build/src/agents/tools/executeScript/examples/quick-test.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/examples/serialization-test.d.ts +15 -0
- package/ts_build/src/agents/tools/executeScript/examples/serialization-test.js +267 -0
- package/ts_build/src/agents/tools/executeScript/examples/serialization-test.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/examples/simple-example.d.ts +20 -0
- package/ts_build/src/agents/tools/executeScript/examples/simple-example.js +35 -0
- package/ts_build/src/agents/tools/executeScript/examples/simple-example.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/examples/test-runner.d.ts +4 -0
- package/ts_build/src/agents/tools/executeScript/examples/test-runner.js +202 -0
- package/ts_build/src/agents/tools/executeScript/examples/test-runner.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/handler.d.ts +27 -0
- package/ts_build/src/agents/tools/executeScript/handler.js +64 -0
- package/ts_build/src/agents/tools/executeScript/handler.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/index.d.ts +27 -0
- package/ts_build/src/agents/tools/executeScript/index.js +64 -0
- package/ts_build/src/agents/tools/executeScript/index.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript.d.ts +29 -0
- package/ts_build/src/agents/tools/executeScript.js +124 -0
- package/ts_build/src/agents/tools/executeScript.js.map +1 -0
- package/ts_build/src/agents/tools/index.d.ts +1 -0
- package/ts_build/src/agents/tools/index.js +1 -0
- package/ts_build/src/agents/tools/index.js.map +1 -1
- package/ts_build/src/agents/tools/list.js +2 -0
- package/ts_build/src/agents/tools/list.js.map +1 -1
- package/ts_build/src/cli.js +2 -6
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/index.d.ts +9 -2
- package/ts_build/src/clients/index.js +17 -4
- package/ts_build/src/clients/index.js.map +1 -1
- package/ts_build/src/services/Tools.d.ts +11 -1
- package/ts_build/src/services/Tools.js +94 -3
- package/ts_build/src/services/Tools.js.map +1 -1
- package/ts_build/src/services/script-execution/SandboxContext.d.ts +34 -0
- package/ts_build/src/services/script-execution/SandboxContext.js +188 -0
- package/ts_build/src/services/script-execution/SandboxContext.js.map +1 -0
- package/ts_build/src/services/script-execution/ScriptExecutor.d.ts +17 -0
- package/ts_build/src/services/script-execution/ScriptExecutor.js +207 -0
- package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -0
- package/ts_build/src/services/script-execution/ScriptPolicy.d.ts +27 -0
- package/ts_build/src/services/script-execution/ScriptPolicy.js +150 -0
- package/ts_build/src/services/script-execution/ScriptPolicy.js.map +1 -0
- package/ts_build/src/services/script-execution/ScriptTracer.d.ts +19 -0
- package/ts_build/src/services/script-execution/ScriptTracer.js +186 -0
- package/ts_build/src/services/script-execution/ScriptTracer.js.map +1 -0
- package/ts_build/src/services/script-execution/types.d.ts +108 -0
- package/ts_build/src/services/script-execution/types.js +3 -0
- package/ts_build/src/services/script-execution/types.js.map +1 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tyvm/knowhow",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.22",
|
|
4
4
|
"description": "ai cli with plugins and agents",
|
|
5
5
|
"main": "ts_build/src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -51,10 +51,12 @@
|
|
|
51
51
|
"axios": "^1.5.0",
|
|
52
52
|
"cheerio": "^1.0.0",
|
|
53
53
|
"diff": "^5.2.0",
|
|
54
|
+
"esbuild": "^0.25.8",
|
|
54
55
|
"express": "^4.19.2",
|
|
55
56
|
"figma-js": "^1.16.1-0",
|
|
56
57
|
"gitignore-to-glob": "^0.3.0",
|
|
57
58
|
"ink": "^6.0.1",
|
|
59
|
+
"isolated-vm": "^5.0.4",
|
|
58
60
|
"jira-client": "^8.2.2",
|
|
59
61
|
"marked": "^10.0.0",
|
|
60
62
|
"marked-terminal": "^6.2.0",
|
package/src/agents/base/base.ts
CHANGED
|
@@ -186,86 +186,16 @@ export abstract class BaseAgent implements IAgent {
|
|
|
186
186
|
abstract getInitialMessages(userInput: string): Promise<Message[]>;
|
|
187
187
|
|
|
188
188
|
async processToolMessages(toolCall: ToolCall) {
|
|
189
|
-
const
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
console.log(toolCall);
|
|
193
|
-
const functionArgs = JSON.parse(
|
|
194
|
-
this.formatAiResponse(toolCall.function.arguments)
|
|
195
|
-
);
|
|
196
|
-
|
|
197
|
-
const toJsonIfObject = (arg: any) => {
|
|
198
|
-
if (typeof arg === "object") {
|
|
199
|
-
return JSON.stringify(arg, null, 2);
|
|
200
|
-
}
|
|
201
|
-
return arg;
|
|
202
|
-
};
|
|
203
|
-
|
|
204
|
-
const toolDefinition = this.tools.getTool(functionName);
|
|
205
|
-
const properties = toolDefinition?.function?.parameters?.properties || {};
|
|
206
|
-
const isPositional =
|
|
207
|
-
toolDefinition?.function?.parameters?.positional || false;
|
|
208
|
-
const fnArgs = isPositional
|
|
209
|
-
? Object.keys(properties).map((p) => functionArgs[p])
|
|
210
|
-
: functionArgs;
|
|
211
|
-
|
|
212
|
-
console.log(
|
|
213
|
-
`Calling function ${functionName} with args:`,
|
|
214
|
-
JSON.stringify(fnArgs, null, 2)
|
|
189
|
+
const { functionResp, toolMessages } = await this.tools.callTool(
|
|
190
|
+
toolCall,
|
|
191
|
+
this.getEnabledToolNames()
|
|
215
192
|
);
|
|
216
193
|
|
|
217
|
-
if (!functionToCall) {
|
|
218
|
-
const options = this.getEnabledToolNames().join(", ");
|
|
219
|
-
const error = `Function ${functionName} not found, options are ${options}`;
|
|
220
|
-
console.log(error);
|
|
221
|
-
return [
|
|
222
|
-
{
|
|
223
|
-
tool_call_id: toolCall.id,
|
|
224
|
-
role: "tool",
|
|
225
|
-
name: "error",
|
|
226
|
-
content: error,
|
|
227
|
-
},
|
|
228
|
-
];
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
const functionResponse = await Promise.resolve(
|
|
232
|
-
isPositional ? functionToCall(...fnArgs) : functionToCall(fnArgs)
|
|
233
|
-
).catch((e) => "ERROR: " + e.message);
|
|
234
|
-
|
|
235
194
|
this.agentEvents.emit(this.eventTypes.toolUsed, {
|
|
236
195
|
toolCall,
|
|
237
|
-
|
|
196
|
+
functionResp,
|
|
238
197
|
});
|
|
239
198
|
|
|
240
|
-
let toolMessages = [];
|
|
241
|
-
|
|
242
|
-
if (functionName === "multi_tool_use.parallel") {
|
|
243
|
-
const args = fnArgs[0] as {
|
|
244
|
-
recipient_name: string;
|
|
245
|
-
parameters: any;
|
|
246
|
-
}[];
|
|
247
|
-
|
|
248
|
-
toolMessages = args.map((call, index) => {
|
|
249
|
-
return {
|
|
250
|
-
tool_call_id: toolCall.id + "_" + index,
|
|
251
|
-
role: "tool",
|
|
252
|
-
name: call.recipient_name.split(".").pop(),
|
|
253
|
-
content: toJsonIfObject(functionResponse[index]) || "Done",
|
|
254
|
-
};
|
|
255
|
-
});
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
toolMessages = [
|
|
259
|
-
{
|
|
260
|
-
tool_call_id: toolCall.id,
|
|
261
|
-
role: "tool",
|
|
262
|
-
name: functionName,
|
|
263
|
-
content: toJsonIfObject(functionResponse) || "Done",
|
|
264
|
-
},
|
|
265
|
-
];
|
|
266
|
-
|
|
267
|
-
console.log(toolMessages);
|
|
268
|
-
|
|
269
199
|
return toolMessages;
|
|
270
200
|
}
|
|
271
201
|
|
|
@@ -421,7 +351,10 @@ export abstract class BaseAgent implements IAgent {
|
|
|
421
351
|
|
|
422
352
|
// Process initial messages if this is the first call
|
|
423
353
|
if (!_messages) {
|
|
424
|
-
messages = await this.messageProcessor.processMessages(
|
|
354
|
+
messages = await this.messageProcessor.processMessages(
|
|
355
|
+
messages,
|
|
356
|
+
"initial_call"
|
|
357
|
+
);
|
|
425
358
|
}
|
|
426
359
|
|
|
427
360
|
if (this.pendingUserMessages.length) {
|
|
@@ -437,7 +370,10 @@ export abstract class BaseAgent implements IAgent {
|
|
|
437
370
|
const endIndex = messages.length;
|
|
438
371
|
|
|
439
372
|
// Process messages before each AI call
|
|
440
|
-
messages = await this.messageProcessor.processMessages(
|
|
373
|
+
messages = await this.messageProcessor.processMessages(
|
|
374
|
+
messages,
|
|
375
|
+
"per_call"
|
|
376
|
+
);
|
|
441
377
|
const compressThreshold = 10000;
|
|
442
378
|
|
|
443
379
|
const response = await this.getClient().createChatCompletion({
|
|
@@ -490,7 +426,10 @@ export abstract class BaseAgent implements IAgent {
|
|
|
490
426
|
|
|
491
427
|
// Process messages after tool execution
|
|
492
428
|
if (newToolCalls && newToolCalls.length > 0) {
|
|
493
|
-
messages = await this.messageProcessor.processMessages(
|
|
429
|
+
messages = await this.messageProcessor.processMessages(
|
|
430
|
+
messages,
|
|
431
|
+
"post_call"
|
|
432
|
+
);
|
|
494
433
|
}
|
|
495
434
|
|
|
496
435
|
// Early exit: not required to call tool
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Execute Script Tool
|
|
2
|
+
|
|
3
|
+
The `executeScript` tool provides secure, isolated execution of TypeScript scripts with access to the Knowhow platform's tools and AI capabilities.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Secure Sandbox**: Scripts run in isolated-vm with no access to Node.js APIs
|
|
8
|
+
- **Tool Integration**: Access to all existing Knowhow tools via `callTool()`
|
|
9
|
+
- **AI Integration**: Direct access to AI completions via `llm()`
|
|
10
|
+
- **Resource Quotas**: Configurable limits on execution time, memory, tool calls, and tokens
|
|
11
|
+
- **Comprehensive Tracing**: Full audit trail of all script activities
|
|
12
|
+
- **Policy Enforcement**: Fine-grained security controls and access restrictions
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
// Basic usage
|
|
18
|
+
await executeScript({
|
|
19
|
+
script: `
|
|
20
|
+
console.log("Hello from script!");
|
|
21
|
+
|
|
22
|
+
const files = await callTool("fileSearch", { searchTerm: "*.ts" });
|
|
23
|
+
console.log("Found TypeScript files:", files);
|
|
24
|
+
|
|
25
|
+
const response = await llm([
|
|
26
|
+
{ role: "user", content: "What is TypeScript?" }
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
return { message: "Script completed successfully" };
|
|
30
|
+
`
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// With custom policy
|
|
34
|
+
await executeScript({
|
|
35
|
+
script: "...",
|
|
36
|
+
policy: {
|
|
37
|
+
maxExecutionTimeMs: 10000,
|
|
38
|
+
maxMemoryMB: 64,
|
|
39
|
+
maxToolCalls: 5,
|
|
40
|
+
maxTokens: 1000,
|
|
41
|
+
allowedTools: ["fileSearch", "textSearch"],
|
|
42
|
+
deniedTools: ["execCommand"]
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Available Functions in Scripts
|
|
48
|
+
|
|
49
|
+
### `callTool(name: string, args: any): Promise<any>`
|
|
50
|
+
Call any available Knowhow tool by name with arguments.
|
|
51
|
+
|
|
52
|
+
### `llm(messages: ChatMessage[], options?: LLMOptions): Promise<ChatCompletion>`
|
|
53
|
+
Make AI completion requests with message history and options.
|
|
54
|
+
|
|
55
|
+
## Security Features
|
|
56
|
+
|
|
57
|
+
- **No Node.js Access**: Scripts cannot access filesystem, network, or system APIs directly
|
|
58
|
+
- **Tool Authorization**: All external access goes through existing authorization systems
|
|
59
|
+
- **Resource Limits**: Prevents runaway scripts with time, memory, and usage quotas
|
|
60
|
+
- **Trace Sanitization**: Sensitive data is redacted from execution logs
|
|
61
|
+
- **Policy Enforcement**: Granular control over what tools and resources scripts can access
|
|
62
|
+
|
|
63
|
+
## Implementation Details
|
|
64
|
+
|
|
65
|
+
- **ScriptExecutor**: Handles compilation and isolated execution
|
|
66
|
+
- **SandboxContext**: Provides `callTool` and `llm` function implementations
|
|
67
|
+
- **ScriptTracer**: Records all script activities for debugging and audit
|
|
68
|
+
- **ScriptPolicyEnforcer**: Enforces resource quotas and security policies
|
|
69
|
+
|
|
70
|
+
## Files
|
|
71
|
+
|
|
72
|
+
- `types.ts` - TypeScript interfaces and type definitions
|
|
73
|
+
- `ScriptExecutor.ts` - Core execution engine with isolated-vm
|
|
74
|
+
- `SandboxContext.ts` - Script execution context with tool/AI access
|
|
75
|
+
- `ScriptTracer.ts` - Event tracing and monitoring system
|
|
76
|
+
- `ScriptPolicyEnforcer.ts` - Security policy enforcement
|
|
77
|
+
- `executeScript.ts` - Tool handler and main entry point
|
|
78
|
+
- `examples/` - Example scripts demonstrating capabilities
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { Tool } from "../../../clients";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Tool for executing TypeScript scripts in a secure sandbox
|
|
5
|
+
*/
|
|
6
|
+
export const executeScriptDefinition: Tool = {
|
|
7
|
+
type: "function",
|
|
8
|
+
function: {
|
|
9
|
+
name: "executeScript",
|
|
10
|
+
description: `Execute TypeScript code in a secure sandbox environment with access to tools and AI models.
|
|
11
|
+
|
|
12
|
+
The script has access to:
|
|
13
|
+
- callTool(toolName, parameters): Call any available tool
|
|
14
|
+
- llm(messages, options): Make LLM calls
|
|
15
|
+
- createArtifact(name, content, type): Create downloadable artifacts
|
|
16
|
+
- console: Standard console logging
|
|
17
|
+
- getQuotaUsage(): Check resource usage
|
|
18
|
+
- sleep(ms): Pause execution for a specified time
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
\`\`\`typescript
|
|
22
|
+
// Call a tool
|
|
23
|
+
const searchResult = await callTool('textSearch', { searchTerm: 'hello world' });
|
|
24
|
+
console.log('Search found:', searchResult);
|
|
25
|
+
|
|
26
|
+
// Call LLM
|
|
27
|
+
const response = await llm([
|
|
28
|
+
{ role: 'user', content: 'Explain quantum computing' }
|
|
29
|
+
], { model: 'gpt-4o-mini', maxTokens: 100 });
|
|
30
|
+
console.log('LLM response:', response.choices[0].message.content);
|
|
31
|
+
|
|
32
|
+
// Create an artifact
|
|
33
|
+
createArtifact('summary.md', '# Summary\\nThis is a test', 'markdown');
|
|
34
|
+
|
|
35
|
+
return { message: 'Script completed successfully' };
|
|
36
|
+
\`\`\`
|
|
37
|
+
|
|
38
|
+
Must either return or await the a top level function
|
|
39
|
+
|
|
40
|
+
Test tools yourself to know the return type when scripting. Can pass JSON.stringified data into llm call if you don't need to know the type.
|
|
41
|
+
You cannot use isolation breaking methods like: setTimeout setInterval setImmediate clearTimeout clearInterval
|
|
42
|
+
|
|
43
|
+
Security: Scripts run in isolation with quotas on tool calls, tokens, time, and cost.`,
|
|
44
|
+
|
|
45
|
+
parameters: {
|
|
46
|
+
type: "object",
|
|
47
|
+
properties: {
|
|
48
|
+
script: {
|
|
49
|
+
type: "string",
|
|
50
|
+
description: "The TypeScript code to execute",
|
|
51
|
+
},
|
|
52
|
+
maxToolCalls: {
|
|
53
|
+
type: "number",
|
|
54
|
+
description: "Maximum number of tool calls allowed (default: 50)",
|
|
55
|
+
},
|
|
56
|
+
maxTokens: {
|
|
57
|
+
type: "number",
|
|
58
|
+
description: "Maximum tokens for LLM calls (default: 10000)",
|
|
59
|
+
},
|
|
60
|
+
maxExecutionTimeMs: {
|
|
61
|
+
type: "number",
|
|
62
|
+
description:
|
|
63
|
+
"Maximum execution time in milliseconds (default: 30000)",
|
|
64
|
+
},
|
|
65
|
+
maxCostUsd: {
|
|
66
|
+
type: "number",
|
|
67
|
+
description: "Maximum cost in USD (default: 1.0)",
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
required: ["script"],
|
|
71
|
+
},
|
|
72
|
+
},
|
|
73
|
+
};
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#!/usr/bin/env ts-node
|
|
2
|
+
/**
|
|
3
|
+
* Quick test for executeScript - minimal example
|
|
4
|
+
* Usage: npx ts-node src/agents/tools/executeScript/examples/quick-test.ts
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { executeScript } from "../../executeScript";
|
|
8
|
+
import { Tools } from "../../../../services";
|
|
9
|
+
import { Clients } from "../../../../clients";
|
|
10
|
+
|
|
11
|
+
// Simple test script
|
|
12
|
+
const simpleScript = `
|
|
13
|
+
console.log("Hello from executeScript!");
|
|
14
|
+
|
|
15
|
+
async function main() {
|
|
16
|
+
// Test basic functionality
|
|
17
|
+
console.log("Running simple test...");
|
|
18
|
+
|
|
19
|
+
// Try a simple tool call
|
|
20
|
+
const files = await callTool("fileSearch", { searchTerm: "*.ts" });
|
|
21
|
+
console.log("Found", files?.length || 0, "TypeScript files");
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
message: "Simple test completed!",
|
|
25
|
+
filesFound: files?.length || 0,
|
|
26
|
+
timestamp: new Date().toISOString()
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
await main().then(result => {
|
|
31
|
+
console.log("Result:", result);
|
|
32
|
+
}).catch(error => {
|
|
33
|
+
console.error("Error:", error);
|
|
34
|
+
});
|
|
35
|
+
`;
|
|
36
|
+
|
|
37
|
+
async function quickTest() {
|
|
38
|
+
console.log("๐งช Quick executeScript test\n");
|
|
39
|
+
|
|
40
|
+
try {
|
|
41
|
+
const result = await executeScript(
|
|
42
|
+
{
|
|
43
|
+
script: simpleScript,
|
|
44
|
+
maxToolCalls: 5,
|
|
45
|
+
maxTokens: 100,
|
|
46
|
+
maxExecutionTimeMs: 10000,
|
|
47
|
+
maxCostUsd: 0.1,
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
tools: Tools,
|
|
51
|
+
clients: Clients,
|
|
52
|
+
}
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
console.log("\n๐ QUICK TEST RESULT:");
|
|
56
|
+
console.log("Success:", result.success);
|
|
57
|
+
console.log("Result:", result.result);
|
|
58
|
+
console.log("Tool calls:", result.quotaUsage.toolCalls);
|
|
59
|
+
console.log("Cost: $" + result.quotaUsage.costUsd.toFixed(4));
|
|
60
|
+
|
|
61
|
+
if (result.consoleOutput.length > 0) {
|
|
62
|
+
console.log("\n๐ Console Output:");
|
|
63
|
+
result.consoleOutput.forEach((entry) => {
|
|
64
|
+
console.log(` ${entry}`);
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (!result.success) {
|
|
69
|
+
console.log("โ Error:", result.error);
|
|
70
|
+
}
|
|
71
|
+
} catch (error) {
|
|
72
|
+
console.error("๐ฅ Test failed:", error);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (require.main === module) {
|
|
77
|
+
quickTest();
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export { quickTest };
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
#!/usr/bin/env ts-node
|
|
2
|
+
/**
|
|
3
|
+
* Serialization Test for executeScript - demonstrates transfer issues
|
|
4
|
+
* Usage: npx ts-node src/agents/tools/executeScript/examples/serialization-test.ts
|
|
5
|
+
*
|
|
6
|
+
* This test demonstrates the "A non-transferable value was passed" errors
|
|
7
|
+
* that occur when trying to return complex objects from executeScript.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { executeScript } from "../../executeScript";
|
|
11
|
+
import { Tools } from "../../../../services";
|
|
12
|
+
import { Clients } from "../../../../clients";
|
|
13
|
+
|
|
14
|
+
interface TestCase {
|
|
15
|
+
name: string;
|
|
16
|
+
script: string;
|
|
17
|
+
expectedToWork: boolean;
|
|
18
|
+
description: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const testCases: TestCase[] = [
|
|
22
|
+
{
|
|
23
|
+
name: "primitive-string",
|
|
24
|
+
expectedToWork: true,
|
|
25
|
+
description: "Simple string return - should work",
|
|
26
|
+
script: `
|
|
27
|
+
console.log("Testing primitive string return");
|
|
28
|
+
return "Hello World";
|
|
29
|
+
`
|
|
30
|
+
},
|
|
31
|
+
|
|
32
|
+
{
|
|
33
|
+
name: "primitive-number",
|
|
34
|
+
expectedToWork: true,
|
|
35
|
+
description: "Simple number return - should work",
|
|
36
|
+
script: `
|
|
37
|
+
console.log("Testing primitive number return");
|
|
38
|
+
return 42;
|
|
39
|
+
`
|
|
40
|
+
},
|
|
41
|
+
|
|
42
|
+
{
|
|
43
|
+
name: "primitive-boolean",
|
|
44
|
+
expectedToWork: true,
|
|
45
|
+
description: "Simple boolean return - should work",
|
|
46
|
+
script: `
|
|
47
|
+
console.log("Testing primitive boolean return");
|
|
48
|
+
return true;
|
|
49
|
+
`
|
|
50
|
+
},
|
|
51
|
+
|
|
52
|
+
{
|
|
53
|
+
name: "simple-array",
|
|
54
|
+
expectedToWork: true, // You mentioned this works
|
|
55
|
+
description: "Simple array return - you said this works",
|
|
56
|
+
script: `
|
|
57
|
+
console.log("Testing simple array return");
|
|
58
|
+
return [1, 2, 3, "hello"];
|
|
59
|
+
`
|
|
60
|
+
},
|
|
61
|
+
|
|
62
|
+
{
|
|
63
|
+
name: "simple-object",
|
|
64
|
+
expectedToWork: false, // This is where I got errors
|
|
65
|
+
description: "Simple object return - expected to fail with transferable error",
|
|
66
|
+
script: `
|
|
67
|
+
console.log("Testing simple object return");
|
|
68
|
+
return {
|
|
69
|
+
message: "Hello",
|
|
70
|
+
count: 42,
|
|
71
|
+
success: true
|
|
72
|
+
};
|
|
73
|
+
`
|
|
74
|
+
},
|
|
75
|
+
|
|
76
|
+
{
|
|
77
|
+
name: "nested-object",
|
|
78
|
+
expectedToWork: false,
|
|
79
|
+
description: "Nested object return - expected to fail",
|
|
80
|
+
script: `
|
|
81
|
+
console.log("Testing nested object return");
|
|
82
|
+
return {
|
|
83
|
+
data: {
|
|
84
|
+
items: [1, 2, 3],
|
|
85
|
+
metadata: { timestamp: new Date().toISOString() }
|
|
86
|
+
},
|
|
87
|
+
status: "success"
|
|
88
|
+
};
|
|
89
|
+
`
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
{
|
|
93
|
+
name: "object-with-functions",
|
|
94
|
+
expectedToWork: false,
|
|
95
|
+
description: "Object with functions - definitely should fail",
|
|
96
|
+
script: `
|
|
97
|
+
console.log("Testing object with functions");
|
|
98
|
+
return {
|
|
99
|
+
data: [1, 2, 3],
|
|
100
|
+
transform: function(x) { return x * 2; },
|
|
101
|
+
helper: () => "test"
|
|
102
|
+
};
|
|
103
|
+
`
|
|
104
|
+
},
|
|
105
|
+
|
|
106
|
+
{
|
|
107
|
+
name: "array-of-objects",
|
|
108
|
+
expectedToWork: false, // Based on my experience
|
|
109
|
+
description: "Array containing objects - expected to fail",
|
|
110
|
+
script: `
|
|
111
|
+
console.log("Testing array of objects");
|
|
112
|
+
return [
|
|
113
|
+
{ id: 1, name: "Alice" },
|
|
114
|
+
{ id: 2, name: "Bob" },
|
|
115
|
+
{ id: 3, name: "Charlie" }
|
|
116
|
+
];
|
|
117
|
+
`
|
|
118
|
+
},
|
|
119
|
+
|
|
120
|
+
{
|
|
121
|
+
name: "json-stringify-workaround",
|
|
122
|
+
expectedToWork: true,
|
|
123
|
+
description: "Using JSON.stringify as workaround - should work",
|
|
124
|
+
script: `
|
|
125
|
+
console.log("Testing JSON.stringify workaround");
|
|
126
|
+
const data = {
|
|
127
|
+
message: "Hello",
|
|
128
|
+
items: [1, 2, 3],
|
|
129
|
+
nested: { key: "value" }
|
|
130
|
+
};
|
|
131
|
+
return JSON.stringify(data);
|
|
132
|
+
`
|
|
133
|
+
},
|
|
134
|
+
|
|
135
|
+
{
|
|
136
|
+
name: "tool-call-result",
|
|
137
|
+
expectedToWork: false, // Based on my experience with news aggregation
|
|
138
|
+
description: "Tool call result object - expected to fail",
|
|
139
|
+
script: `
|
|
140
|
+
console.log("Testing tool call result return");
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
const searchResult = await callTool("fileSearch", { searchTerm: "package.json" });
|
|
144
|
+
|
|
145
|
+
// Try to return a structured response with the tool result
|
|
146
|
+
return {
|
|
147
|
+
success: true,
|
|
148
|
+
toolResult: searchResult,
|
|
149
|
+
timestamp: new Date().toISOString()
|
|
150
|
+
};
|
|
151
|
+
} catch (error) {
|
|
152
|
+
return {
|
|
153
|
+
success: false,
|
|
154
|
+
error: error.message
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
`
|
|
158
|
+
},
|
|
159
|
+
|
|
160
|
+
{
|
|
161
|
+
name: "date-object",
|
|
162
|
+
expectedToWork: false,
|
|
163
|
+
description: "Date object return - expected to fail",
|
|
164
|
+
script: `
|
|
165
|
+
console.log("Testing Date object return");
|
|
166
|
+
return new Date();
|
|
167
|
+
`
|
|
168
|
+
},
|
|
169
|
+
|
|
170
|
+
{
|
|
171
|
+
name: "map-object",
|
|
172
|
+
expectedToWork: false,
|
|
173
|
+
description: "Map object return - expected to fail",
|
|
174
|
+
script: `
|
|
175
|
+
console.log("Testing Map object return");
|
|
176
|
+
const map = new Map();
|
|
177
|
+
map.set("key1", "value1");
|
|
178
|
+
map.set("key2", "value2");
|
|
179
|
+
return map;
|
|
180
|
+
`
|
|
181
|
+
}
|
|
182
|
+
];
|
|
183
|
+
|
|
184
|
+
async function runSerializationTests() {
|
|
185
|
+
console.log("๐งช Running executeScript Serialization Tests\\n");
|
|
186
|
+
console.log("=" .repeat(80));
|
|
187
|
+
|
|
188
|
+
const results = {
|
|
189
|
+
passed: 0,
|
|
190
|
+
failed: 0,
|
|
191
|
+
unexpected: 0,
|
|
192
|
+
details: [] as any[]
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
for (const testCase of testCases) {
|
|
196
|
+
console.log(`\\n๐ Testing: ${testCase.name}`);
|
|
197
|
+
console.log(`๐ Description: ${testCase.description}`);
|
|
198
|
+
console.log(`๐ฏ Expected to work: ${testCase.expectedToWork}`);
|
|
199
|
+
|
|
200
|
+
try {
|
|
201
|
+
const result = await executeScript({
|
|
202
|
+
script: testCase.script,
|
|
203
|
+
maxToolCalls: 5,
|
|
204
|
+
maxTokens: 500,
|
|
205
|
+
maxExecutionTimeMs: 10000,
|
|
206
|
+
maxCostUsd: 0.1
|
|
207
|
+
}, {
|
|
208
|
+
tools: Tools,
|
|
209
|
+
clients: Clients,
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
const actualWorked = result.success;
|
|
213
|
+
const matchesExpectation = actualWorked === testCase.expectedToWork;
|
|
214
|
+
|
|
215
|
+
if (matchesExpectation) {
|
|
216
|
+
results.passed++;
|
|
217
|
+
console.log(`โ
PASS - Behaved as expected`);
|
|
218
|
+
} else {
|
|
219
|
+
results.unexpected++;
|
|
220
|
+
console.log(`โ ๏ธ UNEXPECTED - Expected ${testCase.expectedToWork ? 'success' : 'failure'}, got ${actualWorked ? 'success' : 'failure'}`);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
results.details.push({
|
|
224
|
+
name: testCase.name,
|
|
225
|
+
expected: testCase.expectedToWork,
|
|
226
|
+
actual: actualWorked,
|
|
227
|
+
matches: matchesExpectation,
|
|
228
|
+
result: actualWorked ? result.result : null,
|
|
229
|
+
error: actualWorked ? null : result.error,
|
|
230
|
+
consoleOutput: result.consoleOutput
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
if (actualWorked) {
|
|
234
|
+
console.log(`๐ Result type: ${typeof result.result}`);
|
|
235
|
+
console.log(`๐ Result: ${JSON.stringify(result.result).substring(0, 200)}${JSON.stringify(result.result).length > 200 ? '...' : ''}`);
|
|
236
|
+
} else {
|
|
237
|
+
console.log(`โ Error: ${result.error}`);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (result.consoleOutput.length > 0) {
|
|
241
|
+
console.log(`๐ Console: ${result.consoleOutput.join(', ')}`);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
} catch (error) {
|
|
245
|
+
results.failed++;
|
|
246
|
+
console.log(`๐ฅ TEST FRAMEWORK ERROR: ${error.message}`);
|
|
247
|
+
|
|
248
|
+
results.details.push({
|
|
249
|
+
name: testCase.name,
|
|
250
|
+
expected: testCase.expectedToWork,
|
|
251
|
+
actual: false,
|
|
252
|
+
matches: !testCase.expectedToWork,
|
|
253
|
+
result: null,
|
|
254
|
+
error: error.message,
|
|
255
|
+
consoleOutput: []
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Print summary
|
|
261
|
+
console.log("\\n" + "=".repeat(80));
|
|
262
|
+
console.log("๐ SERIALIZATION TEST SUMMARY");
|
|
263
|
+
console.log("=".repeat(80));
|
|
264
|
+
console.log(`โ
Tests matching expectations: ${results.passed}`);
|
|
265
|
+
console.log(`โ ๏ธ Unexpected behaviors: ${results.unexpected}`);
|
|
266
|
+
console.log(`๐ฅ Framework failures: ${results.failed}`);
|
|
267
|
+
console.log(`๐ Total tests: ${testCases.length}`);
|
|
268
|
+
|
|
269
|
+
if (results.unexpected > 0) {
|
|
270
|
+
console.log("\\n๐ UNEXPECTED RESULTS:");
|
|
271
|
+
results.details
|
|
272
|
+
.filter(d => !d.matches)
|
|
273
|
+
.forEach(detail => {
|
|
274
|
+
console.log(` - ${detail.name}: Expected ${detail.expected ? 'success' : 'failure'}, got ${detail.actual ? 'success' : 'failure'}`);
|
|
275
|
+
if (detail.error) {
|
|
276
|
+
console.log(` Error: ${detail.error}`);
|
|
277
|
+
}
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Analysis and recommendations
|
|
282
|
+
console.log("\\n๐ฌ ANALYSIS:");
|
|
283
|
+
|
|
284
|
+
const workingTypes = results.details.filter(d => d.actual).map(d => d.name);
|
|
285
|
+
const failingTypes = results.details.filter(d => !d.actual).map(d => d.name);
|
|
286
|
+
|
|
287
|
+
console.log("\\nโ
Types that work:");
|
|
288
|
+
workingTypes.forEach(name => console.log(` - ${name}`));
|
|
289
|
+
|
|
290
|
+
console.log("\\nโ Types that fail:");
|
|
291
|
+
failingTypes.forEach(name => console.log(` - ${name}`));
|
|
292
|
+
|
|
293
|
+
console.log("\\n๐ก RECOMMENDATIONS:");
|
|
294
|
+
console.log(" 1. Use JSON.stringify() for complex objects");
|
|
295
|
+
console.log(" 2. Return primitive values when possible");
|
|
296
|
+
console.log(" 3. Consider createArtifact() for structured data");
|
|
297
|
+
console.log(" 4. Test your return types with this suite");
|
|
298
|
+
|
|
299
|
+
return results;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (require.main === module) {
|
|
303
|
+
runSerializationTests().catch(error => {
|
|
304
|
+
console.error("Test suite failed:", error);
|
|
305
|
+
process.exit(1);
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export { runSerializationTests, testCases };
|