@tyvm/knowhow 0.0.21 โ 0.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -1
- package/src/agents/tools/executeScript/README.md +78 -0
- package/src/agents/tools/executeScript/definition.ts +73 -0
- package/src/agents/tools/executeScript/examples/quick-test.ts +80 -0
- package/src/agents/tools/executeScript/examples/serialization-test.ts +309 -0
- package/src/agents/tools/executeScript/examples/test-runner.ts +204 -0
- package/src/agents/tools/executeScript/index.ts +74 -0
- package/src/agents/tools/index.ts +1 -0
- package/src/agents/tools/list.ts +2 -1
- package/src/cli.ts +2 -6
- package/src/clients/index.ts +23 -9
- package/src/services/Tools.ts +19 -3
- package/src/services/script-execution/SandboxContext.ts +278 -0
- package/src/services/script-execution/ScriptExecutor.ts +337 -0
- package/src/services/script-execution/ScriptPolicy.ts +236 -0
- package/src/services/script-execution/ScriptTracer.ts +249 -0
- package/src/services/script-execution/types.ts +134 -0
- package/ts_build/src/agents/tools/executeScript/definition.d.ts +2 -0
- package/ts_build/src/agents/tools/executeScript/definition.js +70 -0
- package/ts_build/src/agents/tools/executeScript/definition.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/examples/quick-test.d.ts +3 -0
- package/ts_build/src/agents/tools/executeScript/examples/quick-test.js +68 -0
- package/ts_build/src/agents/tools/executeScript/examples/quick-test.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/examples/serialization-test.d.ts +15 -0
- package/ts_build/src/agents/tools/executeScript/examples/serialization-test.js +267 -0
- package/ts_build/src/agents/tools/executeScript/examples/serialization-test.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/examples/simple-example.d.ts +20 -0
- package/ts_build/src/agents/tools/executeScript/examples/simple-example.js +35 -0
- package/ts_build/src/agents/tools/executeScript/examples/simple-example.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/examples/test-runner.d.ts +4 -0
- package/ts_build/src/agents/tools/executeScript/examples/test-runner.js +202 -0
- package/ts_build/src/agents/tools/executeScript/examples/test-runner.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/handler.d.ts +27 -0
- package/ts_build/src/agents/tools/executeScript/handler.js +64 -0
- package/ts_build/src/agents/tools/executeScript/handler.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript/index.d.ts +27 -0
- package/ts_build/src/agents/tools/executeScript/index.js +64 -0
- package/ts_build/src/agents/tools/executeScript/index.js.map +1 -0
- package/ts_build/src/agents/tools/executeScript.d.ts +29 -0
- package/ts_build/src/agents/tools/executeScript.js +124 -0
- package/ts_build/src/agents/tools/executeScript.js.map +1 -0
- package/ts_build/src/agents/tools/index.d.ts +1 -0
- package/ts_build/src/agents/tools/index.js +1 -0
- package/ts_build/src/agents/tools/index.js.map +1 -1
- package/ts_build/src/agents/tools/list.js +2 -0
- package/ts_build/src/agents/tools/list.js.map +1 -1
- package/ts_build/src/cli.js +2 -6
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/index.d.ts +9 -2
- package/ts_build/src/clients/index.js +17 -4
- package/ts_build/src/clients/index.js.map +1 -1
- package/ts_build/src/services/Tools.d.ts +3 -0
- package/ts_build/src/services/Tools.js +10 -2
- package/ts_build/src/services/Tools.js.map +1 -1
- package/ts_build/src/services/script-execution/SandboxContext.d.ts +34 -0
- package/ts_build/src/services/script-execution/SandboxContext.js +188 -0
- package/ts_build/src/services/script-execution/SandboxContext.js.map +1 -0
- package/ts_build/src/services/script-execution/ScriptExecutor.d.ts +17 -0
- package/ts_build/src/services/script-execution/ScriptExecutor.js +207 -0
- package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -0
- package/ts_build/src/services/script-execution/ScriptPolicy.d.ts +27 -0
- package/ts_build/src/services/script-execution/ScriptPolicy.js +150 -0
- package/ts_build/src/services/script-execution/ScriptPolicy.js.map +1 -0
- package/ts_build/src/services/script-execution/ScriptTracer.d.ts +19 -0
- package/ts_build/src/services/script-execution/ScriptTracer.js +186 -0
- package/ts_build/src/services/script-execution/ScriptTracer.js.map +1 -0
- package/ts_build/src/services/script-execution/types.d.ts +108 -0
- package/ts_build/src/services/script-execution/types.js +3 -0
- package/ts_build/src/services/script-execution/types.js.map +1 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tyvm/knowhow",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.22",
|
|
4
4
|
"description": "ai cli with plugins and agents",
|
|
5
5
|
"main": "ts_build/src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -51,10 +51,12 @@
|
|
|
51
51
|
"axios": "^1.5.0",
|
|
52
52
|
"cheerio": "^1.0.0",
|
|
53
53
|
"diff": "^5.2.0",
|
|
54
|
+
"esbuild": "^0.25.8",
|
|
54
55
|
"express": "^4.19.2",
|
|
55
56
|
"figma-js": "^1.16.1-0",
|
|
56
57
|
"gitignore-to-glob": "^0.3.0",
|
|
57
58
|
"ink": "^6.0.1",
|
|
59
|
+
"isolated-vm": "^5.0.4",
|
|
58
60
|
"jira-client": "^8.2.2",
|
|
59
61
|
"marked": "^10.0.0",
|
|
60
62
|
"marked-terminal": "^6.2.0",
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Execute Script Tool
|
|
2
|
+
|
|
3
|
+
The `executeScript` tool provides secure, isolated execution of TypeScript scripts with access to the Knowhow platform's tools and AI capabilities.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Secure Sandbox**: Scripts run in isolated-vm with no access to Node.js APIs
|
|
8
|
+
- **Tool Integration**: Access to all existing Knowhow tools via `callTool()`
|
|
9
|
+
- **AI Integration**: Direct access to AI completions via `llm()`
|
|
10
|
+
- **Resource Quotas**: Configurable limits on execution time, memory, tool calls, and tokens
|
|
11
|
+
- **Comprehensive Tracing**: Full audit trail of all script activities
|
|
12
|
+
- **Policy Enforcement**: Fine-grained security controls and access restrictions
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
// Basic usage
|
|
18
|
+
await executeScript({
|
|
19
|
+
script: `
|
|
20
|
+
console.log("Hello from script!");
|
|
21
|
+
|
|
22
|
+
const files = await callTool("fileSearch", { searchTerm: "*.ts" });
|
|
23
|
+
console.log("Found TypeScript files:", files);
|
|
24
|
+
|
|
25
|
+
const response = await llm([
|
|
26
|
+
{ role: "user", content: "What is TypeScript?" }
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
return { message: "Script completed successfully" };
|
|
30
|
+
`
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// With custom policy
|
|
34
|
+
await executeScript({
|
|
35
|
+
script: "...",
|
|
36
|
+
policy: {
|
|
37
|
+
maxExecutionTimeMs: 10000,
|
|
38
|
+
maxMemoryMB: 64,
|
|
39
|
+
maxToolCalls: 5,
|
|
40
|
+
maxTokens: 1000,
|
|
41
|
+
allowedTools: ["fileSearch", "textSearch"],
|
|
42
|
+
deniedTools: ["execCommand"]
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Available Functions in Scripts
|
|
48
|
+
|
|
49
|
+
### `callTool(name: string, args: any): Promise<any>`
|
|
50
|
+
Call any available Knowhow tool by name with arguments.
|
|
51
|
+
|
|
52
|
+
### `llm(messages: ChatMessage[], options?: LLMOptions): Promise<ChatCompletion>`
|
|
53
|
+
Make AI completion requests with message history and options.
|
|
54
|
+
|
|
55
|
+
## Security Features
|
|
56
|
+
|
|
57
|
+
- **No Node.js Access**: Scripts cannot access filesystem, network, or system APIs directly
|
|
58
|
+
- **Tool Authorization**: All external access goes through existing authorization systems
|
|
59
|
+
- **Resource Limits**: Prevents runaway scripts with time, memory, and usage quotas
|
|
60
|
+
- **Trace Sanitization**: Sensitive data is redacted from execution logs
|
|
61
|
+
- **Policy Enforcement**: Granular control over what tools and resources scripts can access
|
|
62
|
+
|
|
63
|
+
## Implementation Details
|
|
64
|
+
|
|
65
|
+
- **ScriptExecutor**: Handles compilation and isolated execution
|
|
66
|
+
- **SandboxContext**: Provides `callTool` and `llm` function implementations
|
|
67
|
+
- **ScriptTracer**: Records all script activities for debugging and audit
|
|
68
|
+
- **ScriptPolicyEnforcer**: Enforces resource quotas and security policies
|
|
69
|
+
|
|
70
|
+
## Files
|
|
71
|
+
|
|
72
|
+
- `types.ts` - TypeScript interfaces and type definitions
|
|
73
|
+
- `ScriptExecutor.ts` - Core execution engine with isolated-vm
|
|
74
|
+
- `SandboxContext.ts` - Script execution context with tool/AI access
|
|
75
|
+
- `ScriptTracer.ts` - Event tracing and monitoring system
|
|
76
|
+
- `ScriptPolicyEnforcer.ts` - Security policy enforcement
|
|
77
|
+
- `executeScript.ts` - Tool handler and main entry point
|
|
78
|
+
- `examples/` - Example scripts demonstrating capabilities
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { Tool } from "../../../clients";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Tool for executing TypeScript scripts in a secure sandbox
|
|
5
|
+
*/
|
|
6
|
+
export const executeScriptDefinition: Tool = {
|
|
7
|
+
type: "function",
|
|
8
|
+
function: {
|
|
9
|
+
name: "executeScript",
|
|
10
|
+
description: `Execute TypeScript code in a secure sandbox environment with access to tools and AI models.
|
|
11
|
+
|
|
12
|
+
The script has access to:
|
|
13
|
+
- callTool(toolName, parameters): Call any available tool
|
|
14
|
+
- llm(messages, options): Make LLM calls
|
|
15
|
+
- createArtifact(name, content, type): Create downloadable artifacts
|
|
16
|
+
- console: Standard console logging
|
|
17
|
+
- getQuotaUsage(): Check resource usage
|
|
18
|
+
- sleep(ms): Pause execution for a specified time
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
\`\`\`typescript
|
|
22
|
+
// Call a tool
|
|
23
|
+
const searchResult = await callTool('textSearch', { searchTerm: 'hello world' });
|
|
24
|
+
console.log('Search found:', searchResult);
|
|
25
|
+
|
|
26
|
+
// Call LLM
|
|
27
|
+
const response = await llm([
|
|
28
|
+
{ role: 'user', content: 'Explain quantum computing' }
|
|
29
|
+
], { model: 'gpt-4o-mini', maxTokens: 100 });
|
|
30
|
+
console.log('LLM response:', response.choices[0].message.content);
|
|
31
|
+
|
|
32
|
+
// Create an artifact
|
|
33
|
+
createArtifact('summary.md', '# Summary\\nThis is a test', 'markdown');
|
|
34
|
+
|
|
35
|
+
return { message: 'Script completed successfully' };
|
|
36
|
+
\`\`\`
|
|
37
|
+
|
|
38
|
+
Must either return or await the a top level function
|
|
39
|
+
|
|
40
|
+
Test tools yourself to know the return type when scripting. Can pass JSON.stringified data into llm call if you don't need to know the type.
|
|
41
|
+
You cannot use isolation breaking methods like: setTimeout setInterval setImmediate clearTimeout clearInterval
|
|
42
|
+
|
|
43
|
+
Security: Scripts run in isolation with quotas on tool calls, tokens, time, and cost.`,
|
|
44
|
+
|
|
45
|
+
parameters: {
|
|
46
|
+
type: "object",
|
|
47
|
+
properties: {
|
|
48
|
+
script: {
|
|
49
|
+
type: "string",
|
|
50
|
+
description: "The TypeScript code to execute",
|
|
51
|
+
},
|
|
52
|
+
maxToolCalls: {
|
|
53
|
+
type: "number",
|
|
54
|
+
description: "Maximum number of tool calls allowed (default: 50)",
|
|
55
|
+
},
|
|
56
|
+
maxTokens: {
|
|
57
|
+
type: "number",
|
|
58
|
+
description: "Maximum tokens for LLM calls (default: 10000)",
|
|
59
|
+
},
|
|
60
|
+
maxExecutionTimeMs: {
|
|
61
|
+
type: "number",
|
|
62
|
+
description:
|
|
63
|
+
"Maximum execution time in milliseconds (default: 30000)",
|
|
64
|
+
},
|
|
65
|
+
maxCostUsd: {
|
|
66
|
+
type: "number",
|
|
67
|
+
description: "Maximum cost in USD (default: 1.0)",
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
required: ["script"],
|
|
71
|
+
},
|
|
72
|
+
},
|
|
73
|
+
};
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#!/usr/bin/env ts-node
|
|
2
|
+
/**
|
|
3
|
+
* Quick test for executeScript - minimal example
|
|
4
|
+
* Usage: npx ts-node src/agents/tools/executeScript/examples/quick-test.ts
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { executeScript } from "../../executeScript";
|
|
8
|
+
import { Tools } from "../../../../services";
|
|
9
|
+
import { Clients } from "../../../../clients";
|
|
10
|
+
|
|
11
|
+
// Simple test script
|
|
12
|
+
const simpleScript = `
|
|
13
|
+
console.log("Hello from executeScript!");
|
|
14
|
+
|
|
15
|
+
async function main() {
|
|
16
|
+
// Test basic functionality
|
|
17
|
+
console.log("Running simple test...");
|
|
18
|
+
|
|
19
|
+
// Try a simple tool call
|
|
20
|
+
const files = await callTool("fileSearch", { searchTerm: "*.ts" });
|
|
21
|
+
console.log("Found", files?.length || 0, "TypeScript files");
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
message: "Simple test completed!",
|
|
25
|
+
filesFound: files?.length || 0,
|
|
26
|
+
timestamp: new Date().toISOString()
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
await main().then(result => {
|
|
31
|
+
console.log("Result:", result);
|
|
32
|
+
}).catch(error => {
|
|
33
|
+
console.error("Error:", error);
|
|
34
|
+
});
|
|
35
|
+
`;
|
|
36
|
+
|
|
37
|
+
async function quickTest() {
|
|
38
|
+
console.log("๐งช Quick executeScript test\n");
|
|
39
|
+
|
|
40
|
+
try {
|
|
41
|
+
const result = await executeScript(
|
|
42
|
+
{
|
|
43
|
+
script: simpleScript,
|
|
44
|
+
maxToolCalls: 5,
|
|
45
|
+
maxTokens: 100,
|
|
46
|
+
maxExecutionTimeMs: 10000,
|
|
47
|
+
maxCostUsd: 0.1,
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
tools: Tools,
|
|
51
|
+
clients: Clients,
|
|
52
|
+
}
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
console.log("\n๐ QUICK TEST RESULT:");
|
|
56
|
+
console.log("Success:", result.success);
|
|
57
|
+
console.log("Result:", result.result);
|
|
58
|
+
console.log("Tool calls:", result.quotaUsage.toolCalls);
|
|
59
|
+
console.log("Cost: $" + result.quotaUsage.costUsd.toFixed(4));
|
|
60
|
+
|
|
61
|
+
if (result.consoleOutput.length > 0) {
|
|
62
|
+
console.log("\n๐ Console Output:");
|
|
63
|
+
result.consoleOutput.forEach((entry) => {
|
|
64
|
+
console.log(` ${entry}`);
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (!result.success) {
|
|
69
|
+
console.log("โ Error:", result.error);
|
|
70
|
+
}
|
|
71
|
+
} catch (error) {
|
|
72
|
+
console.error("๐ฅ Test failed:", error);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (require.main === module) {
|
|
77
|
+
quickTest();
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export { quickTest };
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
#!/usr/bin/env ts-node
|
|
2
|
+
/**
|
|
3
|
+
* Serialization Test for executeScript - demonstrates transfer issues
|
|
4
|
+
* Usage: npx ts-node src/agents/tools/executeScript/examples/serialization-test.ts
|
|
5
|
+
*
|
|
6
|
+
* This test demonstrates the "A non-transferable value was passed" errors
|
|
7
|
+
* that occur when trying to return complex objects from executeScript.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { executeScript } from "../../executeScript";
|
|
11
|
+
import { Tools } from "../../../../services";
|
|
12
|
+
import { Clients } from "../../../../clients";
|
|
13
|
+
|
|
14
|
+
interface TestCase {
|
|
15
|
+
name: string;
|
|
16
|
+
script: string;
|
|
17
|
+
expectedToWork: boolean;
|
|
18
|
+
description: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const testCases: TestCase[] = [
|
|
22
|
+
{
|
|
23
|
+
name: "primitive-string",
|
|
24
|
+
expectedToWork: true,
|
|
25
|
+
description: "Simple string return - should work",
|
|
26
|
+
script: `
|
|
27
|
+
console.log("Testing primitive string return");
|
|
28
|
+
return "Hello World";
|
|
29
|
+
`
|
|
30
|
+
},
|
|
31
|
+
|
|
32
|
+
{
|
|
33
|
+
name: "primitive-number",
|
|
34
|
+
expectedToWork: true,
|
|
35
|
+
description: "Simple number return - should work",
|
|
36
|
+
script: `
|
|
37
|
+
console.log("Testing primitive number return");
|
|
38
|
+
return 42;
|
|
39
|
+
`
|
|
40
|
+
},
|
|
41
|
+
|
|
42
|
+
{
|
|
43
|
+
name: "primitive-boolean",
|
|
44
|
+
expectedToWork: true,
|
|
45
|
+
description: "Simple boolean return - should work",
|
|
46
|
+
script: `
|
|
47
|
+
console.log("Testing primitive boolean return");
|
|
48
|
+
return true;
|
|
49
|
+
`
|
|
50
|
+
},
|
|
51
|
+
|
|
52
|
+
{
|
|
53
|
+
name: "simple-array",
|
|
54
|
+
expectedToWork: true, // You mentioned this works
|
|
55
|
+
description: "Simple array return - you said this works",
|
|
56
|
+
script: `
|
|
57
|
+
console.log("Testing simple array return");
|
|
58
|
+
return [1, 2, 3, "hello"];
|
|
59
|
+
`
|
|
60
|
+
},
|
|
61
|
+
|
|
62
|
+
{
|
|
63
|
+
name: "simple-object",
|
|
64
|
+
expectedToWork: false, // This is where I got errors
|
|
65
|
+
description: "Simple object return - expected to fail with transferable error",
|
|
66
|
+
script: `
|
|
67
|
+
console.log("Testing simple object return");
|
|
68
|
+
return {
|
|
69
|
+
message: "Hello",
|
|
70
|
+
count: 42,
|
|
71
|
+
success: true
|
|
72
|
+
};
|
|
73
|
+
`
|
|
74
|
+
},
|
|
75
|
+
|
|
76
|
+
{
|
|
77
|
+
name: "nested-object",
|
|
78
|
+
expectedToWork: false,
|
|
79
|
+
description: "Nested object return - expected to fail",
|
|
80
|
+
script: `
|
|
81
|
+
console.log("Testing nested object return");
|
|
82
|
+
return {
|
|
83
|
+
data: {
|
|
84
|
+
items: [1, 2, 3],
|
|
85
|
+
metadata: { timestamp: new Date().toISOString() }
|
|
86
|
+
},
|
|
87
|
+
status: "success"
|
|
88
|
+
};
|
|
89
|
+
`
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
{
|
|
93
|
+
name: "object-with-functions",
|
|
94
|
+
expectedToWork: false,
|
|
95
|
+
description: "Object with functions - definitely should fail",
|
|
96
|
+
script: `
|
|
97
|
+
console.log("Testing object with functions");
|
|
98
|
+
return {
|
|
99
|
+
data: [1, 2, 3],
|
|
100
|
+
transform: function(x) { return x * 2; },
|
|
101
|
+
helper: () => "test"
|
|
102
|
+
};
|
|
103
|
+
`
|
|
104
|
+
},
|
|
105
|
+
|
|
106
|
+
{
|
|
107
|
+
name: "array-of-objects",
|
|
108
|
+
expectedToWork: false, // Based on my experience
|
|
109
|
+
description: "Array containing objects - expected to fail",
|
|
110
|
+
script: `
|
|
111
|
+
console.log("Testing array of objects");
|
|
112
|
+
return [
|
|
113
|
+
{ id: 1, name: "Alice" },
|
|
114
|
+
{ id: 2, name: "Bob" },
|
|
115
|
+
{ id: 3, name: "Charlie" }
|
|
116
|
+
];
|
|
117
|
+
`
|
|
118
|
+
},
|
|
119
|
+
|
|
120
|
+
{
|
|
121
|
+
name: "json-stringify-workaround",
|
|
122
|
+
expectedToWork: true,
|
|
123
|
+
description: "Using JSON.stringify as workaround - should work",
|
|
124
|
+
script: `
|
|
125
|
+
console.log("Testing JSON.stringify workaround");
|
|
126
|
+
const data = {
|
|
127
|
+
message: "Hello",
|
|
128
|
+
items: [1, 2, 3],
|
|
129
|
+
nested: { key: "value" }
|
|
130
|
+
};
|
|
131
|
+
return JSON.stringify(data);
|
|
132
|
+
`
|
|
133
|
+
},
|
|
134
|
+
|
|
135
|
+
{
|
|
136
|
+
name: "tool-call-result",
|
|
137
|
+
expectedToWork: false, // Based on my experience with news aggregation
|
|
138
|
+
description: "Tool call result object - expected to fail",
|
|
139
|
+
script: `
|
|
140
|
+
console.log("Testing tool call result return");
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
const searchResult = await callTool("fileSearch", { searchTerm: "package.json" });
|
|
144
|
+
|
|
145
|
+
// Try to return a structured response with the tool result
|
|
146
|
+
return {
|
|
147
|
+
success: true,
|
|
148
|
+
toolResult: searchResult,
|
|
149
|
+
timestamp: new Date().toISOString()
|
|
150
|
+
};
|
|
151
|
+
} catch (error) {
|
|
152
|
+
return {
|
|
153
|
+
success: false,
|
|
154
|
+
error: error.message
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
`
|
|
158
|
+
},
|
|
159
|
+
|
|
160
|
+
{
|
|
161
|
+
name: "date-object",
|
|
162
|
+
expectedToWork: false,
|
|
163
|
+
description: "Date object return - expected to fail",
|
|
164
|
+
script: `
|
|
165
|
+
console.log("Testing Date object return");
|
|
166
|
+
return new Date();
|
|
167
|
+
`
|
|
168
|
+
},
|
|
169
|
+
|
|
170
|
+
{
|
|
171
|
+
name: "map-object",
|
|
172
|
+
expectedToWork: false,
|
|
173
|
+
description: "Map object return - expected to fail",
|
|
174
|
+
script: `
|
|
175
|
+
console.log("Testing Map object return");
|
|
176
|
+
const map = new Map();
|
|
177
|
+
map.set("key1", "value1");
|
|
178
|
+
map.set("key2", "value2");
|
|
179
|
+
return map;
|
|
180
|
+
`
|
|
181
|
+
}
|
|
182
|
+
];
|
|
183
|
+
|
|
184
|
+
async function runSerializationTests() {
|
|
185
|
+
console.log("๐งช Running executeScript Serialization Tests\\n");
|
|
186
|
+
console.log("=" .repeat(80));
|
|
187
|
+
|
|
188
|
+
const results = {
|
|
189
|
+
passed: 0,
|
|
190
|
+
failed: 0,
|
|
191
|
+
unexpected: 0,
|
|
192
|
+
details: [] as any[]
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
for (const testCase of testCases) {
|
|
196
|
+
console.log(`\\n๐ Testing: ${testCase.name}`);
|
|
197
|
+
console.log(`๐ Description: ${testCase.description}`);
|
|
198
|
+
console.log(`๐ฏ Expected to work: ${testCase.expectedToWork}`);
|
|
199
|
+
|
|
200
|
+
try {
|
|
201
|
+
const result = await executeScript({
|
|
202
|
+
script: testCase.script,
|
|
203
|
+
maxToolCalls: 5,
|
|
204
|
+
maxTokens: 500,
|
|
205
|
+
maxExecutionTimeMs: 10000,
|
|
206
|
+
maxCostUsd: 0.1
|
|
207
|
+
}, {
|
|
208
|
+
tools: Tools,
|
|
209
|
+
clients: Clients,
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
const actualWorked = result.success;
|
|
213
|
+
const matchesExpectation = actualWorked === testCase.expectedToWork;
|
|
214
|
+
|
|
215
|
+
if (matchesExpectation) {
|
|
216
|
+
results.passed++;
|
|
217
|
+
console.log(`โ
PASS - Behaved as expected`);
|
|
218
|
+
} else {
|
|
219
|
+
results.unexpected++;
|
|
220
|
+
console.log(`โ ๏ธ UNEXPECTED - Expected ${testCase.expectedToWork ? 'success' : 'failure'}, got ${actualWorked ? 'success' : 'failure'}`);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
results.details.push({
|
|
224
|
+
name: testCase.name,
|
|
225
|
+
expected: testCase.expectedToWork,
|
|
226
|
+
actual: actualWorked,
|
|
227
|
+
matches: matchesExpectation,
|
|
228
|
+
result: actualWorked ? result.result : null,
|
|
229
|
+
error: actualWorked ? null : result.error,
|
|
230
|
+
consoleOutput: result.consoleOutput
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
if (actualWorked) {
|
|
234
|
+
console.log(`๐ Result type: ${typeof result.result}`);
|
|
235
|
+
console.log(`๐ Result: ${JSON.stringify(result.result).substring(0, 200)}${JSON.stringify(result.result).length > 200 ? '...' : ''}`);
|
|
236
|
+
} else {
|
|
237
|
+
console.log(`โ Error: ${result.error}`);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (result.consoleOutput.length > 0) {
|
|
241
|
+
console.log(`๐ Console: ${result.consoleOutput.join(', ')}`);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
} catch (error) {
|
|
245
|
+
results.failed++;
|
|
246
|
+
console.log(`๐ฅ TEST FRAMEWORK ERROR: ${error.message}`);
|
|
247
|
+
|
|
248
|
+
results.details.push({
|
|
249
|
+
name: testCase.name,
|
|
250
|
+
expected: testCase.expectedToWork,
|
|
251
|
+
actual: false,
|
|
252
|
+
matches: !testCase.expectedToWork,
|
|
253
|
+
result: null,
|
|
254
|
+
error: error.message,
|
|
255
|
+
consoleOutput: []
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Print summary
|
|
261
|
+
console.log("\\n" + "=".repeat(80));
|
|
262
|
+
console.log("๐ SERIALIZATION TEST SUMMARY");
|
|
263
|
+
console.log("=".repeat(80));
|
|
264
|
+
console.log(`โ
Tests matching expectations: ${results.passed}`);
|
|
265
|
+
console.log(`โ ๏ธ Unexpected behaviors: ${results.unexpected}`);
|
|
266
|
+
console.log(`๐ฅ Framework failures: ${results.failed}`);
|
|
267
|
+
console.log(`๐ Total tests: ${testCases.length}`);
|
|
268
|
+
|
|
269
|
+
if (results.unexpected > 0) {
|
|
270
|
+
console.log("\\n๐ UNEXPECTED RESULTS:");
|
|
271
|
+
results.details
|
|
272
|
+
.filter(d => !d.matches)
|
|
273
|
+
.forEach(detail => {
|
|
274
|
+
console.log(` - ${detail.name}: Expected ${detail.expected ? 'success' : 'failure'}, got ${detail.actual ? 'success' : 'failure'}`);
|
|
275
|
+
if (detail.error) {
|
|
276
|
+
console.log(` Error: ${detail.error}`);
|
|
277
|
+
}
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Analysis and recommendations
|
|
282
|
+
console.log("\\n๐ฌ ANALYSIS:");
|
|
283
|
+
|
|
284
|
+
const workingTypes = results.details.filter(d => d.actual).map(d => d.name);
|
|
285
|
+
const failingTypes = results.details.filter(d => !d.actual).map(d => d.name);
|
|
286
|
+
|
|
287
|
+
console.log("\\nโ
Types that work:");
|
|
288
|
+
workingTypes.forEach(name => console.log(` - ${name}`));
|
|
289
|
+
|
|
290
|
+
console.log("\\nโ Types that fail:");
|
|
291
|
+
failingTypes.forEach(name => console.log(` - ${name}`));
|
|
292
|
+
|
|
293
|
+
console.log("\\n๐ก RECOMMENDATIONS:");
|
|
294
|
+
console.log(" 1. Use JSON.stringify() for complex objects");
|
|
295
|
+
console.log(" 2. Return primitive values when possible");
|
|
296
|
+
console.log(" 3. Consider createArtifact() for structured data");
|
|
297
|
+
console.log(" 4. Test your return types with this suite");
|
|
298
|
+
|
|
299
|
+
return results;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (require.main === module) {
|
|
303
|
+
runSerializationTests().catch(error => {
|
|
304
|
+
console.error("Test suite failed:", error);
|
|
305
|
+
process.exit(1);
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export { runSerializationTests, testCases };
|