agency-lang 0.0.53 → 0.0.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/agents/judge.agency +18 -0
- package/dist/lib/backends/baseGenerator.js +3 -0
- package/dist/lib/backends/graphGenerator.integration.test.js +4 -4
- package/dist/lib/backends/graphGenerator.js +9 -15
- package/dist/lib/backends/typescriptGenerator.integration.test.js +4 -4
- package/dist/lib/backends/typescriptGenerator.js +7 -15
- package/dist/lib/cli/commands.d.ts +1 -1
- package/dist/lib/cli/commands.js +7 -4
- package/dist/lib/cli/evaluate.js +1 -1
- package/dist/lib/cli/test.js +94 -11
- package/dist/lib/cli/util.d.ts +13 -1
- package/dist/lib/cli/util.js +32 -4
- package/dist/lib/config.d.ts +9 -0
- package/dist/lib/parser.d.ts +3 -2
- package/dist/lib/parser.js +9 -4
- package/dist/lib/parsers/function.js +3 -3
- package/dist/lib/parsers/function.test.js +21 -6
- package/dist/lib/parsers/skill.js +20 -3
- package/dist/lib/parsers/skill.test.js +15 -17
- package/dist/lib/parsers/typeHints.js +3 -3
- package/dist/lib/parsers/typeHints.test.js +11 -2
- package/dist/lib/templates/backends/graphGenerator/builtinTools.d.ts +1 -1
- package/dist/lib/templates/backends/graphGenerator/builtinTools.js +1 -1
- package/dist/lib/templates/backends/graphGenerator/graphNode.d.ts +1 -1
- package/dist/lib/templates/backends/graphGenerator/graphNode.js +5 -2
- package/dist/lib/templates/backends/graphGenerator/imports.d.ts +1 -1
- package/dist/lib/templates/backends/graphGenerator/imports.js +69 -96
- package/dist/lib/templates/backends/graphGenerator/runNodeFunction.d.ts +1 -2
- package/dist/lib/templates/backends/graphGenerator/runNodeFunction.js +6 -3
- package/dist/lib/templates/backends/graphGenerator/startNode.d.ts +1 -1
- package/dist/lib/templates/backends/graphGenerator/startNode.js +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/fetch.d.ts +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/fetch.js +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/fetchJSON.d.ts +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/fetchJSON.js +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/input.d.ts +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/input.js +2 -2
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/read.d.ts +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/read.js +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/readImage.d.ts +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/readImage.js +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/sleep.d.ts +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/sleep.js +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinTools.d.ts +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/builtinTools.js +2 -2
- package/dist/lib/templates/backends/typescriptGenerator/functionCallAssignment.d.ts +1 -2
- package/dist/lib/templates/backends/typescriptGenerator/functionCallAssignment.js +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/functionDefinition.d.ts +1 -2
- package/dist/lib/templates/backends/typescriptGenerator/functionDefinition.js +5 -4
- package/dist/lib/templates/backends/typescriptGenerator/promptFunction.d.ts +1 -2
- package/dist/lib/templates/backends/typescriptGenerator/promptFunction.js +12 -8
- package/dist/lib/templates/backends/typescriptGenerator/toolCall.d.ts +1 -1
- package/dist/lib/templates/backends/typescriptGenerator/toolCall.js +32 -26
- package/dist/lib/templates/cli/evaluate.d.ts +3 -1
- package/dist/lib/templates/cli/evaluate.js +47 -3
- package/dist/lib/templates/cli/judgeEvaluate.d.ts +11 -0
- package/dist/lib/templates/cli/judgeEvaluate.js +66 -0
- package/dist/lib/typeChecker.d.ts +34 -0
- package/dist/lib/typeChecker.js +361 -0
- package/dist/lib/typeChecker.test.js +484 -0
- package/dist/scripts/agency.js +36 -5
- package/dist/scripts/regenerate-fixtures.js +2 -2
- package/dist/scripts/regenerate-graph-fixtures.js +2 -2
- package/package.json +7 -4
- package/dist/lib/backends/typescriptGenerator.test.js +0 -763
- /package/dist/lib/{backends/typescriptGenerator.test.d.ts → typeChecker.test.d.ts} +0 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
type JudgeResult = {
|
|
2
|
+
score: number # a score from 0 to 100 where 100 means perfect match;
|
|
3
|
+
reasoning: string # brief explanation for the score
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
node judge(actualOutput: string, expectedOutput: string, judgePrompt: string): JudgeResult {
|
|
7
|
+
result: JudgeResult = llm("You are an evaluation judge. Score how well the actual output matches what was expected.
|
|
8
|
+
|
|
9
|
+
Judge prompt (evaluation criteria): ${judgePrompt}
|
|
10
|
+
|
|
11
|
+
Expected output: ${expectedOutput}
|
|
12
|
+
|
|
13
|
+
Actual output: ${actualOutput}
|
|
14
|
+
|
|
15
|
+
Provide a score from 0 to 100 where 100 means perfect match and 0 means completely wrong. Also provide brief reasoning.")
|
|
16
|
+
|
|
17
|
+
return result
|
|
18
|
+
}
|
|
@@ -21,6 +21,9 @@ export class BaseGenerator {
|
|
|
21
21
|
agencyConfig = {};
|
|
22
22
|
constructor({ config }) {
|
|
23
23
|
this.agencyConfig = mergeDeep(this.configDefaults(), config || {});
|
|
24
|
+
if (this.agencyConfig.verbose) {
|
|
25
|
+
console.log("Generator config:", this.agencyConfig);
|
|
26
|
+
}
|
|
24
27
|
}
|
|
25
28
|
configDefaults() {
|
|
26
29
|
return {};
|
|
@@ -4,7 +4,7 @@ import { generateGraph } from "./graphGenerator.js";
|
|
|
4
4
|
import fs from "fs";
|
|
5
5
|
import path from "path";
|
|
6
6
|
/**
|
|
7
|
-
* Recursively discovers all .agency/.
|
|
7
|
+
* Recursively discovers all .agency/.mjs fixture pairs in a directory
|
|
8
8
|
*/
|
|
9
9
|
function discoverFixtures(fixtureDir) {
|
|
10
10
|
const fixtures = [];
|
|
@@ -20,9 +20,9 @@ function discoverFixtures(fixtureDir) {
|
|
|
20
20
|
scanDirectory(fullPath, relPath);
|
|
21
21
|
}
|
|
22
22
|
else if (entry.isFile() && entry.name.endsWith(".agency")) {
|
|
23
|
-
// Found an Agency file - look for corresponding .
|
|
23
|
+
// Found an Agency file - look for corresponding .mjs
|
|
24
24
|
const baseName = entry.name.replace(".agency", "");
|
|
25
|
-
const mtsPath = path.join(dir, `${baseName}.
|
|
25
|
+
const mtsPath = path.join(dir, `${baseName}.mjs`);
|
|
26
26
|
if (fs.existsSync(mtsPath)) {
|
|
27
27
|
const nameWithoutExt = relativePath
|
|
28
28
|
? `${relativePath}/${baseName}`
|
|
@@ -41,7 +41,7 @@ function discoverFixtures(fixtureDir) {
|
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
else {
|
|
44
|
-
console.warn(`Warning: No corresponding .
|
|
44
|
+
console.warn(`Warning: No corresponding .mjs file for ${fullPath}`);
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
47
|
}
|
|
@@ -7,7 +7,6 @@ import * as renderRunNodeFunction from "../templates/backends/graphGenerator/run
|
|
|
7
7
|
import * as renderStartNode from "../templates/backends/graphGenerator/startNode.js";
|
|
8
8
|
import { TypeScriptGenerator } from "./typescriptGenerator.js";
|
|
9
9
|
import { mapFunctionName } from "./typescriptGenerator/builtins.js";
|
|
10
|
-
import { variableTypeToString } from "./typescriptGenerator/typeToString.js";
|
|
11
10
|
import { TypescriptPreprocessor } from "../preprocessors/typescriptPreprocessor.js";
|
|
12
11
|
export class GraphGenerator extends TypeScriptGenerator {
|
|
13
12
|
typeHints = {};
|
|
@@ -144,24 +143,22 @@ export class GraphGenerator extends TypeScriptGenerator {
|
|
|
144
143
|
return "generateLiteral not implemented";
|
|
145
144
|
} */
|
|
146
145
|
generateImports() {
|
|
147
|
-
const
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
];
|
|
146
|
+
const args = {
|
|
147
|
+
logHost: this.agencyConfig.log?.host || "",
|
|
148
|
+
logProjectId: this.agencyConfig.log?.projectId || "",
|
|
149
|
+
logDebugMode: this.agencyConfig.log?.debugMode || false,
|
|
150
|
+
clientLogLevel: this.agencyConfig.client?.logLevel || "warn",
|
|
151
|
+
clientDefaultModel: this.agencyConfig.client?.defaultModel || "gpt-4o-mini",
|
|
152
|
+
};
|
|
153
|
+
const arr = [renderImports.default(args)];
|
|
156
154
|
arr.push(builtinTools.default({}));
|
|
157
155
|
return arr.join("\n");
|
|
158
156
|
}
|
|
159
157
|
preprocess() {
|
|
160
158
|
const lines = [];
|
|
161
|
-
lines.push("// @ts-nocheck\n");
|
|
162
159
|
this.importedNodes.forEach((importNode) => {
|
|
163
160
|
const defaultImportName = this.agencyFileToDefaultImportName(importNode.agencyFile);
|
|
164
|
-
lines.push(`import ${defaultImportName} from "${importNode.agencyFile.replace(".agency", ".
|
|
161
|
+
lines.push(`import ${defaultImportName} from "${importNode.agencyFile.replace(".agency", ".js")}";`);
|
|
165
162
|
});
|
|
166
163
|
return lines.join("\n");
|
|
167
164
|
}
|
|
@@ -196,9 +193,6 @@ export class GraphGenerator extends TypeScriptGenerator {
|
|
|
196
193
|
nodeName: node.nodeName,
|
|
197
194
|
hasArgs: args.length > 0,
|
|
198
195
|
argsStr,
|
|
199
|
-
returnType: node.returnType
|
|
200
|
-
? variableTypeToString(node.returnType, this.typeAliases)
|
|
201
|
-
: "any",
|
|
202
196
|
}));
|
|
203
197
|
}
|
|
204
198
|
lines.push("export default graph;");
|
|
@@ -4,7 +4,7 @@ import { generateTypeScript } from "./typescriptGenerator.js";
|
|
|
4
4
|
import fs from "fs";
|
|
5
5
|
import path from "path";
|
|
6
6
|
/**
|
|
7
|
-
* Recursively discovers all .agency/.
|
|
7
|
+
* Recursively discovers all .agency/.mjs fixture pairs in a directory
|
|
8
8
|
*/
|
|
9
9
|
function discoverFixtures(fixtureDir) {
|
|
10
10
|
const fixtures = [];
|
|
@@ -20,9 +20,9 @@ function discoverFixtures(fixtureDir) {
|
|
|
20
20
|
scanDirectory(fullPath, relPath);
|
|
21
21
|
}
|
|
22
22
|
else if (entry.isFile() && entry.name.endsWith(".agency")) {
|
|
23
|
-
// Found an Agency file - look for corresponding .
|
|
23
|
+
// Found an Agency file - look for corresponding .mjs
|
|
24
24
|
const baseName = entry.name.replace(".agency", "");
|
|
25
|
-
const mtsPath = path.join(dir, `${baseName}.
|
|
25
|
+
const mtsPath = path.join(dir, `${baseName}.mjs`);
|
|
26
26
|
if (fs.existsSync(mtsPath)) {
|
|
27
27
|
const nameWithoutExt = relativePath
|
|
28
28
|
? `${relativePath}/${baseName}`
|
|
@@ -41,7 +41,7 @@ function discoverFixtures(fixtureDir) {
|
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
else {
|
|
44
|
-
console.warn(`Warning: No corresponding .
|
|
44
|
+
console.warn(`Warning: No corresponding .mjs file for ${fullPath}`);
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
47
|
}
|
|
@@ -37,7 +37,7 @@ export class TypeScriptGenerator extends BaseGenerator {
|
|
|
37
37
|
}
|
|
38
38
|
typeAliasToString(node) {
|
|
39
39
|
const aliasedTypeStr = variableTypeToString(node.aliasedType, this.typeAliases);
|
|
40
|
-
return
|
|
40
|
+
return "";
|
|
41
41
|
}
|
|
42
42
|
processTypeHint(node) {
|
|
43
43
|
if (node.variableType.type === "typeAliasVariable") {
|
|
@@ -139,9 +139,7 @@ export class TypeScriptGenerator extends BaseGenerator {
|
|
|
139
139
|
else {
|
|
140
140
|
this.functionScopedVariables.push(variableName);
|
|
141
141
|
}
|
|
142
|
-
const typeAnnotation =
|
|
143
|
-
? `: ${variableTypeToString(typeHint, this.typeAliases)}`
|
|
144
|
-
: "";
|
|
142
|
+
const typeAnnotation = "";
|
|
145
143
|
if (value.type === "prompt") {
|
|
146
144
|
return this.processPromptLiteral(variableName, typeHint, value);
|
|
147
145
|
}
|
|
@@ -150,7 +148,6 @@ export class TypeScriptGenerator extends BaseGenerator {
|
|
|
150
148
|
const code = this.processNode(value);
|
|
151
149
|
return renderFunctionCallAssignment.default({
|
|
152
150
|
variableName: `${this.getScopeVar()}.${variableName}`,
|
|
153
|
-
typeAnnotation,
|
|
154
151
|
functionCode: code.trim(),
|
|
155
152
|
nodeContext: this.getCurrentScope().type === "node",
|
|
156
153
|
globalScope: this.getCurrentScope().type === "global",
|
|
@@ -253,9 +250,6 @@ export class TypeScriptGenerator extends BaseGenerator {
|
|
|
253
250
|
return renderFunctionDefinition.default({
|
|
254
251
|
functionName,
|
|
255
252
|
argsStr,
|
|
256
|
-
returnType: node.returnType
|
|
257
|
-
? variableTypeToString(node.returnType, this.typeAliases)
|
|
258
|
-
: "any",
|
|
259
253
|
functionBody: bodyCode.join("\n"),
|
|
260
254
|
});
|
|
261
255
|
}
|
|
@@ -415,15 +409,14 @@ export class TypeScriptGenerator extends BaseGenerator {
|
|
|
415
409
|
value: "string",
|
|
416
410
|
};
|
|
417
411
|
const zodSchema = mapTypeToZodSchema(_variableType, this.typeAliases);
|
|
418
|
-
const typeString = variableTypeToString(_variableType, this.typeAliases);
|
|
419
412
|
// Build prompt construction code
|
|
420
413
|
const promptCode = this.buildPromptString({
|
|
421
414
|
segments: prompt.segments,
|
|
422
415
|
typeHints: this.typeHints,
|
|
423
416
|
skills: prompt.skills || [],
|
|
424
417
|
});
|
|
425
|
-
const parts = functionArgs.map((arg) =>
|
|
426
|
-
parts.push("__metadata
|
|
418
|
+
const parts = functionArgs.map((arg) => arg.replace(".", "_"));
|
|
419
|
+
parts.push("__metadata");
|
|
427
420
|
const argsStr = parts.join(", ");
|
|
428
421
|
let _tools = "";
|
|
429
422
|
if (prompt.tools) {
|
|
@@ -453,7 +446,7 @@ export class TypeScriptGenerator extends BaseGenerator {
|
|
|
453
446
|
I'll probably need to do that for supporting type checking anyway.
|
|
454
447
|
*/
|
|
455
448
|
const functionCalls = (prompt.tools || { type: "usesTool", toolNames: [] }).toolNames
|
|
456
|
-
.filter((t) => BUILTIN_TOOLS.includes(t))
|
|
449
|
+
.filter((t) => !BUILTIN_TOOLS.includes(t))
|
|
457
450
|
.map((toolName) => {
|
|
458
451
|
if (!this.functionDefinitions[toolName] &&
|
|
459
452
|
!this.isImportedTool(toolName)) {
|
|
@@ -473,7 +466,6 @@ export class TypeScriptGenerator extends BaseGenerator {
|
|
|
473
466
|
variableName,
|
|
474
467
|
argsStr,
|
|
475
468
|
funcCallParams: [...scopedFunctionArgs, metadataObj].join(", "),
|
|
476
|
-
typeString,
|
|
477
469
|
promptCode,
|
|
478
470
|
hasResponseFormat: zodSchema !== DEFAULT_SCHEMA,
|
|
479
471
|
zodSchema,
|
|
@@ -487,7 +479,7 @@ export class TypeScriptGenerator extends BaseGenerator {
|
|
|
487
479
|
});
|
|
488
480
|
}
|
|
489
481
|
processImportStatement(node) {
|
|
490
|
-
return `import ${node.importedNames} from "${node.modulePath.replace(/\.agency$/, ".
|
|
482
|
+
return `import ${node.importedNames} from "${node.modulePath.replace(/\.agency$/, ".js")}";`;
|
|
491
483
|
}
|
|
492
484
|
processImportNodeStatement(node) {
|
|
493
485
|
return ""; // handled in preprocess in graphgenerator
|
|
@@ -500,7 +492,7 @@ export class TypeScriptGenerator extends BaseGenerator {
|
|
|
500
492
|
`__${toolName}ToolParams`,
|
|
501
493
|
])
|
|
502
494
|
.flat();
|
|
503
|
-
return `import { ${importNames.join(", ")} } from "${node.agencyFile.replace(/\.agency$/, ".
|
|
495
|
+
return `import { ${importNames.join(", ")} } from "${node.agencyFile.replace(/\.agency$/, ".js")}";`;
|
|
504
496
|
}
|
|
505
497
|
processWhileLoop(node) {
|
|
506
498
|
const conditionCode = this.processNode(node.condition);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { AgencyConfig } from "../config.js";
|
|
2
2
|
import { AgencyProgram } from "../index.js";
|
|
3
|
-
export declare function loadConfig(configPath?: string): AgencyConfig;
|
|
3
|
+
export declare function loadConfig(configPath?: string, verbose?: boolean): AgencyConfig;
|
|
4
4
|
export declare function readStdin(): Promise<string>;
|
|
5
5
|
export declare function parse(contents: string, config: AgencyConfig): AgencyProgram;
|
|
6
6
|
export declare function readFile(inputFile: string): string;
|
package/dist/lib/cli/commands.js
CHANGED
|
@@ -7,11 +7,14 @@ import * as fs from "fs";
|
|
|
7
7
|
import * as path from "path";
|
|
8
8
|
import { parseAgency } from "../parser.js";
|
|
9
9
|
// Load configuration from agency.json
|
|
10
|
-
export function loadConfig(configPath) {
|
|
10
|
+
export function loadConfig(configPath, verbose = false) {
|
|
11
11
|
let config = {};
|
|
12
12
|
// Determine config file path
|
|
13
13
|
const defaultConfigPath = path.join(process.cwd(), "agency.json");
|
|
14
14
|
const finalConfigPath = configPath || defaultConfigPath;
|
|
15
|
+
if (verbose) {
|
|
16
|
+
console.log(`Looking for config at: ${finalConfigPath}`);
|
|
17
|
+
}
|
|
15
18
|
// Check if config file exists
|
|
16
19
|
if (fs.existsSync(finalConfigPath)) {
|
|
17
20
|
try {
|
|
@@ -45,7 +48,7 @@ export function readStdin() {
|
|
|
45
48
|
}
|
|
46
49
|
export function parse(contents, config) {
|
|
47
50
|
const verbose = config.verbose ?? false;
|
|
48
|
-
const parseResult = parseAgency(contents,
|
|
51
|
+
const parseResult = parseAgency(contents, config);
|
|
49
52
|
// Check if parsing was successful
|
|
50
53
|
if (!parseResult.success) {
|
|
51
54
|
console.error("Parse error:");
|
|
@@ -121,7 +124,7 @@ export function compile(config, inputFile, _outputFile) {
|
|
|
121
124
|
}
|
|
122
125
|
// Resolve the absolute path of the input file to avoid duplicates
|
|
123
126
|
const absoluteInputFile = path.resolve(inputFile);
|
|
124
|
-
let outputFile = _outputFile || inputFile.replace(".agency", ".
|
|
127
|
+
let outputFile = _outputFile || inputFile.replace(".agency", ".js");
|
|
125
128
|
if (config.outDir && !_outputFile) {
|
|
126
129
|
const outputDir = path.resolve(config.outDir);
|
|
127
130
|
if (!fs.existsSync(outputDir)) {
|
|
@@ -145,7 +148,7 @@ export function compile(config, inputFile, _outputFile) {
|
|
|
145
148
|
// Update the import path in the AST to reference the new .ts file
|
|
146
149
|
parsedProgram.nodes.forEach((node) => {
|
|
147
150
|
if (node.type === "importStatement") {
|
|
148
|
-
node.modulePath = node.modulePath.replace(".agency", ".
|
|
151
|
+
node.modulePath = node.modulePath.replace(".agency", ".js");
|
|
149
152
|
}
|
|
150
153
|
});
|
|
151
154
|
const generatedCode = generateGraph(parsedProgram, config);
|
package/dist/lib/cli/evaluate.js
CHANGED
|
@@ -162,7 +162,7 @@ export async function evaluate(target, argsFilePath, resultsFilePath) {
|
|
|
162
162
|
const argsString = hasArgs
|
|
163
163
|
? argsRecordToString(c.args, selectedNode.parameters)
|
|
164
164
|
: "";
|
|
165
|
-
const json = executeNode(filename, nodeName, hasArgs, argsString);
|
|
165
|
+
const json = executeNode(filename, nodeName, hasArgs, argsString, undefined);
|
|
166
166
|
console.log("\nOutput:");
|
|
167
167
|
console.log(JSON.stringify(json.data, null, 2));
|
|
168
168
|
const ratingResponse = await prompts({
|
package/dist/lib/cli/test.js
CHANGED
|
@@ -2,14 +2,15 @@ import { parseAgency } from "../parser.js";
|
|
|
2
2
|
import { getNodesOfType } from "../utils/node.js";
|
|
3
3
|
import fs from "fs";
|
|
4
4
|
import prompts from "prompts";
|
|
5
|
-
import { executeNode, parseTarget, pickANode, promptForArgs, promptForTarget, } from "./util.js";
|
|
5
|
+
import { executeJudge, executeNode, parseTarget, pickANode, promptForArgs, promptForTarget, } from "./util.js";
|
|
6
|
+
import { color } from "termcolors";
|
|
6
7
|
function readFile(filename) {
|
|
7
8
|
console.log("Trying to read file", filename, "...");
|
|
8
9
|
const data = fs.readFileSync(filename);
|
|
9
10
|
const contents = data.toString("utf8");
|
|
10
11
|
return contents;
|
|
11
12
|
}
|
|
12
|
-
function writeTestCase(agencyFilename, nodeName, input, expectedOutput, evaluationCriteria) {
|
|
13
|
+
function writeTestCase(agencyFilename, nodeName, input, expectedOutput, evaluationCriteria, interruptHandlers) {
|
|
13
14
|
const testFilePath = agencyFilename.replace(".agency", ".test.json");
|
|
14
15
|
let tests;
|
|
15
16
|
if (fs.existsSync(testFilePath)) {
|
|
@@ -18,7 +19,16 @@ function writeTestCase(agencyFilename, nodeName, input, expectedOutput, evaluati
|
|
|
18
19
|
else {
|
|
19
20
|
tests = { sourceFile: agencyFilename, tests: [] };
|
|
20
21
|
}
|
|
21
|
-
|
|
22
|
+
const testCase = {
|
|
23
|
+
nodeName,
|
|
24
|
+
input,
|
|
25
|
+
expectedOutput,
|
|
26
|
+
evaluationCriteria,
|
|
27
|
+
};
|
|
28
|
+
if (interruptHandlers && interruptHandlers.length > 0) {
|
|
29
|
+
testCase.interruptHandlers = interruptHandlers;
|
|
30
|
+
}
|
|
31
|
+
tests.tests.push(testCase);
|
|
22
32
|
fs.writeFileSync(testFilePath, JSON.stringify(tests, null, 2));
|
|
23
33
|
return testFilePath;
|
|
24
34
|
}
|
|
@@ -46,8 +56,58 @@ export async function fixtures(target) {
|
|
|
46
56
|
const selectedNode = nodes.find((n) => n.nodeName === nodeName);
|
|
47
57
|
let { hasArgs, argsString } = await promptForArgs(selectedNode);
|
|
48
58
|
console.log("Running program from entrypoint", nodeName);
|
|
49
|
-
|
|
50
|
-
|
|
59
|
+
let json = executeNode(filename, nodeName, hasArgs, argsString);
|
|
60
|
+
// Handle interrupt discovery
|
|
61
|
+
const interruptHandlers = [];
|
|
62
|
+
while (json.data &&
|
|
63
|
+
typeof json.data === "object" &&
|
|
64
|
+
json.data.type === "interrupt") {
|
|
65
|
+
console.log(`\n⚠️ Interrupt detected: "${json.data.data}"`);
|
|
66
|
+
const actionResponse = await prompts({
|
|
67
|
+
type: "select",
|
|
68
|
+
name: "action",
|
|
69
|
+
message: "How should the test handle this interrupt?",
|
|
70
|
+
choices: [
|
|
71
|
+
{ title: "Approve", value: "approve" },
|
|
72
|
+
{ title: "Reject", value: "reject" },
|
|
73
|
+
{ title: "Modify arguments", value: "modify" },
|
|
74
|
+
],
|
|
75
|
+
});
|
|
76
|
+
if (!actionResponse.action) {
|
|
77
|
+
console.log("Interrupt handling cancelled.");
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
const handler = {
|
|
81
|
+
action: actionResponse.action,
|
|
82
|
+
expectedMessage: json.data.data, // Capture the actual message
|
|
83
|
+
};
|
|
84
|
+
if (actionResponse.action === "modify") {
|
|
85
|
+
let invalidJSON = true;
|
|
86
|
+
while (invalidJSON) {
|
|
87
|
+
const modifyResponse = await prompts({
|
|
88
|
+
type: "text",
|
|
89
|
+
name: "args",
|
|
90
|
+
message: "Enter modified arguments as JSON object:",
|
|
91
|
+
});
|
|
92
|
+
if (!modifyResponse.args) {
|
|
93
|
+
console.log("Interrupt handling cancelled.");
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
try {
|
|
97
|
+
handler.modifiedArgs = JSON.parse(modifyResponse.args);
|
|
98
|
+
invalidJSON = false;
|
|
99
|
+
}
|
|
100
|
+
catch (e) {
|
|
101
|
+
console.error("Invalid JSON:", e);
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
interruptHandlers.push(handler);
|
|
107
|
+
// Continue execution with this handler to see if there are more interrupts
|
|
108
|
+
json = executeNode(filename, nodeName, hasArgs, argsString, interruptHandlers);
|
|
109
|
+
}
|
|
110
|
+
console.log("\nFinal Output:");
|
|
51
111
|
console.log(JSON.stringify(json.data, null, 2));
|
|
52
112
|
const correctResponse = await prompts({
|
|
53
113
|
type: "confirm",
|
|
@@ -103,7 +163,7 @@ export async function fixtures(target) {
|
|
|
103
163
|
];
|
|
104
164
|
}
|
|
105
165
|
const inputStr = hasArgs ? argsString : "";
|
|
106
|
-
const testFilePath = writeTestCase(filename, nodeName, inputStr, expectedOutput, criteria);
|
|
166
|
+
const testFilePath = writeTestCase(filename, nodeName, inputStr, expectedOutput, criteria, interruptHandlers.length > 0 ? interruptHandlers : undefined);
|
|
107
167
|
console.log(`Test case saved to ${testFilePath}`);
|
|
108
168
|
}
|
|
109
169
|
export async function test(testFile) {
|
|
@@ -139,24 +199,47 @@ export async function test(testFile) {
|
|
|
139
199
|
for (let i = 0; i < total; i++) {
|
|
140
200
|
const testCase = tests.tests[i];
|
|
141
201
|
const hasArgs = testCase.input !== "";
|
|
142
|
-
|
|
143
|
-
|
|
202
|
+
const interruptInfo = testCase.interruptHandlers
|
|
203
|
+
? ` interrupts=${testCase.interruptHandlers.length}`
|
|
204
|
+
: "";
|
|
205
|
+
const testNum = color.cyan(`Test ${i + 1}/${total}:`);
|
|
206
|
+
console.log(`\n${testNum} node=${testCase.nodeName} input=${testCase.input || "(none)"}${interruptInfo}`);
|
|
207
|
+
if (testCase.description) {
|
|
208
|
+
console.log(color.cyan("Description:", testCase.description), "\n");
|
|
209
|
+
}
|
|
210
|
+
const result = executeNode(tests.sourceFile, testCase.nodeName, hasArgs, testCase.input, testCase.interruptHandlers);
|
|
144
211
|
let testPassed = true;
|
|
145
212
|
for (const criterion of testCase.evaluationCriteria) {
|
|
146
213
|
if (criterion.type === "exact") {
|
|
147
214
|
const actual = JSON.stringify(result.data);
|
|
148
215
|
if (actual === testCase.expectedOutput) {
|
|
149
|
-
console.log(" ✓ Exact match passed");
|
|
216
|
+
console.log(color.green(" ✓ Exact match passed"));
|
|
150
217
|
}
|
|
151
218
|
else {
|
|
152
|
-
console.log(" ✗ Exact match failed");
|
|
219
|
+
console.log(color.red(" ✗ Exact match failed"));
|
|
153
220
|
console.log(" Expected:", testCase.expectedOutput);
|
|
154
221
|
console.log(" Actual: ", actual);
|
|
155
222
|
testPassed = false;
|
|
156
223
|
}
|
|
157
224
|
}
|
|
158
225
|
else if (criterion.type === "llmJudge") {
|
|
159
|
-
|
|
226
|
+
const actual = JSON.stringify(result.data);
|
|
227
|
+
try {
|
|
228
|
+
const judgeResult = executeJudge(actual, testCase.expectedOutput, criterion.judgePrompt);
|
|
229
|
+
if (judgeResult.score >= criterion.desiredAccuracy) {
|
|
230
|
+
console.log(color.green(` ✓ LLM Judge passed (score: ${judgeResult.score}/${criterion.desiredAccuracy})`));
|
|
231
|
+
console.log(` Reasoning: ${judgeResult.reasoning}`);
|
|
232
|
+
}
|
|
233
|
+
else {
|
|
234
|
+
console.log(color.red(` ✗ LLM Judge failed (score: ${judgeResult.score}/${criterion.desiredAccuracy})`));
|
|
235
|
+
console.log(` Reasoning: ${judgeResult.reasoning}`);
|
|
236
|
+
testPassed = false;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
catch (e) {
|
|
240
|
+
console.log(color.red(` ✗ LLM Judge error: ${e}`));
|
|
241
|
+
testPassed = false;
|
|
242
|
+
}
|
|
160
243
|
}
|
|
161
244
|
}
|
|
162
245
|
if (testPassed)
|
package/dist/lib/cli/util.d.ts
CHANGED
|
@@ -12,8 +12,20 @@ export declare function promptForArgs(selectedNode: GraphNodeDefinition): Promis
|
|
|
12
12
|
hasArgs: boolean;
|
|
13
13
|
argsString: string;
|
|
14
14
|
}>;
|
|
15
|
-
export declare function executeNode(agencyFile: string, nodeName: string, hasArgs: boolean, argsString: string
|
|
15
|
+
export declare function executeNode(agencyFile: string, nodeName: string, hasArgs: boolean, argsString: string, interruptHandlers?: Array<{
|
|
16
|
+
action: "approve" | "reject" | "modify";
|
|
17
|
+
modifiedArgs?: Record<string, any>;
|
|
18
|
+
expectedMessage?: string;
|
|
19
|
+
}>): {
|
|
16
20
|
data: any;
|
|
17
21
|
[key: string]: any;
|
|
18
22
|
};
|
|
19
23
|
export declare function formatTypeHint(vt: VariableType): string;
|
|
24
|
+
export declare function executeJudge(actualOutput: string, expectedOutput: string, judgePrompt: string, interruptHandlers?: Array<{
|
|
25
|
+
action: "approve" | "reject" | "modify";
|
|
26
|
+
modifiedArgs?: Record<string, any>;
|
|
27
|
+
expectedMessage?: string;
|
|
28
|
+
}>): {
|
|
29
|
+
score: number;
|
|
30
|
+
reasoning: string;
|
|
31
|
+
};
|
package/dist/lib/cli/util.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import prompts from "prompts";
|
|
2
2
|
import fs, { readFileSync } from "fs";
|
|
3
|
+
import path from "path";
|
|
3
4
|
import { execSync } from "child_process";
|
|
4
5
|
import renderEvaluate from "../templates/cli/evaluate.js";
|
|
6
|
+
import renderJudgeEvaluate from "../templates/cli/judgeEvaluate.js";
|
|
5
7
|
import { compile } from "./commands.js";
|
|
6
8
|
export function parseTarget(target) {
|
|
7
9
|
const colonIndex = target.lastIndexOf(":");
|
|
@@ -87,18 +89,22 @@ export async function promptForArgs(selectedNode) {
|
|
|
87
89
|
}
|
|
88
90
|
return { hasArgs, argsString };
|
|
89
91
|
}
|
|
90
|
-
export function executeNode(agencyFile, nodeName, hasArgs, argsString) {
|
|
91
|
-
const outFile = agencyFile.replace(".agency", ".
|
|
92
|
+
export function executeNode(agencyFile, nodeName, hasArgs, argsString, interruptHandlers) {
|
|
93
|
+
const outFile = agencyFile.replace(".agency", ".js");
|
|
92
94
|
compile({}, agencyFile, outFile);
|
|
93
95
|
const evaluateScript = renderEvaluate({
|
|
94
96
|
filename: outFile,
|
|
95
97
|
nodeName,
|
|
96
98
|
hasArgs,
|
|
97
99
|
args: argsString,
|
|
100
|
+
hasInterruptHandlers: !!interruptHandlers,
|
|
101
|
+
interruptHandlersJSON: interruptHandlers
|
|
102
|
+
? JSON.stringify(interruptHandlers)
|
|
103
|
+
: undefined,
|
|
98
104
|
});
|
|
99
|
-
const evaluateFile = "__evaluate.
|
|
105
|
+
const evaluateFile = "__evaluate.js";
|
|
100
106
|
fs.writeFileSync(evaluateFile, evaluateScript);
|
|
101
|
-
execSync(`
|
|
107
|
+
execSync(`node ${evaluateFile}`, { stdio: "inherit" });
|
|
102
108
|
const results = readFileSync("__evaluate.json", "utf-8");
|
|
103
109
|
return JSON.parse(results);
|
|
104
110
|
}
|
|
@@ -130,3 +136,25 @@ function serializeArgValue(value) {
|
|
|
130
136
|
return value;
|
|
131
137
|
return JSON.stringify(value);
|
|
132
138
|
}
|
|
139
|
+
export function executeJudge(actualOutput, expectedOutput, judgePrompt, interruptHandlers) {
|
|
140
|
+
// Resolve the judge.agency file bundled in dist/lib/agents/
|
|
141
|
+
const currentDir = path.dirname(new URL(import.meta.url).pathname);
|
|
142
|
+
const judgeAgencyFile = path.resolve(currentDir, "../agents/judge.agency");
|
|
143
|
+
const judgeOutFile = "__judge.js";
|
|
144
|
+
compile({}, judgeAgencyFile, judgeOutFile);
|
|
145
|
+
const judgeScript = renderJudgeEvaluate({
|
|
146
|
+
judgeFilename: judgeOutFile,
|
|
147
|
+
actualOutput: JSON.stringify(actualOutput),
|
|
148
|
+
expectedOutput: JSON.stringify(expectedOutput),
|
|
149
|
+
judgePrompt: JSON.stringify(judgePrompt),
|
|
150
|
+
hasInterruptHandlers: !!interruptHandlers,
|
|
151
|
+
interruptHandlersJSON: interruptHandlers
|
|
152
|
+
? JSON.stringify(interruptHandlers)
|
|
153
|
+
: undefined,
|
|
154
|
+
});
|
|
155
|
+
const judgeEvaluateFile = "__judge_evaluate.js";
|
|
156
|
+
fs.writeFileSync(judgeEvaluateFile, judgeScript);
|
|
157
|
+
execSync(`node ${judgeEvaluateFile}`, { stdio: "inherit" });
|
|
158
|
+
const results = readFileSync("__judge_evaluate.json", "utf-8");
|
|
159
|
+
return JSON.parse(results).data;
|
|
160
|
+
}
|
package/dist/lib/config.d.ts
CHANGED
|
@@ -36,6 +36,10 @@ export interface AgencyConfig {
|
|
|
36
36
|
* Example: ["malicious.com", "blocked.site.com"]
|
|
37
37
|
*/
|
|
38
38
|
disallowedFetchDomains?: string[];
|
|
39
|
+
/**
|
|
40
|
+
* Optionally specify a custom host for tarsec trace collection
|
|
41
|
+
*/
|
|
42
|
+
tarsecTraceHost?: string;
|
|
39
43
|
/** Statelog config */
|
|
40
44
|
log?: Partial<{
|
|
41
45
|
host: string;
|
|
@@ -46,4 +50,9 @@ export interface AgencyConfig {
|
|
|
46
50
|
logLevel: "error" | "warn" | "info" | "debug";
|
|
47
51
|
defaultModel: string;
|
|
48
52
|
}>;
|
|
53
|
+
/**
|
|
54
|
+
* If true, untyped variables are errors.
|
|
55
|
+
* If false (default), untyped variables are implicitly `any`.
|
|
56
|
+
*/
|
|
57
|
+
strictTypes?: boolean;
|
|
49
58
|
}
|
package/dist/lib/parser.d.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { Parser, ParserResult } from "tarsec";
|
|
2
2
|
import { AgencyNode, AgencyProgram } from "./types.js";
|
|
3
|
+
import { AgencyConfig } from "./config.js";
|
|
3
4
|
export declare const agencyNode: Parser<AgencyNode[]>;
|
|
4
5
|
export declare const agencyParser: Parser<AgencyProgram>;
|
|
5
6
|
export declare const _multilineCommentParser: Parser<string[]>;
|
|
6
7
|
export declare const multilineCommentParser: Parser<string[][]>;
|
|
7
8
|
export declare const normalizeCode: (code: string) => string;
|
|
8
|
-
export declare function _parseAgency(input: string,
|
|
9
|
-
export declare function parseAgency(input: string,
|
|
9
|
+
export declare function _parseAgency(input: string, config?: AgencyConfig): ParserResult<AgencyProgram>;
|
|
10
|
+
export declare function parseAgency(input: string, config?: AgencyConfig): ParserResult<AgencyProgram>;
|
package/dist/lib/parser.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { anyChar, between, capture, eof, many, or, search, seqC, set, str, success, trace, setInputStr, TarsecError, failure, } from "tarsec";
|
|
1
|
+
import { anyChar, between, capture, eof, many, or, search, seqC, set, str, success, trace, setInputStr, TarsecError, failure, setTraceHost, setTraceId, } from "tarsec";
|
|
2
2
|
import { accessExpressionParser } from "./parsers/access.js";
|
|
3
3
|
import { commentParser } from "./parsers/comment.js";
|
|
4
4
|
import { assignmentParser, functionParser, graphNodeParser, ifParser, messageThreadParser, timeBlockParser, whileLoopParser, } from "./parsers/function.js";
|
|
@@ -11,6 +11,7 @@ import { specialVarParser } from "./parsers/specialVar.js";
|
|
|
11
11
|
import { usesToolParser } from "./parsers/tools.js";
|
|
12
12
|
import { typeAliasParser, typeHintParser } from "./parsers/typeHints.js";
|
|
13
13
|
import { skillParser } from "./parsers/skill.js";
|
|
14
|
+
import { nanoid } from "nanoid";
|
|
14
15
|
export const agencyNode = (input) => {
|
|
15
16
|
const parser = many(trace("agencyParser", or(usesToolParser, importNodeStatmentParser, importToolStatmentParser, importStatmentParser, graphNodeParser, typeAliasParser, ifParser, whileLoopParser, typeHintParser, matchBlockParser, timeBlockParser, messageThreadParser, skillParser, streamingPromptLiteralParser, functionParser, returnStatementParser, specialVarParser, accessExpressionParser, assignmentParser, llmPromptFunctionCallParser, functionCallParser, commentParser, newLineParser)));
|
|
16
17
|
return parser(input);
|
|
@@ -28,7 +29,7 @@ export const normalizeCode = (code) => {
|
|
|
28
29
|
.map((line) => line.trim())
|
|
29
30
|
.join("\n");
|
|
30
31
|
};
|
|
31
|
-
export function _parseAgency(input,
|
|
32
|
+
export function _parseAgency(input, config = {}) {
|
|
32
33
|
// get rid of all multiline comments
|
|
33
34
|
const normalized = normalizeCode(input);
|
|
34
35
|
if (normalized.trim().length === 0) {
|
|
@@ -38,12 +39,16 @@ export function _parseAgency(input, verbose = false) {
|
|
|
38
39
|
}, "");
|
|
39
40
|
}
|
|
40
41
|
setInputStr(normalized);
|
|
42
|
+
if (config.tarsecTraceHost) {
|
|
43
|
+
setTraceHost("http://localhost:1465");
|
|
44
|
+
setTraceId(nanoid());
|
|
45
|
+
}
|
|
41
46
|
const result = agencyParser(normalized);
|
|
42
47
|
return result;
|
|
43
48
|
}
|
|
44
|
-
export function parseAgency(input,
|
|
49
|
+
export function parseAgency(input, config = {}) {
|
|
45
50
|
try {
|
|
46
|
-
return _parseAgency(input,
|
|
51
|
+
return _parseAgency(input, config);
|
|
47
52
|
}
|
|
48
53
|
catch (error) {
|
|
49
54
|
if (error instanceof TarsecError) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { capture, captureCaptures, char, debug, many, many1, many1Till, many1WithJoin, map, optional, or, sepBy, seqC, set, space, spaces, str, succeed, trace, } from "tarsec";
|
|
1
|
+
import { capture, captureCaptures, char, debug, fail, many, parseError, many1, many1Till, many1WithJoin, map, optional, or, sepBy, seqC, set, space, spaces, str, succeed, trace, } from "tarsec";
|
|
2
2
|
import { accessExpressionParser, indexAccessParser } from "./access.js";
|
|
3
3
|
import { commentParser } from "./comment.js";
|
|
4
4
|
import { functionCallParser, llmPromptFunctionCallParser, streamingPromptLiteralParser, } from "./functionCall.js";
|
|
@@ -81,8 +81,8 @@ export const ifParser = (input) => {
|
|
|
81
81
|
export const whileLoopParser = trace("whileLoopParser", seqC(set("type", "whileLoop"), str("while"), optionalSpaces, char("("), optionalSpaces, capture(or(indexAccessParser, functionCallParser, accessExpressionParser, literalParser), "condition"), optionalSpaces, char(")"), optionalSpaces, char("{"), spaces, capture(bodyParser, "body"), optionalSpaces, char("}")));
|
|
82
82
|
export const functionParameterParserWithTypeHint = trace("functionParameterParserWithTypeHint", seqC(set("type", "functionParameter"), capture(many1WithJoin(varNameChar), "name"), optionalSpaces, char(":"), optionalSpaces, capture(variableTypeParser, "typeHint")));
|
|
83
83
|
export const functionParameterParser = trace("functionParameterParser", seqC(set("type", "functionParameter"), capture(many1WithJoin(varNameChar), "name")));
|
|
84
|
-
export const functionReturnTypeParser = trace("functionReturnTypeParser", seqC(char(":"), optionalSpaces, captureCaptures(variableTypeParser)));
|
|
85
|
-
export const _functionParser = trace("_functionParser", seqC(set("type", "function"), str("def"), many1(space), capture(many1Till(char("(")), "functionName"), char("("), optionalSpaces, capture(sepBy(comma, or(functionParameterParserWithTypeHint, functionParameterParser)), "parameters"), optionalSpaces, char(")"), optionalSpaces, capture(optional(functionReturnTypeParser), "returnType"), optionalSpaces, char("{"), optionalSpacesOrNewline, capture(or(docStringParser, succeed(undefined)), "docString"), optionalSpacesOrNewline, capture(bodyParser, "body"), optionalSpaces, char("}"), optionalSemicolon));
|
|
84
|
+
export const functionReturnTypeParser = trace("functionReturnTypeParser", seqC(char(":"), optionalSpaces, captureCaptures(or(variableTypeParser, parseError("Invalid return type", fail("error"))))));
|
|
85
|
+
export const _functionParser = trace("_functionParser", seqC(set("type", "function"), str("def"), many1(space), capture(many1Till(char("(")), "functionName"), char("("), optionalSpaces, capture(sepBy(comma, or(functionParameterParserWithTypeHint, functionParameterParser)), "parameters"), optionalSpaces, char(")"), optionalSpaces, capture(optional(functionReturnTypeParser), "returnType"), captureCaptures(parseError("Expected function body", optionalSpaces, char("{"), optionalSpacesOrNewline, capture(or(docStringParser, succeed(undefined)), "docString"), optionalSpacesOrNewline, capture(bodyParser, "body"), optionalSpaces, char("}"), optionalSemicolon))));
|
|
86
86
|
export const asyncFunctionParser = (input) => {
|
|
87
87
|
const parser = trace("asyncFunctionParser", seqC(str("async"), spaces, captureCaptures(_functionParser)));
|
|
88
88
|
const mappedParser = map(parser, (result) => ({
|