agency-lang 0.0.53 → 0.0.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/lib/agents/judge.agency +18 -0
  2. package/dist/lib/backends/baseGenerator.js +3 -0
  3. package/dist/lib/backends/graphGenerator.integration.test.js +4 -4
  4. package/dist/lib/backends/graphGenerator.js +9 -15
  5. package/dist/lib/backends/typescriptGenerator.integration.test.js +4 -4
  6. package/dist/lib/backends/typescriptGenerator.js +7 -15
  7. package/dist/lib/cli/commands.d.ts +1 -1
  8. package/dist/lib/cli/commands.js +7 -4
  9. package/dist/lib/cli/evaluate.js +1 -1
  10. package/dist/lib/cli/test.js +94 -11
  11. package/dist/lib/cli/util.d.ts +13 -1
  12. package/dist/lib/cli/util.js +32 -4
  13. package/dist/lib/config.d.ts +9 -0
  14. package/dist/lib/parser.d.ts +3 -2
  15. package/dist/lib/parser.js +9 -4
  16. package/dist/lib/parsers/function.js +3 -3
  17. package/dist/lib/parsers/function.test.js +21 -6
  18. package/dist/lib/parsers/skill.js +20 -3
  19. package/dist/lib/parsers/skill.test.js +15 -17
  20. package/dist/lib/parsers/typeHints.js +3 -3
  21. package/dist/lib/parsers/typeHints.test.js +11 -2
  22. package/dist/lib/templates/backends/graphGenerator/builtinTools.d.ts +1 -1
  23. package/dist/lib/templates/backends/graphGenerator/builtinTools.js +1 -1
  24. package/dist/lib/templates/backends/graphGenerator/graphNode.d.ts +1 -1
  25. package/dist/lib/templates/backends/graphGenerator/graphNode.js +5 -2
  26. package/dist/lib/templates/backends/graphGenerator/imports.d.ts +1 -1
  27. package/dist/lib/templates/backends/graphGenerator/imports.js +69 -96
  28. package/dist/lib/templates/backends/graphGenerator/runNodeFunction.d.ts +1 -2
  29. package/dist/lib/templates/backends/graphGenerator/runNodeFunction.js +6 -3
  30. package/dist/lib/templates/backends/graphGenerator/startNode.d.ts +1 -1
  31. package/dist/lib/templates/backends/graphGenerator/startNode.js +1 -1
  32. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/fetch.d.ts +1 -1
  33. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/fetch.js +1 -1
  34. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/fetchJSON.d.ts +1 -1
  35. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/fetchJSON.js +1 -1
  36. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/input.d.ts +1 -1
  37. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/input.js +2 -2
  38. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/read.d.ts +1 -1
  39. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/read.js +1 -1
  40. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/readImage.d.ts +1 -1
  41. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/readImage.js +1 -1
  42. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/sleep.d.ts +1 -1
  43. package/dist/lib/templates/backends/typescriptGenerator/builtinFunctions/sleep.js +1 -1
  44. package/dist/lib/templates/backends/typescriptGenerator/builtinTools.d.ts +1 -1
  45. package/dist/lib/templates/backends/typescriptGenerator/builtinTools.js +2 -2
  46. package/dist/lib/templates/backends/typescriptGenerator/functionCallAssignment.d.ts +1 -2
  47. package/dist/lib/templates/backends/typescriptGenerator/functionCallAssignment.js +1 -1
  48. package/dist/lib/templates/backends/typescriptGenerator/functionDefinition.d.ts +1 -2
  49. package/dist/lib/templates/backends/typescriptGenerator/functionDefinition.js +5 -4
  50. package/dist/lib/templates/backends/typescriptGenerator/promptFunction.d.ts +1 -2
  51. package/dist/lib/templates/backends/typescriptGenerator/promptFunction.js +12 -8
  52. package/dist/lib/templates/backends/typescriptGenerator/toolCall.d.ts +1 -1
  53. package/dist/lib/templates/backends/typescriptGenerator/toolCall.js +32 -26
  54. package/dist/lib/templates/cli/evaluate.d.ts +3 -1
  55. package/dist/lib/templates/cli/evaluate.js +47 -3
  56. package/dist/lib/templates/cli/judgeEvaluate.d.ts +11 -0
  57. package/dist/lib/templates/cli/judgeEvaluate.js +66 -0
  58. package/dist/lib/typeChecker.d.ts +34 -0
  59. package/dist/lib/typeChecker.js +361 -0
  60. package/dist/lib/typeChecker.test.js +484 -0
  61. package/dist/scripts/agency.js +36 -5
  62. package/dist/scripts/regenerate-fixtures.js +2 -2
  63. package/dist/scripts/regenerate-graph-fixtures.js +2 -2
  64. package/package.json +7 -4
  65. package/dist/lib/backends/typescriptGenerator.test.js +0 -763
  66. /package/dist/lib/{backends/typescriptGenerator.test.d.ts → typeChecker.test.d.ts} +0 -0
@@ -0,0 +1,18 @@
1
+ type JudgeResult = {
2
+ score: number # a score from 0 to 100 where 100 means perfect match;
3
+ reasoning: string # brief explanation for the score
4
+ }
5
+
6
+ node judge(actualOutput: string, expectedOutput: string, judgePrompt: string): JudgeResult {
7
+ result: JudgeResult = llm("You are an evaluation judge. Score how well the actual output matches what was expected.
8
+
9
+ Judge prompt (evaluation criteria): ${judgePrompt}
10
+
11
+ Expected output: ${expectedOutput}
12
+
13
+ Actual output: ${actualOutput}
14
+
15
+ Provide a score from 0 to 100 where 100 means perfect match and 0 means completely wrong. Also provide brief reasoning.")
16
+
17
+ return result
18
+ }
@@ -21,6 +21,9 @@ export class BaseGenerator {
21
21
  agencyConfig = {};
22
22
  constructor({ config }) {
23
23
  this.agencyConfig = mergeDeep(this.configDefaults(), config || {});
24
+ if (this.agencyConfig.verbose) {
25
+ console.log("Generator config:", this.agencyConfig);
26
+ }
24
27
  }
25
28
  configDefaults() {
26
29
  return {};
@@ -4,7 +4,7 @@ import { generateGraph } from "./graphGenerator.js";
4
4
  import fs from "fs";
5
5
  import path from "path";
6
6
  /**
7
- * Recursively discovers all .agency/.mts fixture pairs in a directory
7
+ * Recursively discovers all .agency/.mjs fixture pairs in a directory
8
8
  */
9
9
  function discoverFixtures(fixtureDir) {
10
10
  const fixtures = [];
@@ -20,9 +20,9 @@ function discoverFixtures(fixtureDir) {
20
20
  scanDirectory(fullPath, relPath);
21
21
  }
22
22
  else if (entry.isFile() && entry.name.endsWith(".agency")) {
23
- // Found an Agency file - look for corresponding .mts
23
+ // Found an Agency file - look for corresponding .mjs
24
24
  const baseName = entry.name.replace(".agency", "");
25
- const mtsPath = path.join(dir, `${baseName}.mts`);
25
+ const mtsPath = path.join(dir, `${baseName}.mjs`);
26
26
  if (fs.existsSync(mtsPath)) {
27
27
  const nameWithoutExt = relativePath
28
28
  ? `${relativePath}/${baseName}`
@@ -41,7 +41,7 @@ function discoverFixtures(fixtureDir) {
41
41
  }
42
42
  }
43
43
  else {
44
- console.warn(`Warning: No corresponding .mts file for ${fullPath}`);
44
+ console.warn(`Warning: No corresponding .mjs file for ${fullPath}`);
45
45
  }
46
46
  }
47
47
  }
@@ -7,7 +7,6 @@ import * as renderRunNodeFunction from "../templates/backends/graphGenerator/run
7
7
  import * as renderStartNode from "../templates/backends/graphGenerator/startNode.js";
8
8
  import { TypeScriptGenerator } from "./typescriptGenerator.js";
9
9
  import { mapFunctionName } from "./typescriptGenerator/builtins.js";
10
- import { variableTypeToString } from "./typescriptGenerator/typeToString.js";
11
10
  import { TypescriptPreprocessor } from "../preprocessors/typescriptPreprocessor.js";
12
11
  export class GraphGenerator extends TypeScriptGenerator {
13
12
  typeHints = {};
@@ -144,24 +143,22 @@ export class GraphGenerator extends TypeScriptGenerator {
144
143
  return "generateLiteral not implemented";
145
144
  } */
146
145
  generateImports() {
147
- const arr = [
148
- renderImports.default({
149
- logHost: this.agencyConfig.log?.host || "",
150
- logProjectId: this.agencyConfig.log?.projectId || "",
151
- logDebugMode: this.agencyConfig.log?.debugMode || false,
152
- clientLogLevel: this.agencyConfig.client?.logLevel || "warn",
153
- clientDefaultModel: this.agencyConfig.client?.defaultModel || "gpt-4o-mini",
154
- }),
155
- ];
146
+ const args = {
147
+ logHost: this.agencyConfig.log?.host || "",
148
+ logProjectId: this.agencyConfig.log?.projectId || "",
149
+ logDebugMode: this.agencyConfig.log?.debugMode || false,
150
+ clientLogLevel: this.agencyConfig.client?.logLevel || "warn",
151
+ clientDefaultModel: this.agencyConfig.client?.defaultModel || "gpt-4o-mini",
152
+ };
153
+ const arr = [renderImports.default(args)];
156
154
  arr.push(builtinTools.default({}));
157
155
  return arr.join("\n");
158
156
  }
159
157
  preprocess() {
160
158
  const lines = [];
161
- lines.push("// @ts-nocheck\n");
162
159
  this.importedNodes.forEach((importNode) => {
163
160
  const defaultImportName = this.agencyFileToDefaultImportName(importNode.agencyFile);
164
- lines.push(`import ${defaultImportName} from "${importNode.agencyFile.replace(".agency", ".ts")}";`);
161
+ lines.push(`import ${defaultImportName} from "${importNode.agencyFile.replace(".agency", ".js")}";`);
165
162
  });
166
163
  return lines.join("\n");
167
164
  }
@@ -196,9 +193,6 @@ export class GraphGenerator extends TypeScriptGenerator {
196
193
  nodeName: node.nodeName,
197
194
  hasArgs: args.length > 0,
198
195
  argsStr,
199
- returnType: node.returnType
200
- ? variableTypeToString(node.returnType, this.typeAliases)
201
- : "any",
202
196
  }));
203
197
  }
204
198
  lines.push("export default graph;");
@@ -4,7 +4,7 @@ import { generateTypeScript } from "./typescriptGenerator.js";
4
4
  import fs from "fs";
5
5
  import path from "path";
6
6
  /**
7
- * Recursively discovers all .agency/.mts fixture pairs in a directory
7
+ * Recursively discovers all .agency/.mjs fixture pairs in a directory
8
8
  */
9
9
  function discoverFixtures(fixtureDir) {
10
10
  const fixtures = [];
@@ -20,9 +20,9 @@ function discoverFixtures(fixtureDir) {
20
20
  scanDirectory(fullPath, relPath);
21
21
  }
22
22
  else if (entry.isFile() && entry.name.endsWith(".agency")) {
23
- // Found an Agency file - look for corresponding .mts
23
+ // Found an Agency file - look for corresponding .mjs
24
24
  const baseName = entry.name.replace(".agency", "");
25
- const mtsPath = path.join(dir, `${baseName}.mts`);
25
+ const mtsPath = path.join(dir, `${baseName}.mjs`);
26
26
  if (fs.existsSync(mtsPath)) {
27
27
  const nameWithoutExt = relativePath
28
28
  ? `${relativePath}/${baseName}`
@@ -41,7 +41,7 @@ function discoverFixtures(fixtureDir) {
41
41
  }
42
42
  }
43
43
  else {
44
- console.warn(`Warning: No corresponding .mts file for ${fullPath}`);
44
+ console.warn(`Warning: No corresponding .mjs file for ${fullPath}`);
45
45
  }
46
46
  }
47
47
  }
@@ -37,7 +37,7 @@ export class TypeScriptGenerator extends BaseGenerator {
37
37
  }
38
38
  typeAliasToString(node) {
39
39
  const aliasedTypeStr = variableTypeToString(node.aliasedType, this.typeAliases);
40
- return `type ${node.aliasName} = ${aliasedTypeStr};`;
40
+ return "";
41
41
  }
42
42
  processTypeHint(node) {
43
43
  if (node.variableType.type === "typeAliasVariable") {
@@ -139,9 +139,7 @@ export class TypeScriptGenerator extends BaseGenerator {
139
139
  else {
140
140
  this.functionScopedVariables.push(variableName);
141
141
  }
142
- const typeAnnotation = typeHint
143
- ? `: ${variableTypeToString(typeHint, this.typeAliases)}`
144
- : "";
142
+ const typeAnnotation = "";
145
143
  if (value.type === "prompt") {
146
144
  return this.processPromptLiteral(variableName, typeHint, value);
147
145
  }
@@ -150,7 +148,6 @@ export class TypeScriptGenerator extends BaseGenerator {
150
148
  const code = this.processNode(value);
151
149
  return renderFunctionCallAssignment.default({
152
150
  variableName: `${this.getScopeVar()}.${variableName}`,
153
- typeAnnotation,
154
151
  functionCode: code.trim(),
155
152
  nodeContext: this.getCurrentScope().type === "node",
156
153
  globalScope: this.getCurrentScope().type === "global",
@@ -253,9 +250,6 @@ export class TypeScriptGenerator extends BaseGenerator {
253
250
  return renderFunctionDefinition.default({
254
251
  functionName,
255
252
  argsStr,
256
- returnType: node.returnType
257
- ? variableTypeToString(node.returnType, this.typeAliases)
258
- : "any",
259
253
  functionBody: bodyCode.join("\n"),
260
254
  });
261
255
  }
@@ -415,15 +409,14 @@ export class TypeScriptGenerator extends BaseGenerator {
415
409
  value: "string",
416
410
  };
417
411
  const zodSchema = mapTypeToZodSchema(_variableType, this.typeAliases);
418
- const typeString = variableTypeToString(_variableType, this.typeAliases);
419
412
  // Build prompt construction code
420
413
  const promptCode = this.buildPromptString({
421
414
  segments: prompt.segments,
422
415
  typeHints: this.typeHints,
423
416
  skills: prompt.skills || [],
424
417
  });
425
- const parts = functionArgs.map((arg) => `${arg.replace(".", "_")}: ${variableTypeToString(this.typeHints[arg] || { type: "primitiveType", value: "string" }, this.typeAliases)}`);
426
- parts.push("__metadata?: Record<string, any>");
418
+ const parts = functionArgs.map((arg) => arg.replace(".", "_"));
419
+ parts.push("__metadata");
427
420
  const argsStr = parts.join(", ");
428
421
  let _tools = "";
429
422
  if (prompt.tools) {
@@ -453,7 +446,7 @@ export class TypeScriptGenerator extends BaseGenerator {
453
446
  I'll probably need to do that for supporting type checking anyway.
454
447
  */
455
448
  const functionCalls = (prompt.tools || { type: "usesTool", toolNames: [] }).toolNames
456
- .filter((t) => BUILTIN_TOOLS.includes(t))
449
+ .filter((t) => !BUILTIN_TOOLS.includes(t))
457
450
  .map((toolName) => {
458
451
  if (!this.functionDefinitions[toolName] &&
459
452
  !this.isImportedTool(toolName)) {
@@ -473,7 +466,6 @@ export class TypeScriptGenerator extends BaseGenerator {
473
466
  variableName,
474
467
  argsStr,
475
468
  funcCallParams: [...scopedFunctionArgs, metadataObj].join(", "),
476
- typeString,
477
469
  promptCode,
478
470
  hasResponseFormat: zodSchema !== DEFAULT_SCHEMA,
479
471
  zodSchema,
@@ -487,7 +479,7 @@ export class TypeScriptGenerator extends BaseGenerator {
487
479
  });
488
480
  }
489
481
  processImportStatement(node) {
490
- return `import ${node.importedNames} from "${node.modulePath.replace(/\.agency$/, ".ts")}";`;
482
+ return `import ${node.importedNames} from "${node.modulePath.replace(/\.agency$/, ".js")}";`;
491
483
  }
492
484
  processImportNodeStatement(node) {
493
485
  return ""; // handled in preprocess in graphgenerator
@@ -500,7 +492,7 @@ export class TypeScriptGenerator extends BaseGenerator {
500
492
  `__${toolName}ToolParams`,
501
493
  ])
502
494
  .flat();
503
- return `import { ${importNames.join(", ")} } from "${node.agencyFile.replace(/\.agency$/, ".ts")}";`;
495
+ return `import { ${importNames.join(", ")} } from "${node.agencyFile.replace(/\.agency$/, ".js")}";`;
504
496
  }
505
497
  processWhileLoop(node) {
506
498
  const conditionCode = this.processNode(node.condition);
@@ -1,6 +1,6 @@
1
1
  import { AgencyConfig } from "../config.js";
2
2
  import { AgencyProgram } from "../index.js";
3
- export declare function loadConfig(configPath?: string): AgencyConfig;
3
+ export declare function loadConfig(configPath?: string, verbose?: boolean): AgencyConfig;
4
4
  export declare function readStdin(): Promise<string>;
5
5
  export declare function parse(contents: string, config: AgencyConfig): AgencyProgram;
6
6
  export declare function readFile(inputFile: string): string;
@@ -7,11 +7,14 @@ import * as fs from "fs";
7
7
  import * as path from "path";
8
8
  import { parseAgency } from "../parser.js";
9
9
  // Load configuration from agency.json
10
- export function loadConfig(configPath) {
10
+ export function loadConfig(configPath, verbose = false) {
11
11
  let config = {};
12
12
  // Determine config file path
13
13
  const defaultConfigPath = path.join(process.cwd(), "agency.json");
14
14
  const finalConfigPath = configPath || defaultConfigPath;
15
+ if (verbose) {
16
+ console.log(`Looking for config at: ${finalConfigPath}`);
17
+ }
15
18
  // Check if config file exists
16
19
  if (fs.existsSync(finalConfigPath)) {
17
20
  try {
@@ -45,7 +48,7 @@ export function readStdin() {
45
48
  }
46
49
  export function parse(contents, config) {
47
50
  const verbose = config.verbose ?? false;
48
- const parseResult = parseAgency(contents, verbose);
51
+ const parseResult = parseAgency(contents, config);
49
52
  // Check if parsing was successful
50
53
  if (!parseResult.success) {
51
54
  console.error("Parse error:");
@@ -121,7 +124,7 @@ export function compile(config, inputFile, _outputFile) {
121
124
  }
122
125
  // Resolve the absolute path of the input file to avoid duplicates
123
126
  const absoluteInputFile = path.resolve(inputFile);
124
- let outputFile = _outputFile || inputFile.replace(".agency", ".ts");
127
+ let outputFile = _outputFile || inputFile.replace(".agency", ".js");
125
128
  if (config.outDir && !_outputFile) {
126
129
  const outputDir = path.resolve(config.outDir);
127
130
  if (!fs.existsSync(outputDir)) {
@@ -145,7 +148,7 @@ export function compile(config, inputFile, _outputFile) {
145
148
  // Update the import path in the AST to reference the new .ts file
146
149
  parsedProgram.nodes.forEach((node) => {
147
150
  if (node.type === "importStatement") {
148
- node.modulePath = node.modulePath.replace(".agency", ".ts");
151
+ node.modulePath = node.modulePath.replace(".agency", ".js");
149
152
  }
150
153
  });
151
154
  const generatedCode = generateGraph(parsedProgram, config);
@@ -162,7 +162,7 @@ export async function evaluate(target, argsFilePath, resultsFilePath) {
162
162
  const argsString = hasArgs
163
163
  ? argsRecordToString(c.args, selectedNode.parameters)
164
164
  : "";
165
- const json = executeNode(filename, nodeName, hasArgs, argsString);
165
+ const json = executeNode(filename, nodeName, hasArgs, argsString, undefined);
166
166
  console.log("\nOutput:");
167
167
  console.log(JSON.stringify(json.data, null, 2));
168
168
  const ratingResponse = await prompts({
@@ -2,14 +2,15 @@ import { parseAgency } from "../parser.js";
2
2
  import { getNodesOfType } from "../utils/node.js";
3
3
  import fs from "fs";
4
4
  import prompts from "prompts";
5
- import { executeNode, parseTarget, pickANode, promptForArgs, promptForTarget, } from "./util.js";
5
+ import { executeJudge, executeNode, parseTarget, pickANode, promptForArgs, promptForTarget, } from "./util.js";
6
+ import { color } from "termcolors";
6
7
  function readFile(filename) {
7
8
  console.log("Trying to read file", filename, "...");
8
9
  const data = fs.readFileSync(filename);
9
10
  const contents = data.toString("utf8");
10
11
  return contents;
11
12
  }
12
- function writeTestCase(agencyFilename, nodeName, input, expectedOutput, evaluationCriteria) {
13
+ function writeTestCase(agencyFilename, nodeName, input, expectedOutput, evaluationCriteria, interruptHandlers) {
13
14
  const testFilePath = agencyFilename.replace(".agency", ".test.json");
14
15
  let tests;
15
16
  if (fs.existsSync(testFilePath)) {
@@ -18,7 +19,16 @@ function writeTestCase(agencyFilename, nodeName, input, expectedOutput, evaluati
18
19
  else {
19
20
  tests = { sourceFile: agencyFilename, tests: [] };
20
21
  }
21
- tests.tests.push({ nodeName, input, expectedOutput, evaluationCriteria });
22
+ const testCase = {
23
+ nodeName,
24
+ input,
25
+ expectedOutput,
26
+ evaluationCriteria,
27
+ };
28
+ if (interruptHandlers && interruptHandlers.length > 0) {
29
+ testCase.interruptHandlers = interruptHandlers;
30
+ }
31
+ tests.tests.push(testCase);
22
32
  fs.writeFileSync(testFilePath, JSON.stringify(tests, null, 2));
23
33
  return testFilePath;
24
34
  }
@@ -46,8 +56,58 @@ export async function fixtures(target) {
46
56
  const selectedNode = nodes.find((n) => n.nodeName === nodeName);
47
57
  let { hasArgs, argsString } = await promptForArgs(selectedNode);
48
58
  console.log("Running program from entrypoint", nodeName);
49
- const json = executeNode(filename, nodeName, hasArgs, argsString);
50
- console.log("\nOutput:");
59
+ let json = executeNode(filename, nodeName, hasArgs, argsString);
60
+ // Handle interrupt discovery
61
+ const interruptHandlers = [];
62
+ while (json.data &&
63
+ typeof json.data === "object" &&
64
+ json.data.type === "interrupt") {
65
+ console.log(`\n⚠️ Interrupt detected: "${json.data.data}"`);
66
+ const actionResponse = await prompts({
67
+ type: "select",
68
+ name: "action",
69
+ message: "How should the test handle this interrupt?",
70
+ choices: [
71
+ { title: "Approve", value: "approve" },
72
+ { title: "Reject", value: "reject" },
73
+ { title: "Modify arguments", value: "modify" },
74
+ ],
75
+ });
76
+ if (!actionResponse.action) {
77
+ console.log("Interrupt handling cancelled.");
78
+ return;
79
+ }
80
+ const handler = {
81
+ action: actionResponse.action,
82
+ expectedMessage: json.data.data, // Capture the actual message
83
+ };
84
+ if (actionResponse.action === "modify") {
85
+ let invalidJSON = true;
86
+ while (invalidJSON) {
87
+ const modifyResponse = await prompts({
88
+ type: "text",
89
+ name: "args",
90
+ message: "Enter modified arguments as JSON object:",
91
+ });
92
+ if (!modifyResponse.args) {
93
+ console.log("Interrupt handling cancelled.");
94
+ return;
95
+ }
96
+ try {
97
+ handler.modifiedArgs = JSON.parse(modifyResponse.args);
98
+ invalidJSON = false;
99
+ }
100
+ catch (e) {
101
+ console.error("Invalid JSON:", e);
102
+ return;
103
+ }
104
+ }
105
+ }
106
+ interruptHandlers.push(handler);
107
+ // Continue execution with this handler to see if there are more interrupts
108
+ json = executeNode(filename, nodeName, hasArgs, argsString, interruptHandlers);
109
+ }
110
+ console.log("\nFinal Output:");
51
111
  console.log(JSON.stringify(json.data, null, 2));
52
112
  const correctResponse = await prompts({
53
113
  type: "confirm",
@@ -103,7 +163,7 @@ export async function fixtures(target) {
103
163
  ];
104
164
  }
105
165
  const inputStr = hasArgs ? argsString : "";
106
- const testFilePath = writeTestCase(filename, nodeName, inputStr, expectedOutput, criteria);
166
+ const testFilePath = writeTestCase(filename, nodeName, inputStr, expectedOutput, criteria, interruptHandlers.length > 0 ? interruptHandlers : undefined);
107
167
  console.log(`Test case saved to ${testFilePath}`);
108
168
  }
109
169
  export async function test(testFile) {
@@ -139,24 +199,47 @@ export async function test(testFile) {
139
199
  for (let i = 0; i < total; i++) {
140
200
  const testCase = tests.tests[i];
141
201
  const hasArgs = testCase.input !== "";
142
- console.log(`\nTest ${i + 1}/${total}: node=${testCase.nodeName} input=${testCase.input || "(none)"}`);
143
- const result = executeNode(tests.sourceFile, testCase.nodeName, hasArgs, testCase.input);
202
+ const interruptInfo = testCase.interruptHandlers
203
+ ? ` interrupts=${testCase.interruptHandlers.length}`
204
+ : "";
205
+ const testNum = color.cyan(`Test ${i + 1}/${total}:`);
206
+ console.log(`\n${testNum} node=${testCase.nodeName} input=${testCase.input || "(none)"}${interruptInfo}`);
207
+ if (testCase.description) {
208
+ console.log(color.cyan("Description:", testCase.description), "\n");
209
+ }
210
+ const result = executeNode(tests.sourceFile, testCase.nodeName, hasArgs, testCase.input, testCase.interruptHandlers);
144
211
  let testPassed = true;
145
212
  for (const criterion of testCase.evaluationCriteria) {
146
213
  if (criterion.type === "exact") {
147
214
  const actual = JSON.stringify(result.data);
148
215
  if (actual === testCase.expectedOutput) {
149
- console.log(" ✓ Exact match passed");
216
+ console.log(color.green(" ✓ Exact match passed"));
150
217
  }
151
218
  else {
152
- console.log(" ✗ Exact match failed");
219
+ console.log(color.red(" ✗ Exact match failed"));
153
220
  console.log(" Expected:", testCase.expectedOutput);
154
221
  console.log(" Actual: ", actual);
155
222
  testPassed = false;
156
223
  }
157
224
  }
158
225
  else if (criterion.type === "llmJudge") {
159
- console.log(" ⚠ LLM Judge evaluation not yet supported, skipping");
226
+ const actual = JSON.stringify(result.data);
227
+ try {
228
+ const judgeResult = executeJudge(actual, testCase.expectedOutput, criterion.judgePrompt);
229
+ if (judgeResult.score >= criterion.desiredAccuracy) {
230
+ console.log(color.green(` ✓ LLM Judge passed (score: ${judgeResult.score}/${criterion.desiredAccuracy})`));
231
+ console.log(` Reasoning: ${judgeResult.reasoning}`);
232
+ }
233
+ else {
234
+ console.log(color.red(` ✗ LLM Judge failed (score: ${judgeResult.score}/${criterion.desiredAccuracy})`));
235
+ console.log(` Reasoning: ${judgeResult.reasoning}`);
236
+ testPassed = false;
237
+ }
238
+ }
239
+ catch (e) {
240
+ console.log(color.red(` ✗ LLM Judge error: ${e}`));
241
+ testPassed = false;
242
+ }
160
243
  }
161
244
  }
162
245
  if (testPassed)
@@ -12,8 +12,20 @@ export declare function promptForArgs(selectedNode: GraphNodeDefinition): Promis
12
12
  hasArgs: boolean;
13
13
  argsString: string;
14
14
  }>;
15
- export declare function executeNode(agencyFile: string, nodeName: string, hasArgs: boolean, argsString: string): {
15
+ export declare function executeNode(agencyFile: string, nodeName: string, hasArgs: boolean, argsString: string, interruptHandlers?: Array<{
16
+ action: "approve" | "reject" | "modify";
17
+ modifiedArgs?: Record<string, any>;
18
+ expectedMessage?: string;
19
+ }>): {
16
20
  data: any;
17
21
  [key: string]: any;
18
22
  };
19
23
  export declare function formatTypeHint(vt: VariableType): string;
24
+ export declare function executeJudge(actualOutput: string, expectedOutput: string, judgePrompt: string, interruptHandlers?: Array<{
25
+ action: "approve" | "reject" | "modify";
26
+ modifiedArgs?: Record<string, any>;
27
+ expectedMessage?: string;
28
+ }>): {
29
+ score: number;
30
+ reasoning: string;
31
+ };
@@ -1,7 +1,9 @@
1
1
  import prompts from "prompts";
2
2
  import fs, { readFileSync } from "fs";
3
+ import path from "path";
3
4
  import { execSync } from "child_process";
4
5
  import renderEvaluate from "../templates/cli/evaluate.js";
6
+ import renderJudgeEvaluate from "../templates/cli/judgeEvaluate.js";
5
7
  import { compile } from "./commands.js";
6
8
  export function parseTarget(target) {
7
9
  const colonIndex = target.lastIndexOf(":");
@@ -87,18 +89,22 @@ export async function promptForArgs(selectedNode) {
87
89
  }
88
90
  return { hasArgs, argsString };
89
91
  }
90
- export function executeNode(agencyFile, nodeName, hasArgs, argsString) {
91
- const outFile = agencyFile.replace(".agency", ".ts");
92
+ export function executeNode(agencyFile, nodeName, hasArgs, argsString, interruptHandlers) {
93
+ const outFile = agencyFile.replace(".agency", ".js");
92
94
  compile({}, agencyFile, outFile);
93
95
  const evaluateScript = renderEvaluate({
94
96
  filename: outFile,
95
97
  nodeName,
96
98
  hasArgs,
97
99
  args: argsString,
100
+ hasInterruptHandlers: !!interruptHandlers,
101
+ interruptHandlersJSON: interruptHandlers
102
+ ? JSON.stringify(interruptHandlers)
103
+ : undefined,
98
104
  });
99
- const evaluateFile = "__evaluate.ts";
105
+ const evaluateFile = "__evaluate.js";
100
106
  fs.writeFileSync(evaluateFile, evaluateScript);
101
- execSync(`npx tsx ${evaluateFile}`, { stdio: "inherit" });
107
+ execSync(`node ${evaluateFile}`, { stdio: "inherit" });
102
108
  const results = readFileSync("__evaluate.json", "utf-8");
103
109
  return JSON.parse(results);
104
110
  }
@@ -130,3 +136,25 @@ function serializeArgValue(value) {
130
136
  return value;
131
137
  return JSON.stringify(value);
132
138
  }
139
+ export function executeJudge(actualOutput, expectedOutput, judgePrompt, interruptHandlers) {
140
+ // Resolve the judge.agency file bundled in dist/lib/agents/
141
+ const currentDir = path.dirname(new URL(import.meta.url).pathname);
142
+ const judgeAgencyFile = path.resolve(currentDir, "../agents/judge.agency");
143
+ const judgeOutFile = "__judge.js";
144
+ compile({}, judgeAgencyFile, judgeOutFile);
145
+ const judgeScript = renderJudgeEvaluate({
146
+ judgeFilename: judgeOutFile,
147
+ actualOutput: JSON.stringify(actualOutput),
148
+ expectedOutput: JSON.stringify(expectedOutput),
149
+ judgePrompt: JSON.stringify(judgePrompt),
150
+ hasInterruptHandlers: !!interruptHandlers,
151
+ interruptHandlersJSON: interruptHandlers
152
+ ? JSON.stringify(interruptHandlers)
153
+ : undefined,
154
+ });
155
+ const judgeEvaluateFile = "__judge_evaluate.js";
156
+ fs.writeFileSync(judgeEvaluateFile, judgeScript);
157
+ execSync(`node ${judgeEvaluateFile}`, { stdio: "inherit" });
158
+ const results = readFileSync("__judge_evaluate.json", "utf-8");
159
+ return JSON.parse(results).data;
160
+ }
@@ -36,6 +36,10 @@ export interface AgencyConfig {
36
36
  * Example: ["malicious.com", "blocked.site.com"]
37
37
  */
38
38
  disallowedFetchDomains?: string[];
39
+ /**
40
+ * Optionally specify a custom host for tarsec trace collection
41
+ */
42
+ tarsecTraceHost?: string;
39
43
  /** Statelog config */
40
44
  log?: Partial<{
41
45
  host: string;
@@ -46,4 +50,9 @@ export interface AgencyConfig {
46
50
  logLevel: "error" | "warn" | "info" | "debug";
47
51
  defaultModel: string;
48
52
  }>;
53
+ /**
54
+ * If true, untyped variables are errors.
55
+ * If false (default), untyped variables are implicitly `any`.
56
+ */
57
+ strictTypes?: boolean;
49
58
  }
@@ -1,9 +1,10 @@
1
1
  import { Parser, ParserResult } from "tarsec";
2
2
  import { AgencyNode, AgencyProgram } from "./types.js";
3
+ import { AgencyConfig } from "./config.js";
3
4
  export declare const agencyNode: Parser<AgencyNode[]>;
4
5
  export declare const agencyParser: Parser<AgencyProgram>;
5
6
  export declare const _multilineCommentParser: Parser<string[]>;
6
7
  export declare const multilineCommentParser: Parser<string[][]>;
7
8
  export declare const normalizeCode: (code: string) => string;
8
- export declare function _parseAgency(input: string, verbose?: boolean): ParserResult<AgencyProgram>;
9
- export declare function parseAgency(input: string, verbose?: boolean): ParserResult<AgencyProgram>;
9
+ export declare function _parseAgency(input: string, config?: AgencyConfig): ParserResult<AgencyProgram>;
10
+ export declare function parseAgency(input: string, config?: AgencyConfig): ParserResult<AgencyProgram>;
@@ -1,4 +1,4 @@
1
- import { anyChar, between, capture, eof, many, or, search, seqC, set, str, success, trace, setInputStr, TarsecError, failure, } from "tarsec";
1
+ import { anyChar, between, capture, eof, many, or, search, seqC, set, str, success, trace, setInputStr, TarsecError, failure, setTraceHost, setTraceId, } from "tarsec";
2
2
  import { accessExpressionParser } from "./parsers/access.js";
3
3
  import { commentParser } from "./parsers/comment.js";
4
4
  import { assignmentParser, functionParser, graphNodeParser, ifParser, messageThreadParser, timeBlockParser, whileLoopParser, } from "./parsers/function.js";
@@ -11,6 +11,7 @@ import { specialVarParser } from "./parsers/specialVar.js";
11
11
  import { usesToolParser } from "./parsers/tools.js";
12
12
  import { typeAliasParser, typeHintParser } from "./parsers/typeHints.js";
13
13
  import { skillParser } from "./parsers/skill.js";
14
+ import { nanoid } from "nanoid";
14
15
  export const agencyNode = (input) => {
15
16
  const parser = many(trace("agencyParser", or(usesToolParser, importNodeStatmentParser, importToolStatmentParser, importStatmentParser, graphNodeParser, typeAliasParser, ifParser, whileLoopParser, typeHintParser, matchBlockParser, timeBlockParser, messageThreadParser, skillParser, streamingPromptLiteralParser, functionParser, returnStatementParser, specialVarParser, accessExpressionParser, assignmentParser, llmPromptFunctionCallParser, functionCallParser, commentParser, newLineParser)));
16
17
  return parser(input);
@@ -28,7 +29,7 @@ export const normalizeCode = (code) => {
28
29
  .map((line) => line.trim())
29
30
  .join("\n");
30
31
  };
31
- export function _parseAgency(input, verbose = false) {
32
+ export function _parseAgency(input, config = {}) {
32
33
  // get rid of all multiline comments
33
34
  const normalized = normalizeCode(input);
34
35
  if (normalized.trim().length === 0) {
@@ -38,12 +39,16 @@ export function _parseAgency(input, verbose = false) {
38
39
  }, "");
39
40
  }
40
41
  setInputStr(normalized);
42
+ if (config.tarsecTraceHost) {
43
+ setTraceHost("http://localhost:1465");
44
+ setTraceId(nanoid());
45
+ }
41
46
  const result = agencyParser(normalized);
42
47
  return result;
43
48
  }
44
- export function parseAgency(input, verbose = false) {
49
+ export function parseAgency(input, config = {}) {
45
50
  try {
46
- return _parseAgency(input, verbose);
51
+ return _parseAgency(input, config);
47
52
  }
48
53
  catch (error) {
49
54
  if (error instanceof TarsecError) {
@@ -1,4 +1,4 @@
1
- import { capture, captureCaptures, char, debug, many, many1, many1Till, many1WithJoin, map, optional, or, sepBy, seqC, set, space, spaces, str, succeed, trace, } from "tarsec";
1
+ import { capture, captureCaptures, char, debug, fail, many, parseError, many1, many1Till, many1WithJoin, map, optional, or, sepBy, seqC, set, space, spaces, str, succeed, trace, } from "tarsec";
2
2
  import { accessExpressionParser, indexAccessParser } from "./access.js";
3
3
  import { commentParser } from "./comment.js";
4
4
  import { functionCallParser, llmPromptFunctionCallParser, streamingPromptLiteralParser, } from "./functionCall.js";
@@ -81,8 +81,8 @@ export const ifParser = (input) => {
81
81
  export const whileLoopParser = trace("whileLoopParser", seqC(set("type", "whileLoop"), str("while"), optionalSpaces, char("("), optionalSpaces, capture(or(indexAccessParser, functionCallParser, accessExpressionParser, literalParser), "condition"), optionalSpaces, char(")"), optionalSpaces, char("{"), spaces, capture(bodyParser, "body"), optionalSpaces, char("}")));
82
82
  export const functionParameterParserWithTypeHint = trace("functionParameterParserWithTypeHint", seqC(set("type", "functionParameter"), capture(many1WithJoin(varNameChar), "name"), optionalSpaces, char(":"), optionalSpaces, capture(variableTypeParser, "typeHint")));
83
83
  export const functionParameterParser = trace("functionParameterParser", seqC(set("type", "functionParameter"), capture(many1WithJoin(varNameChar), "name")));
84
- export const functionReturnTypeParser = trace("functionReturnTypeParser", seqC(char(":"), optionalSpaces, captureCaptures(variableTypeParser)));
85
- export const _functionParser = trace("_functionParser", seqC(set("type", "function"), str("def"), many1(space), capture(many1Till(char("(")), "functionName"), char("("), optionalSpaces, capture(sepBy(comma, or(functionParameterParserWithTypeHint, functionParameterParser)), "parameters"), optionalSpaces, char(")"), optionalSpaces, capture(optional(functionReturnTypeParser), "returnType"), optionalSpaces, char("{"), optionalSpacesOrNewline, capture(or(docStringParser, succeed(undefined)), "docString"), optionalSpacesOrNewline, capture(bodyParser, "body"), optionalSpaces, char("}"), optionalSemicolon));
84
+ export const functionReturnTypeParser = trace("functionReturnTypeParser", seqC(char(":"), optionalSpaces, captureCaptures(or(variableTypeParser, parseError("Invalid return type", fail("error"))))));
85
+ export const _functionParser = trace("_functionParser", seqC(set("type", "function"), str("def"), many1(space), capture(many1Till(char("(")), "functionName"), char("("), optionalSpaces, capture(sepBy(comma, or(functionParameterParserWithTypeHint, functionParameterParser)), "parameters"), optionalSpaces, char(")"), optionalSpaces, capture(optional(functionReturnTypeParser), "returnType"), captureCaptures(parseError("Expected function body", optionalSpaces, char("{"), optionalSpacesOrNewline, capture(or(docStringParser, succeed(undefined)), "docString"), optionalSpacesOrNewline, capture(bodyParser, "body"), optionalSpaces, char("}"), optionalSemicolon))));
86
86
  export const asyncFunctionParser = (input) => {
87
87
  const parser = trace("asyncFunctionParser", seqC(str("async"), spaces, captureCaptures(_functionParser)));
88
88
  const mappedParser = map(parser, (result) => ({