@agentv/core 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -29,28 +29,28 @@ type TestMessageContent = string | readonly JsonObject[];
29
29
  * System-authored instruction message.
30
30
  */
31
31
  type SystemTestMessage = {
32
- readonly role: "system";
32
+ readonly role: 'system';
33
33
  readonly content: TestMessageContent;
34
34
  };
35
35
  /**
36
36
  * User-authored prompt message.
37
37
  */
38
38
  type UserTestMessage = {
39
- readonly role: "user";
39
+ readonly role: 'user';
40
40
  readonly content: TestMessageContent;
41
41
  };
42
42
  /**
43
43
  * Assistant response message.
44
44
  */
45
45
  type AssistantTestMessage = {
46
- readonly role: "assistant";
46
+ readonly role: 'assistant';
47
47
  readonly content: TestMessageContent;
48
48
  };
49
49
  /**
50
50
  * Tool invocation message.
51
51
  */
52
52
  type ToolTestMessage = {
53
- readonly role: "tool";
53
+ readonly role: 'tool';
54
54
  readonly content: TestMessageContent;
55
55
  };
56
56
  /**
@@ -78,7 +78,7 @@ type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
78
78
  declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
79
79
  type CodeEvaluatorConfig = {
80
80
  readonly name: string;
81
- readonly type: "code";
81
+ readonly type: 'code';
82
82
  readonly script: string;
83
83
  readonly resolvedScriptPath?: string;
84
84
  readonly cwd?: string;
@@ -86,7 +86,7 @@ type CodeEvaluatorConfig = {
86
86
  };
87
87
  type LlmJudgeEvaluatorConfig = {
88
88
  readonly name: string;
89
- readonly type: "llm_judge";
89
+ readonly type: 'llm_judge';
90
90
  readonly prompt?: string;
91
91
  readonly promptPath?: string;
92
92
  };
@@ -145,16 +145,16 @@ interface EvaluatorResult {
145
145
  /**
146
146
  * Convenience accessor matching the Python hit_count property.
147
147
  */
148
- declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
148
+ declare function getHitCount(result: Pick<EvaluationResult, 'hits'>): number;
149
149
 
150
- type ChatMessageRole = "system" | "user" | "assistant" | "tool" | "function";
150
+ type ChatMessageRole = 'system' | 'user' | 'assistant' | 'tool' | 'function';
151
151
  interface ChatMessage {
152
152
  readonly role: ChatMessageRole;
153
153
  readonly content: string;
154
154
  readonly name?: string;
155
155
  }
156
156
  type ChatPrompt = readonly ChatMessage[];
157
- type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
157
+ type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
158
158
  interface ProviderRequest {
159
159
  readonly question: string;
160
160
  readonly systemPrompt?: string;
@@ -267,7 +267,7 @@ interface TargetDefinition {
267
267
  * - 'agent': File references only (for providers with filesystem access)
268
268
  * - 'lm': Embedded file content with XML tags (for language model providers)
269
269
  */
270
- type FormattingMode = "agent" | "lm";
270
+ type FormattingMode = 'agent' | 'lm';
271
271
  /**
272
272
  * Extract fenced code blocks from AgentV user segments.
273
273
  */
@@ -391,7 +391,7 @@ interface CodexResolvedConfig {
391
391
  readonly cwd?: string;
392
392
  readonly timeoutMs?: number;
393
393
  readonly logDir?: string;
394
- readonly logFormat?: "summary" | "json";
394
+ readonly logFormat?: 'summary' | 'json';
395
395
  }
396
396
  interface MockResolvedConfig {
397
397
  readonly response?: string;
@@ -407,11 +407,11 @@ interface VSCodeResolvedConfig {
407
407
  readonly workspaceTemplate?: string;
408
408
  }
409
409
  type CliHealthcheck = {
410
- readonly type: "http";
410
+ readonly type: 'http';
411
411
  readonly url: string;
412
412
  readonly timeoutMs?: number;
413
413
  } | {
414
- readonly type: "command";
414
+ readonly type: 'command';
415
415
  readonly commandTemplate: string;
416
416
  readonly timeoutMs?: number;
417
417
  readonly cwd?: string;
@@ -425,49 +425,49 @@ interface CliResolvedConfig {
425
425
  readonly verbose?: boolean;
426
426
  }
427
427
  type ResolvedTarget = {
428
- readonly kind: "azure";
428
+ readonly kind: 'azure';
429
429
  readonly name: string;
430
430
  readonly judgeTarget?: string;
431
431
  readonly workers?: number;
432
432
  readonly providerBatching?: boolean;
433
433
  readonly config: AzureResolvedConfig;
434
434
  } | {
435
- readonly kind: "anthropic";
435
+ readonly kind: 'anthropic';
436
436
  readonly name: string;
437
437
  readonly judgeTarget?: string;
438
438
  readonly workers?: number;
439
439
  readonly providerBatching?: boolean;
440
440
  readonly config: AnthropicResolvedConfig;
441
441
  } | {
442
- readonly kind: "gemini";
442
+ readonly kind: 'gemini';
443
443
  readonly name: string;
444
444
  readonly judgeTarget?: string;
445
445
  readonly workers?: number;
446
446
  readonly providerBatching?: boolean;
447
447
  readonly config: GeminiResolvedConfig;
448
448
  } | {
449
- readonly kind: "codex";
449
+ readonly kind: 'codex';
450
450
  readonly name: string;
451
451
  readonly judgeTarget?: string;
452
452
  readonly workers?: number;
453
453
  readonly providerBatching?: boolean;
454
454
  readonly config: CodexResolvedConfig;
455
455
  } | {
456
- readonly kind: "mock";
456
+ readonly kind: 'mock';
457
457
  readonly name: string;
458
458
  readonly judgeTarget?: string;
459
459
  readonly workers?: number;
460
460
  readonly providerBatching?: boolean;
461
461
  readonly config: MockResolvedConfig;
462
462
  } | {
463
- readonly kind: "vscode" | "vscode-insiders";
463
+ readonly kind: 'vscode' | 'vscode-insiders';
464
464
  readonly name: string;
465
465
  readonly judgeTarget?: string;
466
466
  readonly workers?: number;
467
467
  readonly providerBatching?: boolean;
468
468
  readonly config: VSCodeResolvedConfig;
469
469
  } | {
470
- readonly kind: "cli";
470
+ readonly kind: 'cli';
471
471
  readonly name: string;
472
472
  readonly judgeTarget?: string;
473
473
  readonly workers?: number;
@@ -480,7 +480,7 @@ declare function readTargetDefinitions(filePath: string): Promise<readonly Targe
480
480
  declare function listTargetNames(definitions: readonly TargetDefinition[]): readonly string[];
481
481
 
482
482
  interface EnsureSubagentsOptions {
483
- readonly kind: "vscode" | "vscode-insiders";
483
+ readonly kind: 'vscode' | 'vscode-insiders';
484
484
  readonly count: number;
485
485
  readonly verbose?: boolean;
486
486
  }
@@ -595,7 +595,7 @@ interface RunEvalCaseOptions {
595
595
  interface ProgressEvent {
596
596
  readonly workerId: number;
597
597
  readonly evalId: string;
598
- readonly status: "pending" | "running" | "completed" | "failed";
598
+ readonly status: 'pending' | 'running' | 'completed' | 'failed';
599
599
  readonly startedAt?: number;
600
600
  readonly completedAt?: number;
601
601
  readonly error?: string;
package/dist/index.d.ts CHANGED
@@ -29,28 +29,28 @@ type TestMessageContent = string | readonly JsonObject[];
29
29
  * System-authored instruction message.
30
30
  */
31
31
  type SystemTestMessage = {
32
- readonly role: "system";
32
+ readonly role: 'system';
33
33
  readonly content: TestMessageContent;
34
34
  };
35
35
  /**
36
36
  * User-authored prompt message.
37
37
  */
38
38
  type UserTestMessage = {
39
- readonly role: "user";
39
+ readonly role: 'user';
40
40
  readonly content: TestMessageContent;
41
41
  };
42
42
  /**
43
43
  * Assistant response message.
44
44
  */
45
45
  type AssistantTestMessage = {
46
- readonly role: "assistant";
46
+ readonly role: 'assistant';
47
47
  readonly content: TestMessageContent;
48
48
  };
49
49
  /**
50
50
  * Tool invocation message.
51
51
  */
52
52
  type ToolTestMessage = {
53
- readonly role: "tool";
53
+ readonly role: 'tool';
54
54
  readonly content: TestMessageContent;
55
55
  };
56
56
  /**
@@ -78,7 +78,7 @@ type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
78
78
  declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
79
79
  type CodeEvaluatorConfig = {
80
80
  readonly name: string;
81
- readonly type: "code";
81
+ readonly type: 'code';
82
82
  readonly script: string;
83
83
  readonly resolvedScriptPath?: string;
84
84
  readonly cwd?: string;
@@ -86,7 +86,7 @@ type CodeEvaluatorConfig = {
86
86
  };
87
87
  type LlmJudgeEvaluatorConfig = {
88
88
  readonly name: string;
89
- readonly type: "llm_judge";
89
+ readonly type: 'llm_judge';
90
90
  readonly prompt?: string;
91
91
  readonly promptPath?: string;
92
92
  };
@@ -145,16 +145,16 @@ interface EvaluatorResult {
145
145
  /**
146
146
  * Convenience accessor matching the Python hit_count property.
147
147
  */
148
- declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
148
+ declare function getHitCount(result: Pick<EvaluationResult, 'hits'>): number;
149
149
 
150
- type ChatMessageRole = "system" | "user" | "assistant" | "tool" | "function";
150
+ type ChatMessageRole = 'system' | 'user' | 'assistant' | 'tool' | 'function';
151
151
  interface ChatMessage {
152
152
  readonly role: ChatMessageRole;
153
153
  readonly content: string;
154
154
  readonly name?: string;
155
155
  }
156
156
  type ChatPrompt = readonly ChatMessage[];
157
- type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
157
+ type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
158
158
  interface ProviderRequest {
159
159
  readonly question: string;
160
160
  readonly systemPrompt?: string;
@@ -267,7 +267,7 @@ interface TargetDefinition {
267
267
  * - 'agent': File references only (for providers with filesystem access)
268
268
  * - 'lm': Embedded file content with XML tags (for language model providers)
269
269
  */
270
- type FormattingMode = "agent" | "lm";
270
+ type FormattingMode = 'agent' | 'lm';
271
271
  /**
272
272
  * Extract fenced code blocks from AgentV user segments.
273
273
  */
@@ -391,7 +391,7 @@ interface CodexResolvedConfig {
391
391
  readonly cwd?: string;
392
392
  readonly timeoutMs?: number;
393
393
  readonly logDir?: string;
394
- readonly logFormat?: "summary" | "json";
394
+ readonly logFormat?: 'summary' | 'json';
395
395
  }
396
396
  interface MockResolvedConfig {
397
397
  readonly response?: string;
@@ -407,11 +407,11 @@ interface VSCodeResolvedConfig {
407
407
  readonly workspaceTemplate?: string;
408
408
  }
409
409
  type CliHealthcheck = {
410
- readonly type: "http";
410
+ readonly type: 'http';
411
411
  readonly url: string;
412
412
  readonly timeoutMs?: number;
413
413
  } | {
414
- readonly type: "command";
414
+ readonly type: 'command';
415
415
  readonly commandTemplate: string;
416
416
  readonly timeoutMs?: number;
417
417
  readonly cwd?: string;
@@ -425,49 +425,49 @@ interface CliResolvedConfig {
425
425
  readonly verbose?: boolean;
426
426
  }
427
427
  type ResolvedTarget = {
428
- readonly kind: "azure";
428
+ readonly kind: 'azure';
429
429
  readonly name: string;
430
430
  readonly judgeTarget?: string;
431
431
  readonly workers?: number;
432
432
  readonly providerBatching?: boolean;
433
433
  readonly config: AzureResolvedConfig;
434
434
  } | {
435
- readonly kind: "anthropic";
435
+ readonly kind: 'anthropic';
436
436
  readonly name: string;
437
437
  readonly judgeTarget?: string;
438
438
  readonly workers?: number;
439
439
  readonly providerBatching?: boolean;
440
440
  readonly config: AnthropicResolvedConfig;
441
441
  } | {
442
- readonly kind: "gemini";
442
+ readonly kind: 'gemini';
443
443
  readonly name: string;
444
444
  readonly judgeTarget?: string;
445
445
  readonly workers?: number;
446
446
  readonly providerBatching?: boolean;
447
447
  readonly config: GeminiResolvedConfig;
448
448
  } | {
449
- readonly kind: "codex";
449
+ readonly kind: 'codex';
450
450
  readonly name: string;
451
451
  readonly judgeTarget?: string;
452
452
  readonly workers?: number;
453
453
  readonly providerBatching?: boolean;
454
454
  readonly config: CodexResolvedConfig;
455
455
  } | {
456
- readonly kind: "mock";
456
+ readonly kind: 'mock';
457
457
  readonly name: string;
458
458
  readonly judgeTarget?: string;
459
459
  readonly workers?: number;
460
460
  readonly providerBatching?: boolean;
461
461
  readonly config: MockResolvedConfig;
462
462
  } | {
463
- readonly kind: "vscode" | "vscode-insiders";
463
+ readonly kind: 'vscode' | 'vscode-insiders';
464
464
  readonly name: string;
465
465
  readonly judgeTarget?: string;
466
466
  readonly workers?: number;
467
467
  readonly providerBatching?: boolean;
468
468
  readonly config: VSCodeResolvedConfig;
469
469
  } | {
470
- readonly kind: "cli";
470
+ readonly kind: 'cli';
471
471
  readonly name: string;
472
472
  readonly judgeTarget?: string;
473
473
  readonly workers?: number;
@@ -480,7 +480,7 @@ declare function readTargetDefinitions(filePath: string): Promise<readonly Targe
480
480
  declare function listTargetNames(definitions: readonly TargetDefinition[]): readonly string[];
481
481
 
482
482
  interface EnsureSubagentsOptions {
483
- readonly kind: "vscode" | "vscode-insiders";
483
+ readonly kind: 'vscode' | 'vscode-insiders';
484
484
  readonly count: number;
485
485
  readonly verbose?: boolean;
486
486
  }
@@ -595,7 +595,7 @@ interface RunEvalCaseOptions {
595
595
  interface ProgressEvent {
596
596
  readonly workerId: number;
597
597
  readonly evalId: string;
598
- readonly status: "pending" | "running" | "completed" | "failed";
598
+ readonly status: 'pending' | 'running' | 'completed' | 'failed';
599
599
  readonly startedAt?: number;
600
600
  readonly completedAt?: number;
601
601
  readonly error?: string;
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ import {
8
8
  readTextFile,
9
9
  resolveFileReference,
10
10
  resolveTargetDefinition
11
- } from "./chunk-QHEZJRTU.js";
11
+ } from "./chunk-SVY324GN.js";
12
12
 
13
13
  // src/evaluation/types.ts
14
14
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -2496,6 +2496,43 @@ import {
2496
2496
  getSubagentRoot,
2497
2497
  provisionSubagents
2498
2498
  } from "subagent";
2499
+
2500
+ // src/evaluation/providers/vscode-templates.ts
2501
+ var AGENTV_REQUEST_TEMPLATE = `[[ ## system_instructions ## ]]
2502
+
2503
+ **IMPORTANT**: You are processing an evaluation request from AgentV. Follow these exact steps:
2504
+
2505
+ 1. Read and understand all guideline files and attachments provided above
2506
+ 2. Process the user query below according to the guidelines
2507
+ 3. Write your complete response to: {{responseFileTmp}}
2508
+ 4. When completely finished, run these PowerShell commands to signal completion:
2509
+ \`\`\`powershell
2510
+ Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
2511
+ if (Test-Path subagent.lock) { Remove-Item subagent.lock }
2512
+ \`\`\`
2513
+
2514
+ Do not proceed to step 4 until your response is completely written to the temporary file.
2515
+
2516
+ [[ ## task ## ]]
2517
+
2518
+ {{userQuery}}
2519
+ `;
2520
+ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## system_instructions ## ]]
2521
+
2522
+ **IMPORTANT**: You are processing a batch evaluation request from AgentV. Follow these exact steps:
2523
+
2524
+ 1. Read and understand all guideline files and attachments provided above
2525
+ 2. Process the user query below according to the guidelines
2526
+ 3. Write your complete response to: {{responseFileTmp}}
2527
+ 4. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
2528
+ 5. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
2529
+
2530
+ [[ ## task ## ]]
2531
+
2532
+ {{userQuery}}
2533
+ `;
2534
+
2535
+ // src/evaluation/providers/vscode.ts
2499
2536
  var VSCodeProvider = class {
2500
2537
  id;
2501
2538
  kind;
@@ -2517,6 +2554,7 @@ var VSCodeProvider = class {
2517
2554
  const session = await dispatchAgentSession({
2518
2555
  userQuery: promptContent,
2519
2556
  extraAttachments: inputFiles,
2557
+ requestTemplate: AGENTV_REQUEST_TEMPLATE,
2520
2558
  wait: this.config.waitForResponse,
2521
2559
  dryRun: this.config.dryRun,
2522
2560
  vscodeCmd: this.config.command,
@@ -2563,6 +2601,7 @@ var VSCodeProvider = class {
2563
2601
  const session = await dispatchBatchAgent({
2564
2602
  userQueries,
2565
2603
  extraAttachments: combinedInputFiles,
2604
+ requestTemplate: AGENTV_BATCH_REQUEST_TEMPLATE,
2566
2605
  wait: this.config.waitForResponse,
2567
2606
  dryRun: this.config.dryRun,
2568
2607
  vscodeCmd: this.config.command,