@agentv/core 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-YCIZ33BO.js → chunk-SVY324GN.js} +1 -1
- package/dist/chunk-SVY324GN.js.map +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.d.cts +2 -2
- package/dist/evaluation/validation/index.d.ts +2 -2
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +39 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +22 -22
- package/dist/index.d.ts +22 -22
- package/dist/index.js +40 -1
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-YCIZ33BO.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -29,28 +29,28 @@ type TestMessageContent = string | readonly JsonObject[];
|
|
|
29
29
|
* System-authored instruction message.
|
|
30
30
|
*/
|
|
31
31
|
type SystemTestMessage = {
|
|
32
|
-
readonly role:
|
|
32
|
+
readonly role: 'system';
|
|
33
33
|
readonly content: TestMessageContent;
|
|
34
34
|
};
|
|
35
35
|
/**
|
|
36
36
|
* User-authored prompt message.
|
|
37
37
|
*/
|
|
38
38
|
type UserTestMessage = {
|
|
39
|
-
readonly role:
|
|
39
|
+
readonly role: 'user';
|
|
40
40
|
readonly content: TestMessageContent;
|
|
41
41
|
};
|
|
42
42
|
/**
|
|
43
43
|
* Assistant response message.
|
|
44
44
|
*/
|
|
45
45
|
type AssistantTestMessage = {
|
|
46
|
-
readonly role:
|
|
46
|
+
readonly role: 'assistant';
|
|
47
47
|
readonly content: TestMessageContent;
|
|
48
48
|
};
|
|
49
49
|
/**
|
|
50
50
|
* Tool invocation message.
|
|
51
51
|
*/
|
|
52
52
|
type ToolTestMessage = {
|
|
53
|
-
readonly role:
|
|
53
|
+
readonly role: 'tool';
|
|
54
54
|
readonly content: TestMessageContent;
|
|
55
55
|
};
|
|
56
56
|
/**
|
|
@@ -78,7 +78,7 @@ type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
|
|
|
78
78
|
declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
|
|
79
79
|
type CodeEvaluatorConfig = {
|
|
80
80
|
readonly name: string;
|
|
81
|
-
readonly type:
|
|
81
|
+
readonly type: 'code';
|
|
82
82
|
readonly script: string;
|
|
83
83
|
readonly resolvedScriptPath?: string;
|
|
84
84
|
readonly cwd?: string;
|
|
@@ -86,7 +86,7 @@ type CodeEvaluatorConfig = {
|
|
|
86
86
|
};
|
|
87
87
|
type LlmJudgeEvaluatorConfig = {
|
|
88
88
|
readonly name: string;
|
|
89
|
-
readonly type:
|
|
89
|
+
readonly type: 'llm_judge';
|
|
90
90
|
readonly prompt?: string;
|
|
91
91
|
readonly promptPath?: string;
|
|
92
92
|
};
|
|
@@ -145,16 +145,16 @@ interface EvaluatorResult {
|
|
|
145
145
|
/**
|
|
146
146
|
* Convenience accessor matching the Python hit_count property.
|
|
147
147
|
*/
|
|
148
|
-
declare function getHitCount(result: Pick<EvaluationResult,
|
|
148
|
+
declare function getHitCount(result: Pick<EvaluationResult, 'hits'>): number;
|
|
149
149
|
|
|
150
|
-
type ChatMessageRole =
|
|
150
|
+
type ChatMessageRole = 'system' | 'user' | 'assistant' | 'tool' | 'function';
|
|
151
151
|
interface ChatMessage {
|
|
152
152
|
readonly role: ChatMessageRole;
|
|
153
153
|
readonly content: string;
|
|
154
154
|
readonly name?: string;
|
|
155
155
|
}
|
|
156
156
|
type ChatPrompt = readonly ChatMessage[];
|
|
157
|
-
type ProviderKind =
|
|
157
|
+
type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
|
|
158
158
|
interface ProviderRequest {
|
|
159
159
|
readonly question: string;
|
|
160
160
|
readonly systemPrompt?: string;
|
|
@@ -267,7 +267,7 @@ interface TargetDefinition {
|
|
|
267
267
|
* - 'agent': File references only (for providers with filesystem access)
|
|
268
268
|
* - 'lm': Embedded file content with XML tags (for language model providers)
|
|
269
269
|
*/
|
|
270
|
-
type FormattingMode =
|
|
270
|
+
type FormattingMode = 'agent' | 'lm';
|
|
271
271
|
/**
|
|
272
272
|
* Extract fenced code blocks from AgentV user segments.
|
|
273
273
|
*/
|
|
@@ -391,7 +391,7 @@ interface CodexResolvedConfig {
|
|
|
391
391
|
readonly cwd?: string;
|
|
392
392
|
readonly timeoutMs?: number;
|
|
393
393
|
readonly logDir?: string;
|
|
394
|
-
readonly logFormat?:
|
|
394
|
+
readonly logFormat?: 'summary' | 'json';
|
|
395
395
|
}
|
|
396
396
|
interface MockResolvedConfig {
|
|
397
397
|
readonly response?: string;
|
|
@@ -407,11 +407,11 @@ interface VSCodeResolvedConfig {
|
|
|
407
407
|
readonly workspaceTemplate?: string;
|
|
408
408
|
}
|
|
409
409
|
type CliHealthcheck = {
|
|
410
|
-
readonly type:
|
|
410
|
+
readonly type: 'http';
|
|
411
411
|
readonly url: string;
|
|
412
412
|
readonly timeoutMs?: number;
|
|
413
413
|
} | {
|
|
414
|
-
readonly type:
|
|
414
|
+
readonly type: 'command';
|
|
415
415
|
readonly commandTemplate: string;
|
|
416
416
|
readonly timeoutMs?: number;
|
|
417
417
|
readonly cwd?: string;
|
|
@@ -425,49 +425,49 @@ interface CliResolvedConfig {
|
|
|
425
425
|
readonly verbose?: boolean;
|
|
426
426
|
}
|
|
427
427
|
type ResolvedTarget = {
|
|
428
|
-
readonly kind:
|
|
428
|
+
readonly kind: 'azure';
|
|
429
429
|
readonly name: string;
|
|
430
430
|
readonly judgeTarget?: string;
|
|
431
431
|
readonly workers?: number;
|
|
432
432
|
readonly providerBatching?: boolean;
|
|
433
433
|
readonly config: AzureResolvedConfig;
|
|
434
434
|
} | {
|
|
435
|
-
readonly kind:
|
|
435
|
+
readonly kind: 'anthropic';
|
|
436
436
|
readonly name: string;
|
|
437
437
|
readonly judgeTarget?: string;
|
|
438
438
|
readonly workers?: number;
|
|
439
439
|
readonly providerBatching?: boolean;
|
|
440
440
|
readonly config: AnthropicResolvedConfig;
|
|
441
441
|
} | {
|
|
442
|
-
readonly kind:
|
|
442
|
+
readonly kind: 'gemini';
|
|
443
443
|
readonly name: string;
|
|
444
444
|
readonly judgeTarget?: string;
|
|
445
445
|
readonly workers?: number;
|
|
446
446
|
readonly providerBatching?: boolean;
|
|
447
447
|
readonly config: GeminiResolvedConfig;
|
|
448
448
|
} | {
|
|
449
|
-
readonly kind:
|
|
449
|
+
readonly kind: 'codex';
|
|
450
450
|
readonly name: string;
|
|
451
451
|
readonly judgeTarget?: string;
|
|
452
452
|
readonly workers?: number;
|
|
453
453
|
readonly providerBatching?: boolean;
|
|
454
454
|
readonly config: CodexResolvedConfig;
|
|
455
455
|
} | {
|
|
456
|
-
readonly kind:
|
|
456
|
+
readonly kind: 'mock';
|
|
457
457
|
readonly name: string;
|
|
458
458
|
readonly judgeTarget?: string;
|
|
459
459
|
readonly workers?: number;
|
|
460
460
|
readonly providerBatching?: boolean;
|
|
461
461
|
readonly config: MockResolvedConfig;
|
|
462
462
|
} | {
|
|
463
|
-
readonly kind:
|
|
463
|
+
readonly kind: 'vscode' | 'vscode-insiders';
|
|
464
464
|
readonly name: string;
|
|
465
465
|
readonly judgeTarget?: string;
|
|
466
466
|
readonly workers?: number;
|
|
467
467
|
readonly providerBatching?: boolean;
|
|
468
468
|
readonly config: VSCodeResolvedConfig;
|
|
469
469
|
} | {
|
|
470
|
-
readonly kind:
|
|
470
|
+
readonly kind: 'cli';
|
|
471
471
|
readonly name: string;
|
|
472
472
|
readonly judgeTarget?: string;
|
|
473
473
|
readonly workers?: number;
|
|
@@ -480,7 +480,7 @@ declare function readTargetDefinitions(filePath: string): Promise<readonly Targe
|
|
|
480
480
|
declare function listTargetNames(definitions: readonly TargetDefinition[]): readonly string[];
|
|
481
481
|
|
|
482
482
|
interface EnsureSubagentsOptions {
|
|
483
|
-
readonly kind:
|
|
483
|
+
readonly kind: 'vscode' | 'vscode-insiders';
|
|
484
484
|
readonly count: number;
|
|
485
485
|
readonly verbose?: boolean;
|
|
486
486
|
}
|
|
@@ -595,7 +595,7 @@ interface RunEvalCaseOptions {
|
|
|
595
595
|
interface ProgressEvent {
|
|
596
596
|
readonly workerId: number;
|
|
597
597
|
readonly evalId: string;
|
|
598
|
-
readonly status:
|
|
598
|
+
readonly status: 'pending' | 'running' | 'completed' | 'failed';
|
|
599
599
|
readonly startedAt?: number;
|
|
600
600
|
readonly completedAt?: number;
|
|
601
601
|
readonly error?: string;
|
package/dist/index.d.ts
CHANGED
|
@@ -29,28 +29,28 @@ type TestMessageContent = string | readonly JsonObject[];
|
|
|
29
29
|
* System-authored instruction message.
|
|
30
30
|
*/
|
|
31
31
|
type SystemTestMessage = {
|
|
32
|
-
readonly role:
|
|
32
|
+
readonly role: 'system';
|
|
33
33
|
readonly content: TestMessageContent;
|
|
34
34
|
};
|
|
35
35
|
/**
|
|
36
36
|
* User-authored prompt message.
|
|
37
37
|
*/
|
|
38
38
|
type UserTestMessage = {
|
|
39
|
-
readonly role:
|
|
39
|
+
readonly role: 'user';
|
|
40
40
|
readonly content: TestMessageContent;
|
|
41
41
|
};
|
|
42
42
|
/**
|
|
43
43
|
* Assistant response message.
|
|
44
44
|
*/
|
|
45
45
|
type AssistantTestMessage = {
|
|
46
|
-
readonly role:
|
|
46
|
+
readonly role: 'assistant';
|
|
47
47
|
readonly content: TestMessageContent;
|
|
48
48
|
};
|
|
49
49
|
/**
|
|
50
50
|
* Tool invocation message.
|
|
51
51
|
*/
|
|
52
52
|
type ToolTestMessage = {
|
|
53
|
-
readonly role:
|
|
53
|
+
readonly role: 'tool';
|
|
54
54
|
readonly content: TestMessageContent;
|
|
55
55
|
};
|
|
56
56
|
/**
|
|
@@ -78,7 +78,7 @@ type EvaluatorKind = (typeof EVALUATOR_KIND_VALUES)[number];
|
|
|
78
78
|
declare function isEvaluatorKind(value: unknown): value is EvaluatorKind;
|
|
79
79
|
type CodeEvaluatorConfig = {
|
|
80
80
|
readonly name: string;
|
|
81
|
-
readonly type:
|
|
81
|
+
readonly type: 'code';
|
|
82
82
|
readonly script: string;
|
|
83
83
|
readonly resolvedScriptPath?: string;
|
|
84
84
|
readonly cwd?: string;
|
|
@@ -86,7 +86,7 @@ type CodeEvaluatorConfig = {
|
|
|
86
86
|
};
|
|
87
87
|
type LlmJudgeEvaluatorConfig = {
|
|
88
88
|
readonly name: string;
|
|
89
|
-
readonly type:
|
|
89
|
+
readonly type: 'llm_judge';
|
|
90
90
|
readonly prompt?: string;
|
|
91
91
|
readonly promptPath?: string;
|
|
92
92
|
};
|
|
@@ -145,16 +145,16 @@ interface EvaluatorResult {
|
|
|
145
145
|
/**
|
|
146
146
|
* Convenience accessor matching the Python hit_count property.
|
|
147
147
|
*/
|
|
148
|
-
declare function getHitCount(result: Pick<EvaluationResult,
|
|
148
|
+
declare function getHitCount(result: Pick<EvaluationResult, 'hits'>): number;
|
|
149
149
|
|
|
150
|
-
type ChatMessageRole =
|
|
150
|
+
type ChatMessageRole = 'system' | 'user' | 'assistant' | 'tool' | 'function';
|
|
151
151
|
interface ChatMessage {
|
|
152
152
|
readonly role: ChatMessageRole;
|
|
153
153
|
readonly content: string;
|
|
154
154
|
readonly name?: string;
|
|
155
155
|
}
|
|
156
156
|
type ChatPrompt = readonly ChatMessage[];
|
|
157
|
-
type ProviderKind =
|
|
157
|
+
type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
|
|
158
158
|
interface ProviderRequest {
|
|
159
159
|
readonly question: string;
|
|
160
160
|
readonly systemPrompt?: string;
|
|
@@ -267,7 +267,7 @@ interface TargetDefinition {
|
|
|
267
267
|
* - 'agent': File references only (for providers with filesystem access)
|
|
268
268
|
* - 'lm': Embedded file content with XML tags (for language model providers)
|
|
269
269
|
*/
|
|
270
|
-
type FormattingMode =
|
|
270
|
+
type FormattingMode = 'agent' | 'lm';
|
|
271
271
|
/**
|
|
272
272
|
* Extract fenced code blocks from AgentV user segments.
|
|
273
273
|
*/
|
|
@@ -391,7 +391,7 @@ interface CodexResolvedConfig {
|
|
|
391
391
|
readonly cwd?: string;
|
|
392
392
|
readonly timeoutMs?: number;
|
|
393
393
|
readonly logDir?: string;
|
|
394
|
-
readonly logFormat?:
|
|
394
|
+
readonly logFormat?: 'summary' | 'json';
|
|
395
395
|
}
|
|
396
396
|
interface MockResolvedConfig {
|
|
397
397
|
readonly response?: string;
|
|
@@ -407,11 +407,11 @@ interface VSCodeResolvedConfig {
|
|
|
407
407
|
readonly workspaceTemplate?: string;
|
|
408
408
|
}
|
|
409
409
|
type CliHealthcheck = {
|
|
410
|
-
readonly type:
|
|
410
|
+
readonly type: 'http';
|
|
411
411
|
readonly url: string;
|
|
412
412
|
readonly timeoutMs?: number;
|
|
413
413
|
} | {
|
|
414
|
-
readonly type:
|
|
414
|
+
readonly type: 'command';
|
|
415
415
|
readonly commandTemplate: string;
|
|
416
416
|
readonly timeoutMs?: number;
|
|
417
417
|
readonly cwd?: string;
|
|
@@ -425,49 +425,49 @@ interface CliResolvedConfig {
|
|
|
425
425
|
readonly verbose?: boolean;
|
|
426
426
|
}
|
|
427
427
|
type ResolvedTarget = {
|
|
428
|
-
readonly kind:
|
|
428
|
+
readonly kind: 'azure';
|
|
429
429
|
readonly name: string;
|
|
430
430
|
readonly judgeTarget?: string;
|
|
431
431
|
readonly workers?: number;
|
|
432
432
|
readonly providerBatching?: boolean;
|
|
433
433
|
readonly config: AzureResolvedConfig;
|
|
434
434
|
} | {
|
|
435
|
-
readonly kind:
|
|
435
|
+
readonly kind: 'anthropic';
|
|
436
436
|
readonly name: string;
|
|
437
437
|
readonly judgeTarget?: string;
|
|
438
438
|
readonly workers?: number;
|
|
439
439
|
readonly providerBatching?: boolean;
|
|
440
440
|
readonly config: AnthropicResolvedConfig;
|
|
441
441
|
} | {
|
|
442
|
-
readonly kind:
|
|
442
|
+
readonly kind: 'gemini';
|
|
443
443
|
readonly name: string;
|
|
444
444
|
readonly judgeTarget?: string;
|
|
445
445
|
readonly workers?: number;
|
|
446
446
|
readonly providerBatching?: boolean;
|
|
447
447
|
readonly config: GeminiResolvedConfig;
|
|
448
448
|
} | {
|
|
449
|
-
readonly kind:
|
|
449
|
+
readonly kind: 'codex';
|
|
450
450
|
readonly name: string;
|
|
451
451
|
readonly judgeTarget?: string;
|
|
452
452
|
readonly workers?: number;
|
|
453
453
|
readonly providerBatching?: boolean;
|
|
454
454
|
readonly config: CodexResolvedConfig;
|
|
455
455
|
} | {
|
|
456
|
-
readonly kind:
|
|
456
|
+
readonly kind: 'mock';
|
|
457
457
|
readonly name: string;
|
|
458
458
|
readonly judgeTarget?: string;
|
|
459
459
|
readonly workers?: number;
|
|
460
460
|
readonly providerBatching?: boolean;
|
|
461
461
|
readonly config: MockResolvedConfig;
|
|
462
462
|
} | {
|
|
463
|
-
readonly kind:
|
|
463
|
+
readonly kind: 'vscode' | 'vscode-insiders';
|
|
464
464
|
readonly name: string;
|
|
465
465
|
readonly judgeTarget?: string;
|
|
466
466
|
readonly workers?: number;
|
|
467
467
|
readonly providerBatching?: boolean;
|
|
468
468
|
readonly config: VSCodeResolvedConfig;
|
|
469
469
|
} | {
|
|
470
|
-
readonly kind:
|
|
470
|
+
readonly kind: 'cli';
|
|
471
471
|
readonly name: string;
|
|
472
472
|
readonly judgeTarget?: string;
|
|
473
473
|
readonly workers?: number;
|
|
@@ -480,7 +480,7 @@ declare function readTargetDefinitions(filePath: string): Promise<readonly Targe
|
|
|
480
480
|
declare function listTargetNames(definitions: readonly TargetDefinition[]): readonly string[];
|
|
481
481
|
|
|
482
482
|
interface EnsureSubagentsOptions {
|
|
483
|
-
readonly kind:
|
|
483
|
+
readonly kind: 'vscode' | 'vscode-insiders';
|
|
484
484
|
readonly count: number;
|
|
485
485
|
readonly verbose?: boolean;
|
|
486
486
|
}
|
|
@@ -595,7 +595,7 @@ interface RunEvalCaseOptions {
|
|
|
595
595
|
interface ProgressEvent {
|
|
596
596
|
readonly workerId: number;
|
|
597
597
|
readonly evalId: string;
|
|
598
|
-
readonly status:
|
|
598
|
+
readonly status: 'pending' | 'running' | 'completed' | 'failed';
|
|
599
599
|
readonly startedAt?: number;
|
|
600
600
|
readonly completedAt?: number;
|
|
601
601
|
readonly error?: string;
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
readTextFile,
|
|
9
9
|
resolveFileReference,
|
|
10
10
|
resolveTargetDefinition
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-SVY324GN.js";
|
|
12
12
|
|
|
13
13
|
// src/evaluation/types.ts
|
|
14
14
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -2496,6 +2496,43 @@ import {
|
|
|
2496
2496
|
getSubagentRoot,
|
|
2497
2497
|
provisionSubagents
|
|
2498
2498
|
} from "subagent";
|
|
2499
|
+
|
|
2500
|
+
// src/evaluation/providers/vscode-templates.ts
|
|
2501
|
+
var AGENTV_REQUEST_TEMPLATE = `[[ ## system_instructions ## ]]
|
|
2502
|
+
|
|
2503
|
+
**IMPORTANT**: You are processing an evaluation request from AgentV. Follow these exact steps:
|
|
2504
|
+
|
|
2505
|
+
1. Read and understand all guideline files and attachments provided above
|
|
2506
|
+
2. Process the user query below according to the guidelines
|
|
2507
|
+
3. Write your complete response to: {{responseFileTmp}}
|
|
2508
|
+
4. When completely finished, run these PowerShell commands to signal completion:
|
|
2509
|
+
\`\`\`powershell
|
|
2510
|
+
Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
|
|
2511
|
+
if (Test-Path subagent.lock) { Remove-Item subagent.lock }
|
|
2512
|
+
\`\`\`
|
|
2513
|
+
|
|
2514
|
+
Do not proceed to step 4 until your response is completely written to the temporary file.
|
|
2515
|
+
|
|
2516
|
+
[[ ## task ## ]]
|
|
2517
|
+
|
|
2518
|
+
{{userQuery}}
|
|
2519
|
+
`;
|
|
2520
|
+
var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## system_instructions ## ]]
|
|
2521
|
+
|
|
2522
|
+
**IMPORTANT**: You are processing a batch evaluation request from AgentV. Follow these exact steps:
|
|
2523
|
+
|
|
2524
|
+
1. Read and understand all guideline files and attachments provided above
|
|
2525
|
+
2. Process the user query below according to the guidelines
|
|
2526
|
+
3. Write your complete response to: {{responseFileTmp}}
|
|
2527
|
+
4. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
|
|
2528
|
+
5. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
|
|
2529
|
+
|
|
2530
|
+
[[ ## task ## ]]
|
|
2531
|
+
|
|
2532
|
+
{{userQuery}}
|
|
2533
|
+
`;
|
|
2534
|
+
|
|
2535
|
+
// src/evaluation/providers/vscode.ts
|
|
2499
2536
|
var VSCodeProvider = class {
|
|
2500
2537
|
id;
|
|
2501
2538
|
kind;
|
|
@@ -2517,6 +2554,7 @@ var VSCodeProvider = class {
|
|
|
2517
2554
|
const session = await dispatchAgentSession({
|
|
2518
2555
|
userQuery: promptContent,
|
|
2519
2556
|
extraAttachments: inputFiles,
|
|
2557
|
+
requestTemplate: AGENTV_REQUEST_TEMPLATE,
|
|
2520
2558
|
wait: this.config.waitForResponse,
|
|
2521
2559
|
dryRun: this.config.dryRun,
|
|
2522
2560
|
vscodeCmd: this.config.command,
|
|
@@ -2563,6 +2601,7 @@ var VSCodeProvider = class {
|
|
|
2563
2601
|
const session = await dispatchBatchAgent({
|
|
2564
2602
|
userQueries,
|
|
2565
2603
|
extraAttachments: combinedInputFiles,
|
|
2604
|
+
requestTemplate: AGENTV_BATCH_REQUEST_TEMPLATE,
|
|
2566
2605
|
wait: this.config.waitForResponse,
|
|
2567
2606
|
dryRun: this.config.dryRun,
|
|
2568
2607
|
vscodeCmd: this.config.command,
|