@agentv/core 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-IBTKEEOT.js → chunk-KDEP4I7G.js} +44 -1
- package/dist/chunk-KDEP4I7G.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +209 -42
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +16 -30
- package/dist/index.d.ts +16 -30
- package/dist/index.js +168 -41
- package/dist/index.js.map +1 -1
- package/package.json +4 -1
- package/dist/chunk-IBTKEEOT.js.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -396,7 +396,7 @@ interface ChatMessage {
|
|
|
396
396
|
readonly name?: string;
|
|
397
397
|
}
|
|
398
398
|
type ChatPrompt = readonly ChatMessage[];
|
|
399
|
-
type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'pi-coding-agent' | 'claude-code' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
|
|
399
|
+
type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'pi-coding-agent' | 'pi-agent-sdk' | 'claude-code' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
|
|
400
400
|
interface ProviderRequest {
|
|
401
401
|
readonly question: string;
|
|
402
402
|
readonly systemPrompt?: string;
|
|
@@ -805,6 +805,13 @@ interface PiCodingAgentResolvedConfig {
|
|
|
805
805
|
readonly logFormat?: 'summary' | 'json';
|
|
806
806
|
readonly systemPrompt?: string;
|
|
807
807
|
}
|
|
808
|
+
interface PiAgentSdkResolvedConfig {
|
|
809
|
+
readonly provider?: string;
|
|
810
|
+
readonly model?: string;
|
|
811
|
+
readonly apiKey?: string;
|
|
812
|
+
readonly timeoutMs?: number;
|
|
813
|
+
readonly systemPrompt?: string;
|
|
814
|
+
}
|
|
808
815
|
interface ClaudeCodeResolvedConfig {
|
|
809
816
|
readonly executable: string;
|
|
810
817
|
readonly model?: string;
|
|
@@ -863,6 +870,13 @@ type ResolvedTarget = {
|
|
|
863
870
|
readonly workers?: number;
|
|
864
871
|
readonly providerBatching?: boolean;
|
|
865
872
|
readonly config: PiCodingAgentResolvedConfig;
|
|
873
|
+
} | {
|
|
874
|
+
readonly kind: 'pi-agent-sdk';
|
|
875
|
+
readonly name: string;
|
|
876
|
+
readonly judgeTarget?: string;
|
|
877
|
+
readonly workers?: number;
|
|
878
|
+
readonly providerBatching?: boolean;
|
|
879
|
+
readonly config: PiAgentSdkResolvedConfig;
|
|
866
880
|
} | {
|
|
867
881
|
readonly kind: 'claude-code';
|
|
868
882
|
readonly name: string;
|
|
@@ -1211,37 +1225,9 @@ interface GenerateRubricsOptions {
|
|
|
1211
1225
|
*/
|
|
1212
1226
|
declare function generateRubrics(options: GenerateRubricsOptions): Promise<readonly RubricItem[]>;
|
|
1213
1227
|
|
|
1214
|
-
/**
|
|
1215
|
-
* Payload received by code judges via stdin.
|
|
1216
|
-
* All properties use camelCase for TypeScript ergonomics.
|
|
1217
|
-
*/
|
|
1218
|
-
interface CodeJudgePayload {
|
|
1219
|
-
readonly question: string;
|
|
1220
|
-
readonly expectedOutcome: string;
|
|
1221
|
-
readonly expectedMessages: readonly JsonObject[];
|
|
1222
|
-
readonly referenceAnswer?: string;
|
|
1223
|
-
readonly candidateAnswer: string;
|
|
1224
|
-
readonly outputMessages?: readonly OutputMessage[] | null;
|
|
1225
|
-
readonly guidelineFiles: readonly string[];
|
|
1226
|
-
readonly inputFiles: readonly string[];
|
|
1227
|
-
readonly inputMessages: readonly TestMessage[];
|
|
1228
|
-
readonly traceSummary?: TraceSummary | null;
|
|
1229
|
-
readonly config?: JsonObject | null;
|
|
1230
|
-
}
|
|
1231
|
-
/**
|
|
1232
|
-
* Parse stdin JSON (snake_case) into typed camelCase object.
|
|
1233
|
-
* Use this in TypeScript code judges to get type-safe, idiomatic input.
|
|
1234
|
-
*/
|
|
1235
|
-
declare function parseCodeJudgePayload(payload: string): CodeJudgePayload;
|
|
1236
|
-
/**
|
|
1237
|
-
* Convenience helper that reads stdin and parses it.
|
|
1238
|
-
* Equivalent to: parseCodeJudgePayload(readFileSync(0, 'utf8'))
|
|
1239
|
-
*/
|
|
1240
|
-
declare function readCodeJudgePayload(): CodeJudgePayload;
|
|
1241
|
-
|
|
1242
1228
|
type AgentKernel = {
|
|
1243
1229
|
status: string;
|
|
1244
1230
|
};
|
|
1245
1231
|
declare function createAgentKernel(): AgentKernel;
|
|
1246
1232
|
|
|
1247
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type
|
|
1233
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildPromptInputs, buildSearchRoots, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, explorationRatio, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
|
package/dist/index.d.ts
CHANGED
|
@@ -396,7 +396,7 @@ interface ChatMessage {
|
|
|
396
396
|
readonly name?: string;
|
|
397
397
|
}
|
|
398
398
|
type ChatPrompt = readonly ChatMessage[];
|
|
399
|
-
type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'pi-coding-agent' | 'claude-code' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
|
|
399
|
+
type ProviderKind = 'azure' | 'anthropic' | 'gemini' | 'codex' | 'pi-coding-agent' | 'pi-agent-sdk' | 'claude-code' | 'cli' | 'mock' | 'vscode' | 'vscode-insiders';
|
|
400
400
|
interface ProviderRequest {
|
|
401
401
|
readonly question: string;
|
|
402
402
|
readonly systemPrompt?: string;
|
|
@@ -805,6 +805,13 @@ interface PiCodingAgentResolvedConfig {
|
|
|
805
805
|
readonly logFormat?: 'summary' | 'json';
|
|
806
806
|
readonly systemPrompt?: string;
|
|
807
807
|
}
|
|
808
|
+
interface PiAgentSdkResolvedConfig {
|
|
809
|
+
readonly provider?: string;
|
|
810
|
+
readonly model?: string;
|
|
811
|
+
readonly apiKey?: string;
|
|
812
|
+
readonly timeoutMs?: number;
|
|
813
|
+
readonly systemPrompt?: string;
|
|
814
|
+
}
|
|
808
815
|
interface ClaudeCodeResolvedConfig {
|
|
809
816
|
readonly executable: string;
|
|
810
817
|
readonly model?: string;
|
|
@@ -863,6 +870,13 @@ type ResolvedTarget = {
|
|
|
863
870
|
readonly workers?: number;
|
|
864
871
|
readonly providerBatching?: boolean;
|
|
865
872
|
readonly config: PiCodingAgentResolvedConfig;
|
|
873
|
+
} | {
|
|
874
|
+
readonly kind: 'pi-agent-sdk';
|
|
875
|
+
readonly name: string;
|
|
876
|
+
readonly judgeTarget?: string;
|
|
877
|
+
readonly workers?: number;
|
|
878
|
+
readonly providerBatching?: boolean;
|
|
879
|
+
readonly config: PiAgentSdkResolvedConfig;
|
|
866
880
|
} | {
|
|
867
881
|
readonly kind: 'claude-code';
|
|
868
882
|
readonly name: string;
|
|
@@ -1211,37 +1225,9 @@ interface GenerateRubricsOptions {
|
|
|
1211
1225
|
*/
|
|
1212
1226
|
declare function generateRubrics(options: GenerateRubricsOptions): Promise<readonly RubricItem[]>;
|
|
1213
1227
|
|
|
1214
|
-
/**
|
|
1215
|
-
* Payload received by code judges via stdin.
|
|
1216
|
-
* All properties use camelCase for TypeScript ergonomics.
|
|
1217
|
-
*/
|
|
1218
|
-
interface CodeJudgePayload {
|
|
1219
|
-
readonly question: string;
|
|
1220
|
-
readonly expectedOutcome: string;
|
|
1221
|
-
readonly expectedMessages: readonly JsonObject[];
|
|
1222
|
-
readonly referenceAnswer?: string;
|
|
1223
|
-
readonly candidateAnswer: string;
|
|
1224
|
-
readonly outputMessages?: readonly OutputMessage[] | null;
|
|
1225
|
-
readonly guidelineFiles: readonly string[];
|
|
1226
|
-
readonly inputFiles: readonly string[];
|
|
1227
|
-
readonly inputMessages: readonly TestMessage[];
|
|
1228
|
-
readonly traceSummary?: TraceSummary | null;
|
|
1229
|
-
readonly config?: JsonObject | null;
|
|
1230
|
-
}
|
|
1231
|
-
/**
|
|
1232
|
-
* Parse stdin JSON (snake_case) into typed camelCase object.
|
|
1233
|
-
* Use this in TypeScript code judges to get type-safe, idiomatic input.
|
|
1234
|
-
*/
|
|
1235
|
-
declare function parseCodeJudgePayload(payload: string): CodeJudgePayload;
|
|
1236
|
-
/**
|
|
1237
|
-
* Convenience helper that reads stdin and parses it.
|
|
1238
|
-
* Equivalent to: parseCodeJudgePayload(readFileSync(0, 'utf8'))
|
|
1239
|
-
*/
|
|
1240
|
-
declare function readCodeJudgePayload(): CodeJudgePayload;
|
|
1241
|
-
|
|
1242
1228
|
type AgentKernel = {
|
|
1243
1229
|
status: string;
|
|
1244
1230
|
};
|
|
1245
1231
|
declare function createAgentKernel(): AgentKernel;
|
|
1246
1232
|
|
|
1247
|
-
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type
|
|
1233
|
+
export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type ChildEvaluatorResult, type ClaudeCodeResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type CompositeAggregatorConfig, CompositeEvaluator, type CompositeEvaluatorConfig, type CompositeEvaluatorOptions, CostEvaluator, type CostEvaluatorConfig, type CostEvaluatorOptions, DEFAULT_EXPLORATION_TOOLS, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type Evaluator, type EvaluatorConfig, type EvaluatorFactory, type EvaluatorKind, type EvaluatorResult, type ExecutionMetrics, FieldAccuracyEvaluator, type FieldAccuracyEvaluatorConfig, type FieldAccuracyEvaluatorOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type JsonObject, type JsonPrimitive, type JsonValue, LatencyEvaluator, type LatencyEvaluatorConfig, type LatencyEvaluatorOptions, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type PiAgentSdkResolvedConfig, type PiCodingAgentResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ProviderTokenUsage, type ResolvedTarget, type RubricItem, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageEvaluator, type TokenUsageEvaluatorConfig, type TokenUsageEvaluatorOptions, type ToolTestMessage, ToolTrajectoryEvaluator, type ToolTrajectoryEvaluatorConfig, type ToolTrajectoryEvaluatorOptions, type ToolTrajectoryExpectedItem, type TraceSummary, type UserTestMessage, type VSCodeResolvedConfig, avgToolDurationMs, buildDirectoryChain, buildPromptInputs, buildSearchRoots, computeTraceSummary, consumeClaudeCodeLogEntries, consumeCodexLogEntries, consumePiLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, explorationRatio, extractCodeBlocks, fileExists, findGitRoot, generateRubrics, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, mergeExecutionMetrics, normalizeLineEndings, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToClaudeCodeLogEntries, subscribeToCodexLogEntries, subscribeToPiLogEntries, tokensPerTool };
|
package/dist/index.js
CHANGED
|
@@ -10,7 +10,7 @@ import {
|
|
|
10
10
|
readTextFile,
|
|
11
11
|
resolveFileReference,
|
|
12
12
|
resolveTargetDefinition
|
|
13
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-KDEP4I7G.js";
|
|
14
14
|
|
|
15
15
|
// src/evaluation/types.ts
|
|
16
16
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -4084,6 +4084,167 @@ var MockProvider = class {
|
|
|
4084
4084
|
}
|
|
4085
4085
|
};
|
|
4086
4086
|
|
|
4087
|
+
// src/evaluation/providers/pi-agent-sdk.ts
|
|
4088
|
+
var piAgentModule = null;
|
|
4089
|
+
var piAiModule = null;
|
|
4090
|
+
async function loadPiModules() {
|
|
4091
|
+
if (!piAgentModule || !piAiModule) {
|
|
4092
|
+
try {
|
|
4093
|
+
[piAgentModule, piAiModule] = await Promise.all([
|
|
4094
|
+
import("@mariozechner/pi-agent"),
|
|
4095
|
+
import("@mariozechner/pi-ai")
|
|
4096
|
+
]);
|
|
4097
|
+
} catch (error) {
|
|
4098
|
+
throw new Error(
|
|
4099
|
+
`Failed to load pi-agent-sdk dependencies. Please install them:
|
|
4100
|
+
npm install @mariozechner/pi-agent @mariozechner/pi-ai
|
|
4101
|
+
|
|
4102
|
+
Original error: ${error instanceof Error ? error.message : String(error)}`
|
|
4103
|
+
);
|
|
4104
|
+
}
|
|
4105
|
+
}
|
|
4106
|
+
return {
|
|
4107
|
+
Agent: piAgentModule.Agent,
|
|
4108
|
+
ProviderTransport: piAgentModule.ProviderTransport,
|
|
4109
|
+
getModel: piAiModule.getModel,
|
|
4110
|
+
getEnvApiKey: piAiModule.getEnvApiKey
|
|
4111
|
+
};
|
|
4112
|
+
}
|
|
4113
|
+
var PiAgentSdkProvider = class {
|
|
4114
|
+
id;
|
|
4115
|
+
kind = "pi-agent-sdk";
|
|
4116
|
+
targetName;
|
|
4117
|
+
supportsBatch = false;
|
|
4118
|
+
config;
|
|
4119
|
+
constructor(targetName, config) {
|
|
4120
|
+
this.id = `pi-agent-sdk:${targetName}`;
|
|
4121
|
+
this.targetName = targetName;
|
|
4122
|
+
this.config = config;
|
|
4123
|
+
}
|
|
4124
|
+
async invoke(request) {
|
|
4125
|
+
if (request.signal?.aborted) {
|
|
4126
|
+
throw new Error("Pi agent SDK request was aborted before execution");
|
|
4127
|
+
}
|
|
4128
|
+
const { Agent, ProviderTransport, getModel, getEnvApiKey } = await loadPiModules();
|
|
4129
|
+
const startTime = Date.now();
|
|
4130
|
+
const providerName = this.config.provider ?? "anthropic";
|
|
4131
|
+
const modelId = this.config.model ?? "claude-sonnet-4-20250514";
|
|
4132
|
+
const model = getModel(providerName, modelId);
|
|
4133
|
+
const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
|
|
4134
|
+
const transport = new ProviderTransport({
|
|
4135
|
+
getApiKey: async (provider) => {
|
|
4136
|
+
return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
|
|
4137
|
+
}
|
|
4138
|
+
});
|
|
4139
|
+
const agent = new Agent({
|
|
4140
|
+
initialState: {
|
|
4141
|
+
systemPrompt,
|
|
4142
|
+
model,
|
|
4143
|
+
tools: [],
|
|
4144
|
+
// No tools for simple Q&A
|
|
4145
|
+
messages: []
|
|
4146
|
+
},
|
|
4147
|
+
transport
|
|
4148
|
+
});
|
|
4149
|
+
const outputMessages = [];
|
|
4150
|
+
let finalAssistantContent = "";
|
|
4151
|
+
const unsubscribe = agent.subscribe((event) => {
|
|
4152
|
+
if (event.type === "message_end") {
|
|
4153
|
+
const msg = event.message;
|
|
4154
|
+
if (msg.role === "assistant") {
|
|
4155
|
+
const content = extractTextContent2(msg.content);
|
|
4156
|
+
if (content) {
|
|
4157
|
+
finalAssistantContent = content;
|
|
4158
|
+
}
|
|
4159
|
+
}
|
|
4160
|
+
}
|
|
4161
|
+
});
|
|
4162
|
+
try {
|
|
4163
|
+
const timeoutMs = this.config.timeoutMs ?? 12e4;
|
|
4164
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
4165
|
+
setTimeout(
|
|
4166
|
+
() => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
|
|
4167
|
+
timeoutMs
|
|
4168
|
+
);
|
|
4169
|
+
});
|
|
4170
|
+
await Promise.race([agent.prompt(request.question), timeoutPromise]);
|
|
4171
|
+
await agent.waitForIdle();
|
|
4172
|
+
const agentMessages = agent.state.messages;
|
|
4173
|
+
for (const msg of agentMessages) {
|
|
4174
|
+
outputMessages.push(convertAgentMessage(msg));
|
|
4175
|
+
}
|
|
4176
|
+
const durationMs = Date.now() - startTime;
|
|
4177
|
+
return {
|
|
4178
|
+
raw: {
|
|
4179
|
+
messages: agentMessages,
|
|
4180
|
+
systemPrompt,
|
|
4181
|
+
model: this.config.model,
|
|
4182
|
+
provider: this.config.provider
|
|
4183
|
+
},
|
|
4184
|
+
outputMessages,
|
|
4185
|
+
durationMs
|
|
4186
|
+
};
|
|
4187
|
+
} finally {
|
|
4188
|
+
unsubscribe();
|
|
4189
|
+
}
|
|
4190
|
+
}
|
|
4191
|
+
};
|
|
4192
|
+
function extractTextContent2(content) {
|
|
4193
|
+
if (typeof content === "string") {
|
|
4194
|
+
return content;
|
|
4195
|
+
}
|
|
4196
|
+
if (!Array.isArray(content)) {
|
|
4197
|
+
return void 0;
|
|
4198
|
+
}
|
|
4199
|
+
const textParts = [];
|
|
4200
|
+
for (const part of content) {
|
|
4201
|
+
if (!part || typeof part !== "object") {
|
|
4202
|
+
continue;
|
|
4203
|
+
}
|
|
4204
|
+
const p = part;
|
|
4205
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
4206
|
+
textParts.push(p.text);
|
|
4207
|
+
}
|
|
4208
|
+
}
|
|
4209
|
+
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4210
|
+
}
|
|
4211
|
+
function convertAgentMessage(message) {
|
|
4212
|
+
if (!message || typeof message !== "object") {
|
|
4213
|
+
return { role: "unknown", content: String(message) };
|
|
4214
|
+
}
|
|
4215
|
+
const msg = message;
|
|
4216
|
+
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
4217
|
+
const content = extractTextContent2(msg.content);
|
|
4218
|
+
const toolCalls = extractToolCalls2(msg.content);
|
|
4219
|
+
const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
4220
|
+
return {
|
|
4221
|
+
role,
|
|
4222
|
+
content,
|
|
4223
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
4224
|
+
timestamp
|
|
4225
|
+
};
|
|
4226
|
+
}
|
|
4227
|
+
function extractToolCalls2(content) {
|
|
4228
|
+
if (!Array.isArray(content)) {
|
|
4229
|
+
return [];
|
|
4230
|
+
}
|
|
4231
|
+
const toolCalls = [];
|
|
4232
|
+
for (const part of content) {
|
|
4233
|
+
if (!part || typeof part !== "object") {
|
|
4234
|
+
continue;
|
|
4235
|
+
}
|
|
4236
|
+
const p = part;
|
|
4237
|
+
if (p.type === "tool_use" && typeof p.name === "string") {
|
|
4238
|
+
toolCalls.push({
|
|
4239
|
+
tool: p.name,
|
|
4240
|
+
input: p.input,
|
|
4241
|
+
id: typeof p.id === "string" ? p.id : void 0
|
|
4242
|
+
});
|
|
4243
|
+
}
|
|
4244
|
+
}
|
|
4245
|
+
return toolCalls;
|
|
4246
|
+
}
|
|
4247
|
+
|
|
4087
4248
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
4088
4249
|
import { spawn as spawn3 } from "node:child_process";
|
|
4089
4250
|
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
@@ -4599,8 +4760,8 @@ function convertPiMessage(message) {
|
|
|
4599
4760
|
if (typeof role !== "string") {
|
|
4600
4761
|
return void 0;
|
|
4601
4762
|
}
|
|
4602
|
-
const content =
|
|
4603
|
-
const toolCalls =
|
|
4763
|
+
const content = extractTextContent3(msg.content);
|
|
4764
|
+
const toolCalls = extractToolCalls3(msg.content);
|
|
4604
4765
|
const timestamp = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
4605
4766
|
const metadata = {};
|
|
4606
4767
|
if (msg.api) metadata.api = msg.api;
|
|
@@ -4616,7 +4777,7 @@ function convertPiMessage(message) {
|
|
|
4616
4777
|
metadata: Object.keys(metadata).length > 0 ? metadata : void 0
|
|
4617
4778
|
};
|
|
4618
4779
|
}
|
|
4619
|
-
function
|
|
4780
|
+
function extractTextContent3(content) {
|
|
4620
4781
|
if (typeof content === "string") {
|
|
4621
4782
|
return content;
|
|
4622
4783
|
}
|
|
@@ -4635,7 +4796,7 @@ function extractTextContent2(content) {
|
|
|
4635
4796
|
}
|
|
4636
4797
|
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
4637
4798
|
}
|
|
4638
|
-
function
|
|
4799
|
+
function extractToolCalls3(content) {
|
|
4639
4800
|
if (!Array.isArray(content)) {
|
|
4640
4801
|
return [];
|
|
4641
4802
|
}
|
|
@@ -5130,6 +5291,8 @@ function createProvider(target) {
|
|
|
5130
5291
|
return new CodexProvider(target.name, target.config);
|
|
5131
5292
|
case "pi-coding-agent":
|
|
5132
5293
|
return new PiCodingAgentProvider(target.name, target.config);
|
|
5294
|
+
case "pi-agent-sdk":
|
|
5295
|
+
return new PiAgentSdkProvider(target.name, target.config);
|
|
5133
5296
|
case "claude-code":
|
|
5134
5297
|
return new ClaudeCodeProvider(target.name, target.config);
|
|
5135
5298
|
case "mock":
|
|
@@ -5297,12 +5460,6 @@ function toSnakeCase(str) {
|
|
|
5297
5460
|
}
|
|
5298
5461
|
return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
|
5299
5462
|
}
|
|
5300
|
-
function toCamelCase(str) {
|
|
5301
|
-
if (/^[A-Z]/.test(str)) {
|
|
5302
|
-
return str;
|
|
5303
|
-
}
|
|
5304
|
-
return str.replace(/_([a-z0-9])/g, (_, letter) => letter.toUpperCase());
|
|
5305
|
-
}
|
|
5306
5463
|
function toSnakeCaseDeep(obj) {
|
|
5307
5464
|
if (obj === null || obj === void 0) {
|
|
5308
5465
|
return obj;
|
|
@@ -5320,23 +5477,6 @@ function toSnakeCaseDeep(obj) {
|
|
|
5320
5477
|
}
|
|
5321
5478
|
return obj;
|
|
5322
5479
|
}
|
|
5323
|
-
function toCamelCaseDeep(obj) {
|
|
5324
|
-
if (obj === null || obj === void 0) {
|
|
5325
|
-
return obj;
|
|
5326
|
-
}
|
|
5327
|
-
if (Array.isArray(obj)) {
|
|
5328
|
-
return obj.map((item) => toCamelCaseDeep(item));
|
|
5329
|
-
}
|
|
5330
|
-
if (typeof obj === "object") {
|
|
5331
|
-
const result = {};
|
|
5332
|
-
for (const [key, value] of Object.entries(obj)) {
|
|
5333
|
-
const camelKey = toCamelCase(key);
|
|
5334
|
-
result[camelKey] = toCamelCaseDeep(value);
|
|
5335
|
-
}
|
|
5336
|
-
return result;
|
|
5337
|
-
}
|
|
5338
|
-
return obj;
|
|
5339
|
-
}
|
|
5340
5480
|
|
|
5341
5481
|
// src/evaluation/evaluators.ts
|
|
5342
5482
|
var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
|
|
@@ -8081,17 +8221,6 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
|
|
|
8081
8221
|
return parts.join("\n");
|
|
8082
8222
|
}
|
|
8083
8223
|
|
|
8084
|
-
// src/evaluation/code-judge-sdk.ts
|
|
8085
|
-
import { readFileSync } from "node:fs";
|
|
8086
|
-
function parseCodeJudgePayload(payload) {
|
|
8087
|
-
const parsed = JSON.parse(payload);
|
|
8088
|
-
return toCamelCaseDeep(parsed);
|
|
8089
|
-
}
|
|
8090
|
-
function readCodeJudgePayload() {
|
|
8091
|
-
const stdin = readFileSync(0, "utf8");
|
|
8092
|
-
return parseCodeJudgePayload(stdin);
|
|
8093
|
-
}
|
|
8094
|
-
|
|
8095
8224
|
// src/index.ts
|
|
8096
8225
|
function createAgentKernel() {
|
|
8097
8226
|
return { status: "stub" };
|
|
@@ -8134,8 +8263,6 @@ export {
|
|
|
8134
8263
|
loadEvalCases,
|
|
8135
8264
|
mergeExecutionMetrics,
|
|
8136
8265
|
normalizeLineEndings,
|
|
8137
|
-
parseCodeJudgePayload,
|
|
8138
|
-
readCodeJudgePayload,
|
|
8139
8266
|
readJsonFile,
|
|
8140
8267
|
readTargetDefinitions,
|
|
8141
8268
|
readTestSuiteMetadata,
|