@wix/evalforge-evaluator 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +85 -36
- package/build/index.js.map +2 -2
- package/build/index.mjs +85 -36
- package/build/index.mjs.map +2 -2
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -6268,45 +6268,94 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6268
6268
|
if (options.maxTokens !== void 0) {
|
|
6269
6269
|
queryOptions.maxTokens = options.maxTokens;
|
|
6270
6270
|
}
|
|
6271
|
-
|
|
6272
|
-
|
|
6273
|
-
|
|
6274
|
-
|
|
6275
|
-
|
|
6276
|
-
|
|
6277
|
-
|
|
6278
|
-
|
|
6279
|
-
|
|
6280
|
-
|
|
6281
|
-
|
|
6282
|
-
|
|
6283
|
-
|
|
6284
|
-
|
|
6285
|
-
|
|
6286
|
-
|
|
6271
|
+
try {
|
|
6272
|
+
for await (const message of query({
|
|
6273
|
+
prompt: scenario.triggerPrompt,
|
|
6274
|
+
options: queryOptions
|
|
6275
|
+
})) {
|
|
6276
|
+
messageCount++;
|
|
6277
|
+
console.log("[SDK Message]", JSON.stringify(message, null, 2));
|
|
6278
|
+
allMessages.push(message);
|
|
6279
|
+
if (messageCount <= 3) {
|
|
6280
|
+
console.error(
|
|
6281
|
+
"[DEBUG-H5] SDK message received",
|
|
6282
|
+
JSON.stringify({
|
|
6283
|
+
messageCount,
|
|
6284
|
+
type: message.type,
|
|
6285
|
+
timestamp: Date.now()
|
|
6286
|
+
})
|
|
6287
|
+
);
|
|
6288
|
+
}
|
|
6289
|
+
if (traceContext && isAssistantMessage(message)) {
|
|
6290
|
+
traceStepNumber++;
|
|
6291
|
+
const traceEvent = createTraceEventFromMessage(
|
|
6292
|
+
message,
|
|
6293
|
+
traceContext,
|
|
6294
|
+
traceStepNumber,
|
|
6295
|
+
false
|
|
6296
|
+
// Not complete yet
|
|
6297
|
+
);
|
|
6298
|
+
emitTraceEvent(
|
|
6299
|
+
traceEvent,
|
|
6300
|
+
traceContext.tracePushUrl,
|
|
6301
|
+
traceContext.routeHeader,
|
|
6302
|
+
traceContext.authToken
|
|
6303
|
+
);
|
|
6304
|
+
}
|
|
6287
6305
|
}
|
|
6288
|
-
|
|
6289
|
-
|
|
6290
|
-
|
|
6291
|
-
|
|
6292
|
-
|
|
6293
|
-
|
|
6294
|
-
|
|
6295
|
-
|
|
6296
|
-
|
|
6297
|
-
|
|
6298
|
-
|
|
6299
|
-
|
|
6300
|
-
|
|
6301
|
-
|
|
6302
|
-
|
|
6306
|
+
console.log(
|
|
6307
|
+
"[executeWithClaudeCode] Claude Agent SDK query completed, received",
|
|
6308
|
+
allMessages.length,
|
|
6309
|
+
"messages"
|
|
6310
|
+
);
|
|
6311
|
+
} catch (sdkError) {
|
|
6312
|
+
const errorMessage = sdkError instanceof Error ? sdkError.message : String(sdkError);
|
|
6313
|
+
const errorStack = sdkError instanceof Error ? sdkError.stack : void 0;
|
|
6314
|
+
console.error("[executeWithClaudeCode] Claude SDK execution FAILED");
|
|
6315
|
+
console.error("[executeWithClaudeCode] Error message:", errorMessage);
|
|
6316
|
+
if (errorStack) {
|
|
6317
|
+
console.error("[executeWithClaudeCode] Stack trace:", errorStack);
|
|
6318
|
+
}
|
|
6319
|
+
if (sdkError && typeof sdkError === "object") {
|
|
6320
|
+
const errObj = sdkError;
|
|
6321
|
+
const extraInfo = {};
|
|
6322
|
+
for (const key of [
|
|
6323
|
+
"code",
|
|
6324
|
+
"status",
|
|
6325
|
+
"stderr",
|
|
6326
|
+
"stdout",
|
|
6327
|
+
"exitCode",
|
|
6328
|
+
"signal",
|
|
6329
|
+
"cause"
|
|
6330
|
+
]) {
|
|
6331
|
+
if (key in errObj && errObj[key] !== void 0) {
|
|
6332
|
+
extraInfo[key] = errObj[key];
|
|
6333
|
+
}
|
|
6334
|
+
}
|
|
6335
|
+
if (Object.keys(extraInfo).length > 0) {
|
|
6336
|
+
console.error(
|
|
6337
|
+
"[executeWithClaudeCode] Additional error info:",
|
|
6338
|
+
JSON.stringify(extraInfo)
|
|
6339
|
+
);
|
|
6340
|
+
}
|
|
6303
6341
|
}
|
|
6342
|
+
console.error(
|
|
6343
|
+
"[executeWithClaudeCode] Context:",
|
|
6344
|
+
JSON.stringify({
|
|
6345
|
+
skillId: skill.id,
|
|
6346
|
+
skillName: skill.name,
|
|
6347
|
+
scenarioId: scenario.id,
|
|
6348
|
+
scenarioName: scenario.name,
|
|
6349
|
+
messagesReceived: messageCount,
|
|
6350
|
+
cwd: options.cwd,
|
|
6351
|
+
model: options.model || DEFAULT_MODEL
|
|
6352
|
+
})
|
|
6353
|
+
);
|
|
6354
|
+
throw new Error(
|
|
6355
|
+
`Claude SDK execution failed after ${messageCount} messages: ${errorMessage}` + (errorStack ? `
|
|
6356
|
+
Stack: ${errorStack.split("\n").slice(0, 3).join("\n")}` : "")
|
|
6357
|
+
);
|
|
6304
6358
|
}
|
|
6305
|
-
console.log(
|
|
6306
|
-
"[executeWithClaudeCode] Claude Agent SDK query completed, received",
|
|
6307
|
-
allMessages.length,
|
|
6308
|
-
"messages"
|
|
6309
|
-
);
|
|
6310
6359
|
if (traceContext) {
|
|
6311
6360
|
emitTraceEvent(
|
|
6312
6361
|
{
|