@wix/evalforge-evaluator 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +309 -78
- package/build/index.js.map +4 -4
- package/build/index.mjs +309 -78
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/file-diff.d.ts +30 -0
- package/package.json +3 -3
package/build/index.js
CHANGED
|
@@ -6360,40 +6360,59 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6360
6360
|
traceContext.authToken
|
|
6361
6361
|
);
|
|
6362
6362
|
}
|
|
6363
|
+
const SDK_TIMEOUT_MS = Math.max(3e5, maxTurns * 6e4);
|
|
6364
|
+
let timeoutHandle;
|
|
6365
|
+
let timedOut = false;
|
|
6363
6366
|
try {
|
|
6364
|
-
|
|
6365
|
-
|
|
6366
|
-
|
|
6367
|
-
|
|
6368
|
-
|
|
6369
|
-
|
|
6370
|
-
|
|
6371
|
-
if (messageCount <= 3) {
|
|
6372
|
-
console.error(
|
|
6373
|
-
"[DEBUG-H5] SDK message received",
|
|
6374
|
-
JSON.stringify({
|
|
6375
|
-
messageCount,
|
|
6376
|
-
type: message.type,
|
|
6377
|
-
timestamp: Date.now()
|
|
6378
|
-
})
|
|
6379
|
-
);
|
|
6380
|
-
}
|
|
6381
|
-
if (traceContext && isAssistantMessage(message)) {
|
|
6382
|
-
traceStepNumber++;
|
|
6383
|
-
const traceEvent = createTraceEventFromMessage(
|
|
6384
|
-
message,
|
|
6385
|
-
traceContext,
|
|
6386
|
-
traceStepNumber,
|
|
6387
|
-
false
|
|
6388
|
-
// Not complete yet
|
|
6389
|
-
);
|
|
6390
|
-
emitTraceEvent(
|
|
6391
|
-
traceEvent,
|
|
6392
|
-
traceContext.tracePushUrl,
|
|
6393
|
-
traceContext.routeHeader,
|
|
6394
|
-
traceContext.authToken
|
|
6367
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
6368
|
+
timeoutHandle = setTimeout(() => {
|
|
6369
|
+
timedOut = true;
|
|
6370
|
+
reject(
|
|
6371
|
+
new Error(
|
|
6372
|
+
`SDK execution timed out after ${SDK_TIMEOUT_MS}ms. Skill: ${skill.name}, Scenario: ${scenario.name}, Messages received: ${messageCount}, MaxTurns: ${maxTurns}`
|
|
6373
|
+
)
|
|
6395
6374
|
);
|
|
6375
|
+
}, SDK_TIMEOUT_MS);
|
|
6376
|
+
});
|
|
6377
|
+
const sdkPromise = (async () => {
|
|
6378
|
+
for await (const message of query({
|
|
6379
|
+
prompt: scenario.triggerPrompt,
|
|
6380
|
+
options: queryOptions
|
|
6381
|
+
})) {
|
|
6382
|
+
messageCount++;
|
|
6383
|
+
console.log("[SDK Message]", JSON.stringify(message, null, 2));
|
|
6384
|
+
allMessages.push(message);
|
|
6385
|
+
if (messageCount <= 3) {
|
|
6386
|
+
console.error(
|
|
6387
|
+
"[DEBUG-H5] SDK message received",
|
|
6388
|
+
JSON.stringify({
|
|
6389
|
+
messageCount,
|
|
6390
|
+
type: message.type,
|
|
6391
|
+
timestamp: Date.now()
|
|
6392
|
+
})
|
|
6393
|
+
);
|
|
6394
|
+
}
|
|
6395
|
+
if (traceContext && isAssistantMessage(message)) {
|
|
6396
|
+
traceStepNumber++;
|
|
6397
|
+
const traceEvent = createTraceEventFromMessage(
|
|
6398
|
+
message,
|
|
6399
|
+
traceContext,
|
|
6400
|
+
traceStepNumber,
|
|
6401
|
+
false
|
|
6402
|
+
// Not complete yet
|
|
6403
|
+
);
|
|
6404
|
+
emitTraceEvent(
|
|
6405
|
+
traceEvent,
|
|
6406
|
+
traceContext.tracePushUrl,
|
|
6407
|
+
traceContext.routeHeader,
|
|
6408
|
+
traceContext.authToken
|
|
6409
|
+
);
|
|
6410
|
+
}
|
|
6396
6411
|
}
|
|
6412
|
+
})();
|
|
6413
|
+
await Promise.race([sdkPromise, timeoutPromise]);
|
|
6414
|
+
if (timeoutHandle) {
|
|
6415
|
+
clearTimeout(timeoutHandle);
|
|
6397
6416
|
}
|
|
6398
6417
|
console.log(
|
|
6399
6418
|
"[executeWithClaudeCode] Claude Agent SDK query completed, received",
|
|
@@ -6401,6 +6420,12 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6401
6420
|
"messages"
|
|
6402
6421
|
);
|
|
6403
6422
|
} catch (sdkError) {
|
|
6423
|
+
if (timeoutHandle) {
|
|
6424
|
+
clearTimeout(timeoutHandle);
|
|
6425
|
+
}
|
|
6426
|
+
if (timedOut) {
|
|
6427
|
+
console.error("[SDK-TIMEOUT] Execution timed out:", sdkError);
|
|
6428
|
+
}
|
|
6404
6429
|
console.error("[SDK-ERROR] ====== CLAUDE SDK EXECUTION FAILED ======");
|
|
6405
6430
|
console.error("[SDK-ERROR] Timestamp:", (/* @__PURE__ */ new Date()).toISOString());
|
|
6406
6431
|
console.error(
|
|
@@ -6794,6 +6819,154 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
6794
6819
|
};
|
|
6795
6820
|
}
|
|
6796
6821
|
|
|
6822
|
+
// src/run-scenario/file-diff.ts
|
|
6823
|
+
var import_fs6 = require("fs");
|
|
6824
|
+
var import_path6 = require("path");
|
|
6825
|
+
var IGNORED_PATTERNS = [
|
|
6826
|
+
"node_modules",
|
|
6827
|
+
".git",
|
|
6828
|
+
".claude",
|
|
6829
|
+
".cursor",
|
|
6830
|
+
"dist",
|
|
6831
|
+
"build",
|
|
6832
|
+
".next",
|
|
6833
|
+
".turbo",
|
|
6834
|
+
"__pycache__",
|
|
6835
|
+
".pytest_cache",
|
|
6836
|
+
".venv",
|
|
6837
|
+
"venv",
|
|
6838
|
+
".DS_Store"
|
|
6839
|
+
];
|
|
6840
|
+
var BINARY_EXTENSIONS = [
|
|
6841
|
+
".png",
|
|
6842
|
+
".jpg",
|
|
6843
|
+
".jpeg",
|
|
6844
|
+
".gif",
|
|
6845
|
+
".webp",
|
|
6846
|
+
".ico",
|
|
6847
|
+
".svg",
|
|
6848
|
+
".woff",
|
|
6849
|
+
".woff2",
|
|
6850
|
+
".ttf",
|
|
6851
|
+
".eot",
|
|
6852
|
+
".mp3",
|
|
6853
|
+
".mp4",
|
|
6854
|
+
".wav",
|
|
6855
|
+
".avi",
|
|
6856
|
+
".mov",
|
|
6857
|
+
".pdf",
|
|
6858
|
+
".zip",
|
|
6859
|
+
".tar",
|
|
6860
|
+
".gz",
|
|
6861
|
+
".rar",
|
|
6862
|
+
".7z",
|
|
6863
|
+
".exe",
|
|
6864
|
+
".dll",
|
|
6865
|
+
".so",
|
|
6866
|
+
".dylib"
|
|
6867
|
+
];
|
|
6868
|
+
var MAX_FILE_SIZE = 100 * 1024;
|
|
6869
|
+
function shouldIgnore(name2) {
|
|
6870
|
+
return IGNORED_PATTERNS.some(
|
|
6871
|
+
(pattern) => name2 === pattern || name2.startsWith(pattern + "/")
|
|
6872
|
+
);
|
|
6873
|
+
}
|
|
6874
|
+
function isBinaryFile(filename) {
|
|
6875
|
+
const lower = filename.toLowerCase();
|
|
6876
|
+
return BINARY_EXTENSIONS.some((ext) => lower.endsWith(ext));
|
|
6877
|
+
}
|
|
6878
|
+
function snapshotDirectory(dir, baseDir) {
|
|
6879
|
+
const snapshot = {};
|
|
6880
|
+
const base = baseDir || dir;
|
|
6881
|
+
if (!(0, import_fs6.existsSync)(dir)) {
|
|
6882
|
+
return snapshot;
|
|
6883
|
+
}
|
|
6884
|
+
const entries = (0, import_fs6.readdirSync)(dir, { withFileTypes: true });
|
|
6885
|
+
for (const entry of entries) {
|
|
6886
|
+
const fullPath = (0, import_path6.join)(dir, entry.name);
|
|
6887
|
+
const relativePath = (0, import_path6.relative)(base, fullPath);
|
|
6888
|
+
if (shouldIgnore(entry.name)) {
|
|
6889
|
+
continue;
|
|
6890
|
+
}
|
|
6891
|
+
if (entry.isDirectory()) {
|
|
6892
|
+
const subSnapshot = snapshotDirectory(fullPath, base);
|
|
6893
|
+
Object.assign(snapshot, subSnapshot);
|
|
6894
|
+
} else if (entry.isFile()) {
|
|
6895
|
+
if (isBinaryFile(entry.name)) {
|
|
6896
|
+
continue;
|
|
6897
|
+
}
|
|
6898
|
+
try {
|
|
6899
|
+
const stats = (0, import_fs6.statSync)(fullPath);
|
|
6900
|
+
if (stats.size > MAX_FILE_SIZE) {
|
|
6901
|
+
continue;
|
|
6902
|
+
}
|
|
6903
|
+
const content = (0, import_fs6.readFileSync)(fullPath, "utf-8");
|
|
6904
|
+
snapshot[relativePath] = content;
|
|
6905
|
+
} catch {
|
|
6906
|
+
continue;
|
|
6907
|
+
}
|
|
6908
|
+
}
|
|
6909
|
+
}
|
|
6910
|
+
return snapshot;
|
|
6911
|
+
}
|
|
6912
|
+
function generateDiffLines(before, after) {
|
|
6913
|
+
const beforeLines = before.split("\n");
|
|
6914
|
+
const afterLines = after.split("\n");
|
|
6915
|
+
const result = [];
|
|
6916
|
+
let lineNumber = 1;
|
|
6917
|
+
const maxLines = Math.max(beforeLines.length, afterLines.length);
|
|
6918
|
+
for (let i = 0; i < maxLines; i++) {
|
|
6919
|
+
const beforeLine = beforeLines[i];
|
|
6920
|
+
const afterLine = afterLines[i];
|
|
6921
|
+
if (beforeLine === afterLine) {
|
|
6922
|
+
if (beforeLine !== void 0) {
|
|
6923
|
+
result.push({
|
|
6924
|
+
type: "unchanged",
|
|
6925
|
+
content: beforeLine,
|
|
6926
|
+
lineNumber: lineNumber++
|
|
6927
|
+
});
|
|
6928
|
+
}
|
|
6929
|
+
} else {
|
|
6930
|
+
if (beforeLine !== void 0) {
|
|
6931
|
+
result.push({
|
|
6932
|
+
type: "removed",
|
|
6933
|
+
content: beforeLine,
|
|
6934
|
+
lineNumber
|
|
6935
|
+
});
|
|
6936
|
+
}
|
|
6937
|
+
if (afterLine !== void 0) {
|
|
6938
|
+
result.push({
|
|
6939
|
+
type: "added",
|
|
6940
|
+
content: afterLine,
|
|
6941
|
+
lineNumber
|
|
6942
|
+
});
|
|
6943
|
+
}
|
|
6944
|
+
lineNumber++;
|
|
6945
|
+
}
|
|
6946
|
+
}
|
|
6947
|
+
return result;
|
|
6948
|
+
}
|
|
6949
|
+
function diffSnapshots(before, after) {
|
|
6950
|
+
const diffs = [];
|
|
6951
|
+
const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
|
|
6952
|
+
for (const path10 of allPaths) {
|
|
6953
|
+
const beforeContent = before[path10] ?? "";
|
|
6954
|
+
const afterContent = after[path10] ?? "";
|
|
6955
|
+
if (beforeContent === afterContent) {
|
|
6956
|
+
continue;
|
|
6957
|
+
}
|
|
6958
|
+
const diffLines = generateDiffLines(beforeContent, afterContent);
|
|
6959
|
+
diffs.push({
|
|
6960
|
+
path: path10,
|
|
6961
|
+
expected: beforeContent,
|
|
6962
|
+
actual: afterContent,
|
|
6963
|
+
diffLines
|
|
6964
|
+
});
|
|
6965
|
+
}
|
|
6966
|
+
diffs.sort((a, b) => a.path.localeCompare(b.path));
|
|
6967
|
+
return diffs;
|
|
6968
|
+
}
|
|
6969
|
+
|
|
6797
6970
|
// src/run-scenario/callSkill.ts
|
|
6798
6971
|
async function callSkill(config, evalRunId2, scenario, skill, agent, workDir) {
|
|
6799
6972
|
if (agent && agent.runCommand !== "claude") {
|
|
@@ -6802,6 +6975,7 @@ async function callSkill(config, evalRunId2, scenario, skill, agent, workDir) {
|
|
|
6802
6975
|
);
|
|
6803
6976
|
}
|
|
6804
6977
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
6978
|
+
const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
|
|
6805
6979
|
const { result, llmTrace } = await executeWithClaudeCode(skill, scenario, {
|
|
6806
6980
|
cwd: workDir || process.cwd(),
|
|
6807
6981
|
systemPrompt: skill.skillMd,
|
|
@@ -6822,6 +6996,8 @@ async function callSkill(config, evalRunId2, scenario, skill, agent, workDir) {
|
|
|
6822
6996
|
}
|
|
6823
6997
|
});
|
|
6824
6998
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
6999
|
+
const afterSnapshot = workDir ? snapshotDirectory(workDir) : {};
|
|
7000
|
+
const fileDiffs = diffSnapshots(beforeSnapshot, afterSnapshot);
|
|
6825
7001
|
return {
|
|
6826
7002
|
id: (0, import_crypto2.randomUUID)(),
|
|
6827
7003
|
targetId: skill.id,
|
|
@@ -6831,6 +7007,7 @@ async function callSkill(config, evalRunId2, scenario, skill, agent, workDir) {
|
|
|
6831
7007
|
modelConfig: agent?.modelConfig,
|
|
6832
7008
|
duration: result.durationMs,
|
|
6833
7009
|
outputText: result.outputText,
|
|
7010
|
+
fileDiffs: fileDiffs.length > 0 ? fileDiffs : void 0,
|
|
6834
7011
|
startedAt,
|
|
6835
7012
|
completedAt,
|
|
6836
7013
|
llmTrace
|
|
@@ -7442,33 +7619,14 @@ async function testClaudeDirectExecution(config) {
|
|
|
7442
7619
|
const versionResult = await runAndLog(
|
|
7443
7620
|
"claude --version",
|
|
7444
7621
|
`${envExports} && "${claudePath}" --version 2>&1`,
|
|
7445
|
-
|
|
7446
|
-
|
|
7447
|
-
const fullCmdResult = await runAndLog(
|
|
7448
|
-
"claude -p (with json output)",
|
|
7449
|
-
`${envExports} && "${claudePath}" -p "Say hello" --output-format json 2>&1`,
|
|
7450
|
-
45e3
|
|
7451
|
-
);
|
|
7452
|
-
const simpleCmdResult = await runAndLog(
|
|
7453
|
-
"claude -p (simple)",
|
|
7454
|
-
`${envExports} && "${claudePath}" -p "Hello" 2>&1`,
|
|
7455
|
-
45e3
|
|
7456
|
-
);
|
|
7457
|
-
const printFlagResult = await runAndLog(
|
|
7458
|
-
"claude --print (long flag)",
|
|
7459
|
-
`${envExports} && "${claudePath}" --print "Hi" 2>&1`,
|
|
7460
|
-
45e3
|
|
7461
|
-
);
|
|
7462
|
-
const positionalResult = await runAndLog(
|
|
7463
|
-
'claude "prompt" (positional)',
|
|
7464
|
-
`${envExports} && "${claudePath}" "Hello world" 2>&1`,
|
|
7465
|
-
45e3
|
|
7622
|
+
1e4
|
|
7623
|
+
// Short timeout - should complete in <2s
|
|
7466
7624
|
);
|
|
7467
|
-
await runAndLog("claude --help", `"${claudePath}" --help 2>&1`,
|
|
7625
|
+
await runAndLog("claude --help", `"${claudePath}" --help 2>&1`, 1e4);
|
|
7468
7626
|
await runAndLog(
|
|
7469
7627
|
"claude --version (no custom env)",
|
|
7470
7628
|
`"${claudePath}" --version 2>&1`,
|
|
7471
|
-
|
|
7629
|
+
1e4
|
|
7472
7630
|
);
|
|
7473
7631
|
const homeDir = process.env.HOME || "/tmp";
|
|
7474
7632
|
const claudeConfigDir = path9.join(homeDir, ".claude");
|
|
@@ -7478,35 +7636,110 @@ async function testClaudeDirectExecution(config) {
|
|
|
7478
7636
|
try {
|
|
7479
7637
|
const configContents = fs11.readdirSync(claudeConfigDir);
|
|
7480
7638
|
details.claudeConfigContents = configContents;
|
|
7481
|
-
for (const file of configContents) {
|
|
7482
|
-
if (file.includes("log") || file.includes("error")) {
|
|
7483
|
-
const logPath = path9.join(claudeConfigDir, file);
|
|
7484
|
-
const catCmd = `cat "${logPath}" 2>&1 | tail -50`;
|
|
7485
|
-
const logContent = await execCommand(catCmd);
|
|
7486
|
-
details[`claudeLogFile_${file}`] = logContent.stdout.slice(0, 1e3);
|
|
7487
|
-
}
|
|
7488
|
-
}
|
|
7489
7639
|
} catch (e) {
|
|
7490
7640
|
details.claudeConfigError = e instanceof Error ? e.message : String(e);
|
|
7491
7641
|
}
|
|
7492
7642
|
}
|
|
7493
7643
|
details.commandResults = commandResults;
|
|
7494
|
-
const anyPromptWorked = fullCmdResult.exitCode === 0 || simpleCmdResult.exitCode === 0 || printFlagResult.exitCode === 0 || positionalResult.exitCode === 0;
|
|
7495
7644
|
const versionWorked = versionResult.exitCode === 0;
|
|
7496
|
-
const passed =
|
|
7497
|
-
let errorMsg;
|
|
7498
|
-
if (!passed) {
|
|
7499
|
-
const failedCmds = commandResults.filter((r) => r.exitCode !== 0).map((r) => `${r.name}: exit=${r.exitCode}`).join(", ");
|
|
7500
|
-
errorMsg = `All Claude CLI commands failed. ${failedCmds}. Version works: ${versionWorked}`;
|
|
7501
|
-
}
|
|
7645
|
+
const passed = versionWorked;
|
|
7502
7646
|
return {
|
|
7503
|
-
name: "claude-
|
|
7647
|
+
name: "claude-cli-basic",
|
|
7504
7648
|
passed,
|
|
7505
7649
|
details,
|
|
7506
|
-
error:
|
|
7650
|
+
error: passed ? void 0 : `Claude CLI --version failed with exit code ${versionResult.exitCode}`,
|
|
7507
7651
|
durationMs: Date.now() - start
|
|
7508
7652
|
};
|
|
7509
7653
|
}
|
|
7654
|
+
async function testClaudeSdkExecution(config) {
|
|
7655
|
+
const start = Date.now();
|
|
7656
|
+
const details = {};
|
|
7657
|
+
const SDK_TIMEOUT_MS = 3e4;
|
|
7658
|
+
try {
|
|
7659
|
+
console.error("[SDK-DIAG] Importing Claude Agent SDK...");
|
|
7660
|
+
const sdk = await import("@anthropic-ai/claude-agent-sdk");
|
|
7661
|
+
details.sdkImported = true;
|
|
7662
|
+
const env = { ...process.env };
|
|
7663
|
+
const placeholderApiKey = "sk-ant-api03-placeholder-auth-handled-by-gateway-000000000000000000000000";
|
|
7664
|
+
env.ANTHROPIC_API_KEY = placeholderApiKey;
|
|
7665
|
+
env.ANTHROPIC_AUTH_TOKEN = placeholderApiKey;
|
|
7666
|
+
if (config.aiGatewayUrl) {
|
|
7667
|
+
env.ANTHROPIC_BASE_URL = config.aiGatewayUrl;
|
|
7668
|
+
}
|
|
7669
|
+
if (config.aiGatewayHeaders) {
|
|
7670
|
+
const headerLines = Object.entries(config.aiGatewayHeaders).map(([key, value]) => `${key}:${value}`).join("\n");
|
|
7671
|
+
env.ANTHROPIC_CUSTOM_HEADERS = headerLines;
|
|
7672
|
+
}
|
|
7673
|
+
details.envConfigured = true;
|
|
7674
|
+
details.hasBaseUrl = !!env.ANTHROPIC_BASE_URL;
|
|
7675
|
+
details.hasCustomHeaders = !!env.ANTHROPIC_CUSTOM_HEADERS;
|
|
7676
|
+
const canUseTool = async () => {
|
|
7677
|
+
console.error("[SDK-DIAG] canUseTool callback called - returning allow");
|
|
7678
|
+
return { behavior: "allow" };
|
|
7679
|
+
};
|
|
7680
|
+
const queryOptions = {
|
|
7681
|
+
env,
|
|
7682
|
+
cwd: "/tmp",
|
|
7683
|
+
settingSources: ["project"],
|
|
7684
|
+
allowedTools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
|
|
7685
|
+
model: "claude-3-5-sonnet-latest",
|
|
7686
|
+
maxTurns: 1,
|
|
7687
|
+
// Just one turn for this test
|
|
7688
|
+
permissionMode: "default",
|
|
7689
|
+
canUseTool
|
|
7690
|
+
};
|
|
7691
|
+
details.queryOptionsConfigured = true;
|
|
7692
|
+
console.error("[SDK-DIAG] Starting SDK query with canUseTool...");
|
|
7693
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
7694
|
+
setTimeout(
|
|
7695
|
+
() => reject(new Error(`SDK test timed out after ${SDK_TIMEOUT_MS}ms`)),
|
|
7696
|
+
SDK_TIMEOUT_MS
|
|
7697
|
+
);
|
|
7698
|
+
});
|
|
7699
|
+
const sdkPromise = (async () => {
|
|
7700
|
+
let messageCount2 = 0;
|
|
7701
|
+
let resultMessage2 = null;
|
|
7702
|
+
for await (const message of sdk.query({
|
|
7703
|
+
prompt: 'Say "SDK test successful" and nothing else.',
|
|
7704
|
+
...queryOptions
|
|
7705
|
+
})) {
|
|
7706
|
+
messageCount2++;
|
|
7707
|
+
console.error(`[SDK-DIAG] Received message ${messageCount2}:`, message);
|
|
7708
|
+
if (message.type === "result" || message.type === "assistant") {
|
|
7709
|
+
resultMessage2 = message;
|
|
7710
|
+
}
|
|
7711
|
+
}
|
|
7712
|
+
return { messageCount: messageCount2, resultMessage: resultMessage2 };
|
|
7713
|
+
})();
|
|
7714
|
+
const { messageCount, resultMessage } = await Promise.race([
|
|
7715
|
+
sdkPromise,
|
|
7716
|
+
timeoutPromise
|
|
7717
|
+
]);
|
|
7718
|
+
details.messageCount = messageCount;
|
|
7719
|
+
details.hasResultMessage = !!resultMessage;
|
|
7720
|
+
details.resultPreview = resultMessage && typeof resultMessage === "object" ? JSON.stringify(resultMessage).slice(0, 500) : null;
|
|
7721
|
+
const passed = messageCount > 0;
|
|
7722
|
+
return {
|
|
7723
|
+
name: "claude-sdk-execution",
|
|
7724
|
+
passed,
|
|
7725
|
+
details,
|
|
7726
|
+
error: passed ? void 0 : "SDK query completed but returned no messages",
|
|
7727
|
+
durationMs: Date.now() - start
|
|
7728
|
+
};
|
|
7729
|
+
} catch (err) {
|
|
7730
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7731
|
+
details.error = error;
|
|
7732
|
+
details.errorStack = err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0;
|
|
7733
|
+
console.error("[SDK-DIAG] SDK test failed:", error);
|
|
7734
|
+
return {
|
|
7735
|
+
name: "claude-sdk-execution",
|
|
7736
|
+
passed: false,
|
|
7737
|
+
details,
|
|
7738
|
+
error: `SDK execution failed: ${error}`,
|
|
7739
|
+
durationMs: Date.now() - start
|
|
7740
|
+
};
|
|
7741
|
+
}
|
|
7742
|
+
}
|
|
7510
7743
|
async function testChildProcessSpawning() {
|
|
7511
7744
|
const start = Date.now();
|
|
7512
7745
|
const details = {};
|
|
@@ -7711,15 +7944,13 @@ async function runDiagnostics(config, evalRunId2) {
|
|
|
7711
7944
|
await runTest("claude-cli-execution", testClaudeExecution);
|
|
7712
7945
|
await runTest("environment-dump", testEnvironmentDump);
|
|
7713
7946
|
await runTest("file-system-structure", testFileSystemStructure);
|
|
7714
|
-
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7715
|
-
await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
|
|
7716
|
-
await runTest(
|
|
7717
|
-
"claude-direct-execution",
|
|
7718
|
-
() => testClaudeDirectExecution(config)
|
|
7719
|
-
);
|
|
7720
7947
|
await runTest("child-process-spawning", testChildProcessSpawning);
|
|
7721
|
-
await runTest("sdk-import", testSdkImport);
|
|
7722
7948
|
await runTest("file-system-write", testFileSystemWrite);
|
|
7949
|
+
await runTest("sdk-import", testSdkImport);
|
|
7950
|
+
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7951
|
+
await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
|
|
7952
|
+
await runTest("claude-cli-basic", () => testClaudeDirectExecution(config));
|
|
7953
|
+
await runTest("claude-sdk-execution", () => testClaudeSdkExecution(config));
|
|
7723
7954
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7724
7955
|
const totalDurationMs = Date.now() - startTime;
|
|
7725
7956
|
const report = {
|