@wix/evalforge-evaluator 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +309 -78
- package/build/index.js.map +4 -4
- package/build/index.mjs +309 -78
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/file-diff.d.ts +30 -0
- package/package.json +3 -3
package/build/index.mjs
CHANGED
|
@@ -6343,40 +6343,59 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6343
6343
|
traceContext.authToken
|
|
6344
6344
|
);
|
|
6345
6345
|
}
|
|
6346
|
+
const SDK_TIMEOUT_MS = Math.max(3e5, maxTurns * 6e4);
|
|
6347
|
+
let timeoutHandle;
|
|
6348
|
+
let timedOut = false;
|
|
6346
6349
|
try {
|
|
6347
|
-
|
|
6348
|
-
|
|
6349
|
-
|
|
6350
|
-
|
|
6351
|
-
|
|
6352
|
-
|
|
6353
|
-
|
|
6354
|
-
if (messageCount <= 3) {
|
|
6355
|
-
console.error(
|
|
6356
|
-
"[DEBUG-H5] SDK message received",
|
|
6357
|
-
JSON.stringify({
|
|
6358
|
-
messageCount,
|
|
6359
|
-
type: message.type,
|
|
6360
|
-
timestamp: Date.now()
|
|
6361
|
-
})
|
|
6362
|
-
);
|
|
6363
|
-
}
|
|
6364
|
-
if (traceContext && isAssistantMessage(message)) {
|
|
6365
|
-
traceStepNumber++;
|
|
6366
|
-
const traceEvent = createTraceEventFromMessage(
|
|
6367
|
-
message,
|
|
6368
|
-
traceContext,
|
|
6369
|
-
traceStepNumber,
|
|
6370
|
-
false
|
|
6371
|
-
// Not complete yet
|
|
6372
|
-
);
|
|
6373
|
-
emitTraceEvent(
|
|
6374
|
-
traceEvent,
|
|
6375
|
-
traceContext.tracePushUrl,
|
|
6376
|
-
traceContext.routeHeader,
|
|
6377
|
-
traceContext.authToken
|
|
6350
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
6351
|
+
timeoutHandle = setTimeout(() => {
|
|
6352
|
+
timedOut = true;
|
|
6353
|
+
reject(
|
|
6354
|
+
new Error(
|
|
6355
|
+
`SDK execution timed out after ${SDK_TIMEOUT_MS}ms. Skill: ${skill.name}, Scenario: ${scenario.name}, Messages received: ${messageCount}, MaxTurns: ${maxTurns}`
|
|
6356
|
+
)
|
|
6378
6357
|
);
|
|
6358
|
+
}, SDK_TIMEOUT_MS);
|
|
6359
|
+
});
|
|
6360
|
+
const sdkPromise = (async () => {
|
|
6361
|
+
for await (const message of query({
|
|
6362
|
+
prompt: scenario.triggerPrompt,
|
|
6363
|
+
options: queryOptions
|
|
6364
|
+
})) {
|
|
6365
|
+
messageCount++;
|
|
6366
|
+
console.log("[SDK Message]", JSON.stringify(message, null, 2));
|
|
6367
|
+
allMessages.push(message);
|
|
6368
|
+
if (messageCount <= 3) {
|
|
6369
|
+
console.error(
|
|
6370
|
+
"[DEBUG-H5] SDK message received",
|
|
6371
|
+
JSON.stringify({
|
|
6372
|
+
messageCount,
|
|
6373
|
+
type: message.type,
|
|
6374
|
+
timestamp: Date.now()
|
|
6375
|
+
})
|
|
6376
|
+
);
|
|
6377
|
+
}
|
|
6378
|
+
if (traceContext && isAssistantMessage(message)) {
|
|
6379
|
+
traceStepNumber++;
|
|
6380
|
+
const traceEvent = createTraceEventFromMessage(
|
|
6381
|
+
message,
|
|
6382
|
+
traceContext,
|
|
6383
|
+
traceStepNumber,
|
|
6384
|
+
false
|
|
6385
|
+
// Not complete yet
|
|
6386
|
+
);
|
|
6387
|
+
emitTraceEvent(
|
|
6388
|
+
traceEvent,
|
|
6389
|
+
traceContext.tracePushUrl,
|
|
6390
|
+
traceContext.routeHeader,
|
|
6391
|
+
traceContext.authToken
|
|
6392
|
+
);
|
|
6393
|
+
}
|
|
6379
6394
|
}
|
|
6395
|
+
})();
|
|
6396
|
+
await Promise.race([sdkPromise, timeoutPromise]);
|
|
6397
|
+
if (timeoutHandle) {
|
|
6398
|
+
clearTimeout(timeoutHandle);
|
|
6380
6399
|
}
|
|
6381
6400
|
console.log(
|
|
6382
6401
|
"[executeWithClaudeCode] Claude Agent SDK query completed, received",
|
|
@@ -6384,6 +6403,12 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6384
6403
|
"messages"
|
|
6385
6404
|
);
|
|
6386
6405
|
} catch (sdkError) {
|
|
6406
|
+
if (timeoutHandle) {
|
|
6407
|
+
clearTimeout(timeoutHandle);
|
|
6408
|
+
}
|
|
6409
|
+
if (timedOut) {
|
|
6410
|
+
console.error("[SDK-TIMEOUT] Execution timed out:", sdkError);
|
|
6411
|
+
}
|
|
6387
6412
|
console.error("[SDK-ERROR] ====== CLAUDE SDK EXECUTION FAILED ======");
|
|
6388
6413
|
console.error("[SDK-ERROR] Timestamp:", (/* @__PURE__ */ new Date()).toISOString());
|
|
6389
6414
|
console.error(
|
|
@@ -6777,6 +6802,154 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
6777
6802
|
};
|
|
6778
6803
|
}
|
|
6779
6804
|
|
|
6805
|
+
// src/run-scenario/file-diff.ts
|
|
6806
|
+
import { readdirSync as readdirSync2, readFileSync, statSync, existsSync as existsSync2 } from "fs";
|
|
6807
|
+
import { join as join3, relative } from "path";
|
|
6808
|
+
var IGNORED_PATTERNS = [
|
|
6809
|
+
"node_modules",
|
|
6810
|
+
".git",
|
|
6811
|
+
".claude",
|
|
6812
|
+
".cursor",
|
|
6813
|
+
"dist",
|
|
6814
|
+
"build",
|
|
6815
|
+
".next",
|
|
6816
|
+
".turbo",
|
|
6817
|
+
"__pycache__",
|
|
6818
|
+
".pytest_cache",
|
|
6819
|
+
".venv",
|
|
6820
|
+
"venv",
|
|
6821
|
+
".DS_Store"
|
|
6822
|
+
];
|
|
6823
|
+
var BINARY_EXTENSIONS = [
|
|
6824
|
+
".png",
|
|
6825
|
+
".jpg",
|
|
6826
|
+
".jpeg",
|
|
6827
|
+
".gif",
|
|
6828
|
+
".webp",
|
|
6829
|
+
".ico",
|
|
6830
|
+
".svg",
|
|
6831
|
+
".woff",
|
|
6832
|
+
".woff2",
|
|
6833
|
+
".ttf",
|
|
6834
|
+
".eot",
|
|
6835
|
+
".mp3",
|
|
6836
|
+
".mp4",
|
|
6837
|
+
".wav",
|
|
6838
|
+
".avi",
|
|
6839
|
+
".mov",
|
|
6840
|
+
".pdf",
|
|
6841
|
+
".zip",
|
|
6842
|
+
".tar",
|
|
6843
|
+
".gz",
|
|
6844
|
+
".rar",
|
|
6845
|
+
".7z",
|
|
6846
|
+
".exe",
|
|
6847
|
+
".dll",
|
|
6848
|
+
".so",
|
|
6849
|
+
".dylib"
|
|
6850
|
+
];
|
|
6851
|
+
var MAX_FILE_SIZE = 100 * 1024;
|
|
6852
|
+
function shouldIgnore(name2) {
|
|
6853
|
+
return IGNORED_PATTERNS.some(
|
|
6854
|
+
(pattern) => name2 === pattern || name2.startsWith(pattern + "/")
|
|
6855
|
+
);
|
|
6856
|
+
}
|
|
6857
|
+
function isBinaryFile(filename) {
|
|
6858
|
+
const lower = filename.toLowerCase();
|
|
6859
|
+
return BINARY_EXTENSIONS.some((ext) => lower.endsWith(ext));
|
|
6860
|
+
}
|
|
6861
|
+
function snapshotDirectory(dir, baseDir) {
|
|
6862
|
+
const snapshot = {};
|
|
6863
|
+
const base = baseDir || dir;
|
|
6864
|
+
if (!existsSync2(dir)) {
|
|
6865
|
+
return snapshot;
|
|
6866
|
+
}
|
|
6867
|
+
const entries = readdirSync2(dir, { withFileTypes: true });
|
|
6868
|
+
for (const entry of entries) {
|
|
6869
|
+
const fullPath = join3(dir, entry.name);
|
|
6870
|
+
const relativePath = relative(base, fullPath);
|
|
6871
|
+
if (shouldIgnore(entry.name)) {
|
|
6872
|
+
continue;
|
|
6873
|
+
}
|
|
6874
|
+
if (entry.isDirectory()) {
|
|
6875
|
+
const subSnapshot = snapshotDirectory(fullPath, base);
|
|
6876
|
+
Object.assign(snapshot, subSnapshot);
|
|
6877
|
+
} else if (entry.isFile()) {
|
|
6878
|
+
if (isBinaryFile(entry.name)) {
|
|
6879
|
+
continue;
|
|
6880
|
+
}
|
|
6881
|
+
try {
|
|
6882
|
+
const stats = statSync(fullPath);
|
|
6883
|
+
if (stats.size > MAX_FILE_SIZE) {
|
|
6884
|
+
continue;
|
|
6885
|
+
}
|
|
6886
|
+
const content = readFileSync(fullPath, "utf-8");
|
|
6887
|
+
snapshot[relativePath] = content;
|
|
6888
|
+
} catch {
|
|
6889
|
+
continue;
|
|
6890
|
+
}
|
|
6891
|
+
}
|
|
6892
|
+
}
|
|
6893
|
+
return snapshot;
|
|
6894
|
+
}
|
|
6895
|
+
function generateDiffLines(before, after) {
|
|
6896
|
+
const beforeLines = before.split("\n");
|
|
6897
|
+
const afterLines = after.split("\n");
|
|
6898
|
+
const result = [];
|
|
6899
|
+
let lineNumber = 1;
|
|
6900
|
+
const maxLines = Math.max(beforeLines.length, afterLines.length);
|
|
6901
|
+
for (let i = 0; i < maxLines; i++) {
|
|
6902
|
+
const beforeLine = beforeLines[i];
|
|
6903
|
+
const afterLine = afterLines[i];
|
|
6904
|
+
if (beforeLine === afterLine) {
|
|
6905
|
+
if (beforeLine !== void 0) {
|
|
6906
|
+
result.push({
|
|
6907
|
+
type: "unchanged",
|
|
6908
|
+
content: beforeLine,
|
|
6909
|
+
lineNumber: lineNumber++
|
|
6910
|
+
});
|
|
6911
|
+
}
|
|
6912
|
+
} else {
|
|
6913
|
+
if (beforeLine !== void 0) {
|
|
6914
|
+
result.push({
|
|
6915
|
+
type: "removed",
|
|
6916
|
+
content: beforeLine,
|
|
6917
|
+
lineNumber
|
|
6918
|
+
});
|
|
6919
|
+
}
|
|
6920
|
+
if (afterLine !== void 0) {
|
|
6921
|
+
result.push({
|
|
6922
|
+
type: "added",
|
|
6923
|
+
content: afterLine,
|
|
6924
|
+
lineNumber
|
|
6925
|
+
});
|
|
6926
|
+
}
|
|
6927
|
+
lineNumber++;
|
|
6928
|
+
}
|
|
6929
|
+
}
|
|
6930
|
+
return result;
|
|
6931
|
+
}
|
|
6932
|
+
function diffSnapshots(before, after) {
|
|
6933
|
+
const diffs = [];
|
|
6934
|
+
const allPaths = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
|
|
6935
|
+
for (const path10 of allPaths) {
|
|
6936
|
+
const beforeContent = before[path10] ?? "";
|
|
6937
|
+
const afterContent = after[path10] ?? "";
|
|
6938
|
+
if (beforeContent === afterContent) {
|
|
6939
|
+
continue;
|
|
6940
|
+
}
|
|
6941
|
+
const diffLines = generateDiffLines(beforeContent, afterContent);
|
|
6942
|
+
diffs.push({
|
|
6943
|
+
path: path10,
|
|
6944
|
+
expected: beforeContent,
|
|
6945
|
+
actual: afterContent,
|
|
6946
|
+
diffLines
|
|
6947
|
+
});
|
|
6948
|
+
}
|
|
6949
|
+
diffs.sort((a, b) => a.path.localeCompare(b.path));
|
|
6950
|
+
return diffs;
|
|
6951
|
+
}
|
|
6952
|
+
|
|
6780
6953
|
// src/run-scenario/callSkill.ts
|
|
6781
6954
|
async function callSkill(config, evalRunId2, scenario, skill, agent, workDir) {
|
|
6782
6955
|
if (agent && agent.runCommand !== "claude") {
|
|
@@ -6785,6 +6958,7 @@ async function callSkill(config, evalRunId2, scenario, skill, agent, workDir) {
|
|
|
6785
6958
|
);
|
|
6786
6959
|
}
|
|
6787
6960
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
6961
|
+
const beforeSnapshot = workDir ? snapshotDirectory(workDir) : {};
|
|
6788
6962
|
const { result, llmTrace } = await executeWithClaudeCode(skill, scenario, {
|
|
6789
6963
|
cwd: workDir || process.cwd(),
|
|
6790
6964
|
systemPrompt: skill.skillMd,
|
|
@@ -6805,6 +6979,8 @@ async function callSkill(config, evalRunId2, scenario, skill, agent, workDir) {
|
|
|
6805
6979
|
}
|
|
6806
6980
|
});
|
|
6807
6981
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
6982
|
+
const afterSnapshot = workDir ? snapshotDirectory(workDir) : {};
|
|
6983
|
+
const fileDiffs = diffSnapshots(beforeSnapshot, afterSnapshot);
|
|
6808
6984
|
return {
|
|
6809
6985
|
id: randomUUID2(),
|
|
6810
6986
|
targetId: skill.id,
|
|
@@ -6814,6 +6990,7 @@ async function callSkill(config, evalRunId2, scenario, skill, agent, workDir) {
|
|
|
6814
6990
|
modelConfig: agent?.modelConfig,
|
|
6815
6991
|
duration: result.durationMs,
|
|
6816
6992
|
outputText: result.outputText,
|
|
6993
|
+
fileDiffs: fileDiffs.length > 0 ? fileDiffs : void 0,
|
|
6817
6994
|
startedAt,
|
|
6818
6995
|
completedAt,
|
|
6819
6996
|
llmTrace
|
|
@@ -7425,33 +7602,14 @@ async function testClaudeDirectExecution(config) {
|
|
|
7425
7602
|
const versionResult = await runAndLog(
|
|
7426
7603
|
"claude --version",
|
|
7427
7604
|
`${envExports} && "${claudePath}" --version 2>&1`,
|
|
7428
|
-
|
|
7429
|
-
|
|
7430
|
-
const fullCmdResult = await runAndLog(
|
|
7431
|
-
"claude -p (with json output)",
|
|
7432
|
-
`${envExports} && "${claudePath}" -p "Say hello" --output-format json 2>&1`,
|
|
7433
|
-
45e3
|
|
7434
|
-
);
|
|
7435
|
-
const simpleCmdResult = await runAndLog(
|
|
7436
|
-
"claude -p (simple)",
|
|
7437
|
-
`${envExports} && "${claudePath}" -p "Hello" 2>&1`,
|
|
7438
|
-
45e3
|
|
7439
|
-
);
|
|
7440
|
-
const printFlagResult = await runAndLog(
|
|
7441
|
-
"claude --print (long flag)",
|
|
7442
|
-
`${envExports} && "${claudePath}" --print "Hi" 2>&1`,
|
|
7443
|
-
45e3
|
|
7444
|
-
);
|
|
7445
|
-
const positionalResult = await runAndLog(
|
|
7446
|
-
'claude "prompt" (positional)',
|
|
7447
|
-
`${envExports} && "${claudePath}" "Hello world" 2>&1`,
|
|
7448
|
-
45e3
|
|
7605
|
+
1e4
|
|
7606
|
+
// Short timeout - should complete in <2s
|
|
7449
7607
|
);
|
|
7450
|
-
await runAndLog("claude --help", `"${claudePath}" --help 2>&1`,
|
|
7608
|
+
await runAndLog("claude --help", `"${claudePath}" --help 2>&1`, 1e4);
|
|
7451
7609
|
await runAndLog(
|
|
7452
7610
|
"claude --version (no custom env)",
|
|
7453
7611
|
`"${claudePath}" --version 2>&1`,
|
|
7454
|
-
|
|
7612
|
+
1e4
|
|
7455
7613
|
);
|
|
7456
7614
|
const homeDir = process.env.HOME || "/tmp";
|
|
7457
7615
|
const claudeConfigDir = path9.join(homeDir, ".claude");
|
|
@@ -7461,35 +7619,110 @@ async function testClaudeDirectExecution(config) {
|
|
|
7461
7619
|
try {
|
|
7462
7620
|
const configContents = fs11.readdirSync(claudeConfigDir);
|
|
7463
7621
|
details.claudeConfigContents = configContents;
|
|
7464
|
-
for (const file of configContents) {
|
|
7465
|
-
if (file.includes("log") || file.includes("error")) {
|
|
7466
|
-
const logPath = path9.join(claudeConfigDir, file);
|
|
7467
|
-
const catCmd = `cat "${logPath}" 2>&1 | tail -50`;
|
|
7468
|
-
const logContent = await execCommand(catCmd);
|
|
7469
|
-
details[`claudeLogFile_${file}`] = logContent.stdout.slice(0, 1e3);
|
|
7470
|
-
}
|
|
7471
|
-
}
|
|
7472
7622
|
} catch (e) {
|
|
7473
7623
|
details.claudeConfigError = e instanceof Error ? e.message : String(e);
|
|
7474
7624
|
}
|
|
7475
7625
|
}
|
|
7476
7626
|
details.commandResults = commandResults;
|
|
7477
|
-
const anyPromptWorked = fullCmdResult.exitCode === 0 || simpleCmdResult.exitCode === 0 || printFlagResult.exitCode === 0 || positionalResult.exitCode === 0;
|
|
7478
7627
|
const versionWorked = versionResult.exitCode === 0;
|
|
7479
|
-
const passed =
|
|
7480
|
-
let errorMsg;
|
|
7481
|
-
if (!passed) {
|
|
7482
|
-
const failedCmds = commandResults.filter((r) => r.exitCode !== 0).map((r) => `${r.name}: exit=${r.exitCode}`).join(", ");
|
|
7483
|
-
errorMsg = `All Claude CLI commands failed. ${failedCmds}. Version works: ${versionWorked}`;
|
|
7484
|
-
}
|
|
7628
|
+
const passed = versionWorked;
|
|
7485
7629
|
return {
|
|
7486
|
-
name: "claude-
|
|
7630
|
+
name: "claude-cli-basic",
|
|
7487
7631
|
passed,
|
|
7488
7632
|
details,
|
|
7489
|
-
error:
|
|
7633
|
+
error: passed ? void 0 : `Claude CLI --version failed with exit code ${versionResult.exitCode}`,
|
|
7490
7634
|
durationMs: Date.now() - start
|
|
7491
7635
|
};
|
|
7492
7636
|
}
|
|
7637
|
+
async function testClaudeSdkExecution(config) {
|
|
7638
|
+
const start = Date.now();
|
|
7639
|
+
const details = {};
|
|
7640
|
+
const SDK_TIMEOUT_MS = 3e4;
|
|
7641
|
+
try {
|
|
7642
|
+
console.error("[SDK-DIAG] Importing Claude Agent SDK...");
|
|
7643
|
+
const sdk = await import("@anthropic-ai/claude-agent-sdk");
|
|
7644
|
+
details.sdkImported = true;
|
|
7645
|
+
const env = { ...process.env };
|
|
7646
|
+
const placeholderApiKey = "sk-ant-api03-placeholder-auth-handled-by-gateway-000000000000000000000000";
|
|
7647
|
+
env.ANTHROPIC_API_KEY = placeholderApiKey;
|
|
7648
|
+
env.ANTHROPIC_AUTH_TOKEN = placeholderApiKey;
|
|
7649
|
+
if (config.aiGatewayUrl) {
|
|
7650
|
+
env.ANTHROPIC_BASE_URL = config.aiGatewayUrl;
|
|
7651
|
+
}
|
|
7652
|
+
if (config.aiGatewayHeaders) {
|
|
7653
|
+
const headerLines = Object.entries(config.aiGatewayHeaders).map(([key, value]) => `${key}:${value}`).join("\n");
|
|
7654
|
+
env.ANTHROPIC_CUSTOM_HEADERS = headerLines;
|
|
7655
|
+
}
|
|
7656
|
+
details.envConfigured = true;
|
|
7657
|
+
details.hasBaseUrl = !!env.ANTHROPIC_BASE_URL;
|
|
7658
|
+
details.hasCustomHeaders = !!env.ANTHROPIC_CUSTOM_HEADERS;
|
|
7659
|
+
const canUseTool = async () => {
|
|
7660
|
+
console.error("[SDK-DIAG] canUseTool callback called - returning allow");
|
|
7661
|
+
return { behavior: "allow" };
|
|
7662
|
+
};
|
|
7663
|
+
const queryOptions = {
|
|
7664
|
+
env,
|
|
7665
|
+
cwd: "/tmp",
|
|
7666
|
+
settingSources: ["project"],
|
|
7667
|
+
allowedTools: ["Read", "Write", "Edit", "Bash", "Glob", "Grep"],
|
|
7668
|
+
model: "claude-3-5-sonnet-latest",
|
|
7669
|
+
maxTurns: 1,
|
|
7670
|
+
// Just one turn for this test
|
|
7671
|
+
permissionMode: "default",
|
|
7672
|
+
canUseTool
|
|
7673
|
+
};
|
|
7674
|
+
details.queryOptionsConfigured = true;
|
|
7675
|
+
console.error("[SDK-DIAG] Starting SDK query with canUseTool...");
|
|
7676
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
7677
|
+
setTimeout(
|
|
7678
|
+
() => reject(new Error(`SDK test timed out after ${SDK_TIMEOUT_MS}ms`)),
|
|
7679
|
+
SDK_TIMEOUT_MS
|
|
7680
|
+
);
|
|
7681
|
+
});
|
|
7682
|
+
const sdkPromise = (async () => {
|
|
7683
|
+
let messageCount2 = 0;
|
|
7684
|
+
let resultMessage2 = null;
|
|
7685
|
+
for await (const message of sdk.query({
|
|
7686
|
+
prompt: 'Say "SDK test successful" and nothing else.',
|
|
7687
|
+
...queryOptions
|
|
7688
|
+
})) {
|
|
7689
|
+
messageCount2++;
|
|
7690
|
+
console.error(`[SDK-DIAG] Received message ${messageCount2}:`, message);
|
|
7691
|
+
if (message.type === "result" || message.type === "assistant") {
|
|
7692
|
+
resultMessage2 = message;
|
|
7693
|
+
}
|
|
7694
|
+
}
|
|
7695
|
+
return { messageCount: messageCount2, resultMessage: resultMessage2 };
|
|
7696
|
+
})();
|
|
7697
|
+
const { messageCount, resultMessage } = await Promise.race([
|
|
7698
|
+
sdkPromise,
|
|
7699
|
+
timeoutPromise
|
|
7700
|
+
]);
|
|
7701
|
+
details.messageCount = messageCount;
|
|
7702
|
+
details.hasResultMessage = !!resultMessage;
|
|
7703
|
+
details.resultPreview = resultMessage && typeof resultMessage === "object" ? JSON.stringify(resultMessage).slice(0, 500) : null;
|
|
7704
|
+
const passed = messageCount > 0;
|
|
7705
|
+
return {
|
|
7706
|
+
name: "claude-sdk-execution",
|
|
7707
|
+
passed,
|
|
7708
|
+
details,
|
|
7709
|
+
error: passed ? void 0 : "SDK query completed but returned no messages",
|
|
7710
|
+
durationMs: Date.now() - start
|
|
7711
|
+
};
|
|
7712
|
+
} catch (err) {
|
|
7713
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7714
|
+
details.error = error;
|
|
7715
|
+
details.errorStack = err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0;
|
|
7716
|
+
console.error("[SDK-DIAG] SDK test failed:", error);
|
|
7717
|
+
return {
|
|
7718
|
+
name: "claude-sdk-execution",
|
|
7719
|
+
passed: false,
|
|
7720
|
+
details,
|
|
7721
|
+
error: `SDK execution failed: ${error}`,
|
|
7722
|
+
durationMs: Date.now() - start
|
|
7723
|
+
};
|
|
7724
|
+
}
|
|
7725
|
+
}
|
|
7493
7726
|
async function testChildProcessSpawning() {
|
|
7494
7727
|
const start = Date.now();
|
|
7495
7728
|
const details = {};
|
|
@@ -7694,15 +7927,13 @@ async function runDiagnostics(config, evalRunId2) {
|
|
|
7694
7927
|
await runTest("claude-cli-execution", testClaudeExecution);
|
|
7695
7928
|
await runTest("environment-dump", testEnvironmentDump);
|
|
7696
7929
|
await runTest("file-system-structure", testFileSystemStructure);
|
|
7697
|
-
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7698
|
-
await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
|
|
7699
|
-
await runTest(
|
|
7700
|
-
"claude-direct-execution",
|
|
7701
|
-
() => testClaudeDirectExecution(config)
|
|
7702
|
-
);
|
|
7703
7930
|
await runTest("child-process-spawning", testChildProcessSpawning);
|
|
7704
|
-
await runTest("sdk-import", testSdkImport);
|
|
7705
7931
|
await runTest("file-system-write", testFileSystemWrite);
|
|
7932
|
+
await runTest("sdk-import", testSdkImport);
|
|
7933
|
+
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7934
|
+
await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
|
|
7935
|
+
await runTest("claude-cli-basic", () => testClaudeDirectExecution(config));
|
|
7936
|
+
await runTest("claude-sdk-execution", () => testClaudeSdkExecution(config));
|
|
7706
7937
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7707
7938
|
const totalDurationMs = Date.now() - startTime;
|
|
7708
7939
|
const report = {
|