@workbench-ai/workbench 0.0.88 → 0.0.90

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAiEA,MAAM,WAAW,KAAK;IACpB,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AAuTD,wBAAsB,MAAM,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,EAAE,EAAE,GAAE,KAGzD,GAAG,OAAO,CAAC,MAAM,CAAC,CA2MlB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAiEA,MAAM,WAAW,KAAK;IACpB,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC,cAAc,CAAC;CAC/B;AAuTD,wBAAsB,MAAM,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,EAAE,EAAE,GAAE,KAGzD,GAAG,OAAO,CAAC,MAAM,CAAC,CAmNlB"}
package/dist/index.js CHANGED
@@ -415,11 +415,15 @@ export async function runCli(argv, io = {
415
415
  return await handleAgent(parsed, io);
416
416
  }
417
417
  if (command === "sync") {
418
+ const beforeRuns = parsed.flags["dry-run"] === true
419
+ ? undefined
420
+ : await runEvidenceFingerprints(core).catch(() => undefined);
418
421
  const result = await syncWorkbenchRemote({
419
422
  ...core,
420
423
  remote: optionalPositional(parsed, 1),
421
424
  dryRun: parsed.flags["dry-run"] === true,
422
425
  });
426
+ const next = result.dryRun ? null : await syncNextCommand(core, beforeRuns);
423
427
  return emitResult("workbench.cli.sync.v1", {
424
428
  remote: result.remote,
425
429
  status: result.dryRun ? "dry_run" : "synced",
@@ -427,8 +431,12 @@ export async function runCli(argv, io = {
427
431
  pulled: result.pulled,
428
432
  changed: syncChanged(result),
429
433
  publication: result.publication,
434
+ next: next,
430
435
  ...(result.dryRun ? { dryRun: true } : {}),
431
- }, parsed, io, () => `${result.dryRun ? "Would sync" : "Synced"} ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}${result.upToDate ? " (up to date)" : ""}.`);
436
+ }, parsed, io, () => [
437
+ `${result.dryRun ? "Would sync" : "Synced"} ${result.remote.name}: pushed ${result.pushed}, pulled ${result.pulled}${result.upToDate ? " (up to date)" : ""}.`,
438
+ ...(next ? [`next: ${next}`] : []),
439
+ ].join("\n"));
432
440
  }
433
441
  if (command === "publish") {
434
442
  const preview = parsed.flags["dry-run"] === true
@@ -614,12 +622,14 @@ async function handleAgent(parsed, io) {
614
622
  if (!adapter) {
615
623
  throw new WorkbenchUserError("workbench agent add requires --adapter ADAPTER.");
616
624
  }
625
+ const config = parseWithFlags(parsed);
626
+ validateAgentCommandConfig(config);
617
627
  const agent = await addWorkbenchAgent({
618
628
  ...(await coreOptions(parsed)),
619
629
  name,
620
630
  adapter,
621
631
  model: stringFlag(parsed, "model"),
622
- config: parseWithFlags(parsed),
632
+ config,
623
633
  });
624
634
  return output(agent, parsed, io, () => `Added agent ${formatAgent(agent)}.`);
625
635
  }
@@ -656,7 +666,17 @@ async function handleAdapterLogin(provider, parsed, io) {
656
666
  async function handleAdapterLogout(provider, parsed, io) {
657
667
  const target = parseAuthTarget(provider, authProfileFlag(parsed));
658
668
  await localWorkbenchAdapterAuthStore(adapterAuthStoreRoot()).disconnect(target);
659
- const remote = await deleteAdapterConnectionRemote(target, parsed);
669
+ const remote = await deleteAdapterConnectionRemote(target, parsed).catch((error) => {
670
+ if (error instanceof WorkbenchCodedError && error.code === "auth_required") {
671
+ return {
672
+ status: "not_authenticated",
673
+ sync: "skipped",
674
+ reason: "not_authenticated",
675
+ remediation: "Run workbench login.",
676
+ };
677
+ }
678
+ throw error;
679
+ });
660
680
  return emitResult("workbench.cli.logout.v1", {
661
681
  provider: target.adapterId,
662
682
  localAdapter: {
@@ -1061,7 +1081,6 @@ async function startCloudExecution(command, parsed, io) {
1061
1081
  const showProgress = true;
1062
1082
  const interrupt = createCloudInterruptController(command, io, showProgress);
1063
1083
  try {
1064
- writeCloudProgress(io, `workbench cloud: preparing hosted ${command}.`, showProgress);
1065
1084
  const remote = await cloudPreScheduleStep(command, interrupt, ensureCloudRemoteForExecution(root, parsed));
1066
1085
  const source = parseWorkbenchInstallSource(remote.url);
1067
1086
  if (!source) {
@@ -1079,6 +1098,7 @@ async function startCloudExecution(command, parsed, io) {
1079
1098
  });
1080
1099
  }
1081
1100
  const core = { dir: root, authToken: token };
1101
+ writeCloudProgress(io, `workbench cloud: preparing hosted ${command}.`, showProgress);
1082
1102
  writeCloudProgress(io, "workbench cloud: preparing current source.", showProgress);
1083
1103
  const request = command === "eval"
1084
1104
  ? await cloudPreScheduleStep(command, interrupt, prepareWorkbenchCloudEvalRequest({
@@ -1506,7 +1526,6 @@ function hostedImproveResult(started, artifactIds, switchedVersion) {
1506
1526
  const runs = started.runs.map((run) => runSummary(run, artifactIds.get(run.id) ?? []));
1507
1527
  return {
1508
1528
  run: runs[0] ?? null,
1509
- runs,
1510
1529
  switched: Boolean(switchedVersion),
1511
1530
  promoted: Boolean(switchedVersion),
1512
1531
  ...(switchedVersion ? { version: versionSummary(switchedVersion) } : {}),
@@ -1524,6 +1543,49 @@ function cloudSyncSummary(sync) {
1524
1543
  function syncChanged(sync) {
1525
1544
  return sync.pushed > 0 || sync.pulled > 0;
1526
1545
  }
1546
+ async function syncNextCommand(core, beforeRuns) {
1547
+ if (beforeRuns) {
1548
+ const changedRun = await latestChangedRunAfterSync(core, beforeRuns);
1549
+ if (changedRun) {
1550
+ return `workbench show ${displayRef(changedRun.id)}`;
1551
+ }
1552
+ }
1553
+ const status = await workbenchStatusSnapshot(core);
1554
+ const auth = await workbenchCliAuthStatus();
1555
+ const cliStatus = await statusWithCausalNext(status, auth, core, {
1556
+ installedSkillCount: 0,
1557
+ stores: [],
1558
+ connectedProviders: [],
1559
+ });
1560
+ return cliStatus.next ?? null;
1561
+ }
1562
+ async function latestChangedRunAfterSync(core, beforeRuns) {
1563
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core).catch(() => null);
1564
+ const changedRuns = snapshot?.runs
1565
+ .filter((run) => beforeRuns.get(run.id) !== runEvidenceFingerprint(run))
1566
+ .sort((left, right) => runEvidenceTime(right).localeCompare(runEvidenceTime(left))) ?? [];
1567
+ return changedRuns[0] ?? null;
1568
+ }
1569
+ async function runEvidenceFingerprints(core) {
1570
+ const snapshot = await createWorkbenchReadOnlyInspectionSnapshot(core);
1571
+ return new Map(snapshot.runs.map((run) => [run.id, runEvidenceFingerprint(run)]));
1572
+ }
1573
+ function runEvidenceFingerprint(run) {
1574
+ return JSON.stringify({
1575
+ status: run.status,
1576
+ score: run.score,
1577
+ costUsd: run.costUsd,
1578
+ latencyMs: run.latencyMs,
1579
+ jobIds: run.jobIds ?? [],
1580
+ traceIds: run.traceIds,
1581
+ finishedAt: run.finishedAt,
1582
+ outputVersionId: run.outputVersionId,
1583
+ error: run.error,
1584
+ });
1585
+ }
1586
+ function runEvidenceTime(run) {
1587
+ return run.finishedAt ?? run.createdAt;
1588
+ }
1527
1589
  function writeCloudProgress(io, message, enabled = true) {
1528
1590
  if (!enabled) {
1529
1591
  return;
@@ -2741,6 +2803,33 @@ function parseWithFlags(parsed) {
2741
2803
  return [entry.slice(0, eq), parseScalar(entry.slice(eq + 1))];
2742
2804
  }));
2743
2805
  }
2806
+ function validateAgentCommandConfig(config) {
2807
+ for (const key of ["command", "improveCommand"]) {
2808
+ const value = config[key];
2809
+ if (typeof value !== "string") {
2810
+ continue;
2811
+ }
2812
+ const expanded = expandedRuntimeEnvPath(value);
2813
+ if (!expanded) {
2814
+ continue;
2815
+ }
2816
+ throw new WorkbenchCodedError("usage", `--with ${key}=... contains ${expanded.path}, which usually means the shell expanded a Workbench runtime variable before Workbench received it.`, {
2817
+ remediation: `Wrap the assignment in single quotes, for example --with '${key}=... >> "${expanded.replacement}"'.`,
2818
+ exitCode: 2,
2819
+ });
2820
+ }
2821
+ }
2822
+ function expandedRuntimeEnvPath(value) {
2823
+ for (const entry of [
2824
+ { path: "/SKILL.md", replacement: "$SKILL_DIR/SKILL.md", pattern: /(^|[\s"'=])\/SKILL\.md(?=$|[\s"'])/u },
2825
+ { path: "/result.json", replacement: "$OUTPUT_DIR/result.json", pattern: /(^|[\s"'=])\/result\.json(?=$|[\s"'])/u },
2826
+ ]) {
2827
+ if (entry.pattern.test(value)) {
2828
+ return entry;
2829
+ }
2830
+ }
2831
+ return null;
2832
+ }
2744
2833
  function parseScalar(value) {
2745
2834
  if (value === "true") {
2746
2835
  return true;
@@ -3692,7 +3781,7 @@ function formatTraceDetail(detail, refs = {}) {
3692
3781
  return detail.executions.map((execution) => {
3693
3782
  const sessionLabels = execution.sessions.map((session) => session.label).join(",");
3694
3783
  return [
3695
- `${execution.id}\trun=${refs.runRefs?.get(detail.runId) ?? displayRef(detail.runId)}\tjobs=${execution.jobIds.map((id) => refs.jobRefs?.get(id) ?? displayRef(id)).join(",")}\tstatus=${execution.status}`,
3784
+ `${formatExecutionEvidenceLabel(detail, execution)}\trun=${refs.runRefs?.get(detail.runId) ?? displayRef(detail.runId)}\tjobs=${execution.jobIds.map((id) => refs.jobRefs?.get(id) ?? displayRef(id)).join(",")}\tstatus=${execution.status}`,
3696
3785
  `events=${execution.trace.events.length}`,
3697
3786
  `spans=${execution.trace.spans.length}`,
3698
3787
  `summaries=${execution.trace.summaries.length}`,
@@ -3700,6 +3789,11 @@ function formatTraceDetail(detail, refs = {}) {
3700
3789
  ].filter(Boolean).join("\t");
3701
3790
  }).join("\n");
3702
3791
  }
3792
+ function formatExecutionEvidenceLabel(detail, execution) {
3793
+ return execution.jobIds.length === 1 && execution.id === `job:${detail.runId}:${execution.jobIds[0]}`
3794
+ ? "evidence"
3795
+ : execution.id;
3796
+ }
3703
3797
  function formatArtifact(artifact) {
3704
3798
  return `${displayRef(artifact.id)}\trun=${displayRef(artifact.runId)}\tjob=${displayRef(artifact.jobId)}\t${artifact.kind}\tfiles=${artifact.files.length}`;
3705
3799
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@workbench-ai/workbench",
3
- "version": "0.0.88",
3
+ "version": "0.0.90",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/workbench-ai/workbench.git",
@@ -21,10 +21,10 @@
21
21
  ],
22
22
  "dependencies": {
23
23
  "yaml": "^2.8.2",
24
- "@workbench-ai/workbench-contract": "0.0.88",
25
- "@workbench-ai/workbench-built-in-adapters": "0.0.88",
26
- "@workbench-ai/workbench-protocol": "0.0.88",
27
- "@workbench-ai/workbench-core": "0.0.88"
24
+ "@workbench-ai/workbench-built-in-adapters": "0.0.90",
25
+ "@workbench-ai/workbench-contract": "0.0.90",
26
+ "@workbench-ai/workbench-core": "0.0.90",
27
+ "@workbench-ai/workbench-protocol": "0.0.90"
28
28
  },
29
29
  "devDependencies": {
30
30
  "@tailwindcss/postcss": "^4.2.2",
@@ -35,7 +35,7 @@
35
35
  "react-dom": "^19.2.0",
36
36
  "typescript": "^5.9.2",
37
37
  "vitest": "^3.2.4",
38
- "@workbench-ai/workbench-ui": "0.0.88"
38
+ "@workbench-ai/workbench-ui": "0.0.90"
39
39
  },
40
40
  "scripts": {
41
41
  "build": "rm -rf dist && tsc -p tsconfig.json && chmod 755 dist/workbench.js && node ./scripts/build-dev-open-assets.mjs",