@tangle-network/agent-eval 0.72.3 → 0.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1059,6 +1059,39 @@ interface ScoreboardRow {
1059
1059
  * evidence behind the verdict.
1060
1060
  */
1061
1061
  declare function userStoryScoreboard(verdicts: readonly UserStoryVerdict[]): ScoreboardRow[];
1062
+ /** Launch-readiness headline counts rolled up from the per-requirement rows. */
1063
+ interface ScoreboardSummary {
1064
+ /** Distinct user stories on the board. */
1065
+ stories: number;
1066
+ /** Stories whose every requirement passed. */
1067
+ storiesFullyComplete: number;
1068
+ /** Total (story, requirement) rows. */
1069
+ requirements: number;
1070
+ /** Rows with status PASS. */
1071
+ passed: number;
1072
+ /** Rows with status FAIL. */
1073
+ failed: number;
1074
+ /** passed / requirements; 0 when there are no rows. */
1075
+ passRate: number;
1076
+ }
1077
+ /** Roll the per-requirement rows up into the launch headline counts. */
1078
+ declare function scoreboardSummary(rows: readonly ScoreboardRow[]): ScoreboardSummary;
1079
+ interface ScoreboardRenderOptions {
1080
+ /** Document H1. Defaults to a generic playback title. */
1081
+ title?: string;
1082
+ /** Key/value run metadata rendered under the headline (runId, backend, model, date). */
1083
+ meta?: Record<string, string>;
1084
+ /** Max chars of joined evidence shown per row. Default 160. */
1085
+ maxEvidenceChars?: number;
1086
+ }
1087
+ /**
1088
+ * Render the scoreboard as a launch-readiness Markdown document — the literal
1089
+ * "tick off every user story" artifact: a headline roll-up, the open tickets
1090
+ * (FAIL rows) up top as the launch blockers, then a per-story table of
1091
+ * requirement → PASS/FAIL with the evidence behind each verdict. Pure: same
1092
+ * rows in, same bytes out (no clock/random), so it is safe to snapshot.
1093
+ */
1094
+ declare function renderScoreboardMarkdown(rows: readonly ScoreboardRow[], opts?: ScoreboardRenderOptions): string;
1062
1095
 
1063
1096
  /**
1064
1097
  * @experimental
@@ -1239,4 +1272,4 @@ declare function gitWorktreeAdapter(opts: GitWorktreeAdapterOptions): WorktreeAd
1239
1272
  * as a ref under the adapter's worktree dir. */
1240
1273
  declare function resolveWorktreePath(surface: CodeSurface, worktreeDir?: string): string;
1241
1274
 
1242
- export { type AcceptedEdit, type AceDriverOptions, type AnalystArtifact, type AnalystScenario, type ApplySkillPatchResult, type BuildAnalystSurfaceDispatchOptions, type CampaignBreakdown, CampaignResult, CampaignStorage, CodeSurface, type CompareDriversOptions, DRIVER_GUIDE, type DimensionRegression, DispatchContext, type DriverComparison, type DriverEntry, type DriverGoal, type DriverGuideEntry, type DriverName, type DriverPairwise, type DriverRecommendation, type DriverScore, type DriverStrategy, type DriverSurface, type FailureModeRecallJudgeOptions, FsLabeledScenarioStore, type FsLabeledScenarioStoreOptions, type GitWorktreeAdapterOptions, type HaloDriverOptions, type HeldoutSignificance, type HeldoutSignificanceOptions, ImprovementDriver, JudgeConfig, JudgeScore, LabelTrust, LabeledScenarioRecord, LabeledScenarioSampleArgs, LabeledScenarioSource, LabeledScenarioStore, LabeledScenarioStoreError, LabeledScenarioWrite, type MemoryCurationDriverOptions, MutableSurface, type OptimizerEntryConfig, type PairedHoldout, type PlaybackContext, type PlaybackDriver, type PlaybackStep, type ProfileDispatchFn, ProfileMatrixError, type ProfileSummary, ProposeContext, type ProposePatchesArgs, type RejectedEdit, RunCampaignOptions, RunImprovementLoopOptions, type RunProfileMatrixOptions, type RunProfileMatrixResult, type RunSkillOptOptions, type RunSkillOptResult, Scenario, type ScenarioRollup, type ScoreboardRow, type SelectDriverCriteria, type SkillOptDriver, type SkillOptDriverOptions, type SkillOptEpochRecord, type SkillOptEvidence, type SkillPatch, type SkillPatchOp, SkillPatchParseError, type SkillPatchRejection, type TraceAnalystDriverOptions, type UserStory, type UserStoryVerdict, type Worktree, type WorktreeAdapter, WorktreeAdapterError, aceDriver, applySkillPatch, buildAnalystSurfaceDispatch, campaignBreakdown, campaignMeanComposite, compareDrivers, detectScale, dimensionRegressions, failureModeRecallJudge, gepaParetoEntry, gepaReflectionEntry, gitWorktreeAdapter, haloDriver, heldoutSignificance, makePlaybackDispatch, memoryCurationDriver, pairHoldout, parseSkillPatchResponse, patchEditCount, resolveWorktreePath, runProfileMatrix, runSkillOpt, scoreUserStory, selectDriver, skillOptDriver, skillOptEntry, traceAnalystDriver, userStoryScoreboard };
1275
+ export { type AcceptedEdit, type AceDriverOptions, type AnalystArtifact, type AnalystScenario, type ApplySkillPatchResult, type BuildAnalystSurfaceDispatchOptions, type CampaignBreakdown, CampaignResult, CampaignStorage, CodeSurface, type CompareDriversOptions, DRIVER_GUIDE, type DimensionRegression, DispatchContext, type DriverComparison, type DriverEntry, type DriverGoal, type DriverGuideEntry, type DriverName, type DriverPairwise, type DriverRecommendation, type DriverScore, type DriverStrategy, type DriverSurface, type FailureModeRecallJudgeOptions, FsLabeledScenarioStore, type FsLabeledScenarioStoreOptions, type GitWorktreeAdapterOptions, type HaloDriverOptions, type HeldoutSignificance, type HeldoutSignificanceOptions, ImprovementDriver, JudgeConfig, JudgeScore, LabelTrust, LabeledScenarioRecord, LabeledScenarioSampleArgs, LabeledScenarioSource, LabeledScenarioStore, LabeledScenarioStoreError, LabeledScenarioWrite, type MemoryCurationDriverOptions, MutableSurface, type OptimizerEntryConfig, type PairedHoldout, type PlaybackContext, type PlaybackDriver, type PlaybackStep, type ProfileDispatchFn, ProfileMatrixError, type ProfileSummary, ProposeContext, type ProposePatchesArgs, type RejectedEdit, RunCampaignOptions, RunImprovementLoopOptions, type RunProfileMatrixOptions, type RunProfileMatrixResult, type RunSkillOptOptions, type RunSkillOptResult, Scenario, type ScenarioRollup, type ScoreboardRenderOptions, type ScoreboardRow, type ScoreboardSummary, type SelectDriverCriteria, type SkillOptDriver, type SkillOptDriverOptions, type SkillOptEpochRecord, type SkillOptEvidence, type SkillPatch, type SkillPatchOp, SkillPatchParseError, type SkillPatchRejection, type TraceAnalystDriverOptions, type UserStory, type UserStoryVerdict, type Worktree, type WorktreeAdapter, WorktreeAdapterError, aceDriver, applySkillPatch, buildAnalystSurfaceDispatch, campaignBreakdown, campaignMeanComposite, compareDrivers, detectScale, dimensionRegressions, failureModeRecallJudge, gepaParetoEntry, gepaReflectionEntry, gitWorktreeAdapter, haloDriver, heldoutSignificance, makePlaybackDispatch, memoryCurationDriver, pairHoldout, parseSkillPatchResponse, patchEditCount, renderScoreboardMarkdown, resolveWorktreePath, runProfileMatrix, runSkillOpt, scoreUserStory, scoreboardSummary, selectDriver, skillOptDriver, skillOptEntry, traceAnalystDriver, userStoryScoreboard };
@@ -1396,6 +1396,79 @@ function userStoryScoreboard(verdicts) {
1396
1396
  }
1397
1397
  return rows;
1398
1398
  }
1399
+ function scoreboardSummary(rows) {
1400
+ const byStory = /* @__PURE__ */ new Map();
1401
+ let passed = 0;
1402
+ for (const r of rows) {
1403
+ const s = byStory.get(r.storyId) ?? { total: 0, passed: 0 };
1404
+ s.total++;
1405
+ if (r.status === "PASS") {
1406
+ s.passed++;
1407
+ passed++;
1408
+ }
1409
+ byStory.set(r.storyId, s);
1410
+ }
1411
+ let storiesFullyComplete = 0;
1412
+ for (const s of byStory.values()) if (s.total > 0 && s.passed === s.total) storiesFullyComplete++;
1413
+ return {
1414
+ stories: byStory.size,
1415
+ storiesFullyComplete,
1416
+ requirements: rows.length,
1417
+ passed,
1418
+ failed: rows.length - passed,
1419
+ passRate: rows.length === 0 ? 0 : passed / rows.length
1420
+ };
1421
+ }
1422
+ function escapeCell(s) {
1423
+ return s.replace(/\|/g, "\\|").replace(/\r?\n/g, " ");
1424
+ }
1425
+ function truncate2(s, max) {
1426
+ return s.length <= max ? s : `${s.slice(0, Math.max(0, max - 1))}\u2026`;
1427
+ }
1428
+ function renderScoreboardMarkdown(rows, opts = {}) {
1429
+ const maxEv = opts.maxEvidenceChars ?? 160;
1430
+ const sum = scoreboardSummary(rows);
1431
+ const pct = (n) => `${Math.round(n * 100)}%`;
1432
+ const ev = (e) => escapeCell(truncate2(e.join("; "), maxEv)) || "\u2014";
1433
+ const out = [`# ${opts.title ?? "Product-flow playback scoreboard"}`, ""];
1434
+ if (opts.meta) {
1435
+ for (const [k, v] of Object.entries(opts.meta)) out.push(`- **${k}:** ${v}`);
1436
+ out.push("");
1437
+ }
1438
+ out.push(
1439
+ `**${sum.storiesFullyComplete}/${sum.stories}** user stories fully shipped \xB7 **${sum.passed}/${sum.requirements}** requirements passing (${pct(sum.passRate)}) \xB7 **${sum.failed}** open`,
1440
+ ""
1441
+ );
1442
+ const fails = rows.filter((r) => r.status === "FAIL");
1443
+ if (fails.length > 0) {
1444
+ out.push("## Open tickets", "", "| Story | Requirement | Evidence |", "| --- | --- | --- |");
1445
+ for (const r of fails) {
1446
+ out.push(`| ${escapeCell(r.storyTitle)} | ${escapeCell(r.reqTitle)} | ${ev(r.evidence)} |`);
1447
+ }
1448
+ out.push("");
1449
+ } else {
1450
+ out.push("_All requirements passing \u2014 no open tickets._", "");
1451
+ }
1452
+ out.push("## Per-story tick-off", "");
1453
+ for (const storyId of [...new Set(rows.map((r) => r.storyId))]) {
1454
+ const storyRows = rows.filter((r) => r.storyId === storyId);
1455
+ const passed = storyRows.filter((r) => r.status === "PASS").length;
1456
+ const mark = passed === storyRows.length ? "\u2705" : "\u26A0\uFE0F";
1457
+ out.push(
1458
+ `### ${escapeCell(storyRows[0].storyTitle)} \u2014 ${passed}/${storyRows.length} ${mark}`,
1459
+ "",
1460
+ "| Requirement | Status | Evidence |",
1461
+ "| --- | --- | --- |"
1462
+ );
1463
+ for (const r of storyRows) {
1464
+ out.push(
1465
+ `| ${escapeCell(r.reqTitle)} | ${r.status === "PASS" ? "\u2705 PASS" : "\u274C FAIL"} | ${ev(r.evidence)} |`
1466
+ );
1467
+ }
1468
+ out.push("");
1469
+ }
1470
+ return out.join("\n");
1471
+ }
1399
1472
 
1400
1473
  // src/campaign/presets/run-profile-matrix.ts
1401
1474
  import { createHash as createHash2 } from "crypto";
@@ -1725,6 +1798,7 @@ export {
1725
1798
  patchEditCount,
1726
1799
  provenanceRecordPath,
1727
1800
  provenanceSpansPath,
1801
+ renderScoreboardMarkdown,
1728
1802
  resolveWorktreePath,
1729
1803
  runCampaign,
1730
1804
  runEval,
@@ -1733,6 +1807,7 @@ export {
1733
1807
  runProfileMatrix,
1734
1808
  runSkillOpt,
1735
1809
  scoreUserStory,
1810
+ scoreboardSummary,
1736
1811
  selectDriver,
1737
1812
  skillOptDriver,
1738
1813
  skillOptEntry,