@sanity/ailf 3.3.1 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -320,6 +320,32 @@ const graderPromptPreviewSchema = z.object({
320
320
  rubricName: z.string().max(60).optional(),
321
321
  snippet: z.string().max(120),
322
322
  });
323
+ /**
324
+ * Preview shape for the run-scoped `pipelineContext` bulk artifact (W0063 /
325
+ * D0033 M7). Lets the Studio Overview tab render a Pipeline Execution header
326
+ * row (step count, wall-clock, failed-step badge, quality-gate badge, cache
327
+ * hit count) without fetching the full context payload — `config` and per-
328
+ * step detail only land when the panel is expanded.
329
+ *
330
+ * Bounds chosen so the worst-case preview fits comfortably under 384 bytes:
331
+ * - `failedSteps` is capped at 5 entries with each name ≤ 40 chars. Real
332
+ * step names ("fetch-docs", "calculate-scores", "gap-analysis") are 10–
333
+ * 25 chars; 40 is a defensive ceiling. The array cap exists because
334
+ * `fitPreviewToCap` only shortens string fields — an unbounded array
335
+ * could push the preview over cap and force it to drop entirely.
336
+ * 5 is a triage ceiling: the panel shows "showed 5 of N failed steps"
337
+ * when `failedSteps.length < stepCount - successCount`, and the full
338
+ * per-step list is available in the drilldown payload.
339
+ * - `belowCritical` and `cacheHits` are optional — absent on old runs,
340
+ * skipped pipelines, or runs without remote-cache telemetry.
341
+ */
342
+ const pipelineContextPreviewSchema = z.object({
343
+ stepCount: z.number().int().nonnegative(),
344
+ totalDurationMs: z.number().nonnegative(),
345
+ failedSteps: z.array(z.string().max(40)).max(5),
346
+ belowCritical: z.boolean().optional(),
347
+ cacheHits: z.number().int().nonnegative().optional(),
348
+ });
323
349
  // Aspirational: most payload shapes are still loose. Tightening per-type as
324
350
  // consumers stabilize is explicitly a W0050/W0051 concern — W0049 fixes the
325
351
  // structural shape around them without changing the payload contracts.
@@ -495,6 +521,56 @@ export const ARTIFACT_REGISTRY = {
495
521
  entrySchema: unknownEntry,
496
522
  mime: "application/json",
497
523
  capBytes: 64_000,
524
+ manifestPreview: {
525
+ schema: pipelineContextPreviewSchema,
526
+ extract: (entry) => {
527
+ // Producer shape from `capturePipelineContext` in
528
+ // packages/eval/src/orchestration/pipeline-orchestrator.ts:
529
+ // { config, state: { belowCritical, remoteCacheHits, ... },
530
+ // steps: [{ name, status: "success"|"failed"|"skipped",
531
+ // durationMs? }] }
532
+ //
533
+ // `config` and everything else on `state` are drilldown-only and
534
+ // intentionally absent from the preview — they're what the panel
535
+ // fetches lazily when expanded.
536
+ const e = entry;
537
+ const stepsRaw = Array.isArray(e.steps) ? e.steps : [];
538
+ let totalDurationMs = 0;
539
+ const failedSteps = [];
540
+ let stepCount = 0;
541
+ for (const raw of stepsRaw) {
542
+ if (raw === null || typeof raw !== "object")
543
+ continue;
544
+ stepCount += 1;
545
+ const s = raw;
546
+ if (typeof s.durationMs === "number" &&
547
+ Number.isFinite(s.durationMs) &&
548
+ s.durationMs >= 0) {
549
+ totalDurationMs += s.durationMs;
550
+ }
551
+ if (s.status === "failed" &&
552
+ typeof s.name === "string" &&
553
+ failedSteps.length < 5) {
554
+ failedSteps.push(truncateString(s.name, 40));
555
+ }
556
+ }
557
+ const belowCritical = typeof e.state?.belowCritical === "boolean"
558
+ ? e.state.belowCritical
559
+ : undefined;
560
+ const cacheHitsRaw = e.state?.remoteCacheHits;
561
+ const cacheHits = Array.isArray(cacheHitsRaw)
562
+ ? cacheHitsRaw.length
563
+ : undefined;
564
+ return {
565
+ stepCount,
566
+ totalDurationMs,
567
+ failedSteps,
568
+ ...(belowCritical === undefined ? {} : { belowCritical }),
569
+ ...(cacheHits === undefined ? {} : { cacheHits }),
570
+ };
571
+ },
572
+ capBytes: 384,
573
+ },
498
574
  }),
499
575
  documentManifest: buildDescriptor({
500
576
  type: "documentManifest",
@@ -81,9 +81,11 @@ export function validateCanonicalTasks(tasks) {
81
81
  }
82
82
  }
83
83
  }
84
- // Check task has at least one llm-rubric assertion (recommended but not required)
84
+ // Check task has at least one llm-rubric assertion (recommended but not required).
85
+ // agent-harness tasks grade side-effects (file-exists, command-succeeds, etc.),
86
+ // not text output, so an llm-rubric is not expected.
85
87
  const hasLlmRubric = assertions.some((a) => a.type === "llm-rubric");
86
- if (!hasLlmRubric) {
88
+ if (!hasLlmRubric && task.mode !== "agent-harness") {
87
89
  warnings.push({
88
90
  taskId: task.id,
89
91
  field: "assertions",
@@ -258,6 +258,11 @@ async function runInit(opts) {
258
258
  console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
259
259
  console.log(" slugs and prompts for your documentation");
260
260
  console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
261
+ console.log();
262
+ console.log(' Note: tasks with status: "draft" are skipped on normal runs.');
263
+ console.log(" To run one anyway, target it explicitly with --task <id>, e.g.:");
264
+ console.log(" npx @sanity/ailf@latest pipeline --task example-agent-add-schema");
265
+ console.log();
261
266
  console.log(" 3. Add a GitHub Actions secret");
262
267
  console.log(" (Settings → Secrets and variables → Actions):");
263
268
  console.log(" • AILF_API_KEY — your API key");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "3.3.1",
3
+ "version": "3.4.1",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"