@sanity/ailf 3.3.1 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -320,6 +320,32 @@ const graderPromptPreviewSchema = z.object({
|
|
|
320
320
|
rubricName: z.string().max(60).optional(),
|
|
321
321
|
snippet: z.string().max(120),
|
|
322
322
|
});
|
|
323
|
+
/**
|
|
324
|
+
* Preview shape for the run-scoped `pipelineContext` bulk artifact (W0063 /
|
|
325
|
+
* D0033 M7). Lets the Studio Overview tab render a Pipeline Execution header
|
|
326
|
+
* row (step count, wall-clock, failed-step badge, quality-gate badge, cache
|
|
327
|
+
* hit count) without fetching the full context payload — `config` and per-
|
|
328
|
+
* step detail only land when the panel is expanded.
|
|
329
|
+
*
|
|
330
|
+
* Bounds chosen so the worst-case preview fits comfortably under 384 bytes:
|
|
331
|
+
* - `failedSteps` is capped at 5 entries with each name ≤ 40 chars. Real
|
|
332
|
+
* step names ("fetch-docs", "calculate-scores", "gap-analysis") are 10–
|
|
333
|
+
* 25 chars; 40 is a defensive ceiling. The array cap exists because
|
|
334
|
+
* `fitPreviewToCap` only shortens string fields — an unbounded array
|
|
335
|
+
* could push the preview over cap and force it to drop entirely.
|
|
336
|
+
* 5 is a triage ceiling: the panel shows "showed 5 of N failed steps"
|
|
337
|
+
* when `failedSteps.length < stepCount - successCount`, and the full
|
|
338
|
+
* per-step list is available in the drilldown payload.
|
|
339
|
+
* - `belowCritical` and `cacheHits` are optional — absent on old runs,
|
|
340
|
+
* skipped pipelines, or runs without remote-cache telemetry.
|
|
341
|
+
*/
|
|
342
|
+
const pipelineContextPreviewSchema = z.object({
|
|
343
|
+
stepCount: z.number().int().nonnegative(),
|
|
344
|
+
totalDurationMs: z.number().nonnegative(),
|
|
345
|
+
failedSteps: z.array(z.string().max(40)).max(5),
|
|
346
|
+
belowCritical: z.boolean().optional(),
|
|
347
|
+
cacheHits: z.number().int().nonnegative().optional(),
|
|
348
|
+
});
|
|
323
349
|
// Aspirational: most payload shapes are still loose. Tightening per-type as
|
|
324
350
|
// consumers stabilize is explicitly a W0050/W0051 concern — W0049 fixes the
|
|
325
351
|
// structural shape around them without changing the payload contracts.
|
|
@@ -495,6 +521,56 @@ export const ARTIFACT_REGISTRY = {
|
|
|
495
521
|
entrySchema: unknownEntry,
|
|
496
522
|
mime: "application/json",
|
|
497
523
|
capBytes: 64_000,
|
|
524
|
+
manifestPreview: {
|
|
525
|
+
schema: pipelineContextPreviewSchema,
|
|
526
|
+
extract: (entry) => {
|
|
527
|
+
// Producer shape from `capturePipelineContext` in
|
|
528
|
+
// packages/eval/src/orchestration/pipeline-orchestrator.ts:
|
|
529
|
+
// { config, state: { belowCritical, remoteCacheHits, ... },
|
|
530
|
+
// steps: [{ name, status: "success"|"failed"|"skipped",
|
|
531
|
+
// durationMs? }] }
|
|
532
|
+
//
|
|
533
|
+
// `config` and everything else on `state` are drilldown-only and
|
|
534
|
+
// intentionally absent from the preview — they're what the panel
|
|
535
|
+
// fetches lazily when expanded.
|
|
536
|
+
const e = entry;
|
|
537
|
+
const stepsRaw = Array.isArray(e.steps) ? e.steps : [];
|
|
538
|
+
let totalDurationMs = 0;
|
|
539
|
+
const failedSteps = [];
|
|
540
|
+
let stepCount = 0;
|
|
541
|
+
for (const raw of stepsRaw) {
|
|
542
|
+
if (raw === null || typeof raw !== "object")
|
|
543
|
+
continue;
|
|
544
|
+
stepCount += 1;
|
|
545
|
+
const s = raw;
|
|
546
|
+
if (typeof s.durationMs === "number" &&
|
|
547
|
+
Number.isFinite(s.durationMs) &&
|
|
548
|
+
s.durationMs >= 0) {
|
|
549
|
+
totalDurationMs += s.durationMs;
|
|
550
|
+
}
|
|
551
|
+
if (s.status === "failed" &&
|
|
552
|
+
typeof s.name === "string" &&
|
|
553
|
+
failedSteps.length < 5) {
|
|
554
|
+
failedSteps.push(truncateString(s.name, 40));
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
const belowCritical = typeof e.state?.belowCritical === "boolean"
|
|
558
|
+
? e.state.belowCritical
|
|
559
|
+
: undefined;
|
|
560
|
+
const cacheHitsRaw = e.state?.remoteCacheHits;
|
|
561
|
+
const cacheHits = Array.isArray(cacheHitsRaw)
|
|
562
|
+
? cacheHitsRaw.length
|
|
563
|
+
: undefined;
|
|
564
|
+
return {
|
|
565
|
+
stepCount,
|
|
566
|
+
totalDurationMs,
|
|
567
|
+
failedSteps,
|
|
568
|
+
...(belowCritical === undefined ? {} : { belowCritical }),
|
|
569
|
+
...(cacheHits === undefined ? {} : { cacheHits }),
|
|
570
|
+
};
|
|
571
|
+
},
|
|
572
|
+
capBytes: 384,
|
|
573
|
+
},
|
|
498
574
|
}),
|
|
499
575
|
documentManifest: buildDescriptor({
|
|
500
576
|
type: "documentManifest",
|
|
@@ -81,9 +81,11 @@ export function validateCanonicalTasks(tasks) {
|
|
|
81
81
|
}
|
|
82
82
|
}
|
|
83
83
|
}
|
|
84
|
-
// Check task has at least one llm-rubric assertion (recommended but not required)
|
|
84
|
+
// Check task has at least one llm-rubric assertion (recommended but not required).
|
|
85
|
+
// agent-harness tasks grade side-effects (file-exists, command-succeeds, etc.),
|
|
86
|
+
// not text output, so an llm-rubric is not expected.
|
|
85
87
|
const hasLlmRubric = assertions.some((a) => a.type === "llm-rubric");
|
|
86
|
-
if (!hasLlmRubric) {
|
|
88
|
+
if (!hasLlmRubric && task.mode !== "agent-harness") {
|
|
87
89
|
warnings.push({
|
|
88
90
|
taskId: task.id,
|
|
89
91
|
field: "assertions",
|
package/dist/commands/init.js
CHANGED
|
@@ -258,6 +258,11 @@ async function runInit(opts) {
|
|
|
258
258
|
console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
|
|
259
259
|
console.log(" slugs and prompts for your documentation");
|
|
260
260
|
console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
|
|
261
|
+
console.log();
|
|
262
|
+
console.log(' Note: tasks with status: "draft" are skipped on normal runs.');
|
|
263
|
+
console.log(" To run one anyway, target it explicitly with --task <id>, e.g.:");
|
|
264
|
+
console.log(" npx @sanity/ailf@latest pipeline --task example-agent-add-schema");
|
|
265
|
+
console.log();
|
|
261
266
|
console.log(" 3. Add a GitHub Actions secret");
|
|
262
267
|
console.log(" (Settings → Secrets and variables → Actions):");
|
|
263
268
|
console.log(" • AILF_API_KEY — your API key");
|