@maintainabilityai/research-runner 0.1.31 → 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runner/skills.js +112 -0
- package/package.json +1 -1
package/dist/runner/skills.js
CHANGED
|
@@ -576,6 +576,116 @@ const handleContextQuality = async (input) => {
|
|
|
576
576
|
return { ok: true, scope: parsed.data, bars };
|
|
577
577
|
};
|
|
578
578
|
// ─────────────────────────────────────────────────────────────────────
|
|
579
|
+
// Self-review provenance skills (B29) — pure-data attempt-tracking for
|
|
580
|
+
// prd-agent's persona-switch self-critique loop.
|
|
581
|
+
//
|
|
582
|
+
// Why these exist (PR #112 forensic):
|
|
583
|
+
// The persona-switch self-critique is a prompt-level reasoning step;
|
|
584
|
+
// pre-B29 it emitted ZERO skill_call events. So the audit chain had
|
|
585
|
+
// no proof that the agent entered round N of Architect or Security
|
|
586
|
+
// review. On PR #112 the prd-agent hallucinated `tier=restricted` and
|
|
587
|
+
// skipped the loop entirely, claiming `SKIPPED_RESTRICTED_TIER` in
|
|
588
|
+
// the PRD frontmatter — when the OKR action's actual governanceTier
|
|
589
|
+
// was `supervised`. The chain showed nothing wrong because nothing
|
|
590
|
+
// in the chain referenced self-critique at all.
|
|
591
|
+
//
|
|
592
|
+
// These skills don't "do" the review (the LLM still does that). They
|
|
593
|
+
// hand the agent the AUTHORITATIVE inputs: the OKR action's frozen
|
|
594
|
+
// tier, the resulting max_auto_rounds, a should_proceed gate, and
|
|
595
|
+
// the contents of `.caterpillar/prompts/prd/<persona>-review.md`.
|
|
596
|
+
// Because every runSkill() auto-emits, the chain proves: "agent
|
|
597
|
+
// entered persona X, round N, was told tier=Y, max_rounds=Z,
|
|
598
|
+
// should_proceed=W." If a subsequent `### Self-review — <persona>
|
|
599
|
+
// (round N)` block doesn't appear in the PR body, that's a clear
|
|
600
|
+
// contract violation visible in the audit comment.
|
|
601
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
602
|
+
const SelfReviewInput = zod_1.z.object({
|
|
603
|
+
okrId: zod_1.z.string().min(1),
|
|
604
|
+
runId: zod_1.z.string().min(1),
|
|
605
|
+
round: zod_1.z.number().int().positive(),
|
|
606
|
+
});
|
|
607
|
+
/**
|
|
608
|
+
* Tier → MAX_AUTO_ROUNDS mapping per design §6.2. Restricted=0 means the
|
|
609
|
+
* loop is skipped entirely (mandatory human gate). The agent SHOULD NOT
|
|
610
|
+
* be inferring tier from any other source; this is the single source of
|
|
611
|
+
* truth for the OKR run that's been frozen at dispatch time.
|
|
612
|
+
*/
|
|
613
|
+
function tierMaxRounds(tier) {
|
|
614
|
+
const t = tier.toLowerCase();
|
|
615
|
+
if (t === 'autonomous') {
|
|
616
|
+
return 3;
|
|
617
|
+
}
|
|
618
|
+
if (t === 'supervised') {
|
|
619
|
+
return 2;
|
|
620
|
+
}
|
|
621
|
+
return 0; // restricted / unknown
|
|
622
|
+
}
|
|
623
|
+
/**
|
|
624
|
+
* Factory: builds a self-review skill handler for one persona. Pure
|
|
625
|
+
* data — reads OKR yaml + prompt pack file, computes tier-driven gating,
|
|
626
|
+
* returns the bundle. No LLM, no synthesis.
|
|
627
|
+
*/
|
|
628
|
+
function makeSelfReviewHandler(persona) {
|
|
629
|
+
return async (input) => {
|
|
630
|
+
const parsed = SelfReviewInput.safeParse(input);
|
|
631
|
+
if (!parsed.success) {
|
|
632
|
+
return { ok: false, reason: `bad-input: ${parsed.error.message}` };
|
|
633
|
+
}
|
|
634
|
+
const mesh = meshPath();
|
|
635
|
+
const okrPath = path.join(mesh, 'okrs', parsed.data.okrId, 'okr.yaml');
|
|
636
|
+
if (!fs.existsSync(okrPath)) {
|
|
637
|
+
return { ok: false, reason: 'okr-not-found' };
|
|
638
|
+
}
|
|
639
|
+
const card = readYaml(okrPath);
|
|
640
|
+
const action = card?.actions?.find(a => a.runId === parsed.data.runId);
|
|
641
|
+
if (!action) {
|
|
642
|
+
return { ok: false, reason: `action-not-found: no actions[] entry with runId=${parsed.data.runId}` };
|
|
643
|
+
}
|
|
644
|
+
const tier = (action.governanceTier ?? '').toLowerCase();
|
|
645
|
+
const maxAutoRounds = tierMaxRounds(tier);
|
|
646
|
+
const shouldProceed = tier !== 'restricted' && parsed.data.round <= maxAutoRounds;
|
|
647
|
+
// Prompt-pack filename note: the persona is "architect" but the
|
|
648
|
+
// pack file is "architecture-review.md" (full word). Map explicitly
|
|
649
|
+
// so we don't accidentally look for "architect-review.md".
|
|
650
|
+
const promptFilename = persona === 'architect' ? 'architecture-review.md' : 'security-review.md';
|
|
651
|
+
const promptPath = path.join(mesh, '.caterpillar', 'prompts', 'prd', promptFilename);
|
|
652
|
+
let promptPack = '';
|
|
653
|
+
let promptPackFound = false;
|
|
654
|
+
if (fs.existsSync(promptPath)) {
|
|
655
|
+
try {
|
|
656
|
+
promptPack = fs.readFileSync(promptPath, 'utf8');
|
|
657
|
+
promptPackFound = true;
|
|
658
|
+
}
|
|
659
|
+
catch { /* leave empty */ }
|
|
660
|
+
}
|
|
661
|
+
// The chain only needs the small fields, not the whole prompt-pack
|
|
662
|
+
// body — auditMetadata controls what lands in the skill_call event.
|
|
663
|
+
const auditMetadata = {
|
|
664
|
+
persona,
|
|
665
|
+
tier,
|
|
666
|
+
max_auto_rounds: maxAutoRounds,
|
|
667
|
+
round: parsed.data.round,
|
|
668
|
+
should_proceed: shouldProceed,
|
|
669
|
+
prompt_pack_path: promptPath,
|
|
670
|
+
prompt_pack_found: promptPackFound,
|
|
671
|
+
};
|
|
672
|
+
return {
|
|
673
|
+
ok: true,
|
|
674
|
+
persona,
|
|
675
|
+
tier,
|
|
676
|
+
maxAutoRounds,
|
|
677
|
+
round: parsed.data.round,
|
|
678
|
+
shouldProceed,
|
|
679
|
+
promptPack,
|
|
680
|
+
promptPackPath: promptPath,
|
|
681
|
+
promptPackFound,
|
|
682
|
+
auditMetadata,
|
|
683
|
+
};
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
const handleSelfReviewArchitect = makeSelfReviewHandler('architect');
|
|
687
|
+
const handleSelfReviewSecurity = makeSelfReviewHandler('security');
|
|
688
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
579
689
|
// Search skills — thin wrappers over the existing search nodes
|
|
580
690
|
// ─────────────────────────────────────────────────────────────────────
|
|
581
691
|
const SearchQueriesInput = zod_1.z.object({
|
|
@@ -1132,6 +1242,8 @@ exports.SKILLS = {
|
|
|
1132
1242
|
'context-architecture': handleContextArchitecture,
|
|
1133
1243
|
'context-security': handleContextSecurity,
|
|
1134
1244
|
'context-quality': handleContextQuality,
|
|
1245
|
+
'self-review-architect': handleSelfReviewArchitect,
|
|
1246
|
+
'self-review-security': handleSelfReviewSecurity,
|
|
1135
1247
|
'tavily-search': handleTavilySearch,
|
|
1136
1248
|
'arxiv-search': handleArxivSearch,
|
|
1137
1249
|
'uspto-search': handleUsptoSearch,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@maintainabilityai/research-runner",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.33",
|
|
4
4
|
"description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "MaintainabilityAI",
|