@maintainabilityai/research-runner 0.1.31 → 0.1.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -576,6 +576,116 @@ const handleContextQuality = async (input) => {
576
576
  return { ok: true, scope: parsed.data, bars };
577
577
  };
578
578
  // ─────────────────────────────────────────────────────────────────────
579
+ // Self-review provenance skills (B29) — pure-data attempt-tracking for
580
+ // prd-agent's persona-switch self-critique loop.
581
+ //
582
+ // Why these exist (PR #112 forensic):
583
+ // The persona-switch self-critique is a prompt-level reasoning step;
584
+ // pre-B29 it emitted ZERO skill_call events. So the audit chain had
585
+ // no proof that the agent entered round N of Architect or Security
586
+ // review. On PR #112 the prd-agent hallucinated `tier=restricted` and
587
+ // skipped the loop entirely, claiming `SKIPPED_RESTRICTED_TIER` in
588
+ // the PRD frontmatter — when the OKR action's actual governanceTier
589
+ // was `supervised`. The chain showed nothing wrong because nothing
590
+ // in the chain referenced self-critique at all.
591
+ //
592
+ // These skills don't "do" the review (the LLM still does that). They
593
+ // hand the agent the AUTHORITATIVE inputs: the OKR action's frozen
594
+ // tier, the resulting max_auto_rounds, a should_proceed gate, and
595
+ // the contents of `.caterpillar/prompts/prd/<persona>-review.md`.
596
+ // Because every runSkill() auto-emits, the chain proves: "agent
597
+ // entered persona X, round N, was told tier=Y, max_rounds=Z,
598
+ // should_proceed=W." If a subsequent `### Self-review — <persona>
599
+ // (round N)` block doesn't appear in the PR body, that's a clear
600
+ // contract violation visible in the audit comment.
601
+ // ─────────────────────────────────────────────────────────────────────
602
+ const SelfReviewInput = zod_1.z.object({
603
+ okrId: zod_1.z.string().min(1),
604
+ runId: zod_1.z.string().min(1),
605
+ round: zod_1.z.number().int().positive(),
606
+ });
607
+ /**
608
+ * Tier → MAX_AUTO_ROUNDS mapping per design §6.2. Restricted=0 means the
609
+ * loop is skipped entirely (mandatory human gate). The agent SHOULD NOT
610
+ * be inferring tier from any other source; this is the single source of
611
+ * truth for the OKR run that's been frozen at dispatch time.
612
+ */
613
+ function tierMaxRounds(tier) {
614
+ const t = tier.toLowerCase();
615
+ if (t === 'autonomous') {
616
+ return 3;
617
+ }
618
+ if (t === 'supervised') {
619
+ return 2;
620
+ }
621
+ return 0; // restricted / unknown
622
+ }
623
+ /**
624
+ * Factory: builds a self-review skill handler for one persona. Pure
625
+ * data — reads OKR yaml + prompt pack file, computes tier-driven gating,
626
+ * returns the bundle. No LLM, no synthesis.
627
+ */
628
+ function makeSelfReviewHandler(persona) {
629
+ return async (input) => {
630
+ const parsed = SelfReviewInput.safeParse(input);
631
+ if (!parsed.success) {
632
+ return { ok: false, reason: `bad-input: ${parsed.error.message}` };
633
+ }
634
+ const mesh = meshPath();
635
+ const okrPath = path.join(mesh, 'okrs', parsed.data.okrId, 'okr.yaml');
636
+ if (!fs.existsSync(okrPath)) {
637
+ return { ok: false, reason: 'okr-not-found' };
638
+ }
639
+ const card = readYaml(okrPath);
640
+ const action = card?.actions?.find(a => a.runId === parsed.data.runId);
641
+ if (!action) {
642
+ return { ok: false, reason: `action-not-found: no actions[] entry with runId=${parsed.data.runId}` };
643
+ }
644
+ const tier = (action.governanceTier ?? '').toLowerCase();
645
+ const maxAutoRounds = tierMaxRounds(tier);
646
+ const shouldProceed = tier !== 'restricted' && parsed.data.round <= maxAutoRounds;
647
+ // Prompt-pack filename note: the persona is "architect" but the
648
+ // pack file is "architecture-review.md" (full word). Map explicitly
649
+ // so we don't accidentally look for "architect-review.md".
650
+ const promptFilename = persona === 'architect' ? 'architecture-review.md' : 'security-review.md';
651
+ const promptPath = path.join(mesh, '.caterpillar', 'prompts', 'prd', promptFilename);
652
+ let promptPack = '';
653
+ let promptPackFound = false;
654
+ if (fs.existsSync(promptPath)) {
655
+ try {
656
+ promptPack = fs.readFileSync(promptPath, 'utf8');
657
+ promptPackFound = true;
658
+ }
659
+ catch { /* leave empty */ }
660
+ }
661
+ // The chain only needs the small fields, not the whole prompt-pack
662
+ // body — auditMetadata controls what lands in the skill_call event.
663
+ const auditMetadata = {
664
+ persona,
665
+ tier,
666
+ max_auto_rounds: maxAutoRounds,
667
+ round: parsed.data.round,
668
+ should_proceed: shouldProceed,
669
+ prompt_pack_path: promptPath,
670
+ prompt_pack_found: promptPackFound,
671
+ };
672
+ return {
673
+ ok: true,
674
+ persona,
675
+ tier,
676
+ maxAutoRounds,
677
+ round: parsed.data.round,
678
+ shouldProceed,
679
+ promptPack,
680
+ promptPackPath: promptPath,
681
+ promptPackFound,
682
+ auditMetadata,
683
+ };
684
+ };
685
+ }
686
+ const handleSelfReviewArchitect = makeSelfReviewHandler('architect');
687
+ const handleSelfReviewSecurity = makeSelfReviewHandler('security');
688
+ // ─────────────────────────────────────────────────────────────────────
579
689
  // Search skills — thin wrappers over the existing search nodes
580
690
  // ─────────────────────────────────────────────────────────────────────
581
691
  const SearchQueriesInput = zod_1.z.object({
@@ -1132,6 +1242,8 @@ exports.SKILLS = {
1132
1242
  'context-architecture': handleContextArchitecture,
1133
1243
  'context-security': handleContextSecurity,
1134
1244
  'context-quality': handleContextQuality,
1245
+ 'self-review-architect': handleSelfReviewArchitect,
1246
+ 'self-review-security': handleSelfReviewSecurity,
1135
1247
  'tavily-search': handleTavilySearch,
1136
1248
  'arxiv-search': handleArxivSearch,
1137
1249
  'uspto-search': handleUsptoSearch,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@maintainabilityai/research-runner",
3
- "version": "0.1.31",
3
+ "version": "0.1.33",
4
4
  "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
5
5
  "license": "MIT",
6
6
  "author": "MaintainabilityAI",