@maintainabilityai/research-runner 0.1.33 → 0.1.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -685,6 +685,390 @@ function makeSelfReviewHandler(persona) {
685
685
  }
686
686
  const handleSelfReviewArchitect = makeSelfReviewHandler('architect');
687
687
  const handleSelfReviewSecurity = makeSelfReviewHandler('security');
688
+ /**
689
+ * D-PR1 — code-phase persona-switch self-review. Same B29 pattern as the
690
+ * PRD-phase architect/security handlers above, but reads the WHAT-phase
691
+ * prompt packs at `.caterpillar/prompts/code-design/*` instead of the
692
+ * PRD packs. Returns the authoritative tier + MAX_AUTO_ROUNDS so the
693
+ * code-design-agent can't hallucinate its persona-switch budget.
694
+ *
695
+ * The agent's flow (per the code-design-agent.agent.md contract):
696
+ * 1. First-pass synthesis (no persona — author voice).
697
+ * 2. Inhabit code-architect persona → call this Skill with round=1.
698
+ * Read the returned promptPack as the critique criteria. Produce a
699
+ * structured SCORE/SEVERITY/COVERED/MISSING/CHANGES block in the PR body.
700
+ * 3. Same for code-security persona, round=1.
701
+ * 4. If either round-1 severity > PASS AND round < maxAutoRounds: revise
702
+ * the code-design, call this Skill with round=2, produce round-2 blocks.
703
+ * 5. Restricted tier (maxAutoRounds=0) skips persona-switch entirely;
704
+ * shouldProceed returns false → the agent reports the un-critiqued
705
+ * design and the audit-and-drift workflow gates on HumanGate.
706
+ */
707
+ function makeCodeReviewHandler(persona) {
708
+ return async (input) => {
709
+ const parsed = SelfReviewInput.safeParse(input);
710
+ if (!parsed.success) {
711
+ return { ok: false, reason: `bad-input: ${parsed.error.message}` };
712
+ }
713
+ const mesh = meshPath();
714
+ const okrPath = path.join(mesh, 'okrs', parsed.data.okrId, 'okr.yaml');
715
+ if (!fs.existsSync(okrPath)) {
716
+ return { ok: false, reason: 'okr-not-found' };
717
+ }
718
+ const card = readYaml(okrPath);
719
+ const action = card?.actions?.find(a => a.runId === parsed.data.runId);
720
+ if (!action) {
721
+ return { ok: false, reason: `action-not-found: no actions[] entry with runId=${parsed.data.runId}` };
722
+ }
723
+ const tier = (action.governanceTier ?? '').toLowerCase();
724
+ const maxAutoRounds = tierMaxRounds(tier);
725
+ const shouldProceed = tier !== 'restricted' && parsed.data.round <= maxAutoRounds;
726
+ // code-design prompt packs live alongside the prd packs but in a
727
+ // separate subdir so the agent can't confuse "PRD architecture review"
728
+ // (mesh-grounded) with "code-design architecture review" (code-grounded).
729
+ const promptFilename = persona === 'code-architect' ? 'architecture-review.md' : 'security-review.md';
730
+ const promptPath = path.join(mesh, '.caterpillar', 'prompts', 'code-design', promptFilename);
731
+ let promptPack = '';
732
+ let promptPackFound = false;
733
+ if (fs.existsSync(promptPath)) {
734
+ try {
735
+ promptPack = fs.readFileSync(promptPath, 'utf8');
736
+ promptPackFound = true;
737
+ }
738
+ catch { /* leave empty */ }
739
+ }
740
+ const auditMetadata = {
741
+ persona,
742
+ phase: 'what',
743
+ tier,
744
+ max_auto_rounds: maxAutoRounds,
745
+ round: parsed.data.round,
746
+ should_proceed: shouldProceed,
747
+ prompt_pack_path: promptPath,
748
+ prompt_pack_found: promptPackFound,
749
+ };
750
+ return {
751
+ ok: true,
752
+ persona,
753
+ phase: 'what',
754
+ tier,
755
+ maxAutoRounds,
756
+ round: parsed.data.round,
757
+ shouldProceed,
758
+ promptPack,
759
+ promptPackPath: promptPath,
760
+ promptPackFound,
761
+ auditMetadata,
762
+ };
763
+ };
764
+ }
765
+ const handleSelfReviewCodeArchitect = makeCodeReviewHandler('code-architect');
766
+ const handleSelfReviewCodeSecurity = makeCodeReviewHandler('code-security');
767
+ // ─────────────────────────────────────────────────────────────────────
768
+ // knowledge-code — Phase D D6 backend. Per A12.v1.1, branches on per-repo
769
+ // `targetCodeRepoStatus`: 'connected' clones + classifies (brownfield);
770
+ // 'create' returns scaffolding hints (greenfield, no clone); 'not-connected'
771
+ // / 'unreachable' refuses with a remediation hint so the agent stops cleanly.
772
+ //
773
+ // MVP extraction is shallow (top-dirs + language map + manifest detection +
774
+ // entrypoint heuristics). Tree-sitter polyglot cross-module-call extraction
775
+ // is a follow-up (D-PR1.v1.1) — it requires per-language parsers as deps
776
+ // that bloat the runner package. The shallow shape is enough to prove the
777
+ // brownfield/greenfield contract end-to-end on the IMDB-celebs sample.
778
+ // ─────────────────────────────────────────────────────────────────────
779
+ const KnowledgeCodeInput = zod_1.z.object({
780
+ okrId: zod_1.z.string().min(1),
781
+ repoUrl: zod_1.z.string().min(1),
782
+ repoStatus: zod_1.z.enum(['connected', 'not-connected', 'create', 'unreachable']),
783
+ ref: zod_1.z.string().optional(),
784
+ maxFiles: zod_1.z.number().int().positive().optional(),
785
+ });
786
+ /**
787
+ * Map common file extensions to a primary-language label. Used for the
788
+ * `languages` histogram in the brownfield response. Order matters when a
789
+ * repo has multiple — the most-common wins.
790
+ */
791
+ const LANG_EXTS = {
792
+ '.ts': 'typescript', '.tsx': 'typescript',
793
+ '.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
794
+ '.py': 'python',
795
+ '.go': 'go',
796
+ '.rs': 'rust',
797
+ '.java': 'java',
798
+ '.kt': 'kotlin',
799
+ '.rb': 'ruby',
800
+ '.php': 'php',
801
+ '.cs': 'csharp',
802
+ '.swift': 'swift',
803
+ '.c': 'c', '.h': 'c',
804
+ '.cpp': 'cpp', '.cc': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp',
805
+ };
806
+ /**
807
+ * Manifest filenames the brownfield walk surfaces so the agent can ground
808
+ * design decisions on the repo's actual dependency posture. Keep this list
809
+ * conservative — over-eager manifest detection is noise.
810
+ */
811
+ const MANIFEST_FILES = new Set([
812
+ 'package.json', 'package-lock.json', 'pnpm-lock.yaml', 'yarn.lock',
813
+ 'requirements.txt', 'pyproject.toml', 'Pipfile', 'Pipfile.lock', 'poetry.lock',
814
+ 'go.mod', 'go.sum',
815
+ 'Cargo.toml', 'Cargo.lock',
816
+ 'pom.xml', 'build.gradle', 'build.gradle.kts',
817
+ 'Gemfile', 'Gemfile.lock',
818
+ 'composer.json',
819
+ ]);
820
+ /**
821
+ * Walk a directory tree, capped at `maxFiles`. Returns relative paths.
822
+ * Skips `.git/`, `node_modules/`, `__pycache__/`, and `vendor/` — the
823
+ * convention dirs that bloat counts without informing design.
824
+ */
825
+ function walkRepo(rootDir, maxFiles) {
826
+ const SKIP = new Set(['.git', 'node_modules', '__pycache__', 'vendor', 'dist', 'build', '.next', '.nuxt']);
827
+ const out = [];
828
+ function recurse(absDir, relBase) {
829
+ if (out.length >= maxFiles) {
830
+ return;
831
+ }
832
+ let entries;
833
+ try {
834
+ entries = fs.readdirSync(absDir, { withFileTypes: true });
835
+ }
836
+ catch {
837
+ return;
838
+ }
839
+ for (const ent of entries) {
840
+ if (out.length >= maxFiles) {
841
+ return;
842
+ }
843
+ if (SKIP.has(ent.name)) {
844
+ continue;
845
+ }
846
+ const abs = path.join(absDir, ent.name);
847
+ const rel = relBase ? `${relBase}/${ent.name}` : ent.name;
848
+ if (ent.isDirectory()) {
849
+ recurse(abs, rel);
850
+ }
851
+ else if (ent.isFile()) {
852
+ out.push(rel);
853
+ }
854
+ }
855
+ }
856
+ recurse(rootDir, '');
857
+ return out;
858
+ }
859
+ /**
860
+ * Guess the primary BAR-level language + framework from the manifest +
861
+ * file mix. For greenfield scaffolding the agent can override these from
862
+ * BAR-app.yaml calm-node hints; this is just the brownfield read.
863
+ */
864
+ function classifyRepo(files) {
865
+ const topDirs = new Set();
866
+ const languages = {};
867
+ const packageManifests = [];
868
+ for (const f of files) {
869
+ const slashIdx = f.indexOf('/');
870
+ if (slashIdx > 0) {
871
+ topDirs.add(f.slice(0, slashIdx));
872
+ }
873
+ const ext = path.extname(f).toLowerCase();
874
+ const lang = LANG_EXTS[ext];
875
+ if (lang) {
876
+ languages[lang] = (languages[lang] ?? 0) + 1;
877
+ }
878
+ const base = path.basename(f);
879
+ if (MANIFEST_FILES.has(base)) {
880
+ packageManifests.push(f);
881
+ }
882
+ }
883
+ return {
884
+ topDirs: Array.from(topDirs).sort(),
885
+ languages,
886
+ packageManifests: packageManifests.sort(),
887
+ };
888
+ }
889
+ /**
890
+ * Parse `https://github.com/<owner>/<name>` (with or without `.git` suffix,
891
+ * with or without trailing slash). Returns null for non-GitHub URLs.
892
+ */
893
+ function parseGithubUrl(url) {
894
+ const m = url.match(/^https?:\/\/github\.com\/([^/\s]+)\/([^/\s]+?)(?:\.git)?\/?$/);
895
+ if (!m) {
896
+ return null;
897
+ }
898
+ return { owner: m[1], name: m[2] };
899
+ }
900
+ const handleKnowledgeCode = async (input) => {
901
+ const parsed = KnowledgeCodeInput.safeParse(input);
902
+ if (!parsed.success) {
903
+ return { ok: false, reason: `bad-input: ${parsed.error.message}` };
904
+ }
905
+ const { okrId, repoUrl, repoStatus, ref, maxFiles } = parsed.data;
906
+ const gh = parseGithubUrl(repoUrl);
907
+ const repoSlug = gh ? `${gh.owner}/${gh.name}` : repoUrl;
908
+ // ─── Refuse branch (not-connected / unreachable) ───────────────────
909
+ // The agent never grounds against ambiguous repo intent. The remediation
910
+ // hint points the human back to the Looking Glass repo-status picker
911
+ // — the same UI that A12.v1.1 ships.
912
+ if (repoStatus === 'not-connected' || repoStatus === 'unreachable') {
913
+ const auditMetadata = { phase: 'what', repo: repoSlug, mode: 'refuse', repo_status: repoStatus, okr_id: okrId };
914
+ return {
915
+ ok: false,
916
+ reason: repoStatus === 'unreachable' ? 'repo-unreachable' : 'repo-not-connected',
917
+ repo: repoSlug,
918
+ remediation: "Open Looking Glass → OKR detail → Target Code Repos and pick a status: 'Connected' (if the repo exists and is wired) or 'Create' (if greenfield). The code-design-agent refuses to ground until every target repo's intent is explicit.",
919
+ auditMetadata,
920
+ };
921
+ }
922
+ // ─── Greenfield branch (create) ────────────────────────────────────
923
+ // No clone. Return scaffolding hints derived from the BAR's calm-node
924
+ // language preference (if readable) so the agent's per-repo subsection
925
+ // can lock in seed files / framework choice consistently with the rest
926
+ // of the mesh. Optional referenceRepos (D5) plug in here when ready —
927
+ // for D-PR1 they're an empty array placeholder.
928
+ if (repoStatus === 'create') {
929
+ // Conservative scaffolding hints — the agent can override these in
930
+ // the design when it has stronger signal from BAR ADRs or the PRD.
931
+ // We avoid over-prescribing: the goal is to seed the choice, not own it.
932
+ const scaffoldingHints = {
933
+ suggestedLanguage: 'typescript',
934
+ suggestedFramework: 'express',
935
+ seedFiles: [
936
+ 'README.md',
937
+ 'LICENSE',
938
+ 'package.json',
939
+ 'tsconfig.json',
940
+ 'src/index.ts',
941
+ '.github/CODEOWNERS',
942
+ '.github/workflows/red-queen-bootstrap.yml',
943
+ ],
944
+ };
945
+ const auditMetadata = { phase: 'what', repo: repoSlug, mode: 'greenfield', repo_status: 'create', okr_id: okrId };
946
+ return {
947
+ ok: true,
948
+ mode: 'greenfield',
949
+ repo: repoSlug,
950
+ reason: 'repo-status-create',
951
+ referenceRepos: [], // D5 reference-repos integration is a follow-up
952
+ scaffoldingHints,
953
+ auditMetadata,
954
+ };
955
+ }
956
+ // ─── Brownfield branch (connected) ─────────────────────────────────
957
+ // Shallow git clone (`--depth=1`) into a tmp dir, walk + classify.
958
+ // Cleanup on exit (process-scoped tmpdir). On clone failure we degrade
959
+ // to a soft-refuse rather than crash — the agent can still attempt
960
+ // partial grounding from the SKILL response shape.
961
+ if (!gh) {
962
+ return { ok: false, reason: 'repo-url-not-github', repo: repoUrl };
963
+ }
964
+ const { execFileSync } = await Promise.resolve().then(() => __importStar(require('node:child_process')));
965
+ const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), `knowledge-code-${gh.name}-`));
966
+ const cloneTarget = path.join(tmpRoot, gh.name);
967
+ const cloneRef = ref ?? 'HEAD';
968
+ const cloneArgs = ['clone', '--depth=1', '--filter=blob:limit=10m'];
969
+ if (ref && ref !== 'HEAD') {
970
+ cloneArgs.push('--branch', ref);
971
+ }
972
+ cloneArgs.push(repoUrl, cloneTarget);
973
+ let cloneOk = true;
974
+ let cloneError = '';
975
+ try {
976
+ execFileSync('git', cloneArgs, { stdio: ['ignore', 'pipe', 'pipe'], timeout: 60_000 });
977
+ }
978
+ catch (err) {
979
+ cloneOk = false;
980
+ cloneError = err instanceof Error ? err.message : String(err);
981
+ }
982
+ if (!cloneOk) {
983
+ // Clean up the empty tmpdir before bailing.
984
+ try {
985
+ fs.rmSync(tmpRoot, { recursive: true, force: true });
986
+ }
987
+ catch { /* ignore */ }
988
+ const auditMetadata = { phase: 'what', repo: repoSlug, mode: 'brownfield-clone-failed', repo_status: 'connected', okr_id: okrId };
989
+ return {
990
+ ok: false,
991
+ reason: 'clone-failed',
992
+ repo: repoSlug,
993
+ remediation: `git clone failed for ${repoUrl}. Verify the GitHub App install is approved on this repo and the ref (${cloneRef}) exists. Underlying error: ${cloneError}`,
994
+ auditMetadata,
995
+ };
996
+ }
997
+ // Resolve the actual SHA so the response is reproducible.
998
+ let sha = '';
999
+ try {
1000
+ sha = execFileSync('git', ['rev-parse', 'HEAD'], { cwd: cloneTarget, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] }).trim();
1001
+ }
1002
+ catch { /* sha stays empty */ }
1003
+ const cap = maxFiles ?? 200;
1004
+ const files = walkRepo(cloneTarget, cap);
1005
+ const structure = classifyRepo(files);
1006
+ // Best-effort entrypoint detection from the most-common manifest +
1007
+ // top-level layout. Conservative: only mark something as an entrypoint
1008
+ // when we have positive signal (manifest field OR conventional path).
1009
+ const entryPoints = [];
1010
+ for (const manifestPath of structure.packageManifests) {
1011
+ if (path.basename(manifestPath) === 'package.json') {
1012
+ try {
1013
+ const pkgRaw = fs.readFileSync(path.join(cloneTarget, manifestPath), 'utf8');
1014
+ const pkg = JSON.parse(pkgRaw);
1015
+ const deps = pkg.dependencies ?? {};
1016
+ let framework = 'unknown';
1017
+ if (deps['express']) {
1018
+ framework = 'express';
1019
+ }
1020
+ else if (deps['fastify']) {
1021
+ framework = 'fastify';
1022
+ }
1023
+ else if (deps['hono']) {
1024
+ framework = 'hono';
1025
+ }
1026
+ else if (deps['@nestjs/core']) {
1027
+ framework = 'nestjs';
1028
+ }
1029
+ else if (deps['next']) {
1030
+ framework = 'next';
1031
+ }
1032
+ else if (deps['react']) {
1033
+ framework = 'react';
1034
+ }
1035
+ if (pkg.main) {
1036
+ entryPoints.push({ path: pkg.main, kind: framework === 'react' || framework === 'next' ? 'ui' : 'api', framework });
1037
+ }
1038
+ if (pkg.bin) {
1039
+ entryPoints.push({ path: typeof pkg.bin === 'string' ? pkg.bin : Object.values(pkg.bin)[0] ?? '', kind: 'cli', framework });
1040
+ }
1041
+ }
1042
+ catch { /* manifest unreadable / non-JSON; skip */ }
1043
+ }
1044
+ }
1045
+ // Clean up the cloned tree — the SKILL is a one-shot read, no need to
1046
+ // keep ~10MB of git data per invocation.
1047
+ try {
1048
+ fs.rmSync(tmpRoot, { recursive: true, force: true });
1049
+ }
1050
+ catch { /* ignore */ }
1051
+ const primaryLanguage = Object.entries(structure.languages).sort((a, b) => b[1] - a[1])[0]?.[0] ?? 'unknown';
1052
+ const auditMetadata = {
1053
+ phase: 'what',
1054
+ repo: repoSlug,
1055
+ mode: 'brownfield',
1056
+ repo_status: 'connected',
1057
+ okr_id: okrId,
1058
+ sha: sha.slice(0, 12),
1059
+ file_count: files.length,
1060
+ primary_language: primaryLanguage,
1061
+ manifests: structure.packageManifests.length,
1062
+ };
1063
+ return {
1064
+ ok: true,
1065
+ mode: 'brownfield',
1066
+ repo: { owner: gh.owner, name: gh.name, ref: cloneRef, sha },
1067
+ structure,
1068
+ entryPoints,
1069
+ auditMetadata,
1070
+ };
1071
+ };
688
1072
  // ─────────────────────────────────────────────────────────────────────
689
1073
  // Search skills — thin wrappers over the existing search nodes
690
1074
  // ─────────────────────────────────────────────────────────────────────
@@ -1244,6 +1628,14 @@ exports.SKILLS = {
1244
1628
  'context-quality': handleContextQuality,
1245
1629
  'self-review-architect': handleSelfReviewArchitect,
1246
1630
  'self-review-security': handleSelfReviewSecurity,
1631
+ // D-PR1 — code-phase persona-switch packs. Same B29 pattern as the
1632
+ // PRD-phase pair above; reads .caterpillar/prompts/code-design/* packs.
1633
+ 'self-review-code-architect': handleSelfReviewCodeArchitect,
1634
+ 'self-review-code-security': handleSelfReviewCodeSecurity,
1635
+ // D-PR1 — knowledge-code (Phase D D6). 3-mode response per A12.v1.1
1636
+ // targetCodeRepoStatus: brownfield (clone + classify), greenfield
1637
+ // (scaffolding hints, no clone), refuse (not-connected / unreachable).
1638
+ 'knowledge-code': handleKnowledgeCode,
1247
1639
  'tavily-search': handleTavilySearch,
1248
1640
  'arxiv-search': handleArxivSearch,
1249
1641
  'uspto-search': handleUsptoSearch,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@maintainabilityai/research-runner",
3
- "version": "0.1.33",
3
+ "version": "0.1.34",
4
4
  "description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
5
5
  "license": "MIT",
6
6
  "author": "MaintainabilityAI",