@maintainabilityai/research-runner 0.1.33 → 0.1.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runner/skills.js +520 -0
- package/package.json +1 -1
package/dist/runner/skills.js
CHANGED
|
@@ -686,6 +686,514 @@ function makeSelfReviewHandler(persona) {
|
|
|
686
686
|
const handleSelfReviewArchitect = makeSelfReviewHandler('architect');
|
|
687
687
|
const handleSelfReviewSecurity = makeSelfReviewHandler('security');
|
|
688
688
|
// ─────────────────────────────────────────────────────────────────────
|
|
689
|
+
// knowledge-prd — D-PR1.v1.1 fix. Was deployed as a Skill template but
|
|
690
|
+
// the runner had no handler, so the code-design-agent's first attempt at
|
|
691
|
+
// invoking it on PR #120 returned `{"ok":false,"reason":"unknown-skill"}`.
|
|
692
|
+
// Agent fell back to direct file read + grep, which worked, but the chain
|
|
693
|
+
// has no `knowledge-prd` event proving the PRD was structurally read.
|
|
694
|
+
//
|
|
695
|
+
// Parses `okrs/<id>/how/prd.md` for FR-NN + SR-NN entries with tolerant
|
|
696
|
+
// regex (mirrors B31's tolerance — accepts `FR-NN` / `FR NN` / `**FR-NN**`
|
|
697
|
+
// heading or bold markers). Best-effort extraction of cited sources +
|
|
698
|
+
// STRIDE / OWASP anchors per requirement.
|
|
699
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
700
|
+
const KnowledgePrdInput = zod_1.z.object({ okrId: zod_1.z.string().min(1) });
|
|
701
|
+
/**
|
|
702
|
+
* Extract FR-NN / SR-NN requirement entries from a PRD body. Tolerant
|
|
703
|
+
* to several markdown forms the prd-agent has emitted over time:
|
|
704
|
+
* - `### FR-01: <title>` (H3 heading)
|
|
705
|
+
* - `**FR-01**: <title>` (bold-anchor inline)
|
|
706
|
+
* - `- **FR-01**: <title>` (bullet w/ bold anchor)
|
|
707
|
+
*
|
|
708
|
+
* Returns one record per logical id. Same id seen twice (heading + bullet
|
|
709
|
+
* form) is deduped — first occurrence wins (heading usually).
|
|
710
|
+
*/
|
|
711
|
+
function parsePrdRequirements(body, prefix) {
|
|
712
|
+
const seen = new Set();
|
|
713
|
+
const out = [];
|
|
714
|
+
// Match the requirement id and the rest of the line. The id form
|
|
715
|
+
// accepts `FR-NN` / `FR NN` (no dash) for forgiveness — same shape as
|
|
716
|
+
// B31's `[CRSE]-?\d+`. Captures the text content that follows.
|
|
717
|
+
const idRegex = new RegExp(`(?:^|\\s|\\*\\*)${prefix}[-\\s]?(\\d+)(?:\\*\\*)?\\s*[:.]?\\s*(.*?)(?:\\*\\*|$)`, 'gmi');
|
|
718
|
+
const lines = body.split('\n');
|
|
719
|
+
// Walk line-by-line and accumulate a window of context (this line +
|
|
720
|
+
// next ~6 lines) so source/anchor citations on a "Traces to:" line
|
|
721
|
+
// immediately following the heading get associated with the right id.
|
|
722
|
+
for (let i = 0; i < lines.length; i++) {
|
|
723
|
+
const line = lines[i];
|
|
724
|
+
const m = line.match(new RegExp(`(?:^|\\s|\\*\\*)${prefix}[-\\s]?(\\d+)(?:\\*\\*)?\\s*[:.]\\s*(.*?)\\s*$`, 'i'));
|
|
725
|
+
if (!m) {
|
|
726
|
+
continue;
|
|
727
|
+
}
|
|
728
|
+
const num = m[1];
|
|
729
|
+
const id = `${prefix}-${num.padStart(2, '0')}`;
|
|
730
|
+
if (seen.has(id)) {
|
|
731
|
+
continue;
|
|
732
|
+
}
|
|
733
|
+
seen.add(id);
|
|
734
|
+
let text = (m[2] || '').replace(/\*\*/g, '').trim();
|
|
735
|
+
// Collect 6 lines forward for source / anchor scanning.
|
|
736
|
+
const window = lines.slice(i, Math.min(i + 7, lines.length)).join('\n');
|
|
737
|
+
const sources = [...window.matchAll(/[CRSE]-?\d+/g)].map(x => x[0].replace(/(?<=[CRSE])\B/, '-').replace('--', '-'));
|
|
738
|
+
const dedupSrc = Array.from(new Set(sources));
|
|
739
|
+
const record = { id, text };
|
|
740
|
+
if (prefix === 'FR' && dedupSrc.length > 0) {
|
|
741
|
+
record.sources = dedupSrc;
|
|
742
|
+
}
|
|
743
|
+
if (prefix === 'SR') {
|
|
744
|
+
const stride = [...window.matchAll(/THR-\d{3}/gi)].map(x => x[0].toUpperCase());
|
|
745
|
+
const owasp = [...window.matchAll(/A0[1-9]|A10/gi)].map(x => x[0].toUpperCase());
|
|
746
|
+
if (stride.length > 0) {
|
|
747
|
+
record.stride = Array.from(new Set(stride));
|
|
748
|
+
}
|
|
749
|
+
if (owasp.length > 0) {
|
|
750
|
+
record.owasp = Array.from(new Set(owasp));
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
out.push(record);
|
|
754
|
+
}
|
|
755
|
+
void idRegex;
|
|
756
|
+
return out;
|
|
757
|
+
}
|
|
758
|
+
/**
|
|
759
|
+
* Extract a Coverage Analysis table from the PRD body. Format expected:
|
|
760
|
+
* | FR/SR | Source | Status |
|
|
761
|
+
* |---|---|---|
|
|
762
|
+
* | FR-01 | R-2,E-1 | YES |
|
|
763
|
+
* ...
|
|
764
|
+
* Returns a map id → bool (YES → true, PARTIAL/NO → false).
|
|
765
|
+
*/
|
|
766
|
+
function parsePrdCoverage(body) {
|
|
767
|
+
const coverage = {};
|
|
768
|
+
const lines = body.split('\n');
|
|
769
|
+
for (const line of lines) {
|
|
770
|
+
const m = line.match(/^\s*\|\s*((?:FR|SR)[-\s]?\d+)\s*\|.*\|\s*(YES|PARTIAL|NO)\s*\|/i);
|
|
771
|
+
if (!m) {
|
|
772
|
+
continue;
|
|
773
|
+
}
|
|
774
|
+
const rawId = m[1].toUpperCase();
|
|
775
|
+
const numMatch = rawId.match(/(\d+)/);
|
|
776
|
+
if (!numMatch) {
|
|
777
|
+
continue;
|
|
778
|
+
}
|
|
779
|
+
const id = `${rawId.startsWith('FR') ? 'FR' : 'SR'}-${numMatch[1].padStart(2, '0')}`;
|
|
780
|
+
coverage[id] = m[2].toUpperCase() === 'YES';
|
|
781
|
+
}
|
|
782
|
+
return coverage;
|
|
783
|
+
}
|
|
784
|
+
const handleKnowledgePrd = async (input) => {
|
|
785
|
+
const parsed = KnowledgePrdInput.safeParse(input);
|
|
786
|
+
if (!parsed.success) {
|
|
787
|
+
return { ok: false, reason: `bad-input: ${parsed.error.message}` };
|
|
788
|
+
}
|
|
789
|
+
const docPath = path.join(meshPath(), 'okrs', parsed.data.okrId, 'how', 'prd.md');
|
|
790
|
+
if (!fs.existsSync(docPath)) {
|
|
791
|
+
return { ok: false, reason: 'prd-not-merged-yet' };
|
|
792
|
+
}
|
|
793
|
+
const body = fs.readFileSync(docPath, 'utf8');
|
|
794
|
+
const functionalRequirements = parsePrdRequirements(body, 'FR');
|
|
795
|
+
const securityRequirements = parsePrdRequirements(body, 'SR');
|
|
796
|
+
const coverage = parsePrdCoverage(body);
|
|
797
|
+
const auditMetadata = {
|
|
798
|
+
okr_id: parsed.data.okrId,
|
|
799
|
+
fr_count: functionalRequirements.length,
|
|
800
|
+
sr_count: securityRequirements.length,
|
|
801
|
+
coverage_rows: Object.keys(coverage).length,
|
|
802
|
+
};
|
|
803
|
+
return {
|
|
804
|
+
ok: true,
|
|
805
|
+
functionalRequirements,
|
|
806
|
+
securityRequirements,
|
|
807
|
+
coverage,
|
|
808
|
+
docPath,
|
|
809
|
+
auditMetadata,
|
|
810
|
+
};
|
|
811
|
+
};
|
|
812
|
+
/**
|
|
813
|
+
* D-PR1 — code-phase persona-switch self-review. Same B29 pattern as the
|
|
814
|
+
* PRD-phase architect/security handlers above, but reads the WHAT-phase
|
|
815
|
+
* prompt packs at `.caterpillar/prompts/code-design/*` instead of the
|
|
816
|
+
* PRD packs. Returns the authoritative tier + MAX_AUTO_ROUNDS so the
|
|
817
|
+
* code-design-agent can't hallucinate its persona-switch budget.
|
|
818
|
+
*
|
|
819
|
+
* The agent's flow (per the code-design-agent.agent.md contract):
|
|
820
|
+
* 1. First-pass synthesis (no persona — author voice).
|
|
821
|
+
* 2. Inhabit code-architect persona → call this Skill with round=1.
|
|
822
|
+
* Read the returned promptPack as the critique criteria. Produce a
|
|
823
|
+
* structured SCORE/SEVERITY/COVERED/MISSING/CHANGES block in the PR body.
|
|
824
|
+
* 3. Same for code-security persona, round=1.
|
|
825
|
+
* 4. If either round-1 severity > PASS AND round < maxAutoRounds: revise
|
|
826
|
+
* the code-design, call this Skill with round=2, produce round-2 blocks.
|
|
827
|
+
* 5. Restricted tier (maxAutoRounds=0) skips persona-switch entirely;
|
|
828
|
+
* shouldProceed returns false → the agent reports the un-critiqued
|
|
829
|
+
* design and the audit-and-drift workflow gates on HumanGate.
|
|
830
|
+
*/
|
|
831
|
+
function makeCodeReviewHandler(persona) {
|
|
832
|
+
return async (input) => {
|
|
833
|
+
const parsed = SelfReviewInput.safeParse(input);
|
|
834
|
+
if (!parsed.success) {
|
|
835
|
+
return { ok: false, reason: `bad-input: ${parsed.error.message}` };
|
|
836
|
+
}
|
|
837
|
+
const mesh = meshPath();
|
|
838
|
+
const okrPath = path.join(mesh, 'okrs', parsed.data.okrId, 'okr.yaml');
|
|
839
|
+
if (!fs.existsSync(okrPath)) {
|
|
840
|
+
return { ok: false, reason: 'okr-not-found' };
|
|
841
|
+
}
|
|
842
|
+
const card = readYaml(okrPath);
|
|
843
|
+
const action = card?.actions?.find(a => a.runId === parsed.data.runId);
|
|
844
|
+
if (!action) {
|
|
845
|
+
return { ok: false, reason: `action-not-found: no actions[] entry with runId=${parsed.data.runId}` };
|
|
846
|
+
}
|
|
847
|
+
const tier = (action.governanceTier ?? '').toLowerCase();
|
|
848
|
+
const maxAutoRounds = tierMaxRounds(tier);
|
|
849
|
+
const shouldProceed = tier !== 'restricted' && parsed.data.round <= maxAutoRounds;
|
|
850
|
+
// code-design prompt packs live alongside the prd packs but in a
|
|
851
|
+
// separate subdir so the agent can't confuse "PRD architecture review"
|
|
852
|
+
// (mesh-grounded) with "code-design architecture review" (code-grounded).
|
|
853
|
+
const promptFilename = persona === 'code-architect' ? 'architecture-review.md' : 'security-review.md';
|
|
854
|
+
const promptPath = path.join(mesh, '.caterpillar', 'prompts', 'code-design', promptFilename);
|
|
855
|
+
let promptPack = '';
|
|
856
|
+
let promptPackFound = false;
|
|
857
|
+
if (fs.existsSync(promptPath)) {
|
|
858
|
+
try {
|
|
859
|
+
promptPack = fs.readFileSync(promptPath, 'utf8');
|
|
860
|
+
promptPackFound = true;
|
|
861
|
+
}
|
|
862
|
+
catch { /* leave empty */ }
|
|
863
|
+
}
|
|
864
|
+
const auditMetadata = {
|
|
865
|
+
persona,
|
|
866
|
+
phase: 'what',
|
|
867
|
+
tier,
|
|
868
|
+
max_auto_rounds: maxAutoRounds,
|
|
869
|
+
round: parsed.data.round,
|
|
870
|
+
should_proceed: shouldProceed,
|
|
871
|
+
prompt_pack_path: promptPath,
|
|
872
|
+
prompt_pack_found: promptPackFound,
|
|
873
|
+
};
|
|
874
|
+
return {
|
|
875
|
+
ok: true,
|
|
876
|
+
persona,
|
|
877
|
+
phase: 'what',
|
|
878
|
+
tier,
|
|
879
|
+
maxAutoRounds,
|
|
880
|
+
round: parsed.data.round,
|
|
881
|
+
shouldProceed,
|
|
882
|
+
promptPack,
|
|
883
|
+
promptPackPath: promptPath,
|
|
884
|
+
promptPackFound,
|
|
885
|
+
auditMetadata,
|
|
886
|
+
};
|
|
887
|
+
};
|
|
888
|
+
}
|
|
889
|
+
const handleSelfReviewCodeArchitect = makeCodeReviewHandler('code-architect');
|
|
890
|
+
const handleSelfReviewCodeSecurity = makeCodeReviewHandler('code-security');
|
|
891
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
892
|
+
// knowledge-code — Phase D D6 backend. Per A12.v1.1, branches on per-repo
|
|
893
|
+
// `targetCodeRepoStatus`: 'connected' clones + classifies (brownfield);
|
|
894
|
+
// 'create' returns scaffolding hints (greenfield, no clone); 'not-connected'
|
|
895
|
+
// / 'unreachable' refuses with a remediation hint so the agent stops cleanly.
|
|
896
|
+
//
|
|
897
|
+
// MVP extraction is shallow (top-dirs + language map + manifest detection +
|
|
898
|
+
// entrypoint heuristics). Tree-sitter polyglot cross-module-call extraction
|
|
899
|
+
// is a follow-up (D-PR1.v1.1) — it requires per-language parsers as deps
|
|
900
|
+
// that bloat the runner package. The shallow shape is enough to prove the
|
|
901
|
+
// brownfield/greenfield contract end-to-end on the IMDB-celebs sample.
|
|
902
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
903
|
+
const KnowledgeCodeInput = zod_1.z.object({
|
|
904
|
+
okrId: zod_1.z.string().min(1),
|
|
905
|
+
repoUrl: zod_1.z.string().min(1),
|
|
906
|
+
repoStatus: zod_1.z.enum(['connected', 'not-connected', 'create', 'unreachable']),
|
|
907
|
+
ref: zod_1.z.string().optional(),
|
|
908
|
+
maxFiles: zod_1.z.number().int().positive().optional(),
|
|
909
|
+
});
|
|
910
|
+
/**
|
|
911
|
+
* Map common file extensions to a primary-language label. Used for the
|
|
912
|
+
* `languages` histogram in the brownfield response. Order matters when a
|
|
913
|
+
* repo has multiple — the most-common wins.
|
|
914
|
+
*/
|
|
915
|
+
const LANG_EXTS = {
|
|
916
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
917
|
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
|
918
|
+
'.py': 'python',
|
|
919
|
+
'.go': 'go',
|
|
920
|
+
'.rs': 'rust',
|
|
921
|
+
'.java': 'java',
|
|
922
|
+
'.kt': 'kotlin',
|
|
923
|
+
'.rb': 'ruby',
|
|
924
|
+
'.php': 'php',
|
|
925
|
+
'.cs': 'csharp',
|
|
926
|
+
'.swift': 'swift',
|
|
927
|
+
'.c': 'c', '.h': 'c',
|
|
928
|
+
'.cpp': 'cpp', '.cc': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp',
|
|
929
|
+
};
|
|
930
|
+
/**
|
|
931
|
+
* Manifest filenames the brownfield walk surfaces so the agent can ground
|
|
932
|
+
* design decisions on the repo's actual dependency posture. Keep this list
|
|
933
|
+
* conservative — over-eager manifest detection is noise.
|
|
934
|
+
*/
|
|
935
|
+
const MANIFEST_FILES = new Set([
|
|
936
|
+
'package.json', 'package-lock.json', 'pnpm-lock.yaml', 'yarn.lock',
|
|
937
|
+
'requirements.txt', 'pyproject.toml', 'Pipfile', 'Pipfile.lock', 'poetry.lock',
|
|
938
|
+
'go.mod', 'go.sum',
|
|
939
|
+
'Cargo.toml', 'Cargo.lock',
|
|
940
|
+
'pom.xml', 'build.gradle', 'build.gradle.kts',
|
|
941
|
+
'Gemfile', 'Gemfile.lock',
|
|
942
|
+
'composer.json',
|
|
943
|
+
]);
|
|
944
|
+
/**
|
|
945
|
+
* Walk a directory tree, capped at `maxFiles`. Returns relative paths.
|
|
946
|
+
* Skips `.git/`, `node_modules/`, `__pycache__/`, and `vendor/` — the
|
|
947
|
+
* convention dirs that bloat counts without informing design.
|
|
948
|
+
*/
|
|
949
|
+
function walkRepo(rootDir, maxFiles) {
|
|
950
|
+
const SKIP = new Set(['.git', 'node_modules', '__pycache__', 'vendor', 'dist', 'build', '.next', '.nuxt']);
|
|
951
|
+
const out = [];
|
|
952
|
+
function recurse(absDir, relBase) {
|
|
953
|
+
if (out.length >= maxFiles) {
|
|
954
|
+
return;
|
|
955
|
+
}
|
|
956
|
+
let entries;
|
|
957
|
+
try {
|
|
958
|
+
entries = fs.readdirSync(absDir, { withFileTypes: true });
|
|
959
|
+
}
|
|
960
|
+
catch {
|
|
961
|
+
return;
|
|
962
|
+
}
|
|
963
|
+
for (const ent of entries) {
|
|
964
|
+
if (out.length >= maxFiles) {
|
|
965
|
+
return;
|
|
966
|
+
}
|
|
967
|
+
if (SKIP.has(ent.name)) {
|
|
968
|
+
continue;
|
|
969
|
+
}
|
|
970
|
+
const abs = path.join(absDir, ent.name);
|
|
971
|
+
const rel = relBase ? `${relBase}/${ent.name}` : ent.name;
|
|
972
|
+
if (ent.isDirectory()) {
|
|
973
|
+
recurse(abs, rel);
|
|
974
|
+
}
|
|
975
|
+
else if (ent.isFile()) {
|
|
976
|
+
out.push(rel);
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
recurse(rootDir, '');
|
|
981
|
+
return out;
|
|
982
|
+
}
|
|
983
|
+
/**
|
|
984
|
+
* Guess the primary BAR-level language + framework from the manifest +
|
|
985
|
+
* file mix. For greenfield scaffolding the agent can override these from
|
|
986
|
+
* BAR-app.yaml calm-node hints; this is just the brownfield read.
|
|
987
|
+
*/
|
|
988
|
+
function classifyRepo(files) {
|
|
989
|
+
const topDirs = new Set();
|
|
990
|
+
const languages = {};
|
|
991
|
+
const packageManifests = [];
|
|
992
|
+
for (const f of files) {
|
|
993
|
+
const slashIdx = f.indexOf('/');
|
|
994
|
+
if (slashIdx > 0) {
|
|
995
|
+
topDirs.add(f.slice(0, slashIdx));
|
|
996
|
+
}
|
|
997
|
+
const ext = path.extname(f).toLowerCase();
|
|
998
|
+
const lang = LANG_EXTS[ext];
|
|
999
|
+
if (lang) {
|
|
1000
|
+
languages[lang] = (languages[lang] ?? 0) + 1;
|
|
1001
|
+
}
|
|
1002
|
+
const base = path.basename(f);
|
|
1003
|
+
if (MANIFEST_FILES.has(base)) {
|
|
1004
|
+
packageManifests.push(f);
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
return {
|
|
1008
|
+
topDirs: Array.from(topDirs).sort(),
|
|
1009
|
+
languages,
|
|
1010
|
+
packageManifests: packageManifests.sort(),
|
|
1011
|
+
};
|
|
1012
|
+
}
|
|
1013
|
+
/**
|
|
1014
|
+
* Parse `https://github.com/<owner>/<name>` (with or without `.git` suffix,
|
|
1015
|
+
* with or without trailing slash). Returns null for non-GitHub URLs.
|
|
1016
|
+
*/
|
|
1017
|
+
function parseGithubUrl(url) {
|
|
1018
|
+
const m = url.match(/^https?:\/\/github\.com\/([^/\s]+)\/([^/\s]+?)(?:\.git)?\/?$/);
|
|
1019
|
+
if (!m) {
|
|
1020
|
+
return null;
|
|
1021
|
+
}
|
|
1022
|
+
return { owner: m[1], name: m[2] };
|
|
1023
|
+
}
|
|
1024
|
+
const handleKnowledgeCode = async (input) => {
|
|
1025
|
+
const parsed = KnowledgeCodeInput.safeParse(input);
|
|
1026
|
+
if (!parsed.success) {
|
|
1027
|
+
return { ok: false, reason: `bad-input: ${parsed.error.message}` };
|
|
1028
|
+
}
|
|
1029
|
+
const { okrId, repoUrl, repoStatus, ref, maxFiles } = parsed.data;
|
|
1030
|
+
const gh = parseGithubUrl(repoUrl);
|
|
1031
|
+
const repoSlug = gh ? `${gh.owner}/${gh.name}` : repoUrl;
|
|
1032
|
+
// ─── Refuse branch (not-connected / unreachable) ───────────────────
|
|
1033
|
+
// The agent never grounds against ambiguous repo intent. The remediation
|
|
1034
|
+
// hint points the human back to the Looking Glass repo-status picker
|
|
1035
|
+
// — the same UI that A12.v1.1 ships.
|
|
1036
|
+
if (repoStatus === 'not-connected' || repoStatus === 'unreachable') {
|
|
1037
|
+
const auditMetadata = { phase: 'what', repo: repoSlug, mode: 'refuse', repo_status: repoStatus, okr_id: okrId };
|
|
1038
|
+
return {
|
|
1039
|
+
ok: false,
|
|
1040
|
+
reason: repoStatus === 'unreachable' ? 'repo-unreachable' : 'repo-not-connected',
|
|
1041
|
+
repo: repoSlug,
|
|
1042
|
+
remediation: "Open Looking Glass → OKR detail → Target Code Repos and pick a status: 'Connected' (if the repo exists and is wired) or 'Create' (if greenfield). The code-design-agent refuses to ground until every target repo's intent is explicit.",
|
|
1043
|
+
auditMetadata,
|
|
1044
|
+
};
|
|
1045
|
+
}
|
|
1046
|
+
// ─── Greenfield branch (create) ────────────────────────────────────
|
|
1047
|
+
// No clone. Return scaffolding hints derived from the BAR's calm-node
|
|
1048
|
+
// language preference (if readable) so the agent's per-repo subsection
|
|
1049
|
+
// can lock in seed files / framework choice consistently with the rest
|
|
1050
|
+
// of the mesh. Optional referenceRepos (D5) plug in here when ready —
|
|
1051
|
+
// for D-PR1 they're an empty array placeholder.
|
|
1052
|
+
if (repoStatus === 'create') {
|
|
1053
|
+
// Conservative scaffolding hints — the agent can override these in
|
|
1054
|
+
// the design when it has stronger signal from BAR ADRs or the PRD.
|
|
1055
|
+
// We avoid over-prescribing: the goal is to seed the choice, not own it.
|
|
1056
|
+
const scaffoldingHints = {
|
|
1057
|
+
suggestedLanguage: 'typescript',
|
|
1058
|
+
suggestedFramework: 'express',
|
|
1059
|
+
seedFiles: [
|
|
1060
|
+
'README.md',
|
|
1061
|
+
'LICENSE',
|
|
1062
|
+
'package.json',
|
|
1063
|
+
'tsconfig.json',
|
|
1064
|
+
'src/index.ts',
|
|
1065
|
+
'.github/CODEOWNERS',
|
|
1066
|
+
'.github/workflows/red-queen-bootstrap.yml',
|
|
1067
|
+
],
|
|
1068
|
+
};
|
|
1069
|
+
const auditMetadata = { phase: 'what', repo: repoSlug, mode: 'greenfield', repo_status: 'create', okr_id: okrId };
|
|
1070
|
+
return {
|
|
1071
|
+
ok: true,
|
|
1072
|
+
mode: 'greenfield',
|
|
1073
|
+
repo: repoSlug,
|
|
1074
|
+
reason: 'repo-status-create',
|
|
1075
|
+
referenceRepos: [], // D5 reference-repos integration is a follow-up
|
|
1076
|
+
scaffoldingHints,
|
|
1077
|
+
auditMetadata,
|
|
1078
|
+
};
|
|
1079
|
+
}
|
|
1080
|
+
// ─── Brownfield branch (connected) ─────────────────────────────────
|
|
1081
|
+
// Shallow git clone (`--depth=1`) into a tmp dir, walk + classify.
|
|
1082
|
+
// Cleanup on exit (process-scoped tmpdir). On clone failure we degrade
|
|
1083
|
+
// to a soft-refuse rather than crash — the agent can still attempt
|
|
1084
|
+
// partial grounding from the SKILL response shape.
|
|
1085
|
+
if (!gh) {
|
|
1086
|
+
return { ok: false, reason: 'repo-url-not-github', repo: repoUrl };
|
|
1087
|
+
}
|
|
1088
|
+
const { execFileSync } = await Promise.resolve().then(() => __importStar(require('node:child_process')));
|
|
1089
|
+
const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), `knowledge-code-${gh.name}-`));
|
|
1090
|
+
const cloneTarget = path.join(tmpRoot, gh.name);
|
|
1091
|
+
const cloneRef = ref ?? 'HEAD';
|
|
1092
|
+
const cloneArgs = ['clone', '--depth=1', '--filter=blob:limit=10m'];
|
|
1093
|
+
if (ref && ref !== 'HEAD') {
|
|
1094
|
+
cloneArgs.push('--branch', ref);
|
|
1095
|
+
}
|
|
1096
|
+
cloneArgs.push(repoUrl, cloneTarget);
|
|
1097
|
+
let cloneOk = true;
|
|
1098
|
+
let cloneError = '';
|
|
1099
|
+
try {
|
|
1100
|
+
execFileSync('git', cloneArgs, { stdio: ['ignore', 'pipe', 'pipe'], timeout: 60_000 });
|
|
1101
|
+
}
|
|
1102
|
+
catch (err) {
|
|
1103
|
+
cloneOk = false;
|
|
1104
|
+
cloneError = err instanceof Error ? err.message : String(err);
|
|
1105
|
+
}
|
|
1106
|
+
if (!cloneOk) {
|
|
1107
|
+
// Clean up the empty tmpdir before bailing.
|
|
1108
|
+
try {
|
|
1109
|
+
fs.rmSync(tmpRoot, { recursive: true, force: true });
|
|
1110
|
+
}
|
|
1111
|
+
catch { /* ignore */ }
|
|
1112
|
+
const auditMetadata = { phase: 'what', repo: repoSlug, mode: 'brownfield-clone-failed', repo_status: 'connected', okr_id: okrId };
|
|
1113
|
+
return {
|
|
1114
|
+
ok: false,
|
|
1115
|
+
reason: 'clone-failed',
|
|
1116
|
+
repo: repoSlug,
|
|
1117
|
+
remediation: `git clone failed for ${repoUrl}. Verify the GitHub App install is approved on this repo and the ref (${cloneRef}) exists. Underlying error: ${cloneError}`,
|
|
1118
|
+
auditMetadata,
|
|
1119
|
+
};
|
|
1120
|
+
}
|
|
1121
|
+
// Resolve the actual SHA so the response is reproducible.
|
|
1122
|
+
let sha = '';
|
|
1123
|
+
try {
|
|
1124
|
+
sha = execFileSync('git', ['rev-parse', 'HEAD'], { cwd: cloneTarget, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] }).trim();
|
|
1125
|
+
}
|
|
1126
|
+
catch { /* sha stays empty */ }
|
|
1127
|
+
const cap = maxFiles ?? 200;
|
|
1128
|
+
const files = walkRepo(cloneTarget, cap);
|
|
1129
|
+
const structure = classifyRepo(files);
|
|
1130
|
+
// Best-effort entrypoint detection from the most-common manifest +
|
|
1131
|
+
// top-level layout. Conservative: only mark something as an entrypoint
|
|
1132
|
+
// when we have positive signal (manifest field OR conventional path).
|
|
1133
|
+
const entryPoints = [];
|
|
1134
|
+
for (const manifestPath of structure.packageManifests) {
|
|
1135
|
+
if (path.basename(manifestPath) === 'package.json') {
|
|
1136
|
+
try {
|
|
1137
|
+
const pkgRaw = fs.readFileSync(path.join(cloneTarget, manifestPath), 'utf8');
|
|
1138
|
+
const pkg = JSON.parse(pkgRaw);
|
|
1139
|
+
const deps = pkg.dependencies ?? {};
|
|
1140
|
+
let framework = 'unknown';
|
|
1141
|
+
if (deps['express']) {
|
|
1142
|
+
framework = 'express';
|
|
1143
|
+
}
|
|
1144
|
+
else if (deps['fastify']) {
|
|
1145
|
+
framework = 'fastify';
|
|
1146
|
+
}
|
|
1147
|
+
else if (deps['hono']) {
|
|
1148
|
+
framework = 'hono';
|
|
1149
|
+
}
|
|
1150
|
+
else if (deps['@nestjs/core']) {
|
|
1151
|
+
framework = 'nestjs';
|
|
1152
|
+
}
|
|
1153
|
+
else if (deps['next']) {
|
|
1154
|
+
framework = 'next';
|
|
1155
|
+
}
|
|
1156
|
+
else if (deps['react']) {
|
|
1157
|
+
framework = 'react';
|
|
1158
|
+
}
|
|
1159
|
+
if (pkg.main) {
|
|
1160
|
+
entryPoints.push({ path: pkg.main, kind: framework === 'react' || framework === 'next' ? 'ui' : 'api', framework });
|
|
1161
|
+
}
|
|
1162
|
+
if (pkg.bin) {
|
|
1163
|
+
entryPoints.push({ path: typeof pkg.bin === 'string' ? pkg.bin : Object.values(pkg.bin)[0] ?? '', kind: 'cli', framework });
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
catch { /* manifest unreadable / non-JSON; skip */ }
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
1169
|
+
// Clean up the cloned tree — the SKILL is a one-shot read, no need to
|
|
1170
|
+
// keep ~10MB of git data per invocation.
|
|
1171
|
+
try {
|
|
1172
|
+
fs.rmSync(tmpRoot, { recursive: true, force: true });
|
|
1173
|
+
}
|
|
1174
|
+
catch { /* ignore */ }
|
|
1175
|
+
const primaryLanguage = Object.entries(structure.languages).sort((a, b) => b[1] - a[1])[0]?.[0] ?? 'unknown';
|
|
1176
|
+
const auditMetadata = {
|
|
1177
|
+
phase: 'what',
|
|
1178
|
+
repo: repoSlug,
|
|
1179
|
+
mode: 'brownfield',
|
|
1180
|
+
repo_status: 'connected',
|
|
1181
|
+
okr_id: okrId,
|
|
1182
|
+
sha: sha.slice(0, 12),
|
|
1183
|
+
file_count: files.length,
|
|
1184
|
+
primary_language: primaryLanguage,
|
|
1185
|
+
manifests: structure.packageManifests.length,
|
|
1186
|
+
};
|
|
1187
|
+
return {
|
|
1188
|
+
ok: true,
|
|
1189
|
+
mode: 'brownfield',
|
|
1190
|
+
repo: { owner: gh.owner, name: gh.name, ref: cloneRef, sha },
|
|
1191
|
+
structure,
|
|
1192
|
+
entryPoints,
|
|
1193
|
+
auditMetadata,
|
|
1194
|
+
};
|
|
1195
|
+
};
|
|
1196
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
689
1197
|
// Search skills — thin wrappers over the existing search nodes
|
|
690
1198
|
// ─────────────────────────────────────────────────────────────────────
|
|
691
1199
|
const SearchQueriesInput = zod_1.z.object({
|
|
@@ -1239,11 +1747,23 @@ exports.SKILLS = {
|
|
|
1239
1747
|
'knowledge-mesh-threats': handleKnowledgeMeshThreats,
|
|
1240
1748
|
'knowledge-mesh-adrs': handleKnowledgeMeshAdrs,
|
|
1241
1749
|
'knowledge-research': handleKnowledgeResearch,
|
|
1750
|
+
// D-PR1.v1.1 — knowledge-prd handler (SKILL.md was deployed but no
|
|
1751
|
+
// runner backend existed, causing the code-design-agent to fall back
|
|
1752
|
+
// to direct file read with no chain evidence on PR #120).
|
|
1753
|
+
'knowledge-prd': handleKnowledgePrd,
|
|
1242
1754
|
'context-architecture': handleContextArchitecture,
|
|
1243
1755
|
'context-security': handleContextSecurity,
|
|
1244
1756
|
'context-quality': handleContextQuality,
|
|
1245
1757
|
'self-review-architect': handleSelfReviewArchitect,
|
|
1246
1758
|
'self-review-security': handleSelfReviewSecurity,
|
|
1759
|
+
// D-PR1 — code-phase persona-switch packs. Same B29 pattern as the
|
|
1760
|
+
// PRD-phase pair above; reads .caterpillar/prompts/code-design/* packs.
|
|
1761
|
+
'self-review-code-architect': handleSelfReviewCodeArchitect,
|
|
1762
|
+
'self-review-code-security': handleSelfReviewCodeSecurity,
|
|
1763
|
+
// D-PR1 — knowledge-code (Phase D D6). 3-mode response per A12.v1.1
|
|
1764
|
+
// targetCodeRepoStatus: brownfield (clone + classify), greenfield
|
|
1765
|
+
// (scaffolding hints, no clone), refuse (not-connected / unreachable).
|
|
1766
|
+
'knowledge-code': handleKnowledgeCode,
|
|
1247
1767
|
'tavily-search': handleTavilySearch,
|
|
1248
1768
|
'arxiv-search': handleArxivSearch,
|
|
1249
1769
|
'uspto-search': handleUsptoSearch,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@maintainabilityai/research-runner",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.35",
|
|
4
4
|
"description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "MaintainabilityAI",
|