@kognai/orchestrator-core 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -0
- package/dist/index.js +3 -0
- package/dist/lib/build-triage.d.ts +27 -0
- package/dist/lib/build-triage.js +202 -0
- package/dist/lib/citizenship.d.ts +93 -6
- package/dist/lib/citizenship.js +143 -19
- package/dist/lib/engine-agents.d.ts +7 -0
- package/dist/lib/engine-agents.js +44 -52
- package/dist/lib/engine-orchestrator.d.ts +2 -0
- package/dist/lib/engine-orchestrator.js +210 -134
- package/dist/lib/sovereign-agent-factory.d.ts +34 -0
- package/dist/lib/sovereign-agent-factory.js +103 -5
- package/package.json +1 -1
|
@@ -45,7 +45,7 @@ class SupervisorAgent {
|
|
|
45
45
|
: '';
|
|
46
46
|
// Sherlock v2: inject ASMR episodic memory context (AMD-21-03) — fail-open
|
|
47
47
|
const memoryContext = await (0, sherlock_memory_1.getSherlockMemoryContext)(task.context || task.id);
|
|
48
|
-
const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}${memoryContext}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines}\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
|
|
48
|
+
const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}${memoryContext}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines}\n\n## YOUR REVIEW LENS — Specification & Integration (this is your ONLY job)\nYou are ONE of two INDEPENDENT reviewers. Judge ONLY: (1) SPEC COVERAGE — every required export/function/behavior is present and matches the task spec; REJECT if partial, stubbed, or TODO. (2) INTEGRATION — imports resolve to real files/symbols, types and contracts match the files this depends on, and referenced files exist. Do NOT base your grade on security or runtime concerns — the other reviewer owns those. This file passes your lens only if it is spec-complete AND integrates cleanly.\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
|
|
49
49
|
const startTime = Date.now();
|
|
50
50
|
// B.15: DeepSeek via ClawRouter for standard tasks (~$0.02/task vs $0.07 dual-supervisor)
|
|
51
51
|
// Retain Claude Sonnet only for audit/refactor-complex (high-stakes)
|
|
@@ -115,7 +115,7 @@ class Supervisor2Agent {
|
|
|
115
115
|
const integrityContext2 = task._integrityFailed
|
|
116
116
|
? `\n\n## ⚠️ INTEGRITY ALERT\n${task._integrityDetails}\nThis file was flagged for destructive rewrite. The original was preserved. REJECT this task.\n`
|
|
117
117
|
: '';
|
|
118
|
-
const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext2}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines2}\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
|
|
118
|
+
const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext2}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines2}\n\n## YOUR REVIEW LENS — Security & Runtime (this is your ONLY job)\nYou are ONE of two INDEPENDENT reviewers. Judge ONLY: (1) SECURITY — injection, secret/credential leakage, unsafe eval/exec/shell, unsanitized input or output (e.g. innerHTML / unescaped HTML), missing authorization or input validation. (2) RUNTIME ROBUSTNESS — error handling, unhandled rejections, resource leaks, missing timeouts/cancellation, crash-on-bad-input. Do NOT re-judge spec completeness — the other reviewer owns that. This file passes your lens only if it is secure AND runtime-robust.\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
|
|
119
119
|
const startTime = Date.now();
|
|
120
120
|
// B.15: Use Haiku for second-pass review — 10x cheaper than Sonnet.
|
|
121
121
|
// Founder directive 2026-05-25: if Anthropic depletes, fall back to ClawRouter/DeepSeek
|
|
@@ -199,68 +199,53 @@ async function reconcileSupervisorReviews(review1, review2, task, ceo) {
|
|
|
199
199
|
return { finalReview: review1, review1, review2, consensus: false, escalatedToCEO: false };
|
|
200
200
|
}
|
|
201
201
|
const bothApproved = review1.verdict === 'APPROVED' && review2.verdict === 'APPROVED';
|
|
202
|
-
const bothRejected = review1.verdict !== 'APPROVED' && review2.verdict !== 'APPROVED';
|
|
203
|
-
const consensus = bothApproved || bothRejected;
|
|
204
202
|
if (bothApproved) {
|
|
205
|
-
// Both
|
|
203
|
+
// Both lenses passed — average score, merge strengths.
|
|
206
204
|
const avgScore = Math.round((review1.score + review2.score) / 2);
|
|
207
|
-
(0, orchestrate_engine_1.log)(orchestrate_engine_1.c.green, ` ✓ DUAL
|
|
205
|
+
(0, orchestrate_engine_1.log)(orchestrate_engine_1.c.green, ` ✓ DUAL PASS: both lenses APPROVED — Spec/Integration (${review1.score}) + Security/Runtime (${review2.score}), avg ${avgScore}`);
|
|
208
206
|
return {
|
|
209
207
|
finalReview: {
|
|
210
208
|
verdict: 'APPROVED',
|
|
211
209
|
score: avgScore,
|
|
212
|
-
summary: `
|
|
210
|
+
summary: `Both lenses passed: Spec/Integration ${review1.score}/100 + Security/Runtime ${review2.score}/100`,
|
|
213
211
|
issues: [...review1.issues, ...review2.issues],
|
|
214
212
|
strengths: Array.from(new Set([...review1.strengths, ...review2.strengths])),
|
|
215
213
|
},
|
|
216
214
|
review1, review2, consensus: true, escalatedToCEO: false,
|
|
217
215
|
};
|
|
218
216
|
}
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
strengths: approvalReview.strengths,
|
|
252
|
-
},
|
|
253
|
-
review1, review2, consensus: false, escalatedToCEO: true, ceoDecision,
|
|
254
|
-
};
|
|
255
|
-
}
|
|
256
|
-
catch (error) {
|
|
257
|
-
// CEO unavailable — default to rejection (safer)
|
|
258
|
-
(0, orchestrate_engine_1.log)(orchestrate_engine_1.c.yellow, ` CEO unavailable for conflict resolution: ${error.message}. Defaulting to REJECTED.`);
|
|
259
|
-
return {
|
|
260
|
-
finalReview: rejectionReview,
|
|
261
|
-
review1, review2, consensus: false, escalatedToCEO: false,
|
|
262
|
-
};
|
|
263
|
-
}
|
|
217
|
+
// BOTH-MUST-PASS (gap 3). The two reviewers cover DIFFERENT dimensions
|
|
218
|
+
// (spec/integration vs security/runtime), so passing one does not excuse
|
|
219
|
+
// failing the other. Any rejection ⇒ REJECTED. No CEO rescue — a real
|
|
220
|
+
// security or spec failure must be FIXED, not voted away by a third opinion
|
|
221
|
+
// that never looked at that dimension. This replaces the old
|
|
222
|
+
// conflict→CEO-decides path that let a single approval override a rejection.
|
|
223
|
+
const failedLenses = [];
|
|
224
|
+
if (review1.verdict !== 'APPROVED')
|
|
225
|
+
failedLenses.push('Spec/Integration');
|
|
226
|
+
if (review2.verdict !== 'APPROVED')
|
|
227
|
+
failedLenses.push('Security/Runtime');
|
|
228
|
+
const rejectedIssues = [
|
|
229
|
+
...(review1.verdict !== 'APPROVED' ? review1.issues : []),
|
|
230
|
+
...(review2.verdict !== 'APPROVED' ? review2.issues : []),
|
|
231
|
+
];
|
|
232
|
+
const rejectionSummaries = [review1, review2]
|
|
233
|
+
.filter((r) => r.verdict !== 'APPROVED')
|
|
234
|
+
.map((r) => r.summary)
|
|
235
|
+
.join(' | ');
|
|
236
|
+
(0, orchestrate_engine_1.log)(orchestrate_engine_1.c.red, ` ✗ REJECTED — failed required lens: ${failedLenses.join(' + ')} (both must pass). Spec/Int ${review1.score}, Sec/RT ${review2.score}`);
|
|
237
|
+
return {
|
|
238
|
+
finalReview: {
|
|
239
|
+
verdict: 'REJECTED',
|
|
240
|
+
score: Math.min(review1.score, review2.score),
|
|
241
|
+
summary: `Failed required lens (${failedLenses.join(' + ')}): ${rejectionSummaries}`,
|
|
242
|
+
issues: rejectedIssues,
|
|
243
|
+
strengths: [],
|
|
244
|
+
},
|
|
245
|
+
review1, review2,
|
|
246
|
+
consensus: review1.verdict === review2.verdict,
|
|
247
|
+
escalatedToCEO: false,
|
|
248
|
+
};
|
|
264
249
|
}
|
|
265
250
|
// ===== CEO Agent (Claude via Anthropic API) =====
|
|
266
251
|
class CEOAgent {
|
|
@@ -750,6 +735,7 @@ class AgentCreator {
|
|
|
750
735
|
// supplied a SpawnGate (Kognai wires SAF here), consult it BEFORE creating
|
|
751
736
|
// anything on disk. Approval/rejection only; the citizenship logic below is
|
|
752
737
|
// unchanged (its extraction is tracked separately as TICKET-226).
|
|
738
|
+
let spawnOwner;
|
|
753
739
|
if (this.spawnGate) {
|
|
754
740
|
const decision = this.spawnGate(spec);
|
|
755
741
|
if (!decision.approved) {
|
|
@@ -764,6 +750,9 @@ class AgentCreator {
|
|
|
764
750
|
}
|
|
765
751
|
if (decision.audit)
|
|
766
752
|
(0, orchestrate_engine_1.log)(orchestrate_engine_1.c.gray, ` ✓ ${decision.audit}`);
|
|
753
|
+
// The gate (SAF) resolves the lineage from its requester_did — this is the
|
|
754
|
+
// running company's context, plumbed in rather than hardcoded here.
|
|
755
|
+
spawnOwner = decision.owner;
|
|
767
756
|
}
|
|
768
757
|
const agentDir = `./agents/${spec.name}`;
|
|
769
758
|
(0, fs_1.mkdirSync)(agentDir, { recursive: true });
|
|
@@ -771,10 +760,13 @@ class AgentCreator {
|
|
|
771
760
|
// citizen — not a bare agent. Mint citizenship (citizen_id + roll
|
|
772
761
|
// number + Kōpus avatar + ACP baseline) BEFORE writing the agent
|
|
773
762
|
// files so the citizen record can be referenced in the prompt.
|
|
763
|
+
// Owner-scoped when the gate supplied a lineage (e.g. invoica/voxight);
|
|
764
|
+
// legacy kognai-internal path otherwise (back-compat for gate-less templates).
|
|
774
765
|
const citizen = (0, citizenship_1.mintCitizen)(spec.name, {
|
|
775
766
|
founding_agent: 'ceo',
|
|
776
767
|
proposing_agent: 'cto',
|
|
777
768
|
citizen_type: 'spawned',
|
|
769
|
+
owner: spawnOwner,
|
|
778
770
|
});
|
|
779
771
|
// Write agent.yaml
|
|
780
772
|
const yaml = `name: ${spec.name}
|