@kognai/orchestrator-core 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,7 +45,7 @@ class SupervisorAgent {
45
45
  : '';
46
46
  // Sherlock v2: inject ASMR episodic memory context (AMD-21-03) — fail-open
47
47
  const memoryContext = await (0, sherlock_memory_1.getSherlockMemoryContext)(task.context || task.id);
48
- const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}${memoryContext}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines}\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
48
+ const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}${memoryContext}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines}\n\n## YOUR REVIEW LENS — Specification & Integration (this is your ONLY job)\nYou are ONE of two INDEPENDENT reviewers. Judge ONLY: (1) SPEC COVERAGE — every required export/function/behavior is present and matches the task spec; REJECT if partial, stubbed, or TODO. (2) INTEGRATION — imports resolve to real files/symbols, types and contracts match the files this depends on, and referenced files exist. Do NOT base your grade on security or runtime concerns — the other reviewer owns those. This file passes your lens only if it is spec-complete AND integrates cleanly.\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
49
49
  const startTime = Date.now();
50
50
  // B.15: DeepSeek via ClawRouter for standard tasks (~$0.02/task vs $0.07 dual-supervisor)
51
51
  // Retain Claude Sonnet only for audit/refactor-complex (high-stakes)
@@ -115,7 +115,7 @@ class Supervisor2Agent {
115
115
  const integrityContext2 = task._integrityFailed
116
116
  ? `\n\n## ⚠️ INTEGRITY ALERT\n${task._integrityDetails}\nThis file was flagged for destructive rewrite. The original was preserved. REJECT this task.\n`
117
117
  : '';
118
- const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext2}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines2}\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
118
+ const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext2}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines2}\n\n## YOUR REVIEW LENS — Security & Runtime (this is your ONLY job)\nYou are ONE of two INDEPENDENT reviewers. Judge ONLY: (1) SECURITY — injection, secret/credential leakage, unsafe eval/exec/shell, unsanitized input or output (e.g. innerHTML / unescaped HTML), missing authorization or input validation. (2) RUNTIME ROBUSTNESS — error handling, unhandled rejections, resource leaks, missing timeouts/cancellation, crash-on-bad-input. Do NOT re-judge spec completeness — the other reviewer owns that. This file passes your lens only if it is secure AND runtime-robust.\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
119
119
  const startTime = Date.now();
120
120
  // B.15: Use Haiku for second-pass review — 10x cheaper than Sonnet.
121
121
  // Founder directive 2026-05-25: if Anthropic depletes, fall back to ClawRouter/DeepSeek
@@ -199,68 +199,53 @@ async function reconcileSupervisorReviews(review1, review2, task, ceo) {
199
199
  return { finalReview: review1, review1, review2, consensus: false, escalatedToCEO: false };
200
200
  }
201
201
  const bothApproved = review1.verdict === 'APPROVED' && review2.verdict === 'APPROVED';
202
- const bothRejected = review1.verdict !== 'APPROVED' && review2.verdict !== 'APPROVED';
203
- const consensus = bothApproved || bothRejected;
204
202
  if (bothApproved) {
205
- // Both approvetake the average score, merge strengths
203
+ // Both lenses passed — average score, merge strengths.
206
204
  const avgScore = Math.round((review1.score + review2.score) / 2);
207
- (0, orchestrate_engine_1.log)(orchestrate_engine_1.c.green, ` ✓ DUAL CONSENSUS: Both supervisors APPROVED (Sup1: ${review1.score}, Sup2: ${review2.score}, avg: ${avgScore})`);
205
+ (0, orchestrate_engine_1.log)(orchestrate_engine_1.c.green, ` ✓ DUAL PASS: both lenses APPROVED — Spec/Integration (${review1.score}) + Security/Runtime (${review2.score}), avg ${avgScore}`);
208
206
  return {
209
207
  finalReview: {
210
208
  verdict: 'APPROVED',
211
209
  score: avgScore,
212
- summary: `Dual-approved: Sup1 (${review1.score}/100) + Sup2 (${review2.score}/100)`,
210
+ summary: `Both lenses passed: Spec/Integration ${review1.score}/100 + Security/Runtime ${review2.score}/100`,
213
211
  issues: [...review1.issues, ...review2.issues],
214
212
  strengths: Array.from(new Set([...review1.strengths, ...review2.strengths])),
215
213
  },
216
214
  review1, review2, consensus: true, escalatedToCEO: false,
217
215
  };
218
216
  }
219
- if (bothRejected) {
220
- // Both reject merge issues, take lower score
221
- const minScore = Math.min(review1.score, review2.score);
222
- (0, orchestrate_engine_1.log)(orchestrate_engine_1.c.red, ` ✗ DUAL CONSENSUS: Both supervisors REJECTED (Sup1: ${review1.score}, Sup2: ${review2.score})`);
223
- return {
224
- finalReview: {
225
- verdict: 'REJECTED',
226
- score: minScore,
227
- summary: `Dual-rejected: Sup1 (${review1.score}/100) + Sup2 (${review2.score}/100). ${review1.summary} | ${review2.summary}`,
228
- issues: [...review1.issues, ...review2.issues],
229
- strengths: [],
230
- },
231
- review1, review2, consensus: true, escalatedToCEO: false,
232
- };
233
- }
234
- // CONFLICT one approved, one rejected → escalate to CEO
235
- const approver = review1.verdict === 'APPROVED' ? 'Sup1' : 'Sup2';
236
- const rejecter = review1.verdict === 'APPROVED' ? 'Sup2' : 'Sup1';
237
- const approvalReview = review1.verdict === 'APPROVED' ? review1 : review2;
238
- const rejectionReview = review1.verdict === 'APPROVED' ? review2 : review1;
239
- (0, orchestrate_engine_1.log)(orchestrate_engine_1.c.yellow, ` ⚡ SUPERVISOR CONFLICT on ${task.id}: ${approver} APPROVED (${approvalReview.score}), ${rejecter} REJECTED (${rejectionReview.score})`);
240
- (0, orchestrate_engine_1.log)(orchestrate_engine_1.c.magenta, ` → Escalating to CEO for final decision...`);
241
- try {
242
- const ceoDecision = await ceo.resolveReviewConflict(task, approvalReview, rejectionReview, approver, rejecter);
243
- const ceoApproves = ceoDecision.toLowerCase().includes('approve');
244
- (0, orchestrate_engine_1.log)(ceoApproves ? orchestrate_engine_1.c.green : orchestrate_engine_1.c.red, ` CEO DECISION: ${ceoApproves ? 'APPROVED' : 'REJECTED'} — ${ceoDecision.substring(0, 200)}`);
245
- return {
246
- finalReview: {
247
- verdict: ceoApproves ? 'APPROVED' : 'REJECTED',
248
- score: ceoApproves ? approvalReview.score : rejectionReview.score,
249
- summary: `CEO resolved conflict (${approver} approved, ${rejecter} rejected): ${ceoDecision.substring(0, 300)}`,
250
- issues: rejectionReview.issues,
251
- strengths: approvalReview.strengths,
252
- },
253
- review1, review2, consensus: false, escalatedToCEO: true, ceoDecision,
254
- };
255
- }
256
- catch (error) {
257
- // CEO unavailable — default to rejection (safer)
258
- (0, orchestrate_engine_1.log)(orchestrate_engine_1.c.yellow, ` CEO unavailable for conflict resolution: ${error.message}. Defaulting to REJECTED.`);
259
- return {
260
- finalReview: rejectionReview,
261
- review1, review2, consensus: false, escalatedToCEO: false,
262
- };
263
- }
217
+ // BOTH-MUST-PASS (gap 3). The two reviewers cover DIFFERENT dimensions
218
+ // (spec/integration vs security/runtime), so passing one does not excuse
219
+ // failing the other. Any rejection ⇒ REJECTED. No CEO rescue — a real
220
+ // security or spec failure must be FIXED, not voted away by a third opinion
221
+ // that never looked at that dimension. This replaces the old
222
+ // conflict→CEO-decides path that let a single approval override a rejection.
223
+ const failedLenses = [];
224
+ if (review1.verdict !== 'APPROVED')
225
+ failedLenses.push('Spec/Integration');
226
+ if (review2.verdict !== 'APPROVED')
227
+ failedLenses.push('Security/Runtime');
228
+ const rejectedIssues = [
229
+ ...(review1.verdict !== 'APPROVED' ? review1.issues : []),
230
+ ...(review2.verdict !== 'APPROVED' ? review2.issues : []),
231
+ ];
232
+ const rejectionSummaries = [review1, review2]
233
+ .filter((r) => r.verdict !== 'APPROVED')
234
+ .map((r) => r.summary)
235
+ .join(' | ');
236
+ (0, orchestrate_engine_1.log)(orchestrate_engine_1.c.red, ` ✗ REJECTED — failed required lens: ${failedLenses.join(' + ')} (both must pass). Spec/Int ${review1.score}, Sec/RT ${review2.score}`);
237
+ return {
238
+ finalReview: {
239
+ verdict: 'REJECTED',
240
+ score: Math.min(review1.score, review2.score),
241
+ summary: `Failed required lens (${failedLenses.join(' + ')}): ${rejectionSummaries}`,
242
+ issues: rejectedIssues,
243
+ strengths: [],
244
+ },
245
+ review1, review2,
246
+ consensus: review1.verdict === review2.verdict,
247
+ escalatedToCEO: false,
248
+ };
264
249
  }
265
250
  // ===== CEO Agent (Claude via Anthropic API) =====
266
251
  class CEOAgent {
@@ -750,6 +735,7 @@ class AgentCreator {
750
735
  // supplied a SpawnGate (Kognai wires SAF here), consult it BEFORE creating
751
736
  // anything on disk. Approval/rejection only; the citizenship logic below is
752
737
  // unchanged (its extraction is tracked separately as TICKET-226).
738
+ let spawnOwner;
753
739
  if (this.spawnGate) {
754
740
  const decision = this.spawnGate(spec);
755
741
  if (!decision.approved) {
@@ -764,6 +750,9 @@ class AgentCreator {
764
750
  }
765
751
  if (decision.audit)
766
752
  (0, orchestrate_engine_1.log)(orchestrate_engine_1.c.gray, ` ✓ ${decision.audit}`);
753
+ // The gate (SAF) resolves the lineage from its requester_did — this is the
754
+ // running company's context, plumbed in rather than hardcoded here.
755
+ spawnOwner = decision.owner;
767
756
  }
768
757
  const agentDir = `./agents/${spec.name}`;
769
758
  (0, fs_1.mkdirSync)(agentDir, { recursive: true });
@@ -771,10 +760,13 @@ class AgentCreator {
771
760
  // citizen — not a bare agent. Mint citizenship (citizen_id + roll
772
761
  // number + Kōpus avatar + ACP baseline) BEFORE writing the agent
773
762
  // files so the citizen record can be referenced in the prompt.
763
+ // Owner-scoped when the gate supplied a lineage (e.g. invoica/voxight);
764
+ // legacy kognai-internal path otherwise (back-compat for gate-less templates).
774
765
  const citizen = (0, citizenship_1.mintCitizen)(spec.name, {
775
766
  founding_agent: 'ceo',
776
767
  proposing_agent: 'cto',
777
768
  citizen_type: 'spawned',
769
+ owner: spawnOwner,
778
770
  });
779
771
  // Write agent.yaml
780
772
  const yaml = `name: ${spec.name}
@@ -7,6 +7,8 @@ export declare class Orchestrator {
7
7
  private supervisor2;
8
8
  private agents;
9
9
  private tasks;
10
+ private buildPath;
11
+ private triage;
10
12
  private stats;
11
13
  private taskRuns;
12
14
  /**