@kognai/orchestrator-core 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -795,8 +795,19 @@ ONLY output the JSON array. No markdown, no explanation.`;
|
|
|
795
795
|
(0, engine_primitives_1.safeResetLastCommit)(task.id, task.agent, task.type, ' ');
|
|
796
796
|
(0, code_failure_logger_1.logCodeFailure)({ taskId: task.id, sprintId: (0, orchestrate_engine_1.resolveActiveSprintId)(), agentId: (0, orchestrate_engine_1.resolveAgentDid)(task.agent), attemptNum: attempt, score: 0, model: taskRun.model_used || result?.model || task.model || 'unknown', rejectionReason: qaResult.reason, issues: [], failType: 'qa_gate' });
|
|
797
797
|
monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `QA gate: ${qaResult.reason}`);
|
|
798
|
+
// TICKET-352: feed the QA-gate failure to the coder on retry. Previously
|
|
799
|
+
// the QA reason was only logged, so the coder repaired BLIND (lastReview
|
|
800
|
+
// stayed stale/undefined). A red paired-test's output (expected X, got Y)
|
|
801
|
+
// is the most actionable repair signal there is — surface it as the review.
|
|
802
|
+
lastReview = {
|
|
803
|
+
verdict: 'REJECTED',
|
|
804
|
+
score: 0,
|
|
805
|
+
summary: `QA gate failed: ${qaResult.reason}`,
|
|
806
|
+
issues: [{ severity: 'critical', file: (task.deliverables.code?.[0] || task.id), description: qaResult.reason }],
|
|
807
|
+
strengths: [],
|
|
808
|
+
};
|
|
798
809
|
if (attempt < MAX_RETRIES) {
|
|
799
|
-
(0, engine_primitives_1.log)(engine_primitives_1.c.yellow, ' QA gate failed — retrying
|
|
810
|
+
(0, engine_primitives_1.log)(engine_primitives_1.c.yellow, ' QA gate failed — retrying with the failure as feedback...');
|
|
800
811
|
continue;
|
|
801
812
|
}
|
|
802
813
|
taskRun.status = 'rejected';
|
|
@@ -46,6 +46,16 @@ exports.callAnthropicCached = callAnthropicCached;
|
|
|
46
46
|
exports.compressContext = compressContext;
|
|
47
47
|
exports.localQAGate = localQAGate;
|
|
48
48
|
exports.httpPost = httpPost;
|
|
49
|
+
/**
|
|
50
|
+
* engine-primitives.ts — shared low-level primitives extracted from orchestrate-engine.ts
|
|
51
|
+
* (TICKET-231 engine split 4). Token accounting, AgentTask/ReviewResult/CTO* types,
|
|
52
|
+
* the LLM gateway (callLLM/callAnthropicCached/compressContext), credit/budget alerts,
|
|
53
|
+
* provider classification, and the QA/debug helpers (localQAGate/typecheckChangedFiles/
|
|
54
|
+
* tieredDebug/httpPost). This is a LEAF module — it imports only sibling leaf modules,
|
|
55
|
+
* never back from orchestrate-engine, so the prior split modules can depend on it without
|
|
56
|
+
* a cycle (orchestrate-engine re-exports the primitives they import for back-compat).
|
|
57
|
+
*/
|
|
58
|
+
const fs_1 = require("fs");
|
|
49
59
|
const child_process_1 = require("child_process");
|
|
50
60
|
const https = __importStar(require("https"));
|
|
51
61
|
const http = __importStar(require("http"));
|
|
@@ -544,6 +554,50 @@ function detectRumination(content) {
|
|
|
544
554
|
const wordCount = Math.max(1, content.split(/\s+/).filter(Boolean).length);
|
|
545
555
|
return { hits, ratio: hits / wordCount };
|
|
546
556
|
}
|
|
557
|
+
// TICKET-352: green-test approval gate. The remaining wall after 347-351 is
|
|
558
|
+
// CONTENT CORRECTNESS — a well-shaped single-concern logic file still rejects at
|
|
559
|
+
// ~50/100 because the supervisor's read is subjective and the repair loop has no
|
|
560
|
+
// ground truth. This makes correctness OBJECTIVE: when a task's generated set
|
|
561
|
+
// includes a paired test file, EXECUTE it and fail QA if it's red — and the
|
|
562
|
+
// failure output flows back to the coder as concrete repair feedback (expected X,
|
|
563
|
+
// got Y) instead of prose. No-op when no test files are present (back-compat).
|
|
564
|
+
// Disable with KOGNAI_TEST_GATE=0. NOTE: this executes model-generated test code
|
|
565
|
+
// in-repo, bounded by a timeout — same trust surface as the swarm already
|
|
566
|
+
// committing generated code.
|
|
567
|
+
const TEST_GATE_ENABLED = (process.env.KOGNAI_TEST_GATE ?? '1') !== '0';
|
|
568
|
+
const TEST_RUN_TIMEOUT_MS = parseInt(process.env.KOGNAI_TEST_TIMEOUT_MS ?? '90000', 10);
|
|
569
|
+
function runPairedTests(fileContents) {
|
|
570
|
+
if (!TEST_GATE_ENABLED)
|
|
571
|
+
return { pass: true, reason: 'test gate disabled (KOGNAI_TEST_GATE=0)' };
|
|
572
|
+
const testFiles = fileContents.filter(f => /\.(test|spec)\.[tj]sx?$/.test(f.path) && (0, fs_1.existsSync)(f.path));
|
|
573
|
+
if (testFiles.length === 0)
|
|
574
|
+
return { pass: true, reason: 'no paired tests' };
|
|
575
|
+
for (const tf of testFiles) {
|
|
576
|
+
const isTs = /\.tsx?$/.test(tf.path);
|
|
577
|
+
// node:test auto-runs on execution and sets a non-zero exit code if any test
|
|
578
|
+
// fails → execSync throws. TS runs via ts-node transpile-only (the typecheck
|
|
579
|
+
// gate already validated types); JS via `node --test`.
|
|
580
|
+
const cmd = isTs
|
|
581
|
+
? `npx ts-node --transpile-only ${JSON.stringify(tf.path)}`
|
|
582
|
+
: `node --test ${JSON.stringify(tf.path)}`;
|
|
583
|
+
try {
|
|
584
|
+
(0, child_process_1.execSync)(cmd, {
|
|
585
|
+
cwd: process.cwd(),
|
|
586
|
+
timeout: TEST_RUN_TIMEOUT_MS,
|
|
587
|
+
stdio: 'pipe',
|
|
588
|
+
env: { ...process.env, TS_NODE_TRANSPILE_ONLY: 'true' },
|
|
589
|
+
});
|
|
590
|
+
}
|
|
591
|
+
catch (e) {
|
|
592
|
+
if (e?.signal === 'SIGTERM' || /ETIMEDOUT|timed out/i.test(String(e?.message || ''))) {
|
|
593
|
+
return { pass: false, reason: `Paired test TIMED OUT (${TEST_RUN_TIMEOUT_MS}ms): ${tf.path} — likely an infinite loop/hang in the code under test.` };
|
|
594
|
+
}
|
|
595
|
+
const out = `${e?.stdout?.toString?.() || ''}\n${e?.stderr?.toString?.() || ''}`.trim();
|
|
596
|
+
return { pass: false, reason: `Paired test FAILED: ${tf.path}\n--- test output (tail) ---\n${out.slice(-1500)}` };
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
return { pass: true, reason: `${testFiles.length} paired test(s) green` };
|
|
600
|
+
}
|
|
547
601
|
async function localQAGate(_task, fileContents) {
|
|
548
602
|
// Fail only on structurally empty files (< 50 chars indicates the model returned nothing useful)
|
|
549
603
|
const emptyFiles = fileContents.filter(f => (f.content || '').trim().length < 50);
|
|
@@ -584,7 +638,14 @@ async function localQAGate(_task, fileContents) {
|
|
|
584
638
|
};
|
|
585
639
|
}
|
|
586
640
|
}
|
|
587
|
-
|
|
641
|
+
// TICKET-352: objective correctness gate — run any paired test file(s) and
|
|
642
|
+
// fail QA (with the test output as the reason → fed to the coder on retry) if
|
|
643
|
+
// red. No-op when the task generated no test files.
|
|
644
|
+
const testResult = runPairedTests(fileContents);
|
|
645
|
+
if (!testResult.pass) {
|
|
646
|
+
return { pass: false, reason: testResult.reason };
|
|
647
|
+
}
|
|
648
|
+
return { pass: true, reason: `${fileContents.length} file(s) non-empty + no rumination + typecheck PASS + ${testResult.reason} — proceeding to supervisor review` };
|
|
588
649
|
}
|
|
589
650
|
// TICKET-085 (v2 — TICKET-088 fix): project-aware typecheck. v1 used
|
|
590
651
|
// loose-file mode + `npx -y typescript@5 tsc` and silently passed
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kognai/orchestrator-core",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.9",
|
|
4
4
|
"description": "Kognai sovereign orchestrator — core engine (template-agnostic). Shared by all products (Kognai/coding, Voxight/market-intel, Invoica/fin-compliance); each supplies only its template. Replaces per-repo forks of orchestrate-agents-v2 / sprint-runner / lib.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "SkinGem",
|