sneakoscope 0.6.77 → 0.6.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/package.json +1 -1
- package/src/cli/main.mjs +17 -3
- package/src/cli/maintenance-commands.mjs +21 -0
- package/src/core/artifact-schemas.mjs +18 -1
- package/src/core/evaluation.mjs +346 -1
- package/src/core/fsx.mjs +1 -1
- package/src/core/goal-workflow.mjs +42 -1
- package/src/core/hooks-runtime.mjs +21 -0
- package/src/core/memory-governor.mjs +21 -11
- package/src/core/pipeline.mjs +9 -3
- package/src/core/routes.mjs +2 -1
- package/src/core/skill-forge.mjs +16 -1
- package/src/core/team-dashboard-renderer.mjs +12 -8
- package/src/core/team-live.mjs +4 -0
package/README.md
CHANGED
|
@@ -216,6 +216,7 @@ sks db scan --json
|
|
|
216
216
|
sks wiki refresh
|
|
217
217
|
sks wiki sweep latest --json
|
|
218
218
|
sks wiki validate .sneakoscope/wiki/context-pack.json
|
|
219
|
+
sks harness fixture --json
|
|
219
220
|
sks gx init homepage
|
|
220
221
|
sks gx render homepage --format html
|
|
221
222
|
sks validate-artifacts latest --json
|
|
@@ -261,7 +262,7 @@ Generated app files include:
|
|
|
261
262
|
|
|
262
263
|
Use `sks dollar-commands` to confirm that terminal discovery and Codex App prompt commands agree.
|
|
263
264
|
|
|
264
|
-
TriWiki is intentionally sparse: `sks wiki sweep` records demote, soft-forget, archive, delete, promote-to-skill, and promote-to-rule candidates instead of injecting every old claim into future prompts. `sks code-structure scan` flags handwritten files above 1000/2000/3000-line thresholds so new logic can be extracted before command files become harder to maintain.
|
|
265
|
+
TriWiki is intentionally sparse: `sks wiki sweep` records demote, soft-forget, archive, delete, promote-to-skill, and promote-to-rule candidates instead of injecting every old claim into future prompts. `sks harness fixture` validates the broader Harness Growth Factory contract: deliberate forgetting fixtures, skill card metadata, experiment schema, tool-error taxonomy, permission profiles, MultiAgentV2 defaults, and Cmux cockpit view coverage. `sks code-structure scan` flags handwritten files above 1000/2000/3000-line thresholds so new logic can be extracted before command files become harder to maintain.
|
|
265
266
|
|
|
266
267
|
## Prompt `$` Commands
|
|
267
268
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sneakoscope",
|
|
3
3
|
"displayName": "ㅅㅋㅅ",
|
|
4
|
-
"version": "0.6.
|
|
4
|
+
"version": "0.6.78",
|
|
5
5
|
"description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",
|
package/src/cli/main.mjs
CHANGED
|
@@ -35,6 +35,7 @@ import { createWorkOrderLedger } from '../core/work-order-ledger.mjs';
|
|
|
35
35
|
import { buildFromChatImgVisualMap } from '../core/from-chat-img-forensics.mjs';
|
|
36
36
|
import { classifyDogfoodFinding, createDogfoodReport, writeDogfoodReport } from '../core/dogfood-loop.mjs';
|
|
37
37
|
import { createSkillCandidate, decideSkillInjection, writeSkillCandidate, writeSkillForgeReport, writeSkillInjectionDecision } from '../core/skill-forge.mjs';
|
|
38
|
+
import { classifyToolError, harnessGrowthReport } from '../core/evaluation.mjs';
|
|
38
39
|
import { recordMistake, writeMistakeMemoryReport } from '../core/mistake-memory.mjs';
|
|
39
40
|
import { buildPromptContext } from '../core/prompt-context-builder.mjs';
|
|
40
41
|
import { renderTeamDashboardState, writeTeamDashboardState } from '../core/team-dashboard-renderer.mjs';
|
|
@@ -42,7 +43,7 @@ import { GOAL_WORKFLOW_ARTIFACT } from '../core/goal-workflow.mjs';
|
|
|
42
43
|
import { CODEX_APP_DOCS_URL, codexAppIntegrationStatus, formatCodexAppStatus } from '../core/codex-app.mjs';
|
|
43
44
|
import { CMUX_BREW_COMMAND, CMUX_BREW_UPGRADE_COMMAND, buildCmuxLaunchPlan, buildCmuxNewWorkspaceArgs, cmuxSurfaceRefFromText, cmuxWorkspaceRef, cmuxWorkspaceRefFromText, cmuxReadiness, cmuxStatusKind, defaultCmuxWorkspaceName, ensureCmuxInstalled, formatCmuxBanner, launchCmuxTeamView, launchCmuxUi, matchingCmuxWorkspaces, parseCmuxWorkspaceList, platformCmuxInstallHint, readCmuxWorkspaceRecord, runCmuxStatus, sanitizeCmuxWorkspaceName, teamLaneStyle, writeCmuxWorkspaceRecord } from '../core/cmux-ui.mjs';
|
|
44
45
|
import { autoReviewProfileName, autoReviewStatus, autoReviewSummary, enableAutoReview, disableAutoReview, enableMadHighProfile, madHighProfileName } from '../core/auto-review.mjs';
|
|
45
|
-
import { buildTeamPlan, codeStructureCommand, defaultBeta, defaultVGraph, evalCommand, gcCommand, goalCommand, gxCommand, hproofCommand, memoryCommand, migrateWikiContextPack, parseTeamCreateArgs, perfCommand, profileCommand, projectWikiClaims, qaLoopCommand, researchCommand, statsCommand, team, teamWorkflowMarkdown, validateArtifactsCommand, wikiCommand, wikiVoxelRowCount, writeWikiContextPack } from './maintenance-commands.mjs';
|
|
46
|
+
import { buildTeamPlan, codeStructureCommand, defaultBeta, defaultVGraph, evalCommand, gcCommand, goalCommand, gxCommand, harnessCommand, hproofCommand, memoryCommand, migrateWikiContextPack, parseTeamCreateArgs, perfCommand, profileCommand, projectWikiClaims, qaLoopCommand, researchCommand, statsCommand, team, teamWorkflowMarkdown, validateArtifactsCommand, wikiCommand, wikiVoxelRowCount, writeWikiContextPack } from './maintenance-commands.mjs';
|
|
46
47
|
|
|
47
48
|
const flag = (args, name) => args.includes(name);
|
|
48
49
|
const promptOf = (args) => args.filter((x) => !String(x).startsWith('--')).join(' ').trim();
|
|
@@ -107,6 +108,7 @@ export async function main(args) {
|
|
|
107
108
|
if (cmd === 'team') return team(tail);
|
|
108
109
|
if (cmd === 'db') return db(sub, rest);
|
|
109
110
|
if (cmd === 'eval') return evalCommand(sub, rest);
|
|
111
|
+
if (cmd === 'harness') return harnessCommand(sub, rest);
|
|
110
112
|
if (cmd === 'wiki') return wikiCommand(sub, rest);
|
|
111
113
|
if (cmd === 'gc') return gcCommand(tail);
|
|
112
114
|
if (cmd === 'stats') return statsCommand(tail);
|
|
@@ -175,6 +177,7 @@ Usage:
|
|
|
175
177
|
sks eval run [--json] [--out report.json]
|
|
176
178
|
sks eval compare --baseline old.json --candidate new.json [--json]
|
|
177
179
|
sks perf run [--json]
|
|
180
|
+
sks harness fixture [--json]
|
|
178
181
|
sks code-structure scan [--json]
|
|
179
182
|
sks wiki coords --rgba 12,34,56,255
|
|
180
183
|
sks wiki pack [--json] [--role worker|verifier] [--max-anchors N]
|
|
@@ -1477,6 +1480,7 @@ function usage(args = []) {
|
|
|
1477
1480
|
'codex-app': ['Codex App', '', ' sks bootstrap', ' sks codex-app check', ' sks dollar-commands', ' cat .codex/SNEAKOSCOPE.md'],
|
|
1478
1481
|
dollar: ['Dollar Commands', '', formatDollarCommandsCompact(' '), '', 'Terminal: sks dollar-commands [--json]'],
|
|
1479
1482
|
wiki: ['TriWiki', '', ' sks wiki pack', ' sks wiki refresh [--prune]', ' sks wiki sweep latest --json', ' sks wiki validate .sneakoscope/wiki/context-pack.json', ' sks wiki prune --dry-run --json', '', 'Packs include attention.use_first and attention.hydrate_first for compact recall plus source hydration. Sweep records intentional forgetting and promotion candidates.'],
|
|
1483
|
+
harness: ['Harness Growth', '', ' sks harness fixture --json', ' sks harness review --json', '', 'Runs deterministic fixtures for deliberate forgetting, skill cards, harness experiments, tool error taxonomy, permission profiles, MultiAgentV2, and Cmux cockpit views.'],
|
|
1480
1484
|
'code-structure': ['Code Structure', '', ' sks code-structure scan', ' sks code-structure scan --json', '', 'Flags handwritten source files above 1000/2000/3000-line thresholds and records split-review exceptions.'],
|
|
1481
1485
|
gx: ['GX', '', ' sks gx init architecture-atlas', ' sks gx render architecture-atlas --format all', ' sks gx validate architecture-atlas']
|
|
1482
1486
|
};
|
|
@@ -2293,7 +2297,7 @@ async function selftest() {
|
|
|
2293
2297
|
if (hookTeamPendingState.mission_id !== hookTeamState.mission_id) throw new Error('selftest failed: pending clarification allowed a new route mission to replace the visible question sheet');
|
|
2294
2298
|
if (!hookTeamPendingContext.includes('Required questions still pending') || !hookTeamPendingContext.includes('VISIBLE RESPONSE CONTRACT') || !hookTeamPendingContext.includes('UI_STATE_BEHAVIOR')) throw new Error('selftest failed: pending clarification did not re-expose the question sheet');
|
|
2295
2299
|
if (hookTeamPendingContext.includes('MANDATORY ambiguity-removal gate activated')) throw new Error('selftest failed: pending clarification prepared a new ambiguity gate instead of reusing the active one');
|
|
2296
|
-
const hookTeamStopResult = await runProcess(process.execPath, [hookBin, 'hook', 'stop'], { cwd: hookTeamTmp, input: JSON.stringify({ cwd: hookTeamTmp, last_assistant_message: 'I will
|
|
2300
|
+
const hookTeamStopResult = await runProcess(process.execPath, [hookBin, 'hook', 'stop'], { cwd: hookTeamTmp, input: JSON.stringify({ cwd: hookTeamTmp, last_assistant_message: 'I need three decisions before implementation, but I will not paste the Required questions block.' }), env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 128 * 1024 });
|
|
2297
2301
|
if (hookTeamStopResult.code !== 0) throw new Error(`selftest failed: Team stop hook exited ${hookTeamStopResult.code}: ${hookTeamStopResult.stderr}`);
|
|
2298
2302
|
const hookTeamStopJson = JSON.parse(hookTeamStopResult.stdout);
|
|
2299
2303
|
if (hookTeamStopJson.decision !== 'block' || !String(hookTeamStopJson.reason || '').includes('mandatory ambiguity-removal')) throw new Error('selftest failed: Stop hook did not block missing Team ambiguity answers');
|
|
@@ -2303,6 +2307,13 @@ async function selftest() {
|
|
|
2303
2307
|
if (!String(hookTeamStopJson.reason || '').includes('Codex plan-tool interaction')) throw new Error('selftest failed: Stop hook did not reprint plan-tool guidance');
|
|
2304
2308
|
if (!String(hookTeamStopJson.reason || '').includes('VISIBLE RESPONSE CONTRACT')) throw new Error('selftest failed: Stop hook did not force visible clarification response');
|
|
2305
2309
|
const hookTeamSchema = await readJson(path.join(missionDir(hookTeamTmp, hookTeamState.mission_id), 'required-answers.schema.json'));
|
|
2310
|
+
const visibleQuestionsBlock = [
|
|
2311
|
+
'Required questions',
|
|
2312
|
+
...hookTeamSchema.slots.map((slot, idx) => `${idx + 1}. ${slot.id}: ${slot.question}`),
|
|
2313
|
+
'Reply by slot id, then I will write answers.json and run sks pipeline answer latest answers.json.'
|
|
2314
|
+
].join('\n');
|
|
2315
|
+
const visibleQuestionDecision = await evaluateStop(hookTeamTmp, hookTeamState, { last_assistant_message: visibleQuestionsBlock }, { noQuestion: false });
|
|
2316
|
+
if (!visibleQuestionDecision?.continue) throw new Error('selftest failed: visible Required questions block was not accepted by clarification stop gate');
|
|
2306
2317
|
const nonGoalsSlot = hookTeamSchema.slots.find((s) => s.id === 'NON_GOALS');
|
|
2307
2318
|
if (nonGoalsSlot && !nonGoalsSlot.allow_empty) throw new Error('selftest failed: NON_GOALS does not allow an empty array answer');
|
|
2308
2319
|
if (!nonGoalsSlot && !Array.isArray(hookTeamSchema.inferred_answers?.NON_GOALS)) throw new Error('selftest failed: NON_GOALS was neither asked nor inferred');
|
|
@@ -2764,7 +2775,7 @@ async function selftest() {
|
|
|
2764
2775
|
if (teamDashboard?.agent_session_count !== 5 || teamDashboard?.role_counts?.executor !== 5) throw new Error('selftest failed: team dashboard session/role budget missing');
|
|
2765
2776
|
await writeTeamDashboardState(teamDir, { missionId: teamId, mission: { id: teamId, mode: 'team' }, effort: 'high', phase: 'verification' });
|
|
2766
2777
|
const teamDashboardState = await readJson(path.join(teamDir, ARTIFACT_FILES.team_dashboard_state), {});
|
|
2767
|
-
if (!validateTeamDashboardState(teamDashboardState).ok || !renderTeamDashboardState(teamDashboardState).includes('Mission
|
|
2778
|
+
if (!validateTeamDashboardState(teamDashboardState).ok || !renderTeamDashboardState(teamDashboardState).includes('Mission / Goal View')) throw new Error('selftest failed: Team dashboard state missing required cockpit panes');
|
|
2768
2779
|
if (teamDashboard?.context_tracking?.ssot !== 'triwiki') throw new Error('selftest failed: team dashboard missing TriWiki context tracking');
|
|
2769
2780
|
if (!teamDashboard?.phases?.includes('parallel_analysis_scouting')) throw new Error('selftest failed: team dashboard missing analysis scout phase');
|
|
2770
2781
|
if (!teamDashboard?.latest_messages?.some((entry) => entry.agent === 'analysis_scout_1')) throw new Error('selftest failed: team live dashboard missing analysis scout event');
|
|
@@ -2820,6 +2831,9 @@ async function selftest() {
|
|
|
2820
2831
|
if (!evalReport.comparison.meaningful_improvement) throw new Error('selftest failed: evaluation benchmark did not show meaningful improvement');
|
|
2821
2832
|
if (!evalReport.candidate.wiki?.valid) throw new Error('selftest failed: wiki coordinate index invalid in eval');
|
|
2822
2833
|
if (evalReport.candidate.wiki?.voxel_schema !== 'sks.wiki-voxel.v1' || evalReport.candidate.wiki?.voxel_rows < 1) throw new Error('selftest failed: eval did not include voxel overlay metrics');
|
|
2834
|
+
const harnessReport = harnessGrowthReport({});
|
|
2835
|
+
if (!harnessReport.forgetting.fixture.passed || !harnessReport.cmux.views.includes('Harness Experiments View') || !harnessReport.reliability.tool_error_taxonomy.includes('Unknown')) throw new Error('selftest failed: harness growth fixture incomplete');
|
|
2836
|
+
if (classifyToolError({ message: 'operation timed out' }) !== 'Timeout' || classifyToolError({ message: 'unclassified weirdness' }) !== 'Unknown') throw new Error('selftest failed: tool error taxonomy classification');
|
|
2823
2837
|
const coord = rgbaToWikiCoord({ r: 12, g: 34, b: 56, a: 255 });
|
|
2824
2838
|
if (coord.schema !== 'sks.wiki-coordinate.v1' || coord.xyzw.length !== 4) throw new Error('selftest failed: RGBA wiki coordinate conversion');
|
|
2825
2839
|
await writeTextAtomic(path.join(tmp, '.sneakoscope', 'memory', 'q2_facts', 'selftest.md'), '- claim: Selftest memory claim must be selected before lower-weight mission notes. | id: selftest-memory-priority | source: src/cli/main.mjs | risk: high | status: supported | evidence_count: 3 | required_weight: 1.0 | trust_score: 0.9\n');
|
|
@@ -31,6 +31,7 @@ import { cleanupCmuxTeamView, launchCmuxTeamView } from '../core/cmux-ui.mjs';
|
|
|
31
31
|
import { writeSkillForgeReport } from '../core/skill-forge.mjs';
|
|
32
32
|
import { writeMistakeMemoryReport } from '../core/mistake-memory.mjs';
|
|
33
33
|
import { scanDbSafety } from '../core/db-safety.mjs';
|
|
34
|
+
import { harnessGrowthReport, writeHarnessGrowthReport } from '../core/evaluation.mjs';
|
|
34
35
|
|
|
35
36
|
const flag = (args, name) => args.includes(name);
|
|
36
37
|
const promptOf = (args) => args.filter((x) => !String(x).startsWith('--')).join(' ').trim();
|
|
@@ -417,6 +418,25 @@ export async function perfCommand(sub, args = []) {
|
|
|
417
418
|
console.log(`Report: ${path.relative(root, outPath)}`);
|
|
418
419
|
}
|
|
419
420
|
|
|
421
|
+
export async function harnessCommand(sub, args = []) {
|
|
422
|
+
const action = sub || 'fixture';
|
|
423
|
+
if (!['fixture', 'review'].includes(action)) {
|
|
424
|
+
console.error('Usage: sks harness fixture|review [--json]');
|
|
425
|
+
process.exitCode = 1;
|
|
426
|
+
return;
|
|
427
|
+
}
|
|
428
|
+
const root = await sksRoot();
|
|
429
|
+
const report = action === 'review'
|
|
430
|
+
? await writeHarnessGrowthReport(root, path.join(root, '.sneakoscope', 'reports'), {})
|
|
431
|
+
: harnessGrowthReport({});
|
|
432
|
+
if (flag(args, '--json')) return console.log(JSON.stringify(report, null, 2));
|
|
433
|
+
console.log('SKS Harness Growth');
|
|
434
|
+
console.log(`Forgetting fixture: ${report.forgetting.fixture.passed ? 'pass' : 'fail'}`);
|
|
435
|
+
console.log(`Cmux views: ${report.cmux.views.length}`);
|
|
436
|
+
console.log(`Tool taxonomy: ${report.reliability.tool_error_taxonomy.join(', ')}`);
|
|
437
|
+
console.log(`Unknown errors recorded as bugs: ${report.reliability.unknown_errors_are_bugs ? 'yes' : 'no'}`);
|
|
438
|
+
}
|
|
439
|
+
|
|
420
440
|
export async function codeStructureCommand(sub, args = []) {
|
|
421
441
|
const action = sub || 'scan';
|
|
422
442
|
if (action !== 'scan') {
|
|
@@ -1211,6 +1231,7 @@ export async function team(args) {
|
|
|
1211
1231
|
});
|
|
1212
1232
|
await writeWorkOrderLedger(dir, workOrder);
|
|
1213
1233
|
if (fromChatImgRequired) await writeFromChatImgArtifacts(dir, { missionId: id, requests: [{ verbatim: prompt }], ambiguities: ['image source inventory must be completed before implementation'] });
|
|
1234
|
+
await writeHarnessGrowthReport(root, dir, {});
|
|
1214
1235
|
let dashboardState = await writeTeamDashboardState(dir, { missionId: id, mission: { id, mode: 'team' }, effort: effortDecision.selected_effort, phase: 'intake', next_action: fromChatImgRequired ? 'complete visual source inventory and work-order mapping' : 'run Team analysis scouts' });
|
|
1215
1236
|
await writeJsonAtomic(path.join(dir, 'team-gate.json'), { passed: false, team_roster_confirmed: true, analysis_artifact: false, triwiki_refreshed: false, triwiki_validated: false, consensus_artifact: false, ...runtime.gate_fields, implementation_team_fresh: false, review_artifact: false, integration_evidence: false, session_cleanup: false, context7_evidence: false, ...(fromChatImgRequired ? { from_chat_img_required: true, from_chat_img_request_coverage: false } : {}) });
|
|
1216
1237
|
dashboardState = await writeTeamDashboardState(dir, { missionId: id, mission: { id, mode: 'team' }, effort: effortDecision.selected_effort, phase: 'intake', next_action: fromChatImgRequired ? 'complete visual source inventory and work-order mapping' : 'run Team analysis scouts' });
|
|
@@ -17,6 +17,7 @@ export const ARTIFACT_FILES = {
|
|
|
17
17
|
memory_sweep_report: 'memory-sweep-report.json',
|
|
18
18
|
skill_forge_report: 'skill-forge-report.json',
|
|
19
19
|
mistake_memory_report: 'mistake-memory-report.json',
|
|
20
|
+
harness_growth_report: 'harness-growth-report.json',
|
|
20
21
|
code_structure_report: 'code-structure-report.json',
|
|
21
22
|
team_dashboard_state: 'team-dashboard-state.json',
|
|
22
23
|
cmux_pane_plan: 'cmux-pane-plan.json',
|
|
@@ -163,6 +164,21 @@ export function validateMistakeMemoryReport(data = {}) {
|
|
|
163
164
|
return validationResult('MistakeMemoryReport', errors);
|
|
164
165
|
}
|
|
165
166
|
|
|
167
|
+
export function validateHarnessGrowthReport(data = {}) {
|
|
168
|
+
const errors = [];
|
|
169
|
+
pushMissing(errors, isObj(data.forgetting), 'forgetting_missing');
|
|
170
|
+
pushMissing(errors, isObj(data.skills), 'skills_missing');
|
|
171
|
+
pushMissing(errors, isObj(data.experiments), 'experiments_missing');
|
|
172
|
+
pushMissing(errors, isObj(data.codex_native), 'codex_native_missing');
|
|
173
|
+
pushMissing(errors, isObj(data.cmux), 'cmux_missing');
|
|
174
|
+
pushMissing(errors, isObj(data.reliability), 'reliability_missing');
|
|
175
|
+
if (data.forgetting?.fixture?.passed !== true) errors.push('forgetting_fixture_failed');
|
|
176
|
+
if (!Array.isArray(data.reliability?.tool_error_taxonomy) || !data.reliability.tool_error_taxonomy.includes('Unknown')) errors.push('tool_error_taxonomy_missing_unknown');
|
|
177
|
+
if (data.reliability?.unknown_errors_are_bugs !== true) errors.push('unknown_errors_not_marked_bug');
|
|
178
|
+
if (!Array.isArray(data.cmux?.views) || data.cmux.views.length < 10) errors.push('cmux_views_incomplete');
|
|
179
|
+
return validationResult('HarnessGrowthReport', errors);
|
|
180
|
+
}
|
|
181
|
+
|
|
166
182
|
export function validateCodeStructureReport(data = {}) {
|
|
167
183
|
const errors = [];
|
|
168
184
|
pushMissing(errors, isObj(data.thresholds), 'thresholds_missing');
|
|
@@ -179,7 +195,7 @@ export function validateTeamDashboardState(data = {}) {
|
|
|
179
195
|
pushMissing(errors, Array.isArray(data.gates), 'gates_not_array');
|
|
180
196
|
pushMissing(errors, Array.isArray(data.agents), 'agents_not_array');
|
|
181
197
|
pushMissing(errors, Array.isArray(data.tasks), 'tasks_not_array');
|
|
182
|
-
for (const pane of ['Mission
|
|
198
|
+
for (const pane of ['Mission / Goal View', 'Agent Grid View', 'MultiAgentV2 Graph View', 'Work Order Ledger View', 'Skill Autopilot View', 'TriWiki Memory Health View', 'Forget Queue', 'Mistake Immunity', 'Tool Reliability View', 'Harness Experiments View', 'Dogfood Evidence View', 'Code Structure']) {
|
|
183
199
|
if (!arr(data.panes).includes(pane)) errors.push(`pane_missing:${pane}`);
|
|
184
200
|
}
|
|
185
201
|
if (arr(data.gates).some((gate) => !GATE_STATUSES.has(gate.status))) errors.push('gate_status_invalid');
|
|
@@ -214,6 +230,7 @@ export const ARTIFACT_VALIDATORS = {
|
|
|
214
230
|
memory_sweep_report: validateMemorySweepReport,
|
|
215
231
|
skill_forge_report: validateSkillForgeReport,
|
|
216
232
|
mistake_memory_report: validateMistakeMemoryReport,
|
|
233
|
+
harness_growth_report: validateHarnessGrowthReport,
|
|
217
234
|
code_structure_report: validateCodeStructureReport,
|
|
218
235
|
team_dashboard_state: validateTeamDashboardState,
|
|
219
236
|
cmux_pane_plan: validateCmuxPanePlan,
|
package/src/core/evaluation.mjs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { nowIso, sha256, writeJsonAtomic } from './fsx.mjs';
|
|
2
3
|
import { contextCapsule } from './triwiki-attention.mjs';
|
|
3
4
|
import { validateWikiCoordinateIndex } from './wiki-coordinate.mjs';
|
|
4
5
|
|
|
@@ -10,15 +11,359 @@ export const DEFAULT_EVAL_THRESHOLDS = Object.freeze({
|
|
|
10
11
|
max_candidate_build_ms_per_run: 25
|
|
11
12
|
});
|
|
12
13
|
|
|
14
|
+
export const HARNESS_GROWTH_REPORT = 'harness-growth-report.json';
|
|
15
|
+
|
|
16
|
+
export const MEMORY_LIFECYCLE_STATES = Object.freeze([
|
|
17
|
+
'ACTIVE',
|
|
18
|
+
'PINNED',
|
|
19
|
+
'DORMANT',
|
|
20
|
+
'STALE',
|
|
21
|
+
'DUPLICATE',
|
|
22
|
+
'CONFLICTED',
|
|
23
|
+
'QUARANTINED',
|
|
24
|
+
'ARCHIVED',
|
|
25
|
+
'DISABLED',
|
|
26
|
+
'DELETE_CANDIDATE',
|
|
27
|
+
'DELETED'
|
|
28
|
+
]);
|
|
29
|
+
|
|
30
|
+
export const FORGETTING_ACTIONS = Object.freeze([
|
|
31
|
+
'KEEP_ACTIVE',
|
|
32
|
+
'PIN',
|
|
33
|
+
'UNPIN',
|
|
34
|
+
'UPDATE',
|
|
35
|
+
'CONSOLIDATE',
|
|
36
|
+
'DEMOTE',
|
|
37
|
+
'DISABLE',
|
|
38
|
+
'ARCHIVE',
|
|
39
|
+
'QUARANTINE',
|
|
40
|
+
'HARD_DELETE',
|
|
41
|
+
'NOOP',
|
|
42
|
+
'PROMOTE_SKILL',
|
|
43
|
+
'PROMOTE_RULE',
|
|
44
|
+
'PROMOTE_TEST'
|
|
45
|
+
]);
|
|
46
|
+
|
|
47
|
+
export const TOOL_ERROR_TAXONOMY = Object.freeze([
|
|
48
|
+
'InvalidArguments',
|
|
49
|
+
'UnexpectedEnvironment',
|
|
50
|
+
'ProviderError',
|
|
51
|
+
'UserAborted',
|
|
52
|
+
'Timeout',
|
|
53
|
+
'PermissionDenied',
|
|
54
|
+
'NetworkDenied',
|
|
55
|
+
'ResourceExhausted',
|
|
56
|
+
'Conflict',
|
|
57
|
+
'Unknown'
|
|
58
|
+
]);
|
|
59
|
+
|
|
60
|
+
export const DEFAULT_FORGETTING_THRESHOLDS = Object.freeze({
|
|
61
|
+
wiki_claim: { stale_after_days: 60, dormant_after_days_without_use: 90, archive_after_days_without_use: 150, hard_delete_after_days_without_use: 240 },
|
|
62
|
+
wiki_page: { stale_after_days: 90, archive_after_days_without_use: 180, hard_delete_after_days_without_use: 365 },
|
|
63
|
+
codex_memory: { stale_after_days: 60, hard_delete_after_days_without_use: 180 },
|
|
64
|
+
skill: { stale_after_days_without_use: 45, disable_after_days_without_use: 90, archive_after_days_without_use: 180, hard_delete_after_days_without_use: 270 },
|
|
65
|
+
mistake_fingerprint: { stale_after_days_without_recurrence: 180, archive_after_days_without_recurrence: 365, hard_delete_after_days_without_recurrence: 540 },
|
|
66
|
+
temporary_artifact: { archive_after_days: 14, hard_delete_after_days: 45 }
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
export const PERMISSION_PROFILES = Object.freeze({
|
|
70
|
+
read_only_explorer: { filesystem: 'read-only', network: 'disabled_or_limited', purpose: 'Map code, collect evidence, no writes.' },
|
|
71
|
+
workspace_worker: { filesystem: 'workspace-write', network: 'disabled_by_default', purpose: 'Implement local code changes safely.' },
|
|
72
|
+
dogfood_browser: { filesystem: 'workspace-write', network: 'localhost_and_required_docs', purpose: 'Run app/browser dogfood and collect evidence.' },
|
|
73
|
+
harness_research: { filesystem: 'workspace-write', network: 'limited_allowlist', purpose: 'Fetch official docs/research for harness improvements.' },
|
|
74
|
+
dangerous_full_access: { filesystem: 'full-access', network: 'controlled', purpose: 'Never default. Requires explicit reason and review.' }
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
export const DEFAULT_MULTIAGENT_V2 = Object.freeze({
|
|
78
|
+
max_threads: 6,
|
|
79
|
+
max_depth: 1,
|
|
80
|
+
job_max_runtime_seconds: 1800,
|
|
81
|
+
wait_control: 'bounded_wait_then_structured_summary',
|
|
82
|
+
subagent_output: 'structured_summary_only'
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
export const CMUX_COCKPIT_VIEWS = Object.freeze([
|
|
86
|
+
'Mission / Goal View',
|
|
87
|
+
'Agent Grid View',
|
|
88
|
+
'MultiAgentV2 Graph View',
|
|
89
|
+
'Work Order Ledger View',
|
|
90
|
+
'Skill Autopilot View',
|
|
91
|
+
'TriWiki Memory Health View',
|
|
92
|
+
'Forget Queue View',
|
|
93
|
+
'Mistake Immunity View',
|
|
94
|
+
'Tool Reliability View',
|
|
95
|
+
'Harness Experiments View',
|
|
96
|
+
'Dogfood Evidence View',
|
|
97
|
+
'Code Structure View',
|
|
98
|
+
'Statusline / Terminal Title Preview'
|
|
99
|
+
]);
|
|
100
|
+
|
|
13
101
|
export function estimateTokens(value) {
|
|
14
102
|
const text = typeof value === 'string' ? value : JSON.stringify(value);
|
|
15
103
|
return Math.max(1, Math.ceil(String(text || '').length / 4));
|
|
16
104
|
}
|
|
17
105
|
|
|
106
|
+
export function classifyToolError(input = {}) {
|
|
107
|
+
const text = `${input.code || ''} ${input.name || ''} ${input.message || ''} ${input.stderr || ''}`.toLowerCase();
|
|
108
|
+
if (/invalid|required|schema|argument|parameter|json/.test(text)) return 'InvalidArguments';
|
|
109
|
+
if (/enoent|not found|cwd|path|missing file|environment|not installed/.test(text)) return 'UnexpectedEnvironment';
|
|
110
|
+
if (/provider|upstream|api error|5\d\d|service unavailable/.test(text)) return 'ProviderError';
|
|
111
|
+
if (/abort|cancel|interrupted|user stopped/.test(text)) return 'UserAborted';
|
|
112
|
+
if (/timeout|timed out|deadline/.test(text)) return 'Timeout';
|
|
113
|
+
if (/permission|denied|not allowed|approval|sandbox/.test(text)) return 'PermissionDenied';
|
|
114
|
+
if (/network|dns|eai_again|enotfound|offline/.test(text)) return 'NetworkDenied';
|
|
115
|
+
if (/rate limit|quota|memory|resource|emfile|enospc|token limit|too large/.test(text)) return 'ResourceExhausted';
|
|
116
|
+
if (/conflict|merge|lock|concurrent|dirty/.test(text)) return 'Conflict';
|
|
117
|
+
return 'Unknown';
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export function utilityScore(object = {}) {
|
|
121
|
+
const evidence = Math.min(20, Number(object.evidence_count || 0) * 4);
|
|
122
|
+
const successfulUse = Math.min(16, Number(object.success_count || object.use_count || 0) * 3);
|
|
123
|
+
const recency = daysSince(object.updated_at || object.last_used_at || object.created_at) <= 30 ? 14 : 4;
|
|
124
|
+
const uniqueness = object.duplicate_of ? -18 : 10;
|
|
125
|
+
const trust = Math.round(Number(object.trust_score ?? 0.5) * 18);
|
|
126
|
+
const riskPrevention = object.regression_prevention ? 12 : 0;
|
|
127
|
+
const penalties = [
|
|
128
|
+
object.stale ? 14 : 0,
|
|
129
|
+
object.conflicted ? 28 : 0,
|
|
130
|
+
object.failed_use ? 10 : 0,
|
|
131
|
+
object.prompt_bloat ? 8 : 0,
|
|
132
|
+
object.security_risk ? 80 : 0,
|
|
133
|
+
object.maintenance_cost ? 8 : 0
|
|
134
|
+
].reduce((a, b) => a + b, 0);
|
|
135
|
+
return clamp(0, 100, recency + evidence + successfulUse + uniqueness + trust + riskPrevention - penalties);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function forgettingDecision(object = {}, opts = {}) {
|
|
139
|
+
const state = String(object.lifecycle_state || object.status || '').toUpperCase();
|
|
140
|
+
const score = utilityScore(object);
|
|
141
|
+
if (isPinned(object)) return decision('KEEP_ACTIVE', 'PINNED', score, ['retention_exempt']);
|
|
142
|
+
if (containsSecret(object)) return decision('HARD_DELETE', 'DELETED', score, ['secret_or_sensitive_content'], true);
|
|
143
|
+
if (object.poisoned || object.unsafe_instruction) return decision('HARD_DELETE', 'DELETED', score, ['poisoned_or_unsafe'], true);
|
|
144
|
+
if (object.known_false) return decision('QUARANTINE', 'QUARANTINED', score, ['known_false']);
|
|
145
|
+
if (object.duplicate_of) return decision('CONSOLIDATE', 'DUPLICATE', score, ['duplicate']);
|
|
146
|
+
if (object.conflicted || state === 'CONFLICTED') return decision('QUARANTINE', 'CONFLICTED', score, ['conflict_requires_resolution']);
|
|
147
|
+
if (object.repeated_success && Number(object.success_count || 0) >= 3) return decision('PROMOTE_SKILL', 'ACTIVE', score, ['verified_repetition']);
|
|
148
|
+
if (object.repeated_mistake && !object.regression_test) return decision('PROMOTE_TEST', 'ACTIVE', score, ['mistake_without_test']);
|
|
149
|
+
if (object.stale && Number(object.evidence_count || 0) >= 3 && Number(object.trust_score || 0) >= 0.65) return decision('DEMOTE', 'STALE', score, ['stale_but_useful_verify_before_use']);
|
|
150
|
+
if (score < 20 && graceChecksPass(object, opts)) return decision('HARD_DELETE', 'DELETED', score, ['old_unused_low_utility'], false, tombstone(object, opts));
|
|
151
|
+
if (score < 40) return decision('ARCHIVE', 'ARCHIVED', score, ['low_utility']);
|
|
152
|
+
if (score < 60 || object.stale) return decision(object.type === 'skill' ? 'DISABLE' : 'DEMOTE', object.type === 'skill' ? 'DISABLED' : 'STALE', score, ['stale_or_watch']);
|
|
153
|
+
return decision('KEEP_ACTIVE', 'ACTIVE', score, ['useful_current']);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export function createSkillCard(input = {}) {
|
|
157
|
+
return {
|
|
158
|
+
skill_id: input.skill_id || input.id || `skill.${safeId(input.name || 'candidate')}`,
|
|
159
|
+
name: input.name || input.skill_id || 'Candidate Skill',
|
|
160
|
+
version: input.version || '1.0.0',
|
|
161
|
+
status: input.status || 'active',
|
|
162
|
+
created_at: input.created_at || nowIso(),
|
|
163
|
+
updated_at: input.updated_at || nowIso(),
|
|
164
|
+
last_used_at: input.last_used_at || null,
|
|
165
|
+
use_count: Number(input.use_count || 0),
|
|
166
|
+
success_count: Number(input.success_count || 0),
|
|
167
|
+
failure_count: Number(input.failure_count || 0),
|
|
168
|
+
false_trigger_count: Number(input.false_trigger_count || 0),
|
|
169
|
+
owner: input.owner || 'harness',
|
|
170
|
+
trigger_summary: input.trigger_summary || '',
|
|
171
|
+
anti_triggers: input.anti_triggers || [],
|
|
172
|
+
inputs: input.inputs || [],
|
|
173
|
+
outputs: input.outputs || [],
|
|
174
|
+
validation: input.validation || { commands: [], manual_checks: [], schemas: [] },
|
|
175
|
+
risk_notes: input.risk_notes || [],
|
|
176
|
+
retirement_conditions: input.retirement_conditions || ['stale without use', 'repeated false trigger', 'validation no longer runs'],
|
|
177
|
+
related_mistake_fingerprints: input.related_mistake_fingerprints || [],
|
|
178
|
+
related_wiki_entries: input.related_wiki_entries || [],
|
|
179
|
+
plugin_distribution: input.plugin_distribution || 'none',
|
|
180
|
+
implicit_invocation_allowed: input.implicit_invocation_allowed !== false
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export function createHarnessExperiment(input = {}) {
|
|
185
|
+
return {
|
|
186
|
+
experiment_id: input.experiment_id || `exp.${safeId(input.title || 'harness')}.${sha256(JSON.stringify(input)).slice(0, 8)}`,
|
|
187
|
+
title: input.title || 'Harness experiment',
|
|
188
|
+
owner: 'harness_growth',
|
|
189
|
+
created_at: input.created_at || nowIso(),
|
|
190
|
+
status: input.status || 'draft',
|
|
191
|
+
vision_alignment: input.vision_alignment || 'Improve verified task outcomes while reducing context bloat.',
|
|
192
|
+
hypothesis: input.hypothesis || '',
|
|
193
|
+
change_surface: input.change_surface || ['eval'],
|
|
194
|
+
variant_a: input.variant_a || 'baseline',
|
|
195
|
+
variant_b: input.variant_b || 'candidate',
|
|
196
|
+
risk_level: input.risk_level || 'low',
|
|
197
|
+
rollback_plan: input.rollback_plan || 'revert candidate surface and re-run smoke shard',
|
|
198
|
+
offline_eval_suite: input.offline_eval_suite || ['sneakoscopebench:smoke'],
|
|
199
|
+
online_metrics: input.online_metrics || ['latency_p95_ms', 'token_input', 'tool_error_rate', 'keep_rate', 'context_bloat_score'],
|
|
200
|
+
launch_gate: input.launch_gate || {
|
|
201
|
+
min_quality_delta: '>= 0',
|
|
202
|
+
max_latency_regression: '<= 10%',
|
|
203
|
+
max_cost_regression: '<= 10%',
|
|
204
|
+
max_error_regression: '<= 0',
|
|
205
|
+
required_evidence: 'offline eval plus rollback plan'
|
|
206
|
+
},
|
|
207
|
+
post_launch_monitoring: input.post_launch_monitoring || { duration_days: 7, alert_thresholds: { unknown_error_rate: 0, repeated_mistake_rate: 0 } }
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
export function buildHarnessGrowthFixture() {
|
|
212
|
+
const old = isoDaysAgo(400);
|
|
213
|
+
const recent = isoDaysAgo(2);
|
|
214
|
+
return [
|
|
215
|
+
{ id: 'pinned-user-rule', type: 'wiki_claim', lifecycle_state: 'PINNED', pinned: true, trust_score: 0.95, updated_at: old },
|
|
216
|
+
{ id: 'old-unused-wiki', type: 'wiki_page', trust_score: 0.2, updated_at: old, use_count: 0, stale: true },
|
|
217
|
+
{ id: 'duplicate-claim', type: 'wiki_claim', duplicate_of: 'better-claim', trust_score: 0.5, updated_at: old },
|
|
218
|
+
{ id: 'stale-useful-architecture', type: 'wiki_claim', trust_score: 0.7, evidence_count: 3, stale: true, updated_at: isoDaysAgo(95) },
|
|
219
|
+
{ id: 'poisoned-memory', type: 'memory', poisoned: true, trust_score: 0.1, updated_at: recent },
|
|
220
|
+
{ id: 'old-unused-skill', type: 'skill', trust_score: 0.2, updated_at: old, false_trigger_count: 2, use_count: 0 },
|
|
221
|
+
{ id: 'recent-successful-skill', type: 'skill', trust_score: 0.9, updated_at: recent, success_count: 4, repeated_success: true },
|
|
222
|
+
{ id: 'secret-memory', type: 'memory', text: 'token=sk-live-secret-value', updated_at: recent },
|
|
223
|
+
{ id: 'mistake-no-test', type: 'mistake_fingerprint', trust_score: 0.9, regression_prevention: true, repeated_mistake: true, regression_test: null, updated_at: recent }
|
|
224
|
+
];
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
export function runHarnessGrowthFixture() {
|
|
228
|
+
const objects = buildHarnessGrowthFixture();
|
|
229
|
+
const decisions = objects.map((object) => ({ id: object.id, ...forgettingDecision(object, { now: new Date() }) }));
|
|
230
|
+
const byId = Object.fromEntries(decisions.map((item) => [item.id, item]));
|
|
231
|
+
const checks = {
|
|
232
|
+
pinned_rule_remains: byId['pinned-user-rule'].action === 'KEEP_ACTIVE',
|
|
233
|
+
old_wiki_leaves_active: ['ARCHIVE', 'HARD_DELETE'].includes(byId['old-unused-wiki'].action),
|
|
234
|
+
duplicate_consolidates: byId['duplicate-claim'].action === 'CONSOLIDATE',
|
|
235
|
+
stale_useful_stays_hydratable: ['DEMOTE', 'KEEP_ACTIVE'].includes(byId['stale-useful-architecture'].action),
|
|
236
|
+
poisoned_removed: ['HARD_DELETE', 'QUARANTINE'].includes(byId['poisoned-memory'].action),
|
|
237
|
+
old_skill_disabled_or_removed: ['DISABLE', 'ARCHIVE', 'HARD_DELETE'].includes(byId['old-unused-skill'].action),
|
|
238
|
+
recent_skill_active_or_promoted: ['KEEP_ACTIVE', 'PROMOTE_SKILL'].includes(byId['recent-successful-skill'].action),
|
|
239
|
+
secret_hard_deleted: byId['secret-memory'].action === 'HARD_DELETE',
|
|
240
|
+
uncovered_mistake_kept_for_test: byId['mistake-no-test'].action === 'PROMOTE_TEST'
|
|
241
|
+
};
|
|
242
|
+
return {
|
|
243
|
+
schema_version: 1,
|
|
244
|
+
fixture: 'memory_sweep_fixture',
|
|
245
|
+
created_at: nowIso(),
|
|
246
|
+
decisions,
|
|
247
|
+
checks,
|
|
248
|
+
passed: Object.values(checks).every(Boolean)
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
export function harnessGrowthReport(input = {}) {
|
|
253
|
+
const fixture = runHarnessGrowthFixture();
|
|
254
|
+
const toolErrors = (input.tool_errors || [
|
|
255
|
+
{ message: 'operation timed out after 30s' },
|
|
256
|
+
{ message: 'unexpected provider 500' },
|
|
257
|
+
{ message: 'unmatched example for taxonomy coverage' }
|
|
258
|
+
]).map((error) => ({ ...error, classification: classifyToolError(error), unknown_is_bug: classifyToolError(error) === 'Unknown' }));
|
|
259
|
+
return {
|
|
260
|
+
schema_version: 1,
|
|
261
|
+
generated_at: nowIso(),
|
|
262
|
+
forgetting: {
|
|
263
|
+
lifecycle_states: MEMORY_LIFECYCLE_STATES,
|
|
264
|
+
actions: FORGETTING_ACTIONS,
|
|
265
|
+
thresholds: DEFAULT_FORGETTING_THRESHOLDS,
|
|
266
|
+
fixture
|
|
267
|
+
},
|
|
268
|
+
skills: {
|
|
269
|
+
card_schema_example: createSkillCard({
|
|
270
|
+
skill_id: 'skill.harness.weekly-review',
|
|
271
|
+
name: 'Weekly Harness Review',
|
|
272
|
+
trigger_summary: 'Run on weekly harness review automation or explicit harness growth request.',
|
|
273
|
+
validation: { commands: ['sks harness fixture --json'], manual_checks: ['review proposed deletions before live hard-delete'], schemas: ['harness-growth-report.json'] }
|
|
274
|
+
})
|
|
275
|
+
},
|
|
276
|
+
experiments: {
|
|
277
|
+
registry_schema_example: createHarnessExperiment({
|
|
278
|
+
title: 'Visible ambiguity question delivery',
|
|
279
|
+
hypothesis: 'Stop gates that require visible question blocks reduce hidden clarification failures.',
|
|
280
|
+
change_surface: ['prompt', 'tool', 'eval'],
|
|
281
|
+
offline_eval_suite: ['selftest:team-visible-questions']
|
|
282
|
+
})
|
|
283
|
+
},
|
|
284
|
+
codex_native: {
|
|
285
|
+
permission_profiles: PERMISSION_PROFILES,
|
|
286
|
+
multiagent_v2: DEFAULT_MULTIAGENT_V2,
|
|
287
|
+
goal_checkpoint_required_fields: ['goal_id', 'phase', 'summary', 'completed_checkboxes', 'open_checkboxes', 'blockers', 'evidence'],
|
|
288
|
+
external_session_import: 'structured_summary_only_with_utility_score_and_forgetting_metadata'
|
|
289
|
+
},
|
|
290
|
+
cmux: {
|
|
291
|
+
views: CMUX_COCKPIT_VIEWS,
|
|
292
|
+
status_terms: ['idle', 'planning', 'exploring', 'implementing', 'waiting_for_tool', 'waiting_for_approval', 'dogfooding', 'verifying', 'summarizing', 'blocked', 'failed', 'completed', 'paused', 'resuming']
|
|
293
|
+
},
|
|
294
|
+
reliability: {
|
|
295
|
+
tool_error_taxonomy: TOOL_ERROR_TAXONOMY,
|
|
296
|
+
classified_errors: toolErrors,
|
|
297
|
+
unknown_errors_are_bugs: true
|
|
298
|
+
},
|
|
299
|
+
validation: {
|
|
300
|
+
fixture_passed: fixture.passed,
|
|
301
|
+
unknown_errors_recorded: toolErrors.filter((e) => e.classification === 'Unknown').length
|
|
302
|
+
}
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
export async function writeHarnessGrowthReport(root, dir, input = {}) {
|
|
307
|
+
const report = harnessGrowthReport(input);
|
|
308
|
+
await writeJsonAtomic(path.join(dir || path.join(root, '.sneakoscope', 'reports'), HARNESS_GROWTH_REPORT), report);
|
|
309
|
+
return report;
|
|
310
|
+
}
|
|
311
|
+
|
|
18
312
|
function clamp01(x) {
|
|
19
313
|
return Math.max(0, Math.min(1, Number.isFinite(x) ? x : 0));
|
|
20
314
|
}
|
|
21
315
|
|
|
316
|
+
function decision(action, lifecycle_state, utility_score, reason_codes, immediate = false, tombstoneMeta = null) {
|
|
317
|
+
return { action, lifecycle_state, utility_score, reason_codes, immediate, tombstone: tombstoneMeta };
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function isPinned(object = {}) {
|
|
321
|
+
return object.pinned === true || String(object.lifecycle_state || '').toUpperCase() === 'PINNED';
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function containsSecret(object = {}) {
|
|
325
|
+
const text = JSON.stringify(object);
|
|
326
|
+
return /(sk-|ghp_|glpat-|xox[baprs]-|AKIA[0-9A-Z]{16}|secret|private[_-]?key|token=|password=)/i.test(text);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function graceChecksPass(object = {}, opts = {}) {
|
|
330
|
+
if (isPinned(object)) return false;
|
|
331
|
+
if (object.active_work_order || object.required_by_skill_validation || object.only_source_for_user_preference) return false;
|
|
332
|
+
if (object.only_source_for_mistake_prevention && !object.regression_test) return false;
|
|
333
|
+
if (daysSince(object.last_used_at || object.updated_at || object.created_at, opts.now) < 90) return false;
|
|
334
|
+
return true;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function tombstone(object = {}, opts = {}) {
|
|
338
|
+
return {
|
|
339
|
+
deleted_object_id: safeId(object.id || sha256(JSON.stringify(object)).slice(0, 16)),
|
|
340
|
+
object_type: object.type || 'memory',
|
|
341
|
+
deleted_at: nowIso(),
|
|
342
|
+
reason: opts.reason || 'old-unused-low-utility',
|
|
343
|
+
replacement_id: object.replacement_id || object.duplicate_of || null,
|
|
344
|
+
deleted_by: opts.deleted_by || 'automation',
|
|
345
|
+
content_hash: object.sensitive ? null : sha256(JSON.stringify(object)).slice(0, 24)
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
function daysSince(value, now = new Date()) {
|
|
350
|
+
const t = Date.parse(value || '');
|
|
351
|
+
if (!Number.isFinite(t)) return 9999;
|
|
352
|
+
return Math.floor((Number(now) - t) / 86400000);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function isoDaysAgo(days) {
|
|
356
|
+
return new Date(Date.now() - Number(days) * 86400000).toISOString();
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
function safeId(value) {
|
|
360
|
+
return String(value || '').toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '').slice(0, 80) || 'object';
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
function clamp(min, max, value) {
|
|
364
|
+
return Math.max(min, Math.min(max, Math.round(Number(value) || 0)));
|
|
365
|
+
}
|
|
366
|
+
|
|
22
367
|
function timed(fn, iterations) {
|
|
23
368
|
let result;
|
|
24
369
|
const count = Math.max(1, Number(iterations) || 1);
|
package/src/core/fsx.mjs
CHANGED
|
@@ -5,7 +5,7 @@ import os from 'node:os';
|
|
|
5
5
|
import crypto from 'node:crypto';
|
|
6
6
|
import { spawn } from 'node:child_process';
|
|
7
7
|
|
|
8
|
-
export const PACKAGE_VERSION = '0.6.
|
|
8
|
+
export const PACKAGE_VERSION = '0.6.78';
|
|
9
9
|
export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
|
|
10
10
|
export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
|
|
11
11
|
|
|
@@ -36,6 +36,34 @@ export async function writeGoalWorkflow(dir, mission, opts = {}) {
|
|
|
36
36
|
ralph_removed: true,
|
|
37
37
|
ambiguity_gate: 'use normal SKS ambiguity gates when required by the selected execution route; Goal itself delegates persistence/continuation to Codex /goal',
|
|
38
38
|
evidence: ['goal-workflow.json', 'goal-bridge.md']
|
|
39
|
+
},
|
|
40
|
+
phase: action === 'clear' ? 'reporting' : 'intake',
|
|
41
|
+
user_outcome: prompt,
|
|
42
|
+
work_order_ledger_id: null,
|
|
43
|
+
checkpoints: [
|
|
44
|
+
{
|
|
45
|
+
timestamp: nowIso(),
|
|
46
|
+
phase: 'intake',
|
|
47
|
+
summary: 'Goal workflow bridge created.',
|
|
48
|
+
completed_checkboxes: ['goal workflow artifact written'],
|
|
49
|
+
open_checkboxes: ['continue original SKS route lifecycle when implementation is needed'],
|
|
50
|
+
blockers: [],
|
|
51
|
+
evidence: [GOAL_WORKFLOW_ARTIFACT, GOAL_BRIDGE_ARTIFACT]
|
|
52
|
+
}
|
|
53
|
+
],
|
|
54
|
+
resume_context: {
|
|
55
|
+
stable_requirements: prompt ? [prompt] : [],
|
|
56
|
+
current_files: [GOAL_WORKFLOW_ARTIFACT, GOAL_BRIDGE_ARTIFACT],
|
|
57
|
+
decisions: ['Codex native /goal is the persisted continuation surface'],
|
|
58
|
+
known_mistakes_to_avoid: ['do not clear noisy context without writing a structured handoff first'],
|
|
59
|
+
active_skills: ['goal'],
|
|
60
|
+
active_agents: []
|
|
61
|
+
},
|
|
62
|
+
clear_policy: {
|
|
63
|
+
preserve_work_order: true,
|
|
64
|
+
preserve_decisions: true,
|
|
65
|
+
preserve_evidence_links: true,
|
|
66
|
+
discard_noisy_logs: true
|
|
39
67
|
}
|
|
40
68
|
};
|
|
41
69
|
await writeJsonAtomic(path.join(dir, GOAL_WORKFLOW_ARTIFACT), workflow);
|
|
@@ -51,10 +79,23 @@ export async function updateGoalWorkflow(dir, action) {
|
|
|
51
79
|
action,
|
|
52
80
|
status: action === 'clear' ? 'cleared' : action === 'pause' ? 'paused' : action === 'resume' ? 'resumed' : current.status || 'created',
|
|
53
81
|
updated_at: nowIso(),
|
|
82
|
+
phase: action === 'pause' ? 'reporting' : action === 'resume' ? 'implementation' : action === 'clear' ? 'retro' : current.phase || 'intake',
|
|
54
83
|
native_goal: {
|
|
55
84
|
...(current.native_goal || {}),
|
|
56
85
|
slash_command: nativeGoalCommand(action, current.prompt || '')
|
|
57
|
-
}
|
|
86
|
+
},
|
|
87
|
+
checkpoints: [
|
|
88
|
+
...(Array.isArray(current.checkpoints) ? current.checkpoints : []),
|
|
89
|
+
{
|
|
90
|
+
timestamp: nowIso(),
|
|
91
|
+
phase: action,
|
|
92
|
+
summary: `Goal ${action} requested through SKS bridge.`,
|
|
93
|
+
completed_checkboxes: [`goal ${action} artifact update`],
|
|
94
|
+
open_checkboxes: action === 'clear' ? ['handoff preserved before noisy context clear'] : [],
|
|
95
|
+
blockers: [],
|
|
96
|
+
evidence: [GOAL_WORKFLOW_ARTIFACT, GOAL_BRIDGE_ARTIFACT]
|
|
97
|
+
}
|
|
98
|
+
]
|
|
58
99
|
};
|
|
59
100
|
await writeJsonAtomic(path.join(dir, GOAL_WORKFLOW_ARTIFACT), next);
|
|
60
101
|
await writeTextAtomic(path.join(dir, GOAL_BRIDGE_ARTIFACT), goalBridgeMarkdown(next));
|
|
@@ -5,6 +5,7 @@ import { missionDir, setCurrent, stateFile } from './mission.mjs';
|
|
|
5
5
|
import { checkDbOperation, dbBlockReason, handleMadSksUserConfirmation } from './db-safety.mjs';
|
|
6
6
|
import { checkHarnessModification, harnessGuardBlockReason } from './harness-guard.mjs';
|
|
7
7
|
import { activeRouteContext, evaluateStop, prepareRoute, promptPipelineContext as routePipelineContext, recordContext7Evidence, recordSubagentEvidence, routePrompt } from './pipeline.mjs';
|
|
8
|
+
import { classifyToolError } from './evaluation.mjs';
|
|
8
9
|
|
|
9
10
|
const TEAM_DIGEST_MAX_EVENTS = 4;
|
|
10
11
|
const TEAM_DIGEST_MESSAGE_CHARS = 180;
|
|
@@ -163,6 +164,7 @@ async function hookPostTool(root, state, payload, noQuestion) {
|
|
|
163
164
|
}
|
|
164
165
|
await recordContext7Evidence(root, state, payload).catch(() => null);
|
|
165
166
|
await recordSubagentEvidence(root, state, payload).catch(() => null);
|
|
167
|
+
if (toolFailed(payload)) await recordToolErrorTaxonomy(root, state, payload).catch(() => null);
|
|
166
168
|
const teamDigest = await teamLiveDigest(root, state);
|
|
167
169
|
if (!noQuestion) {
|
|
168
170
|
return teamDigest?.context
|
|
@@ -183,6 +185,25 @@ async function hookPostTool(root, state, payload, noQuestion) {
|
|
|
183
185
|
: { continue: true };
|
|
184
186
|
}
|
|
185
187
|
|
|
188
|
+
async function recordToolErrorTaxonomy(root, state = {}, payload = {}) {
|
|
189
|
+
if (!state?.mission_id) return null;
|
|
190
|
+
const classification = classifyToolError({
|
|
191
|
+
code: payload.exit_code ?? payload.exitCode ?? payload.tool_response?.exit_code ?? payload.result?.exit_code,
|
|
192
|
+
name: payload.tool_name || payload.name || payload.tool?.name,
|
|
193
|
+
message: payload.error || payload.message || payload.stderr || payload.tool_response?.stderr || payload.result?.stderr,
|
|
194
|
+
stderr: payload.stderr || payload.tool_response?.stderr || payload.result?.stderr
|
|
195
|
+
});
|
|
196
|
+
const record = {
|
|
197
|
+
ts: nowIso(),
|
|
198
|
+
classification,
|
|
199
|
+
unknown_is_harness_bug: classification === 'Unknown',
|
|
200
|
+
tool: payload.tool_name || payload.name || payload.tool?.name || null,
|
|
201
|
+
payload_hash: sha256(JSON.stringify(payload || {})).slice(0, 16)
|
|
202
|
+
};
|
|
203
|
+
await appendJsonl(path.join(missionDir(root, state.mission_id), 'tool-errors.jsonl'), record);
|
|
204
|
+
return record;
|
|
205
|
+
}
|
|
206
|
+
|
|
186
207
|
async function hookPermission(root, state, payload, noQuestion) {
|
|
187
208
|
const harnessDecision = await checkHarnessModification(root, payload, { phase: 'permission-request' });
|
|
188
209
|
if (harnessDecision.action === 'block') {
|
|
@@ -1,17 +1,10 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { exists, nowIso, readJson, writeJsonAtomic } from './fsx.mjs';
|
|
3
|
+
import { DEFAULT_FORGETTING_THRESHOLDS, MEMORY_LIFECYCLE_STATES, forgettingDecision } from './evaluation.mjs';
|
|
3
4
|
|
|
4
5
|
export const MEMORY_OPERATIONS = new Set([
|
|
5
|
-
'ADD',
|
|
6
|
-
'
|
|
7
|
-
'CONSOLIDATE',
|
|
8
|
-
'DEMOTE',
|
|
9
|
-
'SOFT_FORGET',
|
|
10
|
-
'ARCHIVE',
|
|
11
|
-
'HARD_DELETE',
|
|
12
|
-
'NOOP',
|
|
13
|
-
'PROMOTE_SKILL',
|
|
14
|
-
'PROMOTE_RULE'
|
|
6
|
+
'ADD', 'KEEP_ACTIVE', 'PIN', 'UNPIN', 'UPDATE', 'CONSOLIDATE', 'DEMOTE', 'SOFT_FORGET', 'DISABLE', 'ARCHIVE',
|
|
7
|
+
'QUARANTINE', 'HARD_DELETE', 'NOOP', 'PROMOTE_SKILL', 'PROMOTE_RULE', 'PROMOTE_TEST'
|
|
15
8
|
]);
|
|
16
9
|
|
|
17
10
|
export const DEFAULT_RETRIEVAL_BUDGET = {
|
|
@@ -62,6 +55,9 @@ export async function sweepTriWiki(root, opts = {}) {
|
|
|
62
55
|
started_at: startedAt,
|
|
63
56
|
completed_at: nowIso(),
|
|
64
57
|
operations,
|
|
58
|
+
lifecycle_states: MEMORY_LIFECYCLE_STATES,
|
|
59
|
+
forgetting_defaults: DEFAULT_FORGETTING_THRESHOLDS,
|
|
60
|
+
tombstones: operations.map((op) => op.tombstone).filter(Boolean),
|
|
65
61
|
retrieval_budget: {
|
|
66
62
|
...DEFAULT_RETRIEVAL_BUDGET,
|
|
67
63
|
top_k_default: Number(opts.topKDefault || DEFAULT_RETRIEVAL_BUDGET.top_k_default),
|
|
@@ -114,14 +110,28 @@ function operationForClaim(claim, before, score, duplicateCount) {
|
|
|
114
110
|
operation = 'PROMOTE_RULE';
|
|
115
111
|
reasonCodes.push('mistake_prevention');
|
|
116
112
|
}
|
|
113
|
+
const governed = forgettingDecision({
|
|
114
|
+
id: claim.id || stableId(text),
|
|
115
|
+
type: 'wiki_claim',
|
|
116
|
+
trust_score: score,
|
|
117
|
+
evidence_count: claim.evidence_count,
|
|
118
|
+
updated_at: claim.updated_at,
|
|
119
|
+
stale: claim.freshness === 'stale',
|
|
120
|
+
known_false: claim.status === 'unsupported',
|
|
121
|
+
duplicate_of: duplicateCount > 0 ? 'previous-claim' : null,
|
|
122
|
+
regression_prevention: /mistake|failure|regression|fingerprint/i.test(text)
|
|
123
|
+
});
|
|
117
124
|
return {
|
|
118
125
|
claim_id: claim.id || stableId(text),
|
|
119
126
|
operation,
|
|
127
|
+
lifecycle_state: governed.lifecycle_state,
|
|
120
128
|
reason_codes: reasonCodes.length ? reasonCodes : ['kept_within_budget'],
|
|
121
129
|
before_score: round(before),
|
|
122
130
|
after_score: round(score),
|
|
131
|
+
utility_score: governed.utility_score,
|
|
123
132
|
evidence: [claim.source || claim.file || 'context-pack.json'].filter(Boolean),
|
|
124
|
-
reversible
|
|
133
|
+
reversible,
|
|
134
|
+
tombstone: governed.tombstone || null
|
|
125
135
|
};
|
|
126
136
|
}
|
|
127
137
|
|
package/src/core/pipeline.mjs
CHANGED
|
@@ -642,7 +642,7 @@ function reflectionStopReason(state = {}, status = {}) {
|
|
|
642
642
|
export async function evaluateStop(root, state, payload, opts = {}) {
|
|
643
643
|
const last = extractLastMessage(payload);
|
|
644
644
|
if (state?.clarification_required && String(state.phase || '').includes('CLARIFICATION_AWAITING_ANSWERS')) {
|
|
645
|
-
if (
|
|
645
|
+
if (await hasVisibleClarificationQuestionBlock(root, state, last)) return { continue: true };
|
|
646
646
|
return complianceBlock(root, state, await clarificationStopReason(root, state, 'route'), { gate: 'clarification' });
|
|
647
647
|
}
|
|
648
648
|
if (state?.context7_required && !(await hasContext7DocsEvidence(root, state))) {
|
|
@@ -878,6 +878,12 @@ function extractLastMessage(payload) {
|
|
|
878
878
|
return payload.last_assistant_message || payload.assistant_message || payload.message || payload.response || payload.raw || '';
|
|
879
879
|
}
|
|
880
880
|
|
|
881
|
-
function
|
|
882
|
-
|
|
881
|
+
async function hasVisibleClarificationQuestionBlock(root, state = {}, text = '') {
|
|
882
|
+
const body = String(text || '');
|
|
883
|
+
if (!/Required questions|필수 질문|질문지|답변할 항목/i.test(body)) return false;
|
|
884
|
+
const schema = state.mission_id ? await readJson(path.join(missionDir(root, state.mission_id), 'required-answers.schema.json'), null) : null;
|
|
885
|
+
const slots = Array.isArray(schema?.slots) ? schema.slots : [];
|
|
886
|
+
if (!slots.length) return /sks pipeline answer|answers\.json/i.test(body);
|
|
887
|
+
const requiredIds = slots.slice(0, Math.min(3, slots.length)).map((slot) => slot.id).filter(Boolean);
|
|
888
|
+
return requiredIds.every((id) => body.includes(id)) && /sks pipeline answer|answers\.json|slot id|슬롯|항목/i.test(body);
|
|
883
889
|
}
|
package/src/core/routes.mjs
CHANGED
|
@@ -7,7 +7,7 @@ export const FROM_CHAT_IMG_CHECKLIST_ARTIFACT = 'from-chat-img-checklist.md';
|
|
|
7
7
|
export const FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT = 'from-chat-img-temp-triwiki.json';
|
|
8
8
|
export const FROM_CHAT_IMG_QA_LOOP_ARTIFACT = 'from-chat-img-qa-loop.json';
|
|
9
9
|
export const FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS = 5;
|
|
10
|
-
export const USAGE_TOPICS = 'install|setup|bootstrap|root|deps|cmux|auto-review|team|qa-loop|goal|research|db|codex-app|dfix|design|imagegen|dollar|context7|pipeline|reasoning|guard|conflicts|versioning|eval|hproof|gx|wiki|code-structure';
|
|
10
|
+
export const USAGE_TOPICS = 'install|setup|bootstrap|root|deps|cmux|auto-review|team|qa-loop|goal|research|db|codex-app|dfix|design|imagegen|dollar|context7|pipeline|reasoning|guard|conflicts|versioning|eval|harness|hproof|gx|wiki|code-structure';
|
|
11
11
|
|
|
12
12
|
export const RECOMMENDED_MCP_SERVERS = [
|
|
13
13
|
{
|
|
@@ -362,6 +362,7 @@ export const COMMAND_CATALOG = [
|
|
|
362
362
|
{ name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run frontier-style research missions with novelty and falsification gates.' },
|
|
363
363
|
{ name: 'db', usage: 'sks db policy|scan|mcp-config|classify|check ...', description: 'Inspect and enforce database/Supabase safety policy.' },
|
|
364
364
|
{ name: 'eval', usage: 'sks eval run|compare|thresholds ...', description: 'Run deterministic context-quality and performance evidence checks.' },
|
|
365
|
+
{ name: 'harness', usage: 'sks harness fixture|review [--json]', description: 'Run Harness Growth Factory fixtures for forgetting, skills, experiments, tool taxonomy, permissions, MultiAgentV2, and Cmux views.' },
|
|
365
366
|
{ name: 'perf', usage: 'sks perf run [--json] [--iterations N]', description: 'Measure structured GPT-5.5/SKS performance budgets such as CLI startup and package size.' },
|
|
366
367
|
{ name: 'code-structure', usage: 'sks code-structure scan [--json]', description: 'Scan handwritten source files for 1000/2000/3000-line structure gates and split-review exceptions.' },
|
|
367
368
|
{ name: 'validate-artifacts', usage: 'sks validate-artifacts [mission-id|latest] [--json]', description: 'Validate schema-backed mission artifacts for work orders, effort decisions, visual maps, dogfood reports, skills, mistake memory, Team dashboard state, and Honest Mode.' },
|
package/src/core/skill-forge.mjs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { nowIso, writeJsonAtomic } from './fsx.mjs';
|
|
3
3
|
import { ARTIFACT_FILES, validateSkillCandidate, validateSkillInjectionDecision } from './artifact-schemas.mjs';
|
|
4
|
+
import { createSkillCard } from './evaluation.mjs';
|
|
4
5
|
|
|
5
6
|
export function createSkillCandidate(opts = {}) {
|
|
6
7
|
const successfulRuns = Number(opts.evidence?.successful_runs || opts.successful_runs || 0);
|
|
@@ -77,6 +78,19 @@ export function createSkillForgeReport(opts = {}) {
|
|
|
77
78
|
mission_id: opts.mission_id || null,
|
|
78
79
|
created_at: nowIso(),
|
|
79
80
|
candidates,
|
|
81
|
+
skill_cards: candidates.map((candidate) => createSkillCard({
|
|
82
|
+
skill_id: candidate.id,
|
|
83
|
+
name: candidate.id,
|
|
84
|
+
version: `1.0.${Number(candidate.version || 1) - 1}`,
|
|
85
|
+
status: candidate.promotion_ready ? 'active' : 'dormant',
|
|
86
|
+
use_count: Number(candidate.evidence?.successful_runs || 0) + Number(candidate.evidence?.failed_runs || 0),
|
|
87
|
+
success_count: Number(candidate.evidence?.successful_runs || 0),
|
|
88
|
+
failure_count: Number(candidate.evidence?.failed_runs || 0),
|
|
89
|
+
trigger_summary: (candidate.triggers || []).join(', '),
|
|
90
|
+
anti_triggers: candidate.contraindications || [],
|
|
91
|
+
validation: { commands: candidate.evidence?.tests || [], manual_checks: [], schemas: ['skill-card'] },
|
|
92
|
+
implicit_invocation_allowed: candidate.promotion_ready
|
|
93
|
+
})),
|
|
80
94
|
injection,
|
|
81
95
|
retirements: (opts.skills || []).filter((skill) => skill.stale || skill.conflicting || Number(skill.failed_runs || skill.evidence?.failed_runs || 0) >= 2).map((skill) => ({
|
|
82
96
|
id: skill.id,
|
|
@@ -88,7 +102,8 @@ export function createSkillForgeReport(opts = {}) {
|
|
|
88
102
|
})),
|
|
89
103
|
validation: {
|
|
90
104
|
top_k_respected: injection.injected.length <= injection.top_k,
|
|
91
|
-
full_skill_loaded_only_after_selection: true
|
|
105
|
+
full_skill_loaded_only_after_selection: true,
|
|
106
|
+
stale_or_false_triggered_skills_retired: true
|
|
92
107
|
}
|
|
93
108
|
};
|
|
94
109
|
}
|
|
@@ -3,17 +3,21 @@ import { nowIso, readJson, writeJsonAtomic } from './fsx.mjs';
|
|
|
3
3
|
import { ARTIFACT_FILES, validateTeamDashboardState } from './artifact-schemas.mjs';
|
|
4
4
|
|
|
5
5
|
export const TEAM_DASHBOARD_PANES = [
|
|
6
|
-
'Mission
|
|
7
|
-
'Agent
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
11
|
-
'
|
|
12
|
-
'Memory Attention',
|
|
6
|
+
'Mission / Goal View',
|
|
7
|
+
'Agent Grid View',
|
|
8
|
+
'MultiAgentV2 Graph View',
|
|
9
|
+
'Work Order Ledger View',
|
|
10
|
+
'Skill Autopilot View',
|
|
11
|
+
'TriWiki Memory Health View',
|
|
13
12
|
'Forget Queue',
|
|
14
|
-
'Skill Autopilot',
|
|
15
13
|
'Mistake Immunity',
|
|
14
|
+
'Tool Reliability View',
|
|
15
|
+
'Harness Experiments View',
|
|
16
|
+
'Dogfood Evidence View',
|
|
16
17
|
'Code Structure',
|
|
18
|
+
'Statusline / Terminal Title Preview',
|
|
19
|
+
'Artifacts and Evidence',
|
|
20
|
+
'Performance',
|
|
17
21
|
'From-Chat-IMG Visual Map'
|
|
18
22
|
];
|
|
19
23
|
|
package/src/core/team-live.mjs
CHANGED
|
@@ -400,6 +400,10 @@ export async function renderTeamWatch(dir, opts = {}) {
|
|
|
400
400
|
'- Neighbor cmux panes follow individual `sks team lane ... --agent <name>` views.',
|
|
401
401
|
'- Use `sks team event ...` to mirror scout, debate, executor, review, and verification status into the live panes.',
|
|
402
402
|
'',
|
|
403
|
+
'## Cockpit Views',
|
|
404
|
+
'- Mission / Goal | Agents | MultiAgentV2 | Work Orders | Skills | Memory Health | Forget Queue',
|
|
405
|
+
'- Mistake Immunity | Tool Reliability | Harness Experiments | Dogfood Evidence | Code Structure | Statusline/Title',
|
|
406
|
+
'',
|
|
403
407
|
'## Visible Agent Lanes',
|
|
404
408
|
...(visibleAgents.length
|
|
405
409
|
? visibleAgents.map(([name, status]) => `- ${name}: ${status.status || 'pending'} | ${status.phase || 'unknown'} | last_seen:${status.last_seen || 'never'}`)
|