sneakoscope 0.6.76 → 0.6.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -1
- package/package.json +1 -1
- package/src/cli/main.mjs +22 -5
- package/src/cli/maintenance-commands.mjs +59 -7
- package/src/core/artifact-schemas.mjs +18 -1
- package/src/core/cmux-ui.mjs +263 -10
- package/src/core/evaluation.mjs +346 -1
- package/src/core/fsx.mjs +1 -1
- package/src/core/goal-workflow.mjs +42 -1
- package/src/core/hooks-runtime.mjs +21 -0
- package/src/core/init.mjs +1 -1
- package/src/core/memory-governor.mjs +21 -11
- package/src/core/pipeline.mjs +9 -3
- package/src/core/routes.mjs +2 -1
- package/src/core/skill-forge.mjs +16 -1
- package/src/core/team-dashboard-renderer.mjs +12 -8
- package/src/core/team-live.mjs +41 -0
package/README.md
CHANGED
|
@@ -201,6 +201,10 @@ sks team log latest
|
|
|
201
201
|
|
|
202
202
|
Team mode prepares the mission, records live events, compiles runtime tasks and worker inboxes, writes schema-backed effort/work-order/dashboard artifacts, and opens a named cmux Team workspace with split live lanes when cmux is available. `sks team dashboard` renders the cockpit panes for mission overview, agent lanes, task DAG, QA/dogfood, artifacts/evidence, and performance.
|
|
203
203
|
|
|
204
|
+
The cmux Team workspace is a live orchestration screen: the first pane follows `sks team watch <mission-id> --follow` as the mission overview, and neighboring split panes follow individual `sks team lane <mission-id> --agent <name> --follow` views. SKS colors and labels lanes by role, so scouts, planning/debate voices, executors, reviewers, and safety lanes are visually distinct while the same evidence is mirrored into `team-transcript.jsonl`, `team-live.md`, and `team-dashboard.json`.
|
|
205
|
+
|
|
206
|
+
When the Team route reaches `session_cleanup`, SKS collapses the cmux workspace back to the overview pane and marks the workspace complete. You can also run `sks team cleanup-cmux <mission-id|latest>` manually, or `sks team cleanup-cmux latest --close-workspace` when you want the whole Team workspace closed.
|
|
207
|
+
|
|
204
208
|
### QA, Goal, Research, DB, Wiki, GX
|
|
205
209
|
|
|
206
210
|
```sh
|
|
@@ -212,6 +216,7 @@ sks db scan --json
|
|
|
212
216
|
sks wiki refresh
|
|
213
217
|
sks wiki sweep latest --json
|
|
214
218
|
sks wiki validate .sneakoscope/wiki/context-pack.json
|
|
219
|
+
sks harness fixture --json
|
|
215
220
|
sks gx init homepage
|
|
216
221
|
sks gx render homepage --format html
|
|
217
222
|
sks validate-artifacts latest --json
|
|
@@ -257,7 +262,7 @@ Generated app files include:
|
|
|
257
262
|
|
|
258
263
|
Use `sks dollar-commands` to confirm that terminal discovery and Codex App prompt commands agree.
|
|
259
264
|
|
|
260
|
-
TriWiki is intentionally sparse: `sks wiki sweep` records demote, soft-forget, archive, delete, promote-to-skill, and promote-to-rule candidates instead of injecting every old claim into future prompts. `sks code-structure scan` flags handwritten files above 1000/2000/3000-line thresholds so new logic can be extracted before command files become harder to maintain.
|
|
265
|
+
TriWiki is intentionally sparse: `sks wiki sweep` records demote, soft-forget, archive, delete, promote-to-skill, and promote-to-rule candidates instead of injecting every old claim into future prompts. `sks harness fixture` validates the broader Harness Growth Factory contract: deliberate forgetting fixtures, skill card metadata, experiment schema, tool-error taxonomy, permission profiles, MultiAgentV2 defaults, and Cmux cockpit view coverage. `sks code-structure scan` flags handwritten files above 1000/2000/3000-line thresholds so new logic can be extracted before command files become harder to maintain.
|
|
261
266
|
|
|
262
267
|
## Prompt `$` Commands
|
|
263
268
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sneakoscope",
|
|
3
3
|
"displayName": "ㅅㅋㅅ",
|
|
4
|
-
"version": "0.6.
|
|
4
|
+
"version": "0.6.78",
|
|
5
5
|
"description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",
|
package/src/cli/main.mjs
CHANGED
|
@@ -35,14 +35,15 @@ import { createWorkOrderLedger } from '../core/work-order-ledger.mjs';
|
|
|
35
35
|
import { buildFromChatImgVisualMap } from '../core/from-chat-img-forensics.mjs';
|
|
36
36
|
import { classifyDogfoodFinding, createDogfoodReport, writeDogfoodReport } from '../core/dogfood-loop.mjs';
|
|
37
37
|
import { createSkillCandidate, decideSkillInjection, writeSkillCandidate, writeSkillForgeReport, writeSkillInjectionDecision } from '../core/skill-forge.mjs';
|
|
38
|
+
import { classifyToolError, harnessGrowthReport } from '../core/evaluation.mjs';
|
|
38
39
|
import { recordMistake, writeMistakeMemoryReport } from '../core/mistake-memory.mjs';
|
|
39
40
|
import { buildPromptContext } from '../core/prompt-context-builder.mjs';
|
|
40
41
|
import { renderTeamDashboardState, writeTeamDashboardState } from '../core/team-dashboard-renderer.mjs';
|
|
41
42
|
import { GOAL_WORKFLOW_ARTIFACT } from '../core/goal-workflow.mjs';
|
|
42
43
|
import { CODEX_APP_DOCS_URL, codexAppIntegrationStatus, formatCodexAppStatus } from '../core/codex-app.mjs';
|
|
43
|
-
import { CMUX_BREW_COMMAND, CMUX_BREW_UPGRADE_COMMAND, buildCmuxLaunchPlan, buildCmuxNewWorkspaceArgs, cmuxSurfaceRefFromText, cmuxWorkspaceRef, cmuxWorkspaceRefFromText, cmuxReadiness, cmuxStatusKind, defaultCmuxWorkspaceName, ensureCmuxInstalled, formatCmuxBanner, launchCmuxTeamView, launchCmuxUi, matchingCmuxWorkspaces, parseCmuxWorkspaceList, platformCmuxInstallHint, readCmuxWorkspaceRecord, runCmuxStatus, sanitizeCmuxWorkspaceName, writeCmuxWorkspaceRecord } from '../core/cmux-ui.mjs';
|
|
44
|
+
import { CMUX_BREW_COMMAND, CMUX_BREW_UPGRADE_COMMAND, buildCmuxLaunchPlan, buildCmuxNewWorkspaceArgs, cmuxSurfaceRefFromText, cmuxWorkspaceRef, cmuxWorkspaceRefFromText, cmuxReadiness, cmuxStatusKind, defaultCmuxWorkspaceName, ensureCmuxInstalled, formatCmuxBanner, launchCmuxTeamView, launchCmuxUi, matchingCmuxWorkspaces, parseCmuxWorkspaceList, platformCmuxInstallHint, readCmuxWorkspaceRecord, runCmuxStatus, sanitizeCmuxWorkspaceName, teamLaneStyle, writeCmuxWorkspaceRecord } from '../core/cmux-ui.mjs';
|
|
44
45
|
import { autoReviewProfileName, autoReviewStatus, autoReviewSummary, enableAutoReview, disableAutoReview, enableMadHighProfile, madHighProfileName } from '../core/auto-review.mjs';
|
|
45
|
-
import { buildTeamPlan, codeStructureCommand, defaultBeta, defaultVGraph, evalCommand, gcCommand, goalCommand, gxCommand, hproofCommand, memoryCommand, migrateWikiContextPack, parseTeamCreateArgs, perfCommand, profileCommand, projectWikiClaims, qaLoopCommand, researchCommand, statsCommand, team, teamWorkflowMarkdown, validateArtifactsCommand, wikiCommand, wikiVoxelRowCount, writeWikiContextPack } from './maintenance-commands.mjs';
|
|
46
|
+
import { buildTeamPlan, codeStructureCommand, defaultBeta, defaultVGraph, evalCommand, gcCommand, goalCommand, gxCommand, harnessCommand, hproofCommand, memoryCommand, migrateWikiContextPack, parseTeamCreateArgs, perfCommand, profileCommand, projectWikiClaims, qaLoopCommand, researchCommand, statsCommand, team, teamWorkflowMarkdown, validateArtifactsCommand, wikiCommand, wikiVoxelRowCount, writeWikiContextPack } from './maintenance-commands.mjs';
|
|
46
47
|
|
|
47
48
|
const flag = (args, name) => args.includes(name);
|
|
48
49
|
const promptOf = (args) => args.filter((x) => !String(x).startsWith('--')).join(' ').trim();
|
|
@@ -107,6 +108,7 @@ export async function main(args) {
|
|
|
107
108
|
if (cmd === 'team') return team(tail);
|
|
108
109
|
if (cmd === 'db') return db(sub, rest);
|
|
109
110
|
if (cmd === 'eval') return evalCommand(sub, rest);
|
|
111
|
+
if (cmd === 'harness') return harnessCommand(sub, rest);
|
|
110
112
|
if (cmd === 'wiki') return wikiCommand(sub, rest);
|
|
111
113
|
if (cmd === 'gc') return gcCommand(tail);
|
|
112
114
|
if (cmd === 'stats') return statsCommand(tail);
|
|
@@ -175,6 +177,7 @@ Usage:
|
|
|
175
177
|
sks eval run [--json] [--out report.json]
|
|
176
178
|
sks eval compare --baseline old.json --candidate new.json [--json]
|
|
177
179
|
sks perf run [--json]
|
|
180
|
+
sks harness fixture [--json]
|
|
178
181
|
sks code-structure scan [--json]
|
|
179
182
|
sks wiki coords --rgba 12,34,56,255
|
|
180
183
|
sks wiki pack [--json] [--role worker|verifier] [--max-anchors N]
|
|
@@ -1477,6 +1480,7 @@ function usage(args = []) {
|
|
|
1477
1480
|
'codex-app': ['Codex App', '', ' sks bootstrap', ' sks codex-app check', ' sks dollar-commands', ' cat .codex/SNEAKOSCOPE.md'],
|
|
1478
1481
|
dollar: ['Dollar Commands', '', formatDollarCommandsCompact(' '), '', 'Terminal: sks dollar-commands [--json]'],
|
|
1479
1482
|
wiki: ['TriWiki', '', ' sks wiki pack', ' sks wiki refresh [--prune]', ' sks wiki sweep latest --json', ' sks wiki validate .sneakoscope/wiki/context-pack.json', ' sks wiki prune --dry-run --json', '', 'Packs include attention.use_first and attention.hydrate_first for compact recall plus source hydration. Sweep records intentional forgetting and promotion candidates.'],
|
|
1483
|
+
harness: ['Harness Growth', '', ' sks harness fixture --json', ' sks harness review --json', '', 'Runs deterministic fixtures for deliberate forgetting, skill cards, harness experiments, tool error taxonomy, permission profiles, MultiAgentV2, and Cmux cockpit views.'],
|
|
1480
1484
|
'code-structure': ['Code Structure', '', ' sks code-structure scan', ' sks code-structure scan --json', '', 'Flags handwritten source files above 1000/2000/3000-line thresholds and records split-review exceptions.'],
|
|
1481
1485
|
gx: ['GX', '', ' sks gx init architecture-atlas', ' sks gx render architecture-atlas --format all', ' sks gx validate architecture-atlas']
|
|
1482
1486
|
};
|
|
@@ -2293,7 +2297,7 @@ async function selftest() {
|
|
|
2293
2297
|
if (hookTeamPendingState.mission_id !== hookTeamState.mission_id) throw new Error('selftest failed: pending clarification allowed a new route mission to replace the visible question sheet');
|
|
2294
2298
|
if (!hookTeamPendingContext.includes('Required questions still pending') || !hookTeamPendingContext.includes('VISIBLE RESPONSE CONTRACT') || !hookTeamPendingContext.includes('UI_STATE_BEHAVIOR')) throw new Error('selftest failed: pending clarification did not re-expose the question sheet');
|
|
2295
2299
|
if (hookTeamPendingContext.includes('MANDATORY ambiguity-removal gate activated')) throw new Error('selftest failed: pending clarification prepared a new ambiguity gate instead of reusing the active one');
|
|
2296
|
-
const hookTeamStopResult = await runProcess(process.execPath, [hookBin, 'hook', 'stop'], { cwd: hookTeamTmp, input: JSON.stringify({ cwd: hookTeamTmp, last_assistant_message: 'I will
|
|
2300
|
+
const hookTeamStopResult = await runProcess(process.execPath, [hookBin, 'hook', 'stop'], { cwd: hookTeamTmp, input: JSON.stringify({ cwd: hookTeamTmp, last_assistant_message: 'I need three decisions before implementation, but I will not paste the Required questions block.' }), env: { SKS_DISABLE_UPDATE_CHECK: '1' }, timeoutMs: 15000, maxOutputBytes: 128 * 1024 });
|
|
2297
2301
|
if (hookTeamStopResult.code !== 0) throw new Error(`selftest failed: Team stop hook exited ${hookTeamStopResult.code}: ${hookTeamStopResult.stderr}`);
|
|
2298
2302
|
const hookTeamStopJson = JSON.parse(hookTeamStopResult.stdout);
|
|
2299
2303
|
if (hookTeamStopJson.decision !== 'block' || !String(hookTeamStopJson.reason || '').includes('mandatory ambiguity-removal')) throw new Error('selftest failed: Stop hook did not block missing Team ambiguity answers');
|
|
@@ -2303,6 +2307,13 @@ async function selftest() {
|
|
|
2303
2307
|
if (!String(hookTeamStopJson.reason || '').includes('Codex plan-tool interaction')) throw new Error('selftest failed: Stop hook did not reprint plan-tool guidance');
|
|
2304
2308
|
if (!String(hookTeamStopJson.reason || '').includes('VISIBLE RESPONSE CONTRACT')) throw new Error('selftest failed: Stop hook did not force visible clarification response');
|
|
2305
2309
|
const hookTeamSchema = await readJson(path.join(missionDir(hookTeamTmp, hookTeamState.mission_id), 'required-answers.schema.json'));
|
|
2310
|
+
const visibleQuestionsBlock = [
|
|
2311
|
+
'Required questions',
|
|
2312
|
+
...hookTeamSchema.slots.map((slot, idx) => `${idx + 1}. ${slot.id}: ${slot.question}`),
|
|
2313
|
+
'Reply by slot id, then I will write answers.json and run sks pipeline answer latest answers.json.'
|
|
2314
|
+
].join('\n');
|
|
2315
|
+
const visibleQuestionDecision = await evaluateStop(hookTeamTmp, hookTeamState, { last_assistant_message: visibleQuestionsBlock }, { noQuestion: false });
|
|
2316
|
+
if (!visibleQuestionDecision?.continue) throw new Error('selftest failed: visible Required questions block was not accepted by clarification stop gate');
|
|
2306
2317
|
const nonGoalsSlot = hookTeamSchema.slots.find((s) => s.id === 'NON_GOALS');
|
|
2307
2318
|
if (nonGoalsSlot && !nonGoalsSlot.allow_empty) throw new Error('selftest failed: NON_GOALS does not allow an empty array answer');
|
|
2308
2319
|
if (!nonGoalsSlot && !Array.isArray(hookTeamSchema.inferred_answers?.NON_GOALS)) throw new Error('selftest failed: NON_GOALS was neither asked nor inferred');
|
|
@@ -2700,7 +2711,10 @@ async function selftest() {
|
|
|
2700
2711
|
if (!roleTeamPlan.roster.debate_team.some((agent) => /inconvenience/.test(agent.persona))) throw new Error('selftest failed: user friction persona missing from debate team');
|
|
2701
2712
|
const cmuxTeam = await launchCmuxTeamView({ root: tmp, missionId: teamId, plan: roleTeamPlan, json: true });
|
|
2702
2713
|
if (!cmuxTeam.agents?.length || !cmuxTeam.agents.some((entry) => entry.agent === 'analysis_scout_1') || !cmuxTeam.agents.every((entry) => String(entry.command || '').includes('team lane') && String(entry.command || '').includes('--agent'))) throw new Error('selftest failed: Team cmux view did not expose agent live lanes');
|
|
2703
|
-
|
|
2714
|
+
if (!cmuxTeam.overview?.command?.includes('team watch') || !cmuxTeam.lanes?.some((entry) => entry.role === 'overview') || !cmuxTeam.lanes?.some((entry) => entry.agent === 'analysis_scout_1')) throw new Error('selftest failed: Team cmux view did not expose orchestration overview plus agent lanes');
|
|
2715
|
+
if (teamLaneStyle('analysis_scout_1').role !== 'scout' || teamLaneStyle('executor_1').role !== 'execution' || teamLaneStyle('reviewer_1').role !== 'review') throw new Error('selftest failed: Team cmux role palette did not classify lane roles');
|
|
2716
|
+
if (cmuxTeam.cleanup_policy !== 'collapse-agent-lanes-to-overview' || !cmuxTeam.lanes.every((entry) => entry.style?.color && entry.title)) throw new Error('selftest failed: Team cmux view did not expose color/title metadata and cleanup policy');
|
|
2717
|
+
const cmuxTeamWorkspaceArgs = buildCmuxNewWorkspaceArgs({ root: tmp, workspace: `sks-team-${teamId}` }, cmuxTeam.overview.command);
|
|
2704
2718
|
if (!cmuxTeamWorkspaceArgs.includes('--name') || !cmuxTeamWorkspaceArgs.includes(`sks-team-${teamId}`)) throw new Error('selftest failed: Team cmux workspace is not named for visibility');
|
|
2705
2719
|
if (routeReasoning(routePrompt('$Research frontier idea'), '$Research frontier idea').effort !== 'xhigh') throw new Error('selftest failed: research reasoning not xhigh');
|
|
2706
2720
|
if (routeReasoning(routePrompt('$From-Chat-IMG 채팅 이미지 작업'), '$From-Chat-IMG 채팅 이미지 작업').effort !== 'xhigh') throw new Error('selftest failed: From-Chat-IMG reasoning not xhigh');
|
|
@@ -2761,7 +2775,7 @@ async function selftest() {
|
|
|
2761
2775
|
if (teamDashboard?.agent_session_count !== 5 || teamDashboard?.role_counts?.executor !== 5) throw new Error('selftest failed: team dashboard session/role budget missing');
|
|
2762
2776
|
await writeTeamDashboardState(teamDir, { missionId: teamId, mission: { id: teamId, mode: 'team' }, effort: 'high', phase: 'verification' });
|
|
2763
2777
|
const teamDashboardState = await readJson(path.join(teamDir, ARTIFACT_FILES.team_dashboard_state), {});
|
|
2764
|
-
if (!validateTeamDashboardState(teamDashboardState).ok || !renderTeamDashboardState(teamDashboardState).includes('Mission
|
|
2778
|
+
if (!validateTeamDashboardState(teamDashboardState).ok || !renderTeamDashboardState(teamDashboardState).includes('Mission / Goal View')) throw new Error('selftest failed: Team dashboard state missing required cockpit panes');
|
|
2765
2779
|
if (teamDashboard?.context_tracking?.ssot !== 'triwiki') throw new Error('selftest failed: team dashboard missing TriWiki context tracking');
|
|
2766
2780
|
if (!teamDashboard?.phases?.includes('parallel_analysis_scouting')) throw new Error('selftest failed: team dashboard missing analysis scout phase');
|
|
2767
2781
|
if (!teamDashboard?.latest_messages?.some((entry) => entry.agent === 'analysis_scout_1')) throw new Error('selftest failed: team live dashboard missing analysis scout event');
|
|
@@ -2817,6 +2831,9 @@ async function selftest() {
|
|
|
2817
2831
|
if (!evalReport.comparison.meaningful_improvement) throw new Error('selftest failed: evaluation benchmark did not show meaningful improvement');
|
|
2818
2832
|
if (!evalReport.candidate.wiki?.valid) throw new Error('selftest failed: wiki coordinate index invalid in eval');
|
|
2819
2833
|
if (evalReport.candidate.wiki?.voxel_schema !== 'sks.wiki-voxel.v1' || evalReport.candidate.wiki?.voxel_rows < 1) throw new Error('selftest failed: eval did not include voxel overlay metrics');
|
|
2834
|
+
const harnessReport = harnessGrowthReport({});
|
|
2835
|
+
if (!harnessReport.forgetting.fixture.passed || !harnessReport.cmux.views.includes('Harness Experiments View') || !harnessReport.reliability.tool_error_taxonomy.includes('Unknown')) throw new Error('selftest failed: harness growth fixture incomplete');
|
|
2836
|
+
if (classifyToolError({ message: 'operation timed out' }) !== 'Timeout' || classifyToolError({ message: 'unclassified weirdness' }) !== 'Unknown') throw new Error('selftest failed: tool error taxonomy classification');
|
|
2820
2837
|
const coord = rgbaToWikiCoord({ r: 12, g: 34, b: 56, a: 255 });
|
|
2821
2838
|
if (coord.schema !== 'sks.wiki-coordinate.v1' || coord.xyzw.length !== 4) throw new Error('selftest failed: RGBA wiki coordinate conversion');
|
|
2822
2839
|
await writeTextAtomic(path.join(tmp, '.sneakoscope', 'memory', 'q2_facts', 'selftest.md'), '- claim: Selftest memory claim must be selected before lower-weight mission notes. | id: selftest-memory-priority | source: src/cli/main.mjs | risk: high | status: supported | evidence_count: 3 | required_weight: 1.0 | trust_score: 0.9\n');
|
|
@@ -17,7 +17,7 @@ import { contextCapsule } from '../core/triwiki-attention.mjs';
|
|
|
17
17
|
import { rgbaKey, rgbaToWikiCoord, validateWikiCoordinateIndex } from '../core/wiki-coordinate.mjs';
|
|
18
18
|
import { ALLOWED_REASONING_EFFORTS, FROM_CHAT_IMG_CHECKLIST_ARTIFACT, FROM_CHAT_IMG_COVERAGE_ARTIFACT, FROM_CHAT_IMG_QA_LOOP_ARTIFACT, FROM_CHAT_IMG_SOURCE_INVENTORY_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT, FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS, FROM_CHAT_IMG_VISUAL_MAP_ARTIFACT, FROM_CHAT_IMG_WORK_ORDER_ARTIFACT, ROUTES, hasFromChatImgSignal, routePrompt, stackCurrentDocsPolicy, triwikiContextTracking } from '../core/routes.mjs';
|
|
19
19
|
import { TEAM_DECOMPOSITION_ARTIFACT, TEAM_GRAPH_ARTIFACT, TEAM_INBOX_DIR, TEAM_RUNTIME_TASKS_ARTIFACT, teamRuntimePlanMetadata, teamRuntimeRequiredArtifacts, writeTeamRuntimeArtifacts } from '../core/team-dag.mjs';
|
|
20
|
-
import { appendTeamEvent, formatRoleCounts, initTeamLive, normalizeTeamSpec, parseTeamSpecArgs, readTeamDashboard, readTeamLive, readTeamTranscriptTail, renderTeamAgentLane } from '../core/team-live.mjs';
|
|
20
|
+
import { appendTeamEvent, formatRoleCounts, initTeamLive, normalizeTeamSpec, parseTeamSpecArgs, readTeamDashboard, readTeamLive, readTeamTranscriptTail, renderTeamAgentLane, renderTeamWatch } from '../core/team-live.mjs';
|
|
21
21
|
import { ARTIFACT_FILES, writeValidationReport } from '../core/artifact-schemas.mjs';
|
|
22
22
|
import { writeEffortDecision } from '../core/effort-orchestrator.mjs';
|
|
23
23
|
import { createWorkOrderLedger, writeWorkOrderLedger } from '../core/work-order-ledger.mjs';
|
|
@@ -27,10 +27,11 @@ import { runPerfBench } from '../core/perf-bench.mjs';
|
|
|
27
27
|
import { GOAL_BRIDGE_ARTIFACT, GOAL_WORKFLOW_ARTIFACT, updateGoalWorkflow, writeGoalWorkflow } from '../core/goal-workflow.mjs';
|
|
28
28
|
import { scanCodeStructure, writeCodeStructureReport } from '../core/code-structure.mjs';
|
|
29
29
|
import { writeMemorySweepReport } from '../core/memory-governor.mjs';
|
|
30
|
-
import { launchCmuxTeamView } from '../core/cmux-ui.mjs';
|
|
30
|
+
import { cleanupCmuxTeamView, launchCmuxTeamView } from '../core/cmux-ui.mjs';
|
|
31
31
|
import { writeSkillForgeReport } from '../core/skill-forge.mjs';
|
|
32
32
|
import { writeMistakeMemoryReport } from '../core/mistake-memory.mjs';
|
|
33
33
|
import { scanDbSafety } from '../core/db-safety.mjs';
|
|
34
|
+
import { harnessGrowthReport, writeHarnessGrowthReport } from '../core/evaluation.mjs';
|
|
34
35
|
|
|
35
36
|
const flag = (args, name) => args.includes(name);
|
|
36
37
|
const promptOf = (args) => args.filter((x) => !String(x).startsWith('--')).join(' ').trim();
|
|
@@ -417,6 +418,25 @@ export async function perfCommand(sub, args = []) {
|
|
|
417
418
|
console.log(`Report: ${path.relative(root, outPath)}`);
|
|
418
419
|
}
|
|
419
420
|
|
|
421
|
+
export async function harnessCommand(sub, args = []) {
|
|
422
|
+
const action = sub || 'fixture';
|
|
423
|
+
if (!['fixture', 'review'].includes(action)) {
|
|
424
|
+
console.error('Usage: sks harness fixture|review [--json]');
|
|
425
|
+
process.exitCode = 1;
|
|
426
|
+
return;
|
|
427
|
+
}
|
|
428
|
+
const root = await sksRoot();
|
|
429
|
+
const report = action === 'review'
|
|
430
|
+
? await writeHarnessGrowthReport(root, path.join(root, '.sneakoscope', 'reports'), {})
|
|
431
|
+
: harnessGrowthReport({});
|
|
432
|
+
if (flag(args, '--json')) return console.log(JSON.stringify(report, null, 2));
|
|
433
|
+
console.log('SKS Harness Growth');
|
|
434
|
+
console.log(`Forgetting fixture: ${report.forgetting.fixture.passed ? 'pass' : 'fail'}`);
|
|
435
|
+
console.log(`Cmux views: ${report.cmux.views.length}`);
|
|
436
|
+
console.log(`Tool taxonomy: ${report.reliability.tool_error_taxonomy.join(', ')}`);
|
|
437
|
+
console.log(`Unknown errors recorded as bugs: ${report.reliability.unknown_errors_are_bugs ? 'yes' : 'no'}`);
|
|
438
|
+
}
|
|
439
|
+
|
|
420
440
|
export async function codeStructureCommand(sub, args = []) {
|
|
421
441
|
const action = sub || 'scan';
|
|
422
442
|
if (action !== 'scan') {
|
|
@@ -1171,13 +1191,13 @@ export async function gxCommand(sub, args) {
|
|
|
1171
1191
|
}
|
|
1172
1192
|
|
|
1173
1193
|
export async function team(args) {
|
|
1174
|
-
const teamSubcommands = new Set(['log', 'tail', 'watch', 'lane', 'status', 'dashboard', 'event']);
|
|
1194
|
+
const teamSubcommands = new Set(['log', 'tail', 'watch', 'lane', 'status', 'dashboard', 'event', 'cleanup-cmux']);
|
|
1175
1195
|
if (teamSubcommands.has(args[0])) return teamCommand(args[0], args.slice(1));
|
|
1176
1196
|
const opts = parseTeamCreateArgs(args);
|
|
1177
1197
|
const { prompt, agentSessions, roleCounts, roster } = opts;
|
|
1178
1198
|
if (!prompt) {
|
|
1179
1199
|
console.error('Usage: sks team "task" [executor:5 reviewer:2 user:1] [--agents N] [--json]');
|
|
1180
|
-
console.error(' sks team log|tail|watch|lane|status [mission-id|latest]');
|
|
1200
|
+
console.error(' sks team log|tail|watch|lane|status|cleanup-cmux [mission-id|latest]');
|
|
1181
1201
|
console.error(' sks team event [mission-id|latest] --agent <name> --phase <phase> --message "..."');
|
|
1182
1202
|
process.exitCode = 1;
|
|
1183
1203
|
return;
|
|
@@ -1211,6 +1231,7 @@ export async function team(args) {
|
|
|
1211
1231
|
});
|
|
1212
1232
|
await writeWorkOrderLedger(dir, workOrder);
|
|
1213
1233
|
if (fromChatImgRequired) await writeFromChatImgArtifacts(dir, { missionId: id, requests: [{ verbatim: prompt }], ambiguities: ['image source inventory must be completed before implementation'] });
|
|
1234
|
+
await writeHarnessGrowthReport(root, dir, {});
|
|
1214
1235
|
let dashboardState = await writeTeamDashboardState(dir, { missionId: id, mission: { id, mode: 'team' }, effort: effortDecision.selected_effort, phase: 'intake', next_action: fromChatImgRequired ? 'complete visual source inventory and work-order mapping' : 'run Team analysis scouts' });
|
|
1215
1236
|
await writeJsonAtomic(path.join(dir, 'team-gate.json'), { passed: false, team_roster_confirmed: true, analysis_artifact: false, triwiki_refreshed: false, triwiki_validated: false, consensus_artifact: false, ...runtime.gate_fields, implementation_team_fresh: false, review_artifact: false, integration_evidence: false, session_cleanup: false, context7_evidence: false, ...(fromChatImgRequired ? { from_chat_img_required: true, from_chat_img_request_coverage: false } : {}) });
|
|
1216
1237
|
dashboardState = await writeTeamDashboardState(dir, { missionId: id, mission: { id, mode: 'team' }, effort: effortDecision.selected_effort, phase: 'intake', next_action: fromChatImgRequired ? 'complete visual source inventory and work-order mapping' : 'run Team analysis scouts' });
|
|
@@ -1507,15 +1528,35 @@ async function teamCommand(sub, args) {
|
|
|
1507
1528
|
process.exitCode = 1;
|
|
1508
1529
|
return;
|
|
1509
1530
|
}
|
|
1531
|
+
const phase = readFlagValue(args, '--phase', 'general');
|
|
1510
1532
|
const record = await appendTeamEvent(dir, {
|
|
1511
1533
|
agent: readFlagValue(args, '--agent', 'parent_orchestrator'),
|
|
1512
|
-
phase
|
|
1534
|
+
phase,
|
|
1513
1535
|
type: readFlagValue(args, '--type', 'status'),
|
|
1514
1536
|
artifact: readFlagValue(args, '--artifact', ''),
|
|
1515
1537
|
message
|
|
1516
1538
|
});
|
|
1539
|
+
const cmuxCleanup = /^session_cleanup$|^team_cleanup$|^cleanup$/i.test(String(phase || ''))
|
|
1540
|
+
? await cleanupCmuxTeamView({ root, missionId: id, closeWorkspace: flag(args, '--close-workspace') }).catch((err) => ({ ok: false, reason: err.message || 'cmux cleanup failed' }))
|
|
1541
|
+
: null;
|
|
1517
1542
|
if (flag(args, '--json')) return console.log(JSON.stringify(record, null, 2));
|
|
1518
1543
|
console.log(`${record.ts} [${record.phase}] ${record.agent}: ${record.message}`);
|
|
1544
|
+
if (cmuxCleanup) {
|
|
1545
|
+
if (cmuxCleanup.ok) console.log(`cmux cleanup: collapsed ${cmuxCleanup.closed_surfaces || 0} agent pane(s), kept overview ${cmuxCleanup.kept_surface || cmuxCleanup.workspace_ref}`);
|
|
1546
|
+
else console.log(`cmux cleanup: skipped (${cmuxCleanup.reason || 'not available'})`);
|
|
1547
|
+
}
|
|
1548
|
+
return;
|
|
1549
|
+
}
|
|
1550
|
+
if (sub === 'cleanup-cmux') {
|
|
1551
|
+
const cleanup = await cleanupCmuxTeamView({ root, missionId: id, closeWorkspace: flag(args, '--close-workspace') || flag(args, '--close') });
|
|
1552
|
+
if (flag(args, '--json')) return console.log(JSON.stringify(cleanup, null, 2));
|
|
1553
|
+
if (!cleanup.ok) {
|
|
1554
|
+
console.error(`cmux cleanup skipped: ${cleanup.reason || 'not available'}`);
|
|
1555
|
+
process.exitCode = cleanup.skipped ? 0 : 2;
|
|
1556
|
+
return;
|
|
1557
|
+
}
|
|
1558
|
+
if (cleanup.close_workspace) console.log(`cmux cleanup: closed Team workspace ${cleanup.workspace_ref}`);
|
|
1559
|
+
else console.log(`cmux cleanup: collapsed ${cleanup.closed_surfaces}/${cleanup.requested_close_surfaces} agent pane(s), kept overview ${cleanup.kept_surface || cleanup.workspace_ref}`);
|
|
1519
1560
|
return;
|
|
1520
1561
|
}
|
|
1521
1562
|
if (sub === 'status') {
|
|
@@ -1573,15 +1614,26 @@ async function teamCommand(sub, args) {
|
|
|
1573
1614
|
if (sub === 'tail' || sub === 'watch') {
|
|
1574
1615
|
const lines = readFlagValue(args, '--lines', '20');
|
|
1575
1616
|
const printTail = async () => {
|
|
1617
|
+
if (sub === 'watch' && !flag(args, '--raw')) {
|
|
1618
|
+
if (flag(args, '--follow') && process.stdout.isTTY) console.clear();
|
|
1619
|
+
console.log(await renderTeamWatch(dir, { missionId: id, lines: Number(lines) }));
|
|
1620
|
+
return;
|
|
1621
|
+
}
|
|
1576
1622
|
for (const line of await readTeamTranscriptTail(dir, Number(lines))) console.log(line);
|
|
1577
1623
|
};
|
|
1578
1624
|
await printTail();
|
|
1579
1625
|
if (sub === 'watch' && flag(args, '--follow')) {
|
|
1580
|
-
let last = (
|
|
1626
|
+
let last = flag(args, '--raw')
|
|
1627
|
+
? (await readTeamTranscriptTail(dir, Number(lines))).join('\n')
|
|
1628
|
+
: await renderTeamWatch(dir, { missionId: id, lines: Number(lines) });
|
|
1581
1629
|
for (;;) {
|
|
1582
1630
|
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
1583
|
-
const next = (
|
|
1631
|
+
const next = flag(args, '--raw')
|
|
1632
|
+
? (await readTeamTranscriptTail(dir, Number(lines))).join('\n')
|
|
1633
|
+
: await renderTeamWatch(dir, { missionId: id, lines: Number(lines) });
|
|
1584
1634
|
if (next !== last) {
|
|
1635
|
+
if (process.stdout.isTTY) console.clear();
|
|
1636
|
+
else console.log('\n--- team watch update ---\n');
|
|
1585
1637
|
console.log(next);
|
|
1586
1638
|
last = next;
|
|
1587
1639
|
}
|
|
@@ -17,6 +17,7 @@ export const ARTIFACT_FILES = {
|
|
|
17
17
|
memory_sweep_report: 'memory-sweep-report.json',
|
|
18
18
|
skill_forge_report: 'skill-forge-report.json',
|
|
19
19
|
mistake_memory_report: 'mistake-memory-report.json',
|
|
20
|
+
harness_growth_report: 'harness-growth-report.json',
|
|
20
21
|
code_structure_report: 'code-structure-report.json',
|
|
21
22
|
team_dashboard_state: 'team-dashboard-state.json',
|
|
22
23
|
cmux_pane_plan: 'cmux-pane-plan.json',
|
|
@@ -163,6 +164,21 @@ export function validateMistakeMemoryReport(data = {}) {
|
|
|
163
164
|
return validationResult('MistakeMemoryReport', errors);
|
|
164
165
|
}
|
|
165
166
|
|
|
167
|
+
export function validateHarnessGrowthReport(data = {}) {
|
|
168
|
+
const errors = [];
|
|
169
|
+
pushMissing(errors, isObj(data.forgetting), 'forgetting_missing');
|
|
170
|
+
pushMissing(errors, isObj(data.skills), 'skills_missing');
|
|
171
|
+
pushMissing(errors, isObj(data.experiments), 'experiments_missing');
|
|
172
|
+
pushMissing(errors, isObj(data.codex_native), 'codex_native_missing');
|
|
173
|
+
pushMissing(errors, isObj(data.cmux), 'cmux_missing');
|
|
174
|
+
pushMissing(errors, isObj(data.reliability), 'reliability_missing');
|
|
175
|
+
if (data.forgetting?.fixture?.passed !== true) errors.push('forgetting_fixture_failed');
|
|
176
|
+
if (!Array.isArray(data.reliability?.tool_error_taxonomy) || !data.reliability.tool_error_taxonomy.includes('Unknown')) errors.push('tool_error_taxonomy_missing_unknown');
|
|
177
|
+
if (data.reliability?.unknown_errors_are_bugs !== true) errors.push('unknown_errors_not_marked_bug');
|
|
178
|
+
if (!Array.isArray(data.cmux?.views) || data.cmux.views.length < 10) errors.push('cmux_views_incomplete');
|
|
179
|
+
return validationResult('HarnessGrowthReport', errors);
|
|
180
|
+
}
|
|
181
|
+
|
|
166
182
|
export function validateCodeStructureReport(data = {}) {
|
|
167
183
|
const errors = [];
|
|
168
184
|
pushMissing(errors, isObj(data.thresholds), 'thresholds_missing');
|
|
@@ -179,7 +195,7 @@ export function validateTeamDashboardState(data = {}) {
|
|
|
179
195
|
pushMissing(errors, Array.isArray(data.gates), 'gates_not_array');
|
|
180
196
|
pushMissing(errors, Array.isArray(data.agents), 'agents_not_array');
|
|
181
197
|
pushMissing(errors, Array.isArray(data.tasks), 'tasks_not_array');
|
|
182
|
-
for (const pane of ['Mission
|
|
198
|
+
for (const pane of ['Mission / Goal View', 'Agent Grid View', 'MultiAgentV2 Graph View', 'Work Order Ledger View', 'Skill Autopilot View', 'TriWiki Memory Health View', 'Forget Queue', 'Mistake Immunity', 'Tool Reliability View', 'Harness Experiments View', 'Dogfood Evidence View', 'Code Structure']) {
|
|
183
199
|
if (!arr(data.panes).includes(pane)) errors.push(`pane_missing:${pane}`);
|
|
184
200
|
}
|
|
185
201
|
if (arr(data.gates).some((gate) => !GATE_STATUSES.has(gate.status))) errors.push('gate_status_invalid');
|
|
@@ -214,6 +230,7 @@ export const ARTIFACT_VALIDATORS = {
|
|
|
214
230
|
memory_sweep_report: validateMemorySweepReport,
|
|
215
231
|
skill_forge_report: validateSkillForgeReport,
|
|
216
232
|
mistake_memory_report: validateMistakeMemoryReport,
|
|
233
|
+
harness_growth_report: validateHarnessGrowthReport,
|
|
217
234
|
code_structure_report: validateCodeStructureReport,
|
|
218
235
|
team_dashboard_state: validateTeamDashboardState,
|
|
219
236
|
cmux_pane_plan: validateCmuxPanePlan,
|