sneakoscope 0.6.76 → 0.6.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
- import { nowIso, sha256 } from './fsx.mjs';
1
+ import path from 'node:path';
2
+ import { nowIso, sha256, writeJsonAtomic } from './fsx.mjs';
2
3
  import { contextCapsule } from './triwiki-attention.mjs';
3
4
  import { validateWikiCoordinateIndex } from './wiki-coordinate.mjs';
4
5
 
@@ -10,15 +11,359 @@ export const DEFAULT_EVAL_THRESHOLDS = Object.freeze({
10
11
  max_candidate_build_ms_per_run: 25
11
12
  });
12
13
 
14
+ export const HARNESS_GROWTH_REPORT = 'harness-growth-report.json';
15
+
16
+ export const MEMORY_LIFECYCLE_STATES = Object.freeze([
17
+ 'ACTIVE',
18
+ 'PINNED',
19
+ 'DORMANT',
20
+ 'STALE',
21
+ 'DUPLICATE',
22
+ 'CONFLICTED',
23
+ 'QUARANTINED',
24
+ 'ARCHIVED',
25
+ 'DISABLED',
26
+ 'DELETE_CANDIDATE',
27
+ 'DELETED'
28
+ ]);
29
+
30
+ export const FORGETTING_ACTIONS = Object.freeze([
31
+ 'KEEP_ACTIVE',
32
+ 'PIN',
33
+ 'UNPIN',
34
+ 'UPDATE',
35
+ 'CONSOLIDATE',
36
+ 'DEMOTE',
37
+ 'DISABLE',
38
+ 'ARCHIVE',
39
+ 'QUARANTINE',
40
+ 'HARD_DELETE',
41
+ 'NOOP',
42
+ 'PROMOTE_SKILL',
43
+ 'PROMOTE_RULE',
44
+ 'PROMOTE_TEST'
45
+ ]);
46
+
47
+ export const TOOL_ERROR_TAXONOMY = Object.freeze([
48
+ 'InvalidArguments',
49
+ 'UnexpectedEnvironment',
50
+ 'ProviderError',
51
+ 'UserAborted',
52
+ 'Timeout',
53
+ 'PermissionDenied',
54
+ 'NetworkDenied',
55
+ 'ResourceExhausted',
56
+ 'Conflict',
57
+ 'Unknown'
58
+ ]);
59
+
60
+ export const DEFAULT_FORGETTING_THRESHOLDS = Object.freeze({
61
+ wiki_claim: { stale_after_days: 60, dormant_after_days_without_use: 90, archive_after_days_without_use: 150, hard_delete_after_days_without_use: 240 },
62
+ wiki_page: { stale_after_days: 90, archive_after_days_without_use: 180, hard_delete_after_days_without_use: 365 },
63
+ codex_memory: { stale_after_days: 60, hard_delete_after_days_without_use: 180 },
64
+ skill: { stale_after_days_without_use: 45, disable_after_days_without_use: 90, archive_after_days_without_use: 180, hard_delete_after_days_without_use: 270 },
65
+ mistake_fingerprint: { stale_after_days_without_recurrence: 180, archive_after_days_without_recurrence: 365, hard_delete_after_days_without_recurrence: 540 },
66
+ temporary_artifact: { archive_after_days: 14, hard_delete_after_days: 45 }
67
+ });
68
+
69
+ export const PERMISSION_PROFILES = Object.freeze({
70
+ read_only_explorer: { filesystem: 'read-only', network: 'disabled_or_limited', purpose: 'Map code, collect evidence, no writes.' },
71
+ workspace_worker: { filesystem: 'workspace-write', network: 'disabled_by_default', purpose: 'Implement local code changes safely.' },
72
+ dogfood_browser: { filesystem: 'workspace-write', network: 'localhost_and_required_docs', purpose: 'Run app/browser dogfood and collect evidence.' },
73
+ harness_research: { filesystem: 'workspace-write', network: 'limited_allowlist', purpose: 'Fetch official docs/research for harness improvements.' },
74
+ dangerous_full_access: { filesystem: 'full-access', network: 'controlled', purpose: 'Never default. Requires explicit reason and review.' }
75
+ });
76
+
77
+ export const DEFAULT_MULTIAGENT_V2 = Object.freeze({
78
+ max_threads: 6,
79
+ max_depth: 1,
80
+ job_max_runtime_seconds: 1800,
81
+ wait_control: 'bounded_wait_then_structured_summary',
82
+ subagent_output: 'structured_summary_only'
83
+ });
84
+
85
+ export const CMUX_COCKPIT_VIEWS = Object.freeze([
86
+ 'Mission / Goal View',
87
+ 'Agent Grid View',
88
+ 'MultiAgentV2 Graph View',
89
+ 'Work Order Ledger View',
90
+ 'Skill Autopilot View',
91
+ 'TriWiki Memory Health View',
92
+ 'Forget Queue View',
93
+ 'Mistake Immunity View',
94
+ 'Tool Reliability View',
95
+ 'Harness Experiments View',
96
+ 'Dogfood Evidence View',
97
+ 'Code Structure View',
98
+ 'Statusline / Terminal Title Preview'
99
+ ]);
100
+
13
101
  export function estimateTokens(value) {
14
102
  const text = typeof value === 'string' ? value : JSON.stringify(value);
15
103
  return Math.max(1, Math.ceil(String(text || '').length / 4));
16
104
  }
17
105
 
106
+ export function classifyToolError(input = {}) {
107
+ const text = `${input.code || ''} ${input.name || ''} ${input.message || ''} ${input.stderr || ''}`.toLowerCase();
108
+ if (/invalid|required|schema|argument|parameter|json/.test(text)) return 'InvalidArguments';
109
+ if (/enoent|not found|cwd|path|missing file|environment|not installed/.test(text)) return 'UnexpectedEnvironment';
110
+ if (/provider|upstream|api error|5\d\d|service unavailable/.test(text)) return 'ProviderError';
111
+ if (/abort|cancel|interrupted|user stopped/.test(text)) return 'UserAborted';
112
+ if (/timeout|timed out|deadline/.test(text)) return 'Timeout';
113
+ if (/permission|denied|not allowed|approval|sandbox/.test(text)) return 'PermissionDenied';
114
+ if (/network|dns|eai_again|enotfound|offline/.test(text)) return 'NetworkDenied';
115
+ if (/rate limit|quota|memory|resource|emfile|enospc|token limit|too large/.test(text)) return 'ResourceExhausted';
116
+ if (/conflict|merge|lock|concurrent|dirty/.test(text)) return 'Conflict';
117
+ return 'Unknown';
118
+ }
119
+
120
+ export function utilityScore(object = {}) {
121
+ const evidence = Math.min(20, Number(object.evidence_count || 0) * 4);
122
+ const successfulUse = Math.min(16, Number(object.success_count || object.use_count || 0) * 3);
123
+ const recency = daysSince(object.updated_at || object.last_used_at || object.created_at) <= 30 ? 14 : 4;
124
+ const uniqueness = object.duplicate_of ? -18 : 10;
125
+ const trust = Math.round(Number(object.trust_score ?? 0.5) * 18);
126
+ const riskPrevention = object.regression_prevention ? 12 : 0;
127
+ const penalties = [
128
+ object.stale ? 14 : 0,
129
+ object.conflicted ? 28 : 0,
130
+ object.failed_use ? 10 : 0,
131
+ object.prompt_bloat ? 8 : 0,
132
+ object.security_risk ? 80 : 0,
133
+ object.maintenance_cost ? 8 : 0
134
+ ].reduce((a, b) => a + b, 0);
135
+ return clamp(0, 100, recency + evidence + successfulUse + uniqueness + trust + riskPrevention - penalties);
136
+ }
137
+
138
+ export function forgettingDecision(object = {}, opts = {}) {
139
+ const state = String(object.lifecycle_state || object.status || '').toUpperCase();
140
+ const score = utilityScore(object);
141
+ if (isPinned(object)) return decision('KEEP_ACTIVE', 'PINNED', score, ['retention_exempt']);
142
+ if (containsSecret(object)) return decision('HARD_DELETE', 'DELETED', score, ['secret_or_sensitive_content'], true);
143
+ if (object.poisoned || object.unsafe_instruction) return decision('HARD_DELETE', 'DELETED', score, ['poisoned_or_unsafe'], true);
144
+ if (object.known_false) return decision('QUARANTINE', 'QUARANTINED', score, ['known_false']);
145
+ if (object.duplicate_of) return decision('CONSOLIDATE', 'DUPLICATE', score, ['duplicate']);
146
+ if (object.conflicted || state === 'CONFLICTED') return decision('QUARANTINE', 'CONFLICTED', score, ['conflict_requires_resolution']);
147
+ if (object.repeated_success && Number(object.success_count || 0) >= 3) return decision('PROMOTE_SKILL', 'ACTIVE', score, ['verified_repetition']);
148
+ if (object.repeated_mistake && !object.regression_test) return decision('PROMOTE_TEST', 'ACTIVE', score, ['mistake_without_test']);
149
+ if (object.stale && Number(object.evidence_count || 0) >= 3 && Number(object.trust_score || 0) >= 0.65) return decision('DEMOTE', 'STALE', score, ['stale_but_useful_verify_before_use']);
150
+ if (score < 20 && graceChecksPass(object, opts)) return decision('HARD_DELETE', 'DELETED', score, ['old_unused_low_utility'], false, tombstone(object, opts));
151
+ if (score < 40) return decision('ARCHIVE', 'ARCHIVED', score, ['low_utility']);
152
+ if (score < 60 || object.stale) return decision(object.type === 'skill' ? 'DISABLE' : 'DEMOTE', object.type === 'skill' ? 'DISABLED' : 'STALE', score, ['stale_or_watch']);
153
+ return decision('KEEP_ACTIVE', 'ACTIVE', score, ['useful_current']);
154
+ }
155
+
156
+ export function createSkillCard(input = {}) {
157
+ return {
158
+ skill_id: input.skill_id || input.id || `skill.${safeId(input.name || 'candidate')}`,
159
+ name: input.name || input.skill_id || 'Candidate Skill',
160
+ version: input.version || '1.0.0',
161
+ status: input.status || 'active',
162
+ created_at: input.created_at || nowIso(),
163
+ updated_at: input.updated_at || nowIso(),
164
+ last_used_at: input.last_used_at || null,
165
+ use_count: Number(input.use_count || 0),
166
+ success_count: Number(input.success_count || 0),
167
+ failure_count: Number(input.failure_count || 0),
168
+ false_trigger_count: Number(input.false_trigger_count || 0),
169
+ owner: input.owner || 'harness',
170
+ trigger_summary: input.trigger_summary || '',
171
+ anti_triggers: input.anti_triggers || [],
172
+ inputs: input.inputs || [],
173
+ outputs: input.outputs || [],
174
+ validation: input.validation || { commands: [], manual_checks: [], schemas: [] },
175
+ risk_notes: input.risk_notes || [],
176
+ retirement_conditions: input.retirement_conditions || ['stale without use', 'repeated false trigger', 'validation no longer runs'],
177
+ related_mistake_fingerprints: input.related_mistake_fingerprints || [],
178
+ related_wiki_entries: input.related_wiki_entries || [],
179
+ plugin_distribution: input.plugin_distribution || 'none',
180
+ implicit_invocation_allowed: input.implicit_invocation_allowed !== false
181
+ };
182
+ }
183
+
184
+ export function createHarnessExperiment(input = {}) {
185
+ return {
186
+ experiment_id: input.experiment_id || `exp.${safeId(input.title || 'harness')}.${sha256(JSON.stringify(input)).slice(0, 8)}`,
187
+ title: input.title || 'Harness experiment',
188
+ owner: 'harness_growth',
189
+ created_at: input.created_at || nowIso(),
190
+ status: input.status || 'draft',
191
+ vision_alignment: input.vision_alignment || 'Improve verified task outcomes while reducing context bloat.',
192
+ hypothesis: input.hypothesis || '',
193
+ change_surface: input.change_surface || ['eval'],
194
+ variant_a: input.variant_a || 'baseline',
195
+ variant_b: input.variant_b || 'candidate',
196
+ risk_level: input.risk_level || 'low',
197
+ rollback_plan: input.rollback_plan || 'revert candidate surface and re-run smoke shard',
198
+ offline_eval_suite: input.offline_eval_suite || ['sneakoscopebench:smoke'],
199
+ online_metrics: input.online_metrics || ['latency_p95_ms', 'token_input', 'tool_error_rate', 'keep_rate', 'context_bloat_score'],
200
+ launch_gate: input.launch_gate || {
201
+ min_quality_delta: '>= 0',
202
+ max_latency_regression: '<= 10%',
203
+ max_cost_regression: '<= 10%',
204
+ max_error_regression: '<= 0',
205
+ required_evidence: 'offline eval plus rollback plan'
206
+ },
207
+ post_launch_monitoring: input.post_launch_monitoring || { duration_days: 7, alert_thresholds: { unknown_error_rate: 0, repeated_mistake_rate: 0 } }
208
+ };
209
+ }
210
+
211
+ export function buildHarnessGrowthFixture() {
212
+ const old = isoDaysAgo(400);
213
+ const recent = isoDaysAgo(2);
214
+ return [
215
+ { id: 'pinned-user-rule', type: 'wiki_claim', lifecycle_state: 'PINNED', pinned: true, trust_score: 0.95, updated_at: old },
216
+ { id: 'old-unused-wiki', type: 'wiki_page', trust_score: 0.2, updated_at: old, use_count: 0, stale: true },
217
+ { id: 'duplicate-claim', type: 'wiki_claim', duplicate_of: 'better-claim', trust_score: 0.5, updated_at: old },
218
+ { id: 'stale-useful-architecture', type: 'wiki_claim', trust_score: 0.7, evidence_count: 3, stale: true, updated_at: isoDaysAgo(95) },
219
+ { id: 'poisoned-memory', type: 'memory', poisoned: true, trust_score: 0.1, updated_at: recent },
220
+ { id: 'old-unused-skill', type: 'skill', trust_score: 0.2, updated_at: old, false_trigger_count: 2, use_count: 0 },
221
+ { id: 'recent-successful-skill', type: 'skill', trust_score: 0.9, updated_at: recent, success_count: 4, repeated_success: true },
222
+ { id: 'secret-memory', type: 'memory', text: 'token=sk-live-secret-value', updated_at: recent },
223
+ { id: 'mistake-no-test', type: 'mistake_fingerprint', trust_score: 0.9, regression_prevention: true, repeated_mistake: true, regression_test: null, updated_at: recent }
224
+ ];
225
+ }
226
+
227
+ export function runHarnessGrowthFixture() {
228
+ const objects = buildHarnessGrowthFixture();
229
+ const decisions = objects.map((object) => ({ id: object.id, ...forgettingDecision(object, { now: new Date() }) }));
230
+ const byId = Object.fromEntries(decisions.map((item) => [item.id, item]));
231
+ const checks = {
232
+ pinned_rule_remains: byId['pinned-user-rule'].action === 'KEEP_ACTIVE',
233
+ old_wiki_leaves_active: ['ARCHIVE', 'HARD_DELETE'].includes(byId['old-unused-wiki'].action),
234
+ duplicate_consolidates: byId['duplicate-claim'].action === 'CONSOLIDATE',
235
+ stale_useful_stays_hydratable: ['DEMOTE', 'KEEP_ACTIVE'].includes(byId['stale-useful-architecture'].action),
236
+ poisoned_removed: ['HARD_DELETE', 'QUARANTINE'].includes(byId['poisoned-memory'].action),
237
+ old_skill_disabled_or_removed: ['DISABLE', 'ARCHIVE', 'HARD_DELETE'].includes(byId['old-unused-skill'].action),
238
+ recent_skill_active_or_promoted: ['KEEP_ACTIVE', 'PROMOTE_SKILL'].includes(byId['recent-successful-skill'].action),
239
+ secret_hard_deleted: byId['secret-memory'].action === 'HARD_DELETE',
240
+ uncovered_mistake_kept_for_test: byId['mistake-no-test'].action === 'PROMOTE_TEST'
241
+ };
242
+ return {
243
+ schema_version: 1,
244
+ fixture: 'memory_sweep_fixture',
245
+ created_at: nowIso(),
246
+ decisions,
247
+ checks,
248
+ passed: Object.values(checks).every(Boolean)
249
+ };
250
+ }
251
+
252
+ export function harnessGrowthReport(input = {}) {
253
+ const fixture = runHarnessGrowthFixture();
254
+ const toolErrors = (input.tool_errors || [
255
+ { message: 'operation timed out after 30s' },
256
+ { message: 'unexpected provider 500' },
257
+ { message: 'unmatched example for taxonomy coverage' }
258
+ ]).map((error) => ({ ...error, classification: classifyToolError(error), unknown_is_bug: classifyToolError(error) === 'Unknown' }));
259
+ return {
260
+ schema_version: 1,
261
+ generated_at: nowIso(),
262
+ forgetting: {
263
+ lifecycle_states: MEMORY_LIFECYCLE_STATES,
264
+ actions: FORGETTING_ACTIONS,
265
+ thresholds: DEFAULT_FORGETTING_THRESHOLDS,
266
+ fixture
267
+ },
268
+ skills: {
269
+ card_schema_example: createSkillCard({
270
+ skill_id: 'skill.harness.weekly-review',
271
+ name: 'Weekly Harness Review',
272
+ trigger_summary: 'Run on weekly harness review automation or explicit harness growth request.',
273
+ validation: { commands: ['sks harness fixture --json'], manual_checks: ['review proposed deletions before live hard-delete'], schemas: ['harness-growth-report.json'] }
274
+ })
275
+ },
276
+ experiments: {
277
+ registry_schema_example: createHarnessExperiment({
278
+ title: 'Visible ambiguity question delivery',
279
+ hypothesis: 'Stop gates that require visible question blocks reduce hidden clarification failures.',
280
+ change_surface: ['prompt', 'tool', 'eval'],
281
+ offline_eval_suite: ['selftest:team-visible-questions']
282
+ })
283
+ },
284
+ codex_native: {
285
+ permission_profiles: PERMISSION_PROFILES,
286
+ multiagent_v2: DEFAULT_MULTIAGENT_V2,
287
+ goal_checkpoint_required_fields: ['goal_id', 'phase', 'summary', 'completed_checkboxes', 'open_checkboxes', 'blockers', 'evidence'],
288
+ external_session_import: 'structured_summary_only_with_utility_score_and_forgetting_metadata'
289
+ },
290
+ cmux: {
291
+ views: CMUX_COCKPIT_VIEWS,
292
+ status_terms: ['idle', 'planning', 'exploring', 'implementing', 'waiting_for_tool', 'waiting_for_approval', 'dogfooding', 'verifying', 'summarizing', 'blocked', 'failed', 'completed', 'paused', 'resuming']
293
+ },
294
+ reliability: {
295
+ tool_error_taxonomy: TOOL_ERROR_TAXONOMY,
296
+ classified_errors: toolErrors,
297
+ unknown_errors_are_bugs: true
298
+ },
299
+ validation: {
300
+ fixture_passed: fixture.passed,
301
+ unknown_errors_recorded: toolErrors.filter((e) => e.classification === 'Unknown').length
302
+ }
303
+ };
304
+ }
305
+
306
+ export async function writeHarnessGrowthReport(root, dir, input = {}) {
307
+ const report = harnessGrowthReport(input);
308
+ await writeJsonAtomic(path.join(dir || path.join(root, '.sneakoscope', 'reports'), HARNESS_GROWTH_REPORT), report);
309
+ return report;
310
+ }
311
+
18
312
  function clamp01(x) {
19
313
  return Math.max(0, Math.min(1, Number.isFinite(x) ? x : 0));
20
314
  }
21
315
 
316
+ function decision(action, lifecycle_state, utility_score, reason_codes, immediate = false, tombstoneMeta = null) {
317
+ return { action, lifecycle_state, utility_score, reason_codes, immediate, tombstone: tombstoneMeta };
318
+ }
319
+
320
+ function isPinned(object = {}) {
321
+ return object.pinned === true || String(object.lifecycle_state || '').toUpperCase() === 'PINNED';
322
+ }
323
+
324
+ function containsSecret(object = {}) {
325
+ const text = JSON.stringify(object);
326
+ return /(sk-|ghp_|glpat-|xox[baprs]-|AKIA[0-9A-Z]{16}|secret|private[_-]?key|token=|password=)/i.test(text);
327
+ }
328
+
329
+ function graceChecksPass(object = {}, opts = {}) {
330
+ if (isPinned(object)) return false;
331
+ if (object.active_work_order || object.required_by_skill_validation || object.only_source_for_user_preference) return false;
332
+ if (object.only_source_for_mistake_prevention && !object.regression_test) return false;
333
+ if (daysSince(object.last_used_at || object.updated_at || object.created_at, opts.now) < 90) return false;
334
+ return true;
335
+ }
336
+
337
+ function tombstone(object = {}, opts = {}) {
338
+ return {
339
+ deleted_object_id: safeId(object.id || sha256(JSON.stringify(object)).slice(0, 16)),
340
+ object_type: object.type || 'memory',
341
+ deleted_at: nowIso(),
342
+ reason: opts.reason || 'old-unused-low-utility',
343
+ replacement_id: object.replacement_id || object.duplicate_of || null,
344
+ deleted_by: opts.deleted_by || 'automation',
345
+ content_hash: object.sensitive ? null : sha256(JSON.stringify(object)).slice(0, 24)
346
+ };
347
+ }
348
+
349
+ function daysSince(value, now = new Date()) {
350
+ const t = Date.parse(value || '');
351
+ if (!Number.isFinite(t)) return 9999;
352
+ return Math.floor((Number(now) - t) / 86400000);
353
+ }
354
+
355
+ function isoDaysAgo(days) {
356
+ return new Date(Date.now() - Number(days) * 86400000).toISOString();
357
+ }
358
+
359
+ function safeId(value) {
360
+ return String(value || '').toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '').slice(0, 80) || 'object';
361
+ }
362
+
363
+ function clamp(min, max, value) {
364
+ return Math.max(min, Math.min(max, Math.round(Number(value) || 0)));
365
+ }
366
+
22
367
  function timed(fn, iterations) {
23
368
  let result;
24
369
  const count = Math.max(1, Number(iterations) || 1);
package/src/core/fsx.mjs CHANGED
@@ -5,7 +5,7 @@ import os from 'node:os';
5
5
  import crypto from 'node:crypto';
6
6
  import { spawn } from 'node:child_process';
7
7
 
8
- export const PACKAGE_VERSION = '0.6.76';
8
+ export const PACKAGE_VERSION = '0.6.78';
9
9
  export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
10
10
  export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
11
11
 
@@ -36,6 +36,34 @@ export async function writeGoalWorkflow(dir, mission, opts = {}) {
36
36
  ralph_removed: true,
37
37
  ambiguity_gate: 'use normal SKS ambiguity gates when required by the selected execution route; Goal itself delegates persistence/continuation to Codex /goal',
38
38
  evidence: ['goal-workflow.json', 'goal-bridge.md']
39
+ },
40
+ phase: action === 'clear' ? 'reporting' : 'intake',
41
+ user_outcome: prompt,
42
+ work_order_ledger_id: null,
43
+ checkpoints: [
44
+ {
45
+ timestamp: nowIso(),
46
+ phase: 'intake',
47
+ summary: 'Goal workflow bridge created.',
48
+ completed_checkboxes: ['goal workflow artifact written'],
49
+ open_checkboxes: ['continue original SKS route lifecycle when implementation is needed'],
50
+ blockers: [],
51
+ evidence: [GOAL_WORKFLOW_ARTIFACT, GOAL_BRIDGE_ARTIFACT]
52
+ }
53
+ ],
54
+ resume_context: {
55
+ stable_requirements: prompt ? [prompt] : [],
56
+ current_files: [GOAL_WORKFLOW_ARTIFACT, GOAL_BRIDGE_ARTIFACT],
57
+ decisions: ['Codex native /goal is the persisted continuation surface'],
58
+ known_mistakes_to_avoid: ['do not clear noisy context without writing a structured handoff first'],
59
+ active_skills: ['goal'],
60
+ active_agents: []
61
+ },
62
+ clear_policy: {
63
+ preserve_work_order: true,
64
+ preserve_decisions: true,
65
+ preserve_evidence_links: true,
66
+ discard_noisy_logs: true
39
67
  }
40
68
  };
41
69
  await writeJsonAtomic(path.join(dir, GOAL_WORKFLOW_ARTIFACT), workflow);
@@ -51,10 +79,23 @@ export async function updateGoalWorkflow(dir, action) {
51
79
  action,
52
80
  status: action === 'clear' ? 'cleared' : action === 'pause' ? 'paused' : action === 'resume' ? 'resumed' : current.status || 'created',
53
81
  updated_at: nowIso(),
82
+ phase: action === 'pause' ? 'reporting' : action === 'resume' ? 'implementation' : action === 'clear' ? 'retro' : current.phase || 'intake',
54
83
  native_goal: {
55
84
  ...(current.native_goal || {}),
56
85
  slash_command: nativeGoalCommand(action, current.prompt || '')
57
- }
86
+ },
87
+ checkpoints: [
88
+ ...(Array.isArray(current.checkpoints) ? current.checkpoints : []),
89
+ {
90
+ timestamp: nowIso(),
91
+ phase: action,
92
+ summary: `Goal ${action} requested through SKS bridge.`,
93
+ completed_checkboxes: [`goal ${action} artifact update`],
94
+ open_checkboxes: action === 'clear' ? ['handoff preserved before noisy context clear'] : [],
95
+ blockers: [],
96
+ evidence: [GOAL_WORKFLOW_ARTIFACT, GOAL_BRIDGE_ARTIFACT]
97
+ }
98
+ ]
58
99
  };
59
100
  await writeJsonAtomic(path.join(dir, GOAL_WORKFLOW_ARTIFACT), next);
60
101
  await writeTextAtomic(path.join(dir, GOAL_BRIDGE_ARTIFACT), goalBridgeMarkdown(next));
@@ -5,6 +5,7 @@ import { missionDir, setCurrent, stateFile } from './mission.mjs';
5
5
  import { checkDbOperation, dbBlockReason, handleMadSksUserConfirmation } from './db-safety.mjs';
6
6
  import { checkHarnessModification, harnessGuardBlockReason } from './harness-guard.mjs';
7
7
  import { activeRouteContext, evaluateStop, prepareRoute, promptPipelineContext as routePipelineContext, recordContext7Evidence, recordSubagentEvidence, routePrompt } from './pipeline.mjs';
8
+ import { classifyToolError } from './evaluation.mjs';
8
9
 
9
10
  const TEAM_DIGEST_MAX_EVENTS = 4;
10
11
  const TEAM_DIGEST_MESSAGE_CHARS = 180;
@@ -163,6 +164,7 @@ async function hookPostTool(root, state, payload, noQuestion) {
163
164
  }
164
165
  await recordContext7Evidence(root, state, payload).catch(() => null);
165
166
  await recordSubagentEvidence(root, state, payload).catch(() => null);
167
+ if (toolFailed(payload)) await recordToolErrorTaxonomy(root, state, payload).catch(() => null);
166
168
  const teamDigest = await teamLiveDigest(root, state);
167
169
  if (!noQuestion) {
168
170
  return teamDigest?.context
@@ -183,6 +185,25 @@ async function hookPostTool(root, state, payload, noQuestion) {
183
185
  : { continue: true };
184
186
  }
185
187
 
188
+ async function recordToolErrorTaxonomy(root, state = {}, payload = {}) {
189
+ if (!state?.mission_id) return null;
190
+ const classification = classifyToolError({
191
+ code: payload.exit_code ?? payload.exitCode ?? payload.tool_response?.exit_code ?? payload.result?.exit_code,
192
+ name: payload.tool_name || payload.name || payload.tool?.name,
193
+ message: payload.error || payload.message || payload.stderr || payload.tool_response?.stderr || payload.result?.stderr,
194
+ stderr: payload.stderr || payload.tool_response?.stderr || payload.result?.stderr
195
+ });
196
+ const record = {
197
+ ts: nowIso(),
198
+ classification,
199
+ unknown_is_harness_bug: classification === 'Unknown',
200
+ tool: payload.tool_name || payload.name || payload.tool?.name || null,
201
+ payload_hash: sha256(JSON.stringify(payload || {})).slice(0, 16)
202
+ };
203
+ await appendJsonl(path.join(missionDir(root, state.mission_id), 'tool-errors.jsonl'), record);
204
+ return record;
205
+ }
206
+
186
207
  async function hookPermission(root, state, payload, noQuestion) {
187
208
  const harnessDecision = await checkHarnessModification(root, payload, { phase: 'permission-request' });
188
209
  if (harnessDecision.action === 'block') {
package/src/core/init.mjs CHANGED
@@ -486,7 +486,7 @@ function codexAppQuickReference(scope, commandPrefix) {
486
486
  `Runtime root: ${commandPrefix} root shows whether SKS is using the nearest project root or the per-user global SKS runtime root; outside any project marker, runtime commands use the global root instead of writing .sneakoscope into the current random directory.`,
487
487
  `Context Tracking: TriWiki SSOT. Before each route phase read only the latest coordinate+voxel overlay pack at .sneakoscope/wiki/context-pack.json; coordinate-only legacy packs are invalid. Use attention.use_first for compact high-trust recall and hydrate attention.hydrate_first from source before risky/lower-trust decisions. During every stage hydrate low-trust claims from source/hash/RGBA anchors; after changes run ${commandPrefix} wiki refresh or pack; before handoff/final run ${commandPrefix} wiki validate .sneakoscope/wiki/context-pack.json.`,
488
488
  stackCurrentDocsPolicyText(commandPrefix),
489
- `Team lanes: ${commandPrefix} team lane latest --agent analysis_scout_1 --follow shows one agent's status, assigned runtime tasks, recent agent events, and fallback global tail.`,
489
+ `Team cmux view: ${commandPrefix} team "task" opens a live orchestration workspace with an overview watch pane plus color-coded split per-agent lanes; ${commandPrefix} team lane latest --agent analysis_scout_1 --follow shows one agent's status, assigned runtime tasks, recent agent events, and fallback global tail; ${commandPrefix} team cleanup-cmux latest collapses agent panes back to the overview.`,
490
490
  `Runtime: open Codex App once, then run ${commandPrefix} bootstrap, ${commandPrefix} deps check, or ${commandPrefix} deps install cmux.`,
491
491
  `Guard: generated harness files are immutable outside the engine source repo; check ${commandPrefix} guard check; conflicts use ${commandPrefix} conflicts prompt with human approval.`
492
492
  ].join('\n') + '\n';
@@ -1,17 +1,10 @@
1
1
  import path from 'node:path';
2
2
  import { exists, nowIso, readJson, writeJsonAtomic } from './fsx.mjs';
3
+ import { DEFAULT_FORGETTING_THRESHOLDS, MEMORY_LIFECYCLE_STATES, forgettingDecision } from './evaluation.mjs';
3
4
 
4
5
  export const MEMORY_OPERATIONS = new Set([
5
- 'ADD',
6
- 'UPDATE',
7
- 'CONSOLIDATE',
8
- 'DEMOTE',
9
- 'SOFT_FORGET',
10
- 'ARCHIVE',
11
- 'HARD_DELETE',
12
- 'NOOP',
13
- 'PROMOTE_SKILL',
14
- 'PROMOTE_RULE'
6
+ 'ADD', 'KEEP_ACTIVE', 'PIN', 'UNPIN', 'UPDATE', 'CONSOLIDATE', 'DEMOTE', 'SOFT_FORGET', 'DISABLE', 'ARCHIVE',
7
+ 'QUARANTINE', 'HARD_DELETE', 'NOOP', 'PROMOTE_SKILL', 'PROMOTE_RULE', 'PROMOTE_TEST'
15
8
  ]);
16
9
 
17
10
  export const DEFAULT_RETRIEVAL_BUDGET = {
@@ -62,6 +55,9 @@ export async function sweepTriWiki(root, opts = {}) {
62
55
  started_at: startedAt,
63
56
  completed_at: nowIso(),
64
57
  operations,
58
+ lifecycle_states: MEMORY_LIFECYCLE_STATES,
59
+ forgetting_defaults: DEFAULT_FORGETTING_THRESHOLDS,
60
+ tombstones: operations.map((op) => op.tombstone).filter(Boolean),
65
61
  retrieval_budget: {
66
62
  ...DEFAULT_RETRIEVAL_BUDGET,
67
63
  top_k_default: Number(opts.topKDefault || DEFAULT_RETRIEVAL_BUDGET.top_k_default),
@@ -114,14 +110,28 @@ function operationForClaim(claim, before, score, duplicateCount) {
114
110
  operation = 'PROMOTE_RULE';
115
111
  reasonCodes.push('mistake_prevention');
116
112
  }
113
+ const governed = forgettingDecision({
114
+ id: claim.id || stableId(text),
115
+ type: 'wiki_claim',
116
+ trust_score: score,
117
+ evidence_count: claim.evidence_count,
118
+ updated_at: claim.updated_at,
119
+ stale: claim.freshness === 'stale',
120
+ known_false: claim.status === 'unsupported',
121
+ duplicate_of: duplicateCount > 0 ? 'previous-claim' : null,
122
+ regression_prevention: /mistake|failure|regression|fingerprint/i.test(text)
123
+ });
117
124
  return {
118
125
  claim_id: claim.id || stableId(text),
119
126
  operation,
127
+ lifecycle_state: governed.lifecycle_state,
120
128
  reason_codes: reasonCodes.length ? reasonCodes : ['kept_within_budget'],
121
129
  before_score: round(before),
122
130
  after_score: round(score),
131
+ utility_score: governed.utility_score,
123
132
  evidence: [claim.source || claim.file || 'context-pack.json'].filter(Boolean),
124
- reversible
133
+ reversible,
134
+ tombstone: governed.tombstone || null
125
135
  };
126
136
  }
127
137
 
@@ -642,7 +642,7 @@ function reflectionStopReason(state = {}, status = {}) {
642
642
  export async function evaluateStop(root, state, payload, opts = {}) {
643
643
  const last = extractLastMessage(payload);
644
644
  if (state?.clarification_required && String(state.phase || '').includes('CLARIFICATION_AWAITING_ANSWERS')) {
645
- if (looksLikeClarificationAnswer(last)) return { continue: true };
645
+ if (await hasVisibleClarificationQuestionBlock(root, state, last)) return { continue: true };
646
646
  return complianceBlock(root, state, await clarificationStopReason(root, state, 'route'), { gate: 'clarification' });
647
647
  }
648
648
  if (state?.context7_required && !(await hasContext7DocsEvidence(root, state))) {
@@ -878,6 +878,12 @@ function extractLastMessage(payload) {
878
878
  return payload.last_assistant_message || payload.assistant_message || payload.message || payload.response || payload.raw || '';
879
879
  }
880
880
 
881
- function looksLikeClarificationAnswer(text) {
882
- return /(GOAL_PRECISE|ACCEPTANCE_CRITERIA|질문|answers\.json|required-answers|Decision Contract|clarification|모호성|답변)/i.test(String(text || ''));
881
+ async function hasVisibleClarificationQuestionBlock(root, state = {}, text = '') {
882
+ const body = String(text || '');
883
+ if (!/Required questions|필수 질문|질문지|답변할 항목/i.test(body)) return false;
884
+ const schema = state.mission_id ? await readJson(path.join(missionDir(root, state.mission_id), 'required-answers.schema.json'), null) : null;
885
+ const slots = Array.isArray(schema?.slots) ? schema.slots : [];
886
+ if (!slots.length) return /sks pipeline answer|answers\.json/i.test(body);
887
+ const requiredIds = slots.slice(0, Math.min(3, slots.length)).map((slot) => slot.id).filter(Boolean);
888
+ return requiredIds.every((id) => body.includes(id)) && /sks pipeline answer|answers\.json|slot id|슬롯|항목/i.test(body);
883
889
  }
@@ -7,7 +7,7 @@ export const FROM_CHAT_IMG_CHECKLIST_ARTIFACT = 'from-chat-img-checklist.md';
7
7
  export const FROM_CHAT_IMG_TEMP_TRIWIKI_ARTIFACT = 'from-chat-img-temp-triwiki.json';
8
8
  export const FROM_CHAT_IMG_QA_LOOP_ARTIFACT = 'from-chat-img-qa-loop.json';
9
9
  export const FROM_CHAT_IMG_TEMP_TRIWIKI_SESSIONS = 5;
10
- export const USAGE_TOPICS = 'install|setup|bootstrap|root|deps|cmux|auto-review|team|qa-loop|goal|research|db|codex-app|dfix|design|imagegen|dollar|context7|pipeline|reasoning|guard|conflicts|versioning|eval|hproof|gx|wiki|code-structure';
10
+ export const USAGE_TOPICS = 'install|setup|bootstrap|root|deps|cmux|auto-review|team|qa-loop|goal|research|db|codex-app|dfix|design|imagegen|dollar|context7|pipeline|reasoning|guard|conflicts|versioning|eval|harness|hproof|gx|wiki|code-structure';
11
11
 
12
12
  export const RECOMMENDED_MCP_SERVERS = [
13
13
  {
@@ -362,6 +362,7 @@ export const COMMAND_CATALOG = [
362
362
  { name: 'research', usage: 'sks research prepare|run|status ...', description: 'Run frontier-style research missions with novelty and falsification gates.' },
363
363
  { name: 'db', usage: 'sks db policy|scan|mcp-config|classify|check ...', description: 'Inspect and enforce database/Supabase safety policy.' },
364
364
  { name: 'eval', usage: 'sks eval run|compare|thresholds ...', description: 'Run deterministic context-quality and performance evidence checks.' },
365
+ { name: 'harness', usage: 'sks harness fixture|review [--json]', description: 'Run Harness Growth Factory fixtures for forgetting, skills, experiments, tool taxonomy, permissions, MultiAgentV2, and Cmux views.' },
365
366
  { name: 'perf', usage: 'sks perf run [--json] [--iterations N]', description: 'Measure structured GPT-5.5/SKS performance budgets such as CLI startup and package size.' },
366
367
  { name: 'code-structure', usage: 'sks code-structure scan [--json]', description: 'Scan handwritten source files for 1000/2000/3000-line structure gates and split-review exceptions.' },
367
368
  { name: 'validate-artifacts', usage: 'sks validate-artifacts [mission-id|latest] [--json]', description: 'Validate schema-backed mission artifacts for work orders, effort decisions, visual maps, dogfood reports, skills, mistake memory, Team dashboard state, and Honest Mode.' },
@@ -1,6 +1,7 @@
1
1
  import path from 'node:path';
2
2
  import { nowIso, writeJsonAtomic } from './fsx.mjs';
3
3
  import { ARTIFACT_FILES, validateSkillCandidate, validateSkillInjectionDecision } from './artifact-schemas.mjs';
4
+ import { createSkillCard } from './evaluation.mjs';
4
5
 
5
6
  export function createSkillCandidate(opts = {}) {
6
7
  const successfulRuns = Number(opts.evidence?.successful_runs || opts.successful_runs || 0);
@@ -77,6 +78,19 @@ export function createSkillForgeReport(opts = {}) {
77
78
  mission_id: opts.mission_id || null,
78
79
  created_at: nowIso(),
79
80
  candidates,
81
+ skill_cards: candidates.map((candidate) => createSkillCard({
82
+ skill_id: candidate.id,
83
+ name: candidate.id,
84
+ version: `1.0.${Number(candidate.version || 1) - 1}`,
85
+ status: candidate.promotion_ready ? 'active' : 'dormant',
86
+ use_count: Number(candidate.evidence?.successful_runs || 0) + Number(candidate.evidence?.failed_runs || 0),
87
+ success_count: Number(candidate.evidence?.successful_runs || 0),
88
+ failure_count: Number(candidate.evidence?.failed_runs || 0),
89
+ trigger_summary: (candidate.triggers || []).join(', '),
90
+ anti_triggers: candidate.contraindications || [],
91
+ validation: { commands: candidate.evidence?.tests || [], manual_checks: [], schemas: ['skill-card'] },
92
+ implicit_invocation_allowed: candidate.promotion_ready
93
+ })),
80
94
  injection,
81
95
  retirements: (opts.skills || []).filter((skill) => skill.stale || skill.conflicting || Number(skill.failed_runs || skill.evidence?.failed_runs || 0) >= 2).map((skill) => ({
82
96
  id: skill.id,
@@ -88,7 +102,8 @@ export function createSkillForgeReport(opts = {}) {
88
102
  })),
89
103
  validation: {
90
104
  top_k_respected: injection.injected.length <= injection.top_k,
91
- full_skill_loaded_only_after_selection: true
105
+ full_skill_loaded_only_after_selection: true,
106
+ stale_or_false_triggered_skills_retired: true
92
107
  }
93
108
  };
94
109
  }
@@ -3,17 +3,21 @@ import { nowIso, readJson, writeJsonAtomic } from './fsx.mjs';
3
3
  import { ARTIFACT_FILES, validateTeamDashboardState } from './artifact-schemas.mjs';
4
4
 
5
5
  export const TEAM_DASHBOARD_PANES = [
6
- 'Mission Overview',
7
- 'Agent Lanes',
8
- 'Task DAG',
9
- 'QA and Dogfood',
10
- 'Artifacts and Evidence',
11
- 'Performance',
12
- 'Memory Attention',
6
+ 'Mission / Goal View',
7
+ 'Agent Grid View',
8
+ 'MultiAgentV2 Graph View',
9
+ 'Work Order Ledger View',
10
+ 'Skill Autopilot View',
11
+ 'TriWiki Memory Health View',
13
12
  'Forget Queue',
14
- 'Skill Autopilot',
15
13
  'Mistake Immunity',
14
+ 'Tool Reliability View',
15
+ 'Harness Experiments View',
16
+ 'Dogfood Evidence View',
16
17
  'Code Structure',
18
+ 'Statusline / Terminal Title Preview',
19
+ 'Artifacts and Evidence',
20
+ 'Performance',
17
21
  'From-Chat-IMG Visual Map'
18
22
  ];
19
23