sneakoscope 0.8.2 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -275,7 +275,7 @@ For headless remotely controllable Codex App/server sessions on Codex CLI 0.130.
275
275
  sks codex-app remote-control -- --help
276
276
  ```
277
277
 
278
- `sks codex-app check` reports whether the installed Codex CLI is new enough, whether the required app flags are visible, whether Fast/speed-selector config is unlocked, and whether installed OpenAI default plugins such as Browser, Chrome, Computer Use, Documents, Presentations, Spreadsheets, and LaTeX are enabled. Codex CLI 0.130.0+ app-server/remote-control threads can pick up config changes live; older CLI/TUI sessions should still be restarted after `.codex/config.toml` or MCP/plugin changes.
278
+ `sks codex-app check` reports whether the installed Codex CLI is new enough, whether the required app flags are visible, whether Fast/speed-selector config is unlocked, and whether installed OpenAI default plugins such as Browser, Chrome, Computer Use, Documents, Presentations, Spreadsheets, and LaTeX are enabled. codex-lb can remain configured as a custom provider, but SKS keeps it off the top-level Codex App provider setting so native model, speed, and built-in feature UI stay visible. Codex CLI 0.130.0+ app-server/remote-control threads can pick up config changes live; older CLI/TUI sessions should still be restarted after `.codex/config.toml` or MCP/plugin changes.
279
279
 
280
280
  Then open Codex App and use prompt commands directly in the chat. Examples:
281
281
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "sneakoscope",
3
3
  "displayName": "ㅅㅋㅅ",
4
- "version": "0.8.2",
4
+ "version": "0.8.4",
5
5
  "description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
6
6
  "type": "module",
7
7
  "homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",
@@ -169,10 +169,10 @@ async function capturePostinstallCodexLbConfigSnapshot(home = process.env.HOME |
169
169
  async function restorePostinstallCodexLbConfigSnapshot(snapshot) {
170
170
  if (!snapshot?.base_url) return { status: 'skipped', reason: 'no_snapshot' };
171
171
  const current = await readText(snapshot.config_path, '');
172
- if (hasTopLevelCodexLbSelected(current) && codexLbProviderBaseUrl(current)) {
172
+ const next = normalizeCodexFastModeUiConfig(upsertCodexLbConfig(current, snapshot.base_url));
173
+ if (next === ensureTrailingNewline(current) && codexLbProviderBaseUrl(current)) {
173
174
  return { status: 'present', config_path: snapshot.config_path };
174
175
  }
175
- const next = normalizeCodexFastModeUiConfig(upsertCodexLbConfig(current, snapshot.base_url));
176
176
  await writeTextAtomic(snapshot.config_path, next);
177
177
  return { status: 'restored', config_path: snapshot.config_path };
178
178
  }
@@ -212,10 +212,10 @@ export async function codexLbStatus(opts = {}) {
212
212
  const envText = envExists ? await readText(envPath, '') : '';
213
213
  const envKeyConfigured = Boolean(parseCodexLbEnvKey(envText));
214
214
  const providerConfigured = /\[model_providers\.codex-lb\]/.test(config);
215
- const selected = /model_provider\s*=\s*"codex-lb"/.test(config);
215
+ const selected = hasTopLevelCodexLbSelected(config);
216
216
  const baseUrl = codexLbProviderBaseUrl(config) || parseCodexLbEnvBaseUrl(envText) || null;
217
217
  return {
218
- ok: selected && providerConfigured && envKeyConfigured && Boolean(baseUrl),
218
+ ok: providerConfigured && envKeyConfigured && Boolean(baseUrl),
219
219
  config_path: configPath,
220
220
  env_path: envPath,
221
221
  provider_configured: providerConfigured,
@@ -360,10 +360,10 @@ function codexLbProviderBaseUrl(text = '') {
360
360
  export async function repairCodexLbAuth(opts = {}) {
361
361
  let status = await codexLbStatus(opts);
362
362
  let configRepaired = false;
363
- if (!status.ok && status.env_key_configured && status.base_url) {
363
+ const currentConfig = await readText(status.config_path, '');
364
+ if (status.env_key_configured && status.base_url && (!status.ok || status.selected || hasTopLevelCodexModeLock(currentConfig))) {
364
365
  await ensureDir(path.dirname(status.config_path));
365
- const current = await readText(status.config_path, '');
366
- const next = normalizeCodexFastModeUiConfig(upsertCodexLbConfig(current, status.base_url));
366
+ const next = normalizeCodexFastModeUiConfig(upsertCodexLbConfig(currentConfig, status.base_url));
367
367
  await writeTextAtomic(status.config_path, next);
368
368
  configRepaired = true;
369
369
  status = await codexLbStatus(opts);
@@ -460,7 +460,7 @@ async function syncCodexApiKeyLogin(apiKey, opts = {}) {
460
460
  }
461
461
 
462
462
  function upsertCodexLbConfig(text = '', baseUrl) {
463
- let next = upsertTopLevelTomlString(text, 'model_provider', 'codex-lb');
463
+ let next = removeTopLevelTomlKeyIfValue(text, 'model_provider', 'codex-lb');
464
464
  const block = [
465
465
  '[model_providers.codex-lb]',
466
466
  'name = "OpenAI"',
@@ -547,6 +547,14 @@ function removeLegacyTopLevelCodexModeLocks(text = '') {
547
547
  }).join('\n').replace(/^\n+/, '').replace(/\n{3,}/g, '\n\n');
548
548
  }
549
549
 
550
+ function removeTopLevelTomlKeyIfValue(text = '', key = '', value = '') {
551
+ const lines = String(text || '').split('\n');
552
+ const firstTable = lines.findIndex((x) => /^\s*\[.+\]\s*$/.test(x));
553
+ const end = firstTable === -1 ? lines.length : firstTable;
554
+ const keyPattern = new RegExp(`^\\s*${escapeRegExp(key)}\\s*=\\s*"${escapeRegExp(value)}"\\s*(?:#.*)?$`);
555
+ return lines.filter((line, index) => index >= end || !keyPattern.test(line)).join('\n').replace(/^\n+/, '').replace(/\n{3,}/g, '\n\n');
556
+ }
557
+
550
558
  function removeTomlTableKey(text, table, key) {
551
559
  const lines = String(text || '').trimEnd().split('\n');
552
560
  if (lines.length === 1 && lines[0] === '') return '';
@@ -1095,18 +1103,18 @@ export async function selftestCodexLb(tmp) {
1095
1103
  const codexLbConfig = await safeReadText(path.join(codexLbHome, '.codex', 'config.toml'));
1096
1104
  const codexLbEnv = await safeReadText(path.join(codexLbHome, '.codex', 'sks-codex-lb.env'));
1097
1105
  const codexLbAuth = await safeReadText(path.join(codexLbHome, '.codex', 'auth.json'));
1098
- if (!codexLbSetupJson.ok || codexLbSetupJson.base_url !== 'https://lb.example.test/backend-api/codex' || !codexLbConfig.includes('model_provider = "codex-lb"') || !codexLbConfig.includes('[model_providers.codex-lb]') || !codexLbEnv.includes("CODEX_LB_BASE_URL='https://lb.example.test/backend-api/codex'") || !codexLbEnv.includes("CODEX_LB_API_KEY='sk-test'") || !/(\"auth_mode\"\s*:\s*\"apikey\")/.test(codexLbAuth)) throw new Error('selftest: codex-lb setup');
1106
+ if (!codexLbSetupJson.ok || codexLbSetupJson.base_url !== 'https://lb.example.test/backend-api/codex' || hasTopLevelCodexLbSelected(codexLbConfig) || !codexLbConfig.includes('[model_providers.codex-lb]') || !codexLbEnv.includes("CODEX_LB_BASE_URL='https://lb.example.test/backend-api/codex'") || !codexLbEnv.includes("CODEX_LB_API_KEY='sk-test'") || !/(\"auth_mode\"\s*:\s*\"apikey\")/.test(codexLbAuth)) throw new Error('selftest: codex-lb setup');
1099
1107
  if (!hasCodexUnstableFeatureWarningSuppression(codexLbConfig)) throw new Error('selftest: codex-lb setup did not suppress Codex unstable feature warning');
1100
1108
  await initProject(codexLbHome, { installScope: 'global', force: true, repair: true });
1101
1109
  const codexLbRepairSetupConfig = await safeReadText(path.join(codexLbHome, '.codex', 'config.toml'));
1102
- if (!codexLbRepairSetupConfig.includes('model_provider = "codex-lb"') || !codexLbRepairSetupConfig.includes('[model_providers.codex-lb]') || !codexLbRepairSetupConfig.includes('https://lb.example.test/backend-api/codex') || codexLbRepairSetupConfig.includes('sk-test')) throw new Error('selftest: init codex-lb');
1110
+ if (hasTopLevelCodexLbSelected(codexLbRepairSetupConfig) || !codexLbRepairSetupConfig.includes('[model_providers.codex-lb]') || !codexLbRepairSetupConfig.includes('https://lb.example.test/backend-api/codex') || codexLbRepairSetupConfig.includes('sk-test')) throw new Error('selftest: init codex-lb');
1103
1111
  if (!hasCodexUnstableFeatureWarningSuppression(codexLbRepairSetupConfig)) throw new Error('selftest: init codex-lb did not suppress Codex unstable feature warning');
1104
1112
  await writeTextAtomic(path.join(codexLbHome, '.codex', 'config.toml'), `${codexLbConfig}\n[mcp_servers.supabase]\nurl = "https://mcp.supabase.com/mcp?project_ref=ref&read_only=true&features=database,docs"\n`);
1105
1113
  const ptmp = path.join(tmp, 'codex-lb-project-config'), prevHome = process.env.HOME;
1106
1114
  try { process.env.HOME = codexLbHome; await initProject(ptmp, { installScope: 'global' }); }
1107
1115
  finally { if (prevHome === undefined) delete process.env.HOME; else process.env.HOME = prevHome; }
1108
1116
  const pcfg = await safeReadText(path.join(ptmp, '.codex', 'config.toml'));
1109
- if (!pcfg.includes('model_provider = "codex-lb"') || !pcfg.includes('[model_providers.codex-lb]') || !pcfg.includes('[mcp_servers.supabase]') || !pcfg.includes('read_only=true')) throw new Error('selftest: project codex-lb');
1117
+ if (hasTopLevelCodexLbSelected(pcfg) || !pcfg.includes('[model_providers.codex-lb]') || !pcfg.includes('[mcp_servers.supabase]') || !pcfg.includes('read_only=true')) throw new Error('selftest: project codex-lb');
1110
1118
  if (!hasCodexUnstableFeatureWarningSuppression(pcfg)) throw new Error('selftest: project codex-lb config did not suppress Codex unstable feature warning');
1111
1119
  await writeTextAtomic(path.join(codexLbHome, '.codex', 'auth.json'), '{"auth_mode":"browser"}\n');
1112
1120
  const codexLbRepair = await runProcess(process.execPath, [path.join(packageRoot(), 'bin', 'sks.mjs'), 'auth', 'repair', '--json'], { cwd: tmp, env: codexLbEnvForSelftest, timeoutMs: 15000, maxOutputBytes: 64 * 1024 });
@@ -1167,7 +1175,7 @@ export async function selftestCodexLb(tmp) {
1167
1175
  const codexLbPostBootstrapConfig = await safeReadText(path.join(codexLbHome, '.codex', 'config.toml'));
1168
1176
  const codexLbLoginCallsAfterBootstrap = (await safeReadText(path.join(codexLbHome, '.codex', 'login-calls.log'))).trim().split(/\r?\n/).filter(Boolean).length;
1169
1177
  if (!codexLbPostBootstrapAuth.includes('"auth_mode":"apikey"') || !codexLbPostBootstrapAuth.includes('sk-test') || codexLbLoginCallsAfterBootstrap <= codexLbLoginCallsBeforeBootstrap) throw new Error('selftest: postinstall drift auth');
1170
- if (!codexLbPostBootstrapConfig.includes('model_provider = "codex-lb"') || !codexLbPostBootstrapConfig.includes('[model_providers.codex-lb]') || !codexLbPostBootstrapConfig.includes('https://lb.example.test/backend-api/codex') || codexLbPostBootstrapConfig.includes('sk-test')) throw new Error('selftest: postinstall drift config');
1178
+ if (hasTopLevelCodexLbSelected(codexLbPostBootstrapConfig) || !codexLbPostBootstrapConfig.includes('[model_providers.codex-lb]') || !codexLbPostBootstrapConfig.includes('https://lb.example.test/backend-api/codex') || codexLbPostBootstrapConfig.includes('sk-test')) throw new Error('selftest: postinstall drift config');
1171
1179
  const doctorProject = tmpdir();
1172
1180
  await ensureDir(path.join(doctorProject, '.git'));
1173
1181
  await writeTextAtomic(path.join(doctorProject, 'package.json'), '{"name":"codex-lb-doctor-project","version":"0.0.0"}\n');
@@ -1184,7 +1192,7 @@ export async function selftestCodexLb(tmp) {
1184
1192
  const codexLbDoctorJson = JSON.parse(codexLbDoctorRepair.stdout);
1185
1193
  const codexLbDoctorAuth = await safeReadText(path.join(codexLbHome, '.codex', 'auth.json'));
1186
1194
  const codexLbDoctorConfig = await safeReadText(path.join(codexLbHome, '.codex', 'config.toml'));
1187
- if (!codexLbDoctorJson.repair?.codex_lb?.ok || !codexLbDoctorJson.repair.codex_lb.config_repaired || !codexLbDoctorJson.codex_lb?.ok || !codexLbDoctorAuth.includes('"auth_mode":"apikey"') || !codexLbDoctorAuth.includes('sk-test') || !codexLbDoctorConfig.includes('model_provider = "codex-lb"') || !codexLbDoctorConfig.includes('https://lb.example.test/backend-api/codex') || !hasCodexUnstableFeatureWarningSuppression(codexLbDoctorConfig)) throw new Error('selftest: doctor codex-lb');
1195
+ if (!codexLbDoctorJson.repair?.codex_lb?.ok || !codexLbDoctorJson.repair.codex_lb.config_repaired || !codexLbDoctorJson.codex_lb?.ok || !codexLbDoctorAuth.includes('"auth_mode":"apikey"') || !codexLbDoctorAuth.includes('sk-test') || hasTopLevelCodexLbSelected(codexLbDoctorConfig) || !codexLbDoctorConfig.includes('https://lb.example.test/backend-api/codex') || !hasCodexUnstableFeatureWarningSuppression(codexLbDoctorConfig)) throw new Error('selftest: doctor codex-lb');
1188
1196
  const codexLbContext7Bin = path.join(tmp, 'codex-lb-context7-bin');
1189
1197
  await ensureDir(codexLbContext7Bin);
1190
1198
  await writeTextAtomic(path.join(codexLbContext7Bin, 'codex'), '#!/bin/sh\nif [ "$1" = "--version" ]; then echo "codex-cli 99.0.0"; exit 0; fi\nif [ "$CODEX_LB_API_KEY" ]; then echo "context7 leaked CODEX_LB_API_KEY" >&2; exit 77; fi\nif [ "$1" = "mcp" ] && [ "$2" = "list" ]; then echo ""; exit 0; fi\nif [ "$1" = "mcp" ] && [ "$2" = "add" ]; then echo "context7 added"; exit 0; fi\necho "unexpected codex $*" >&2\nexit 2\n');
@@ -1326,7 +1334,7 @@ function hasTopLevelCodexModeLock(text = '') {
1326
1334
  const lines = String(text || '').split('\n');
1327
1335
  const firstTable = lines.findIndex((x) => /^\s*\[.+\]\s*$/.test(x));
1328
1336
  const top = (firstTable === -1 ? lines : lines.slice(0, firstTable)).join('\n');
1329
- return /(^|\n)\s*model\s*=\s*"codex-lb"\s*(\n|$)/.test(top) || /(^|\n)\s*model_provider\s*=\s*"openai"\s*(\n|$)/.test(top) || /(^|\n)\s*model_reasoning_effort\s*=/.test(top);
1337
+ return /(^|\n)\s*model_provider\s*=\s*"codex-lb"\s*(\n|$)/.test(top) || /(^|\n)\s*model_reasoning_effort\s*=/.test(top);
1330
1338
  }
1331
1339
 
1332
1340
  function hasDeprecatedCodexHooksFeatureFlag(text = '') {
package/src/cli/main.mjs CHANGED
@@ -22,7 +22,7 @@ import { bumpProjectVersion, disableVersionGitHook, runVersionPreCommit, version
22
22
  import { rustInfo } from '../core/rust-accelerator.mjs';
23
23
  import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
24
24
  import { defaultEvaluationScenario, runEvaluationBenchmark } from '../core/evaluation.mjs';
25
- import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
25
+ import { buildResearchPrompt, evaluateResearchGate, isDatedResearchPaperArtifact, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
26
26
  import { evaluateRecallPulseFixtures, readMissionStatusLedger, writeRecallPulseArtifacts } from '../core/recallpulse.mjs';
27
27
  import {
28
28
  PPT_AUDIENCE_STRATEGY_ARTIFACT,
@@ -155,11 +155,19 @@ function codexLbImmediateLaunchOpts(args = [], lb = {}, opts = {}) {
155
155
  return { ...opts, session, codexArgs: [...(opts.codexArgs || []), '-c', 'model_provider="openai"'], codexLbBypassed: true };
156
156
  }
157
157
  if (!lb?.ok) return opts;
158
- if (explicitSession) return opts;
158
+ const nextOpts = withCodexLbProviderArgs(opts);
159
+ if (explicitSession) return nextOpts;
159
160
  const session = sanitizeTmuxSessionName(`sks-codex-lb-${Date.now().toString(36)}-${defaultTmuxSessionName(root)}`);
160
161
  console.log(`codex-lb active for this launch: ${lb.env_path || lb.base_url || 'configured'}`);
161
162
  console.log(`Using fresh tmux session: ${session}`);
162
- return { ...opts, session, codexLbFreshSession: true };
163
+ return { ...nextOpts, session, codexLbFreshSession: true };
164
+ }
165
+
166
+ function withCodexLbProviderArgs(opts = {}) {
167
+ const codexArgs = [...(opts.codexArgs || [])];
168
+ const hasProviderOverride = codexArgs.some((arg) => /model_provider\s*=/.test(String(arg || '')));
169
+ if (!hasProviderOverride) codexArgs.push('-c', 'model_provider="codex-lb"');
170
+ return { ...opts, codexArgs };
163
171
  }
164
172
 
165
173
  function help(args = []) {
@@ -3946,17 +3954,21 @@ async function selftest() {
3946
3954
  const researchPlan = await writeResearchPlan(researchDir, researchMission.prompt, {});
3947
3955
  if (researchPlan.methodology !== 'genius-scout-council-frontier-discovery-loop' || researchPlan.web_research_policy?.mode !== 'layered_source_retrieval_and_triangulation') throw new Error('selftest: research plan contract');
3948
3956
  if (researchPlan.execution_policy?.default_max_cycles !== 12 || researchPlan.mutation_policy?.implementation_allowed !== false || !String(researchPlan.research_council?.debate_policy?.rule || '').includes('every scout records final agreement')) throw new Error('selftest: research consensus/no-code contract');
3949
- if (!researchPlan.research_council?.scouts?.every((scout) => scout.display_name && scout.persona && scout.persona_boundary && scout.reasoning_effort === 'xhigh')) throw new Error('selftest: research scout persona contract missing from plan');
3957
+ if (!researchPlan.research_council?.scouts?.every((scout) => scout.agent_name && scout.display_name && scout.persona && scout.persona_boundary && scout.reasoning_effort === 'xhigh') || !researchPlan.research_council.scouts.some((scout) => scout.agent_name === 'Einstein Scout')) throw new Error('selftest: research scout persona contract missing from plan');
3958
+ const researchPaperArtifact = researchPlan.artifacts?.research_paper;
3959
+ if (!isDatedResearchPaperArtifact(researchPaperArtifact) || researchPaperArtifact === 'research-paper.md') throw new Error('selftest: research paper artifact filename is not dated and titled');
3950
3960
  const researchPrompt = buildResearchPrompt({ id: researchMission.id, mission: researchMission, plan: researchPlan, cycle: 1, previous: '' });
3951
- if (!researchPrompt.includes('NO-CODE-MUTATION POLICY') || !researchPrompt.includes('not a fixed three-cycle run') || !researchPrompt.includes('unanimous_consensus=true')) throw new Error('selftest: research prompt missing no-code unanimous consensus policy');
3961
+ if (!researchPrompt.includes('NO-CODE-MUTATION POLICY') || !researchPrompt.includes('not a fixed three-cycle run') || !researchPrompt.includes('unanimous_consensus=true') || !researchPrompt.includes('agent_name') || !researchPrompt.includes(researchPaperArtifact)) throw new Error('selftest: research prompt missing no-code unanimous consensus policy');
3952
3962
  const rArts = researchPlan.required_artifacts || [];
3953
3963
  for (const a of [rss, 'source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) if (!rArts.includes(a) || !(await exists(path.join(researchDir, a)))) throw new Error('selftest: research artifact');
3954
- if (!rArts.includes('research-paper.md') || !rArts.includes(gos)) throw new Error('selftest: research paper');
3964
+ if (!rArts.includes(researchPaperArtifact) || rArts.includes('research-paper.md') || !rArts.includes(gos)) throw new Error('selftest: research paper');
3955
3965
  const initialResearchGate = await evaluateResearchGate(researchDir);
3956
3966
  if (initialResearchGate.passed || ['web_search_pass_missing', 'eureka_missing', 'debate_exchanges_missing', 'research_paper_missing', 'consensus_iteration_missing', 'unanimous_consensus_missing'].some((r) => !initialResearchGate.reasons.includes(r))) throw new Error('selftest: research gate');
3957
3967
  const researchGate = await writeMockResearchResult(researchDir, researchPlan);
3958
3968
  if (!researchGate.passed) throw new Error('selftest: mock research gate did not pass');
3969
+ if (!(await exists(path.join(researchDir, researchPaperArtifact))) || await exists(path.join(researchDir, 'research-paper.md'))) throw new Error('selftest: mock research paper filename did not use dated title artifact');
3959
3970
  const rm = researchGate.metrics || {};
3971
+ if (rm.research_paper_artifact !== researchPaperArtifact) throw new Error('selftest: research gate did not report dated paper artifact');
3960
3972
  if (rm.scout_persona_contract_ok !== true || (rm.scout_persona_issues || []).length) throw new Error('selftest: research scout persona contract did not pass');
3961
3973
  if (['independent_scouts', 'xhigh_scouts', 'eureka_moments', 'debate_participants', 'genius_opinion_summaries'].some((m) => rm[m] < 5) || ['counterevidence_sources', 'falsification_cases', 'triangulation_checks'].some((m) => rm[m] < 1) || rm.paper_sections < 8 || rm.citation_coverage !== true || rm.source_layers_covered < 7 || rm.consensus_iterations < 1 || rm.unanimous_consensus !== true || rm.consensus_agreed_scouts < 5) throw new Error('selftest: research metrics');
3962
3974
  await writeJsonAtomic(path.join(dir, 'done-gate.json'), { passed: true, unsupported_critical_claims: 0, database_safety_violation: false, database_safety_reviewed: true, visual_drift: 'low', wiki_drift: 'low', tests_required: false });
@@ -9,7 +9,7 @@ import { buildQuestionSchema, writeQuestions } from '../core/questions.mjs';
9
9
  import { sealContract } from '../core/decision-contract.mjs';
10
10
  import { buildQaLoopQuestionSchema, buildQaLoopPrompt, evaluateQaGate, qaStatus, writeMockQaResult, writeQaLoopArtifacts } from '../core/qa-loop.mjs';
11
11
  import { containsUserQuestion, noQuestionContinuationReason } from '../core/no-question-guard.mjs';
12
- import { RESEARCH_GENIUS_SUMMARY_ARTIFACT, RESEARCH_PAPER_ARTIFACT, RESEARCH_SOURCE_SKILL_ARTIFACT, countGeniusOpinionSummaries, countResearchPaperSections, buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
12
+ import { RESEARCH_GENIUS_SUMMARY_ARTIFACT, RESEARCH_SOURCE_SKILL_ARTIFACT, countGeniusOpinionSummaries, countResearchPaperSections, buildResearchPrompt, evaluateResearchGate, findResearchPaperArtifact, researchPaperArtifactForPlan, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
13
13
  import { storageReport, enforceRetention, pruneWikiArtifacts } from '../core/retention.mjs';
14
14
  import { evaluateDoneGate } from '../core/hproof.mjs';
15
15
  import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
@@ -76,11 +76,19 @@ function codexLbImmediateLaunchOpts(args = [], lb = {}, opts = {}) {
76
76
  return { ...opts, session, codexArgs: [...(opts.codexArgs || []), '-c', 'model_provider="openai"'], codexLbBypassed: true };
77
77
  }
78
78
  if (!lb?.ok) return opts;
79
- if (explicitSession) return opts;
79
+ const nextOpts = withCodexLbProviderArgs(opts);
80
+ if (explicitSession) return nextOpts;
80
81
  const session = sanitizeTmuxSessionName(`sks-codex-lb-${Date.now().toString(36)}-${defaultTmuxSessionName(root)}`);
81
82
  console.log(`codex-lb active for this launch: ${lb.env_path || lb.base_url || 'configured'}`);
82
83
  console.log(`Using fresh tmux session: ${session}`);
83
- return { ...opts, session, codexLbFreshSession: true };
84
+ return { ...nextOpts, session, codexLbFreshSession: true };
85
+ }
86
+
87
+ function withCodexLbProviderArgs(opts = {}) {
88
+ const codexArgs = [...(opts.codexArgs || [])];
89
+ const hasProviderOverride = codexArgs.some((arg) => /model_provider\s*=/.test(String(arg || '')));
90
+ if (!hasProviderOverride) codexArgs.push('-c', 'model_provider="codex-lb"');
91
+ return { ...opts, codexArgs };
84
92
  }
85
93
 
86
94
  export async function madHighCommand(args = [], deps = {}) {
@@ -491,7 +499,7 @@ async function researchPrepare(args) {
491
499
  console.log(`Methodology: ${plan.methodology}`);
492
500
  console.log(`Plan: ${path.relative(root, path.join(dir, 'research-plan.md'))}`);
493
501
  console.log(`Pipeline: ${path.relative(root, path.join(dir, PIPELINE_PLAN_ARTIFACT))}`);
494
- console.log(`Paper: ${RESEARCH_PAPER_ARTIFACT}`);
502
+ console.log(`Paper: ${researchPaperArtifactForPlan(plan)}`);
495
503
  console.log(`Genius summary: ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}`);
496
504
  console.log(`Source skill: ${RESEARCH_SOURCE_SKILL_ARTIFACT}`);
497
505
  console.log('Ledgers: source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json');
@@ -612,7 +620,9 @@ async function researchStatus(args) {
612
620
  const falsificationLedger = await readJson(path.join(dir, 'falsification-ledger.json'), null);
613
621
  const sourceSkillText = await readText(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), '');
614
622
  const geniusSummaryText = await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '');
615
- const paperText = await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '');
623
+ const plan = await readJson(path.join(dir, 'research-plan.json'), null);
624
+ const paperArtifact = await findResearchPaperArtifact(dir, plan);
625
+ const paperText = paperArtifact.exists ? await readText(paperArtifact.path, '') : '';
616
626
  const scoutRows = Array.isArray(scoutLedger?.scouts) ? scoutLedger.scouts : [];
617
627
  const sourceLayerRows = Array.isArray(sourceLedger?.source_layers) ? sourceLedger.source_layers : [];
618
628
  const sourceLayersCovered = sourceLayerRows.filter((layer) => layer.status === 'covered' && ((Array.isArray(layer.source_ids) && layer.source_ids.length) || (Array.isArray(layer.counterevidence_ids) && layer.counterevidence_ids.length))).length;
@@ -635,6 +645,7 @@ async function researchStatus(args) {
635
645
  unanimous_consensus: gate?.metrics?.unanimous_consensus ?? gate?.unanimous_consensus ?? debateLedger?.unanimous_consensus ?? false,
636
646
  research_source_skill_present: Boolean(sourceSkillText.trim()),
637
647
  genius_opinion_summary_present: Boolean(geniusSummaryText.trim()),
648
+ research_paper_artifact: paperArtifact.name,
638
649
  paper_present: Boolean(paperText.trim()),
639
650
  paper_sections: countResearchPaperSections(paperText),
640
651
  falsification_cases: falsificationLedger?.cases?.length ?? null
@@ -401,6 +401,7 @@ async function codexFastModeConfigStatus(opts = {}) {
401
401
  if (!config.text) continue;
402
402
  const topLevel = topLevelToml(config.text);
403
403
  if (/(^|\n)\s*model_reasoning_effort\s*=/.test(topLevel)) blockers.push(`${config.scope}:top_level_model_reasoning_effort`);
404
+ if (/(^|\n)\s*model_provider\s*=\s*"codex-lb"\s*(?:#.*)?(?=\n|$)/.test(topLevel)) blockers.push(`${config.scope}:top_level_codex_lb_provider`);
404
405
  if (/(^|\n)\s*fast_default_opt_out\s*=\s*true\s*(?:#.*)?(?=\n|$)/.test(tomlTable(config.text, 'notice'))) blockers.push(`${config.scope}:fast_default_opt_out`);
405
406
  }
406
407
  const merged = configs.map((config) => config.text).join('\n');
package/src/core/fsx.mjs CHANGED
@@ -5,7 +5,7 @@ import os from 'node:os';
5
5
  import crypto from 'node:crypto';
6
6
  import { spawn } from 'node:child_process';
7
7
 
8
- export const PACKAGE_VERSION = '0.8.2';
8
+ export const PACKAGE_VERSION = '0.8.4';
9
9
  export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
10
10
  export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;
11
11
 
package/src/core/init.mjs CHANGED
@@ -48,7 +48,7 @@ export function hasTopLevelCodexModeLock(text = '') {
48
48
  const firstTable = lines.findIndex((x) => /^\s*\[.+\]\s*$/.test(x));
49
49
  const top = (firstTable === -1 ? lines : lines.slice(0, firstTable)).join('\n');
50
50
  const model = top.match(/^model\s*=\s*"([^"]+)"/m)?.[1];
51
- return (Boolean(model) && model !== 'gpt-5.5') || /^model_reasoning_effort\s*=/m.test(top);
51
+ return (Boolean(model) && model !== 'gpt-5.5') || /^model_reasoning_effort\s*=/m.test(top) || /^model_provider\s*=\s*"codex-lb"/m.test(top);
52
52
  }
53
53
 
54
54
  export function hasDeprecatedCodexHooksFeatureFlag(text = '') {
@@ -502,6 +502,7 @@ function installPolicy(scope, commandPrefix) {
502
502
 
503
503
  function mergeManagedCodexConfigToml(existingContent = '') {
504
504
  let next = removeLegacyTopLevelCodexModeLocks(String(existingContent || '').trimEnd());
505
+ next = removeTopLevelTomlKeyIfValue(next, 'model_provider', 'codex-lb');
505
506
  next = removeTomlTableKey(next, 'notice', 'fast_default_opt_out');
506
507
  next = removeTomlTableKey(next, 'features', 'codex_hooks');
507
508
  next = upsertTopLevelTomlString(next, 'model', 'gpt-5.5');
@@ -546,13 +547,15 @@ async function mergeGlobalCodexConfigIfAvailable(configText = '', configPath = '
546
547
  const globalConfig = await readText(globalConfigPath, '');
547
548
  let next = mergeGlobalMcpServers(configText, globalConfig);
548
549
  next = mergeGlobalCodexAppRuntimeTables(next, globalConfig);
549
- if (selectedRe.test(next) && /\[model_providers\.codex-lb\]/.test(next)) return `${String(next || '').trim()}\n`;
550
+ if (selectedRe.test(next) && /\[model_providers\.codex-lb\]/.test(next)) {
551
+ return `${removeTopLevelTomlKeyIfValue(next, 'model_provider', 'codex-lb').trim()}\n`;
552
+ }
550
553
  const envPath = path.join(home, '.codex', 'sks-codex-lb.env');
551
554
  if (!(await exists(envPath))) return next;
552
555
  const envText = await readText(envPath, '');
553
556
  const baseUrl = globalConfig.match(/(^|\n)\[model_providers\.codex-lb\][\s\S]*?\n\s*base_url\s*=\s*"([^"]+)"/)?.[2] || parseCodexLbEnvBaseUrl(envText);
554
557
  if (!parseCodexLbEnvKey(envText) || !baseUrl || (!selectedRe.test(globalConfig) && !parseCodexLbEnvBaseUrl(envText))) return next;
555
- next = upsertTopLevelTomlString(next, 'model_provider', 'codex-lb');
558
+ next = removeTopLevelTomlKeyIfValue(next, 'model_provider', 'codex-lb');
556
559
  next = upsertTomlTable(next, 'model_providers.codex-lb', `[model_providers.codex-lb]\nname = "OpenAI"\nbase_url = "${baseUrl}"\nwire_api = "responses"\nenv_key = "CODEX_LB_API_KEY"\nsupports_websockets = true\nrequires_openai_auth = true`);
557
560
  return `${next.trim()}\n`;
558
561
  }
@@ -612,6 +615,14 @@ function removeLegacyTopLevelCodexModeLocks(text = '') {
612
615
  }).join('\n').replace(/^\n+/, '').replace(/\n{3,}/g, '\n\n');
613
616
  }
614
617
 
618
+ function removeTopLevelTomlKeyIfValue(text = '', key = '', value = '') {
619
+ const lines = String(text || '').split('\n');
620
+ const firstTable = lines.findIndex((x) => /^\s*\[.+\]\s*$/.test(x));
621
+ const end = firstTable === -1 ? lines.length : firstTable;
622
+ const keyPattern = new RegExp(`^\\s*${escapeRegExp(key)}\\s*=\\s*"${escapeRegExp(value)}"\\s*(?:#.*)?$`);
623
+ return lines.filter((line, index) => index >= end || !keyPattern.test(line)).join('\n').replace(/^\n+/, '').replace(/\n{3,}/g, '\n\n');
624
+ }
625
+
615
626
  function upsertTopLevelTomlString(text, key, value) {
616
627
  const line = `${key} = "${value}"`;
617
628
  const lines = String(text || '').split('\n');
@@ -17,11 +17,62 @@ export const RESEARCH_PAPER_SECTION_GROUPS = Object.freeze([
17
17
  ['references', 'sources']
18
18
  ]);
19
19
 
20
- export const RESEARCH_SCOUT_COUNCIL = Object.freeze(RESEARCH_SCOUT_PERSONA_CONTRACT.map((scout) => Object.freeze({
21
- ...scout,
22
- label: scout.display_name,
23
- required_outputs: scout.required_outputs
24
- })));
20
+ function cleanResearchArtifactDate(value = '') {
21
+ const match = String(value || '').match(/\d{4}-\d{2}-\d{2}/);
22
+ return match ? match[0] : nowIso().slice(0, 10);
23
+ }
24
+
25
+ function researchTitleSlug(prompt = '') {
26
+ const cleaned = String(prompt || '')
27
+ .normalize('NFKC')
28
+ .replace(/[`"'<>]/g, ' ')
29
+ .replace(/[^\p{L}\p{N}]+/gu, '-')
30
+ .replace(/^-+|-+$/g, '')
31
+ .toLowerCase();
32
+ const slug = cleaned.split('-').filter(Boolean).slice(0, 10).join('-').slice(0, 90).replace(/-+$/g, '');
33
+ return slug || 'research';
34
+ }
35
+
36
+ export function researchPaperArtifactName(prompt = '', createdAt = nowIso(), opts = {}) {
37
+ const titleSource = opts.title || opts.paperTitle || prompt;
38
+ return `${cleanResearchArtifactDate(createdAt)}-${researchTitleSlug(titleSource)}-research-paper.md`;
39
+ }
40
+
41
+ export function isDatedResearchPaperArtifact(name = '') {
42
+ return /^\d{4}-\d{2}-\d{2}-[^\s/\\]+-research-paper\.md$/u.test(String(name || ''));
43
+ }
44
+
45
+ export function researchPaperArtifactForPlan(plan = null) {
46
+ const artifact = plan?.artifacts?.research_paper || plan?.paper_artifact;
47
+ return artifact ? path.basename(String(artifact)) : RESEARCH_PAPER_ARTIFACT;
48
+ }
49
+
50
+ export async function findResearchPaperArtifact(dir, plan = null, opts = {}) {
51
+ const preferred = researchPaperArtifactForPlan(plan);
52
+ const allowLegacyFallback = opts.allowLegacyFallback === true || preferred === RESEARCH_PAPER_ARTIFACT;
53
+ const names = [...new Set([preferred, allowLegacyFallback ? RESEARCH_PAPER_ARTIFACT : null].filter(Boolean))];
54
+ for (const name of names) {
55
+ const file = path.join(dir, name);
56
+ if (await exists(file)) return { name, path: file, exists: true, preferred: name === preferred, legacy: name === RESEARCH_PAPER_ARTIFACT };
57
+ }
58
+ return { name: preferred, path: path.join(dir, preferred), exists: false, preferred: true, legacy: false };
59
+ }
60
+
61
+ export function researchScoutAgentName(scout = {}) {
62
+ return String(scout.agent_name || scout.display_name || scout.label || scout.id || 'Research Scout').trim();
63
+ }
64
+
65
+ export const RESEARCH_SCOUT_COUNCIL = Object.freeze(RESEARCH_SCOUT_PERSONA_CONTRACT.map((scout) => {
66
+ const displayName = scout.display_name || scout.label || scout.id;
67
+ return Object.freeze({
68
+ ...scout,
69
+ display_name: displayName,
70
+ label: displayName,
71
+ agent_name: displayName,
72
+ codex_agent_name: displayName,
73
+ required_outputs: scout.required_outputs
74
+ });
75
+ }));
25
76
 
26
77
  export const RESEARCH_SOURCE_LAYERS = Object.freeze([
27
78
  {
@@ -86,12 +137,21 @@ export const RESEARCH_SOURCE_LAYER_IDS = Object.freeze(RESEARCH_SOURCE_LAYERS.ma
86
137
 
87
138
  export function createResearchPlan(prompt, opts = {}) {
88
139
  const depth = opts.depth || 'frontier';
140
+ const createdAt = nowIso();
141
+ const paperArtifact = researchPaperArtifactName(prompt, createdAt, opts);
89
142
  return {
90
143
  schema_version: 1,
91
144
  prompt,
92
145
  depth,
93
- created_at: nowIso(),
146
+ created_at: createdAt,
94
147
  methodology: 'genius-scout-council-frontier-discovery-loop',
148
+ paper_artifact: paperArtifact,
149
+ artifacts: {
150
+ research_paper: paperArtifact,
151
+ legacy_research_paper: RESEARCH_PAPER_ARTIFACT,
152
+ genius_opinion_summary: RESEARCH_GENIUS_SUMMARY_ARTIFACT,
153
+ research_source_skill: RESEARCH_SOURCE_SKILL_ARTIFACT
154
+ },
95
155
  objective: 'Find the shortest useful mechanism that can be falsified or applied, grounded in maximum available source retrieval rather than broad summary.',
96
156
  execution_policy: {
97
157
  normal_run: 'real_long_running_research_until_unanimous_scout_consensus',
@@ -165,6 +225,10 @@ export function createResearchPlan(prompt, opts = {}) {
165
225
  allowed_write_scope: 'route-local mission artifacts only',
166
226
  rule: 'Normal Research must not modify repository source, package, docs, config, or generated harness files. It may write only artifacts under its own .sneakoscope/missions/<mission-id>/ directory.'
167
227
  },
228
+ artifact_policy: {
229
+ research_paper: paperArtifact,
230
+ rule: 'Write the final manuscript to the dated topic-specific research_paper artifact from this plan, not the legacy generic filename.'
231
+ },
168
232
  rules: [
169
233
  'Do not modify code or project source files during Research. Research writes only route-local mission artifacts; implementation belongs to $Team or another execution route.',
170
234
  'Do not claim novelty without a novelty ledger entry.',
@@ -175,7 +239,7 @@ export function createResearchPlan(prompt, opts = {}) {
175
239
  'Maximize safe web/source search as layered source retrieval and record queries, source layers, citations, quality notes, triangulation checks, and blockers in source-ledger.json.',
176
240
  `Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis; do not edit generated .agents/skills during the research run.`,
177
241
  'Actively seek disconfirming evidence before synthesis.',
178
- 'Turn the surviving research result into research-paper.md with paper-style sections and references.',
242
+ `Turn the surviving research result into ${paperArtifact} with paper-style sections and references.`,
179
243
  `End every run with ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}, summarizing each genius-lens scout's final opinion, strongest evidence, disagreement, and changed mind.`,
180
244
  'Keep unsupported source-free claims as hypotheses only.',
181
245
  'Prefer the smallest testable mechanism or implementation probe, but do not stop source gathering early for speed when the research question needs a longer pass.',
@@ -194,7 +258,7 @@ export function createResearchPlan(prompt, opts = {}) {
194
258
  ],
195
259
  required_artifacts: [
196
260
  'research-report.md',
197
- RESEARCH_PAPER_ARTIFACT,
261
+ paperArtifact,
198
262
  RESEARCH_GENIUS_SUMMARY_ARTIFACT,
199
263
  RESEARCH_SOURCE_SKILL_ARTIFACT,
200
264
  'source-ledger.json',
@@ -214,6 +278,7 @@ export function researchPlanMarkdown(plan) {
214
278
  lines.push(`Prompt: ${plan.prompt}`);
215
279
  lines.push(`Depth: ${plan.depth}`);
216
280
  lines.push(`Methodology: ${plan.methodology}`);
281
+ lines.push(`Research paper: ${researchPaperArtifactForPlan(plan)}`);
217
282
  if (plan.execution_policy) {
218
283
  lines.push(`Execution: ${plan.execution_policy.normal_run}; default cycle timeout ${plan.execution_policy.default_cycle_timeout_minutes} minutes`);
219
284
  if (plan.execution_policy.default_max_cycles) lines.push(`Consensus loop: repeat until unanimous scout consensus; default safety cap ${plan.execution_policy.default_max_cycles} cycles`);
@@ -227,7 +292,7 @@ export function researchPlanMarkdown(plan) {
227
292
  if (plan.research_council?.scouts?.length) {
228
293
  lines.push('## Genius Scout Council');
229
294
  lines.push(`Policy: ${plan.research_council.policy}`);
230
- for (const scout of plan.research_council.scouts) lines.push(`- ${scout.display_name || scout.label || scout.id}: ${scout.persona || scout.role} - ${scout.mandate} (${scout.persona_boundary || 'persona-inspired lens only'})`);
295
+ for (const scout of plan.research_council.scouts) lines.push(`- ${researchScoutAgentName(scout)}: ${scout.persona || scout.role} - ${scout.mandate} (${scout.persona_boundary || 'persona-inspired lens only'})`);
231
296
  lines.push('');
232
297
  }
233
298
  if (plan.web_research_policy) {
@@ -381,6 +446,7 @@ export function defaultScoutLedger(plan = null) {
381
446
  created_at: nowIso(),
382
447
  scouts: scouts.map((scout) => ({
383
448
  id: scout.id,
449
+ agent_name: researchScoutAgentName(scout),
384
450
  display_name: scout.display_name || scout.label || scout.id,
385
451
  historical_inspiration: scout.historical_inspiration || null,
386
452
  persona: scout.persona || scout.role,
@@ -417,10 +483,13 @@ export function defaultDebateLedger(plan = null) {
417
483
  created_at: nowIso(),
418
484
  mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
419
485
  required_participants: scouts.map((scout) => scout.id),
486
+ participant_display_names: scouts.map((scout) => researchScoutAgentName(scout)),
420
487
  consensus_iterations: 0,
421
488
  unanimous_consensus: false,
422
489
  scout_agreements: scouts.map((scout) => ({
423
490
  scout_id: scout.id,
491
+ agent_name: researchScoutAgentName(scout),
492
+ display_name: scout.display_name || scout.label || scout.id,
424
493
  agrees: false,
425
494
  final_position: '',
426
495
  source_ids: []
@@ -506,6 +575,7 @@ export function defaultResearchGate() {
506
575
  return {
507
576
  passed: false,
508
577
  report_present: false,
578
+ research_paper_artifact: null,
509
579
  paper_present: false,
510
580
  paper_sections: 0,
511
581
  genius_opinion_summary_present: false,
@@ -548,8 +618,10 @@ export async function evaluateResearchGate(dir) {
548
618
  const gate = await readJson(path.join(dir, 'research-gate.json'), defaultResearchGate());
549
619
  const plan = await readJson(path.join(dir, 'research-plan.json'), null);
550
620
  const reportPresent = await exists(path.join(dir, 'research-report.md'));
551
- const paperPresent = await exists(path.join(dir, RESEARCH_PAPER_ARTIFACT));
552
- const paperSections = paperPresent ? countResearchPaperSections(await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '')) : 0;
621
+ const paperArtifact = await findResearchPaperArtifact(dir, plan);
622
+ const paperPresent = paperArtifact.exists;
623
+ const paperText = paperPresent ? await readText(paperArtifact.path, '') : '';
624
+ const paperSections = paperPresent ? countResearchPaperSections(paperText) : 0;
553
625
  const geniusSummaryPresent = await exists(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT));
554
626
  const geniusSummaryCount = geniusSummaryPresent ? countGeniusOpinionSummaries(await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '')) : 0;
555
627
  const sourceSkillPresent = await exists(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT));
@@ -624,6 +696,8 @@ export async function evaluateResearchGate(dir) {
624
696
  passed: gate.passed === true && reasons.length === 0,
625
697
  reasons,
626
698
  metrics: {
699
+ research_paper_artifact: paperArtifact.name,
700
+ paper_present: paperPresent || gate.paper_present === true,
627
701
  web_search_passes: webSearchPasses,
628
702
  paper_sections: Math.max(Number(gate.paper_sections || 0), paperSections),
629
703
  genius_opinion_summary_present: geniusSummaryPresent || gate.genius_opinion_summary_present === true,
@@ -651,13 +725,18 @@ export async function evaluateResearchGate(dir) {
651
725
  citation_coverage: citationCoverage,
652
726
  web_search_blockers: searchBlockers.length
653
727
  },
654
- gate
728
+ gate: {
729
+ ...gate,
730
+ research_paper_artifact: paperArtifact.name,
731
+ paper_present: paperPresent || gate.paper_present === true
732
+ }
655
733
  };
656
734
  await writeJsonAtomic(path.join(dir, 'research-gate.evaluated.json'), result);
657
735
  return result;
658
736
  }
659
737
 
660
738
  export async function writeMockResearchResult(dir, plan) {
739
+ const paperArtifact = researchPaperArtifactForPlan(plan);
661
740
  const mockLayerSources = RESEARCH_SOURCE_LAYERS.map((layer, index) => ({
662
741
  id: `mock-source-${index + 1}`,
663
742
  layer: layer.id,
@@ -751,6 +830,7 @@ export async function writeMockResearchResult(dir, plan) {
751
830
  ...defaultScoutLedger(plan),
752
831
  scouts: RESEARCH_SCOUT_COUNCIL.map((scout) => ({
753
832
  id: scout.id,
833
+ agent_name: researchScoutAgentName(scout),
754
834
  display_name: scout.display_name || scout.label,
755
835
  historical_inspiration: scout.historical_inspiration || null,
756
836
  persona: scout.persona || scout.role,
@@ -790,10 +870,13 @@ export async function writeMockResearchResult(dir, plan) {
790
870
  created_at: nowIso(),
791
871
  mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
792
872
  required_participants: RESEARCH_SCOUT_COUNCIL.map((scout) => scout.id),
873
+ participant_display_names: RESEARCH_SCOUT_COUNCIL.map((scout) => researchScoutAgentName(scout)),
793
874
  consensus_iterations: 2,
794
875
  unanimous_consensus: true,
795
876
  scout_agreements: RESEARCH_SCOUT_COUNCIL.map((scout) => ({
796
877
  scout_id: scout.id,
878
+ agent_name: researchScoutAgentName(scout),
879
+ display_name: scout.display_name || scout.label,
797
880
  agrees: true,
798
881
  final_position: 'Agrees to keep the falsifiable, source-cited research mechanism as the surviving claim.',
799
882
  source_ids: ['mock-source-1', 'mock-counter-1']
@@ -868,11 +951,12 @@ export async function writeMockResearchResult(dir, plan) {
868
951
  await writeJsonAtomic(path.join(dir, 'novelty-ledger.json'), ledger);
869
952
  await writeTextAtomic(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), `${geniusSummary}\n`);
870
953
  await writeTextAtomic(path.join(dir, 'research-report.md'), `# SKS Research Report\n\nPrompt: ${plan.prompt}\n\n## Scout Council Synthesis\n\nThe mock council keeps one cited methodological insight: a research mode should force layered, falsifiable novelty rather than summarize known material from one corpus [mock-source-1].\n\n## Source Coverage\n\nThis is a selftest fixture. It records mock coverage for academic literature, official data, standards, news, public discourse, developer knowledge, and counterevidence layers, but does not perform live web browsing in --mock mode.\n\n## Candidate Insight\n\nA useful research run must produce source-cited, cross-layer triangulated, falsifiable novelty with scout findings and a cheap probe.\n\n## Falsification\n\nThe claim is weak if no new testable prediction, counterevidence source, cross-layer check, or experiment is produced [mock-counter-1].\n\n## Next Test\n\nCompare this mode against a summary-only run and score candidate insights, falsification passes, citation coverage, source-layer coverage, triangulation checks, and testability.\n`);
871
- await writeTextAtomic(path.join(dir, RESEARCH_PAPER_ARTIFACT), `# Research Paper: ${plan.prompt}\n\n## Abstract\nA source-cited research run should produce cross-layer, falsifiable novelty rather than only summarize known material.\n\n## Introduction\nThe mock topic is evaluated as a research workflow outcome with layered source coverage [mock-source-1].\n\n## Methodology\nFive xhigh scouts produce Eureka ideas, debate, triangulate source layers, and falsify the strongest claim.\n\n## Findings\nThe surviving finding is that useful research needs cited novelty, source-layer coverage, cross-layer triangulation, and a cheap decisive probe.\n\n## Discussion\nThe debate favors gate-backed evidence over narrative confidence, and treats public discourse as signal rather than truth.\n\n## Limitations and Falsification\nThe claim fails without sources, counterevidence, triangulation checks, or testable predictions [mock-counter-1].\n\n## Conclusion and Next Experiment\nCompare this loop against a summary-only baseline and score testable insights.\n\n## References\n- [mock-source-1] Mock academic literature coverage.\n- [mock-source-2] Mock official government and leading-institution knowledge coverage.\n- [mock-source-3] Mock standards and primary documents coverage.\n- [mock-source-4] Mock current news and global reporting coverage.\n- [mock-source-5] Mock public discourse coverage.\n- [mock-source-6] Mock developer and practitioner knowledge coverage.\n- [mock-source-7] Mock counterevidence and fact-checking coverage.\n- [mock-counter-1] Mock overclaim counterexample.\n`);
954
+ await writeTextAtomic(path.join(dir, paperArtifact), `# Research Paper: ${plan.prompt}\n\n## Abstract\nA source-cited research run should produce cross-layer, falsifiable novelty rather than only summarize known material.\n\n## Introduction\nThe mock topic is evaluated as a research workflow outcome with layered source coverage [mock-source-1].\n\n## Methodology\nFive xhigh scouts produce Eureka ideas, debate, triangulate source layers, and falsify the strongest claim.\n\n## Findings\nThe surviving finding is that useful research needs cited novelty, source-layer coverage, cross-layer triangulation, and a cheap decisive probe.\n\n## Discussion\nThe debate favors gate-backed evidence over narrative confidence, and treats public discourse as signal rather than truth.\n\n## Limitations and Falsification\nThe claim fails without sources, counterevidence, triangulation checks, or testable predictions [mock-counter-1].\n\n## Conclusion and Next Experiment\nCompare this loop against a summary-only baseline and score testable insights.\n\n## References\n- [mock-source-1] Mock academic literature coverage.\n- [mock-source-2] Mock official government and leading-institution knowledge coverage.\n- [mock-source-3] Mock standards and primary documents coverage.\n- [mock-source-4] Mock current news and global reporting coverage.\n- [mock-source-5] Mock public discourse coverage.\n- [mock-source-6] Mock developer and practitioner knowledge coverage.\n- [mock-source-7] Mock counterevidence and fact-checking coverage.\n- [mock-counter-1] Mock overclaim counterexample.\n`);
872
955
  await writeJsonAtomic(path.join(dir, 'research-gate.json'), {
873
956
  ...defaultResearchGate(),
874
957
  passed: true,
875
958
  report_present: true,
959
+ research_paper_artifact: paperArtifact,
876
960
  paper_present: true,
877
961
  paper_sections: RESEARCH_PAPER_SECTION_GROUPS.length,
878
962
  genius_opinion_summary_present: true,
@@ -902,12 +986,14 @@ export async function writeMockResearchResult(dir, plan) {
902
986
  falsification_cases: 1,
903
987
  testable_predictions: 1,
904
988
  citation_coverage: true,
905
- evidence: ['mock research report', 'mock research paper', 'mock genius opinion summary', 'mock research source skill', 'mock layered source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
989
+ evidence: ['mock research report', `mock research paper: ${paperArtifact}`, 'mock genius opinion summary', 'mock research source skill', 'mock layered source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
906
990
  notes: ['mock mode records the new contract but does not call a model or perform live web browsing']
907
991
  });
908
992
  return evaluateResearchGate(dir);
909
993
  }
910
994
 
911
995
  export function buildResearchPrompt({ id, mission, plan, cycle, previous }) {
912
- return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-CODE-MUTATION POLICY: Do not edit repository source, package metadata, docs, config, generated skills, or harness files. Write only route-local artifacts under .sneakoscope/missions/${id}/. If a needed implementation change is discovered, record it as a recommendation or blocker for a later execution route.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT PERSONA POLICY: Every Research scout row must include display_name, persona, persona_boundary, reasoning_effort: "xhigh", service_tier when available, falsifiers, cheap_probes, and challenge_or_response. Persona names are Einstein Scout, Feynman Scout, Turing Scout, von Neumann Scout, and Skeptic Scout; they are cognitive lenses, not impersonations.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nCONSENSUS LOOP POLICY: This is not a fixed three-cycle run. Repeat source-gathering, scout Eureka ideas, debate, falsification, and synthesis pressure until every scout records final agreement with the surviving mechanism. If unanimous agreement is not reached, keep research-gate.json unpassed and continue until the explicit max-cycle safety cap pauses the run.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record exchanges, consensus_iterations, unanimous_consensus, and per-scout agreements before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with display_name/persona/persona_boundary, effort=xhigh, reasoning_effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response for every scout lens.\n4. Debate to agreement: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout, consensus_iterations >= 1, unanimous_consensus=true only when all scouts agree, and scout_agreements for every scout.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, unanimous debate agreement, cross-layer triangulation, and falsification are recorded.\n7. Paper: write research-paper.md as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- research-paper.md: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with display_name, persona, persona_boundary, effort, reasoning_effort, service_tier, eureka, query_set, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, unresolved conflicts, consensus_iterations, unanimous_consensus, and scout_agreements for every scout.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, research-paper.md exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have display_name/persona/persona_boundary, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, consensus_iterations >= 1, unanimous_consensus=true with every scout agreement recorded, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
996
+ const paperArtifact = researchPaperArtifactForPlan(plan);
997
+ const scoutAgentNames = (plan?.research_council?.scouts || RESEARCH_SCOUT_COUNCIL).map((scout) => researchScoutAgentName(scout)).join(', ');
998
+ return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-CODE-MUTATION POLICY: Do not edit repository source, package metadata, docs, config, generated skills, or harness files. Write only route-local artifacts under .sneakoscope/missions/${id}/. If a needed implementation change is discovered, record it as a recommendation or blocker for a later execution route.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT PERSONA POLICY: Every Research scout row must include agent_name, display_name, persona, persona_boundary, reasoning_effort: "xhigh", service_tier when available, falsifiers, cheap_probes, and challenge_or_response. Use these agent_name values exactly: ${scoutAgentNames}. Persona names are cognitive lenses, not impersonations.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nCONSENSUS LOOP POLICY: This is not a fixed three-cycle run. Repeat source-gathering, scout Eureka ideas, debate, falsification, and synthesis pressure until every scout records final agreement with the surviving mechanism. If unanimous agreement is not reached, keep research-gate.json unpassed and continue until the explicit max-cycle safety cap pauses the run.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record exchanges, consensus_iterations, unanimous_consensus, and per-scout agreements before synthesis.\nPAPER POLICY: After the report and ledgers, write ${paperArtifact} as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with agent_name/display_name/persona/persona_boundary, effort=xhigh, reasoning_effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response for every scout lens.\n4. Debate to agreement: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout, consensus_iterations >= 1, unanimous_consensus=true only when all scouts agree, and scout_agreements for every scout.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, unanimous debate agreement, cross-layer triangulation, and falsification are recorded.\n7. Paper: write ${paperArtifact} as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- ${paperArtifact}: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with agent_name, display_name, persona, persona_boundary, effort, reasoning_effort, service_tier, eureka, query_set, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, unresolved conflicts, consensus_iterations, unanimous_consensus, and scout_agreements for every scout.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, ${paperArtifact} exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have agent_name/display_name/persona/persona_boundary, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, consensus_iterations >= 1, unanimous_consensus=true with every scout agreement recorded, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
913
999
  }