npm - sneakoscope - Versions diffs - 0.8.2 → 0.8.4 - Mend

sneakoscope 0.8.2 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +1 -1
package/package.json +1 -1
package/src/cli/install-helpers.mjs +22 -14
package/src/cli/main.mjs +18 -6
package/src/cli/maintenance-commands.mjs +16 -5
package/src/core/codex-app.mjs +1 -0
package/src/core/fsx.mjs +1 -1
package/src/core/init.mjs +14 -3
package/src/core/research.mjs +101 -15

package/README.md CHANGED Viewed

@@ -275,7 +275,7 @@ For headless remotely controllable Codex App/server sessions on Codex CLI 0.130.
 sks codex-app remote-control -- --help
 ```
-`sks codex-app check` reports whether the installed Codex CLI is new enough, whether the required app flags are visible, whether Fast/speed-selector config is unlocked, and whether installed OpenAI default plugins such as Browser, Chrome, Computer Use, Documents, Presentations, Spreadsheets, and LaTeX are enabled. Codex CLI 0.130.0+ app-server/remote-control threads can pick up config changes live; older CLI/TUI sessions should still be restarted after `.codex/config.toml` or MCP/plugin changes.
+`sks codex-app check` reports whether the installed Codex CLI is new enough, whether the required app flags are visible, whether Fast/speed-selector config is unlocked, and whether installed OpenAI default plugins such as Browser, Chrome, Computer Use, Documents, Presentations, Spreadsheets, and LaTeX are enabled. codex-lb can remain configured as a custom provider, but SKS keeps it off the top-level Codex App provider setting so native model, speed, and built-in feature UI stay visible. Codex CLI 0.130.0+ app-server/remote-control threads can pick up config changes live; older CLI/TUI sessions should still be restarted after `.codex/config.toml` or MCP/plugin changes.
 Then open Codex App and use prompt commands directly in the chat. Examples:

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "sneakoscope",
   "displayName": "ㅅㅋㅅ",
-  "version": "0.8.2",
+  "version": "0.8.4",
   "description": "Sneakoscope Codex: database-safe Codex CLI/App harness with Team, Goal, AutoResearch, TriWiki, and Honest Mode.",
   "type": "module",
   "homepage": "https://github.com/mandarange/Sneakoscope-Codex#readme",

package/src/cli/install-helpers.mjs CHANGED Viewed

@@ -169,10 +169,10 @@ async function capturePostinstallCodexLbConfigSnapshot(home = process.env.HOME |
 async function restorePostinstallCodexLbConfigSnapshot(snapshot) {
   if (!snapshot?.base_url) return { status: 'skipped', reason: 'no_snapshot' };
   const current = await readText(snapshot.config_path, '');
-  if (hasTopLevelCodexLbSelected(current) && codexLbProviderBaseUrl(current)) {
+  const next = normalizeCodexFastModeUiConfig(upsertCodexLbConfig(current, snapshot.base_url));
+  if (next === ensureTrailingNewline(current) && codexLbProviderBaseUrl(current)) {
     return { status: 'present', config_path: snapshot.config_path };
   }
-  const next = normalizeCodexFastModeUiConfig(upsertCodexLbConfig(current, snapshot.base_url));
   await writeTextAtomic(snapshot.config_path, next);
   return { status: 'restored', config_path: snapshot.config_path };
 }
@@ -212,10 +212,10 @@ export async function codexLbStatus(opts = {}) {
   const envText = envExists ? await readText(envPath, '') : '';
   const envKeyConfigured = Boolean(parseCodexLbEnvKey(envText));
   const providerConfigured = /\[model_providers\.codex-lb\]/.test(config);
-  const selected = /model_provider\s*=\s*"codex-lb"/.test(config);
+  const selected = hasTopLevelCodexLbSelected(config);
   const baseUrl = codexLbProviderBaseUrl(config) || parseCodexLbEnvBaseUrl(envText) || null;
   return {
-    ok: selected && providerConfigured && envKeyConfigured && Boolean(baseUrl),
+    ok: providerConfigured && envKeyConfigured && Boolean(baseUrl),
     config_path: configPath,
     env_path: envPath,
     provider_configured: providerConfigured,
@@ -360,10 +360,10 @@ function codexLbProviderBaseUrl(text = '') {
 export async function repairCodexLbAuth(opts = {}) {
   let status = await codexLbStatus(opts);
   let configRepaired = false;
-  if (!status.ok && status.env_key_configured && status.base_url) {
+  const currentConfig = await readText(status.config_path, '');
+  if (status.env_key_configured && status.base_url && (!status.ok || status.selected || hasTopLevelCodexModeLock(currentConfig))) {
     await ensureDir(path.dirname(status.config_path));
-    const current = await readText(status.config_path, '');
-    const next = normalizeCodexFastModeUiConfig(upsertCodexLbConfig(current, status.base_url));
+    const next = normalizeCodexFastModeUiConfig(upsertCodexLbConfig(currentConfig, status.base_url));
     await writeTextAtomic(status.config_path, next);
     configRepaired = true;
     status = await codexLbStatus(opts);
@@ -460,7 +460,7 @@ async function syncCodexApiKeyLogin(apiKey, opts = {}) {
 }
 function upsertCodexLbConfig(text = '', baseUrl) {
-  let next = upsertTopLevelTomlString(text, 'model_provider', 'codex-lb');
+  let next = removeTopLevelTomlKeyIfValue(text, 'model_provider', 'codex-lb');
   const block = [
     '[model_providers.codex-lb]',
     'name = "OpenAI"',
@@ -547,6 +547,14 @@ function removeLegacyTopLevelCodexModeLocks(text = '') {
   }).join('\n').replace(/^\n+/, '').replace(/\n{3,}/g, '\n\n');
 }
+function removeTopLevelTomlKeyIfValue(text = '', key = '', value = '') {
+  const lines = String(text || '').split('\n');
+  const firstTable = lines.findIndex((x) => /^\s*\[.+\]\s*$/.test(x));
+  const end = firstTable === -1 ? lines.length : firstTable;
+  const keyPattern = new RegExp(`^\\s*${escapeRegExp(key)}\\s*=\\s*"${escapeRegExp(value)}"\\s*(?:#.*)?$`);
+  return lines.filter((line, index) => index >= end || !keyPattern.test(line)).join('\n').replace(/^\n+/, '').replace(/\n{3,}/g, '\n\n');
+}
 function removeTomlTableKey(text, table, key) {
   const lines = String(text || '').trimEnd().split('\n');
   if (lines.length === 1 && lines[0] === '') return '';
@@ -1095,18 +1103,18 @@ export async function selftestCodexLb(tmp) {
   const codexLbConfig = await safeReadText(path.join(codexLbHome, '.codex', 'config.toml'));
   const codexLbEnv = await safeReadText(path.join(codexLbHome, '.codex', 'sks-codex-lb.env'));
   const codexLbAuth = await safeReadText(path.join(codexLbHome, '.codex', 'auth.json'));
-  if (!codexLbSetupJson.ok || codexLbSetupJson.base_url !== 'https://lb.example.test/backend-api/codex' || !codexLbConfig.includes('model_provider = "codex-lb"') || !codexLbConfig.includes('[model_providers.codex-lb]') || !codexLbEnv.includes("CODEX_LB_BASE_URL='https://lb.example.test/backend-api/codex'") || !codexLbEnv.includes("CODEX_LB_API_KEY='sk-test'") || !/(\"auth_mode\"\s*:\s*\"apikey\")/.test(codexLbAuth)) throw new Error('selftest: codex-lb setup');
+  if (!codexLbSetupJson.ok || codexLbSetupJson.base_url !== 'https://lb.example.test/backend-api/codex' || hasTopLevelCodexLbSelected(codexLbConfig) || !codexLbConfig.includes('[model_providers.codex-lb]') || !codexLbEnv.includes("CODEX_LB_BASE_URL='https://lb.example.test/backend-api/codex'") || !codexLbEnv.includes("CODEX_LB_API_KEY='sk-test'") || !/(\"auth_mode\"\s*:\s*\"apikey\")/.test(codexLbAuth)) throw new Error('selftest: codex-lb setup');
   if (!hasCodexUnstableFeatureWarningSuppression(codexLbConfig)) throw new Error('selftest: codex-lb setup did not suppress Codex unstable feature warning');
   await initProject(codexLbHome, { installScope: 'global', force: true, repair: true });
   const codexLbRepairSetupConfig = await safeReadText(path.join(codexLbHome, '.codex', 'config.toml'));
-  if (!codexLbRepairSetupConfig.includes('model_provider = "codex-lb"') || !codexLbRepairSetupConfig.includes('[model_providers.codex-lb]') || !codexLbRepairSetupConfig.includes('https://lb.example.test/backend-api/codex') || codexLbRepairSetupConfig.includes('sk-test')) throw new Error('selftest: init codex-lb');
+  if (hasTopLevelCodexLbSelected(codexLbRepairSetupConfig) || !codexLbRepairSetupConfig.includes('[model_providers.codex-lb]') || !codexLbRepairSetupConfig.includes('https://lb.example.test/backend-api/codex') || codexLbRepairSetupConfig.includes('sk-test')) throw new Error('selftest: init codex-lb');
   if (!hasCodexUnstableFeatureWarningSuppression(codexLbRepairSetupConfig)) throw new Error('selftest: init codex-lb did not suppress Codex unstable feature warning');
   await writeTextAtomic(path.join(codexLbHome, '.codex', 'config.toml'), `${codexLbConfig}\n[mcp_servers.supabase]\nurl = "https://mcp.supabase.com/mcp?project_ref=ref&read_only=true&features=database,docs"\n`);
   const ptmp = path.join(tmp, 'codex-lb-project-config'), prevHome = process.env.HOME;
   try { process.env.HOME = codexLbHome; await initProject(ptmp, { installScope: 'global' }); }
   finally { if (prevHome === undefined) delete process.env.HOME; else process.env.HOME = prevHome; }
   const pcfg = await safeReadText(path.join(ptmp, '.codex', 'config.toml'));
-  if (!pcfg.includes('model_provider = "codex-lb"') || !pcfg.includes('[model_providers.codex-lb]') || !pcfg.includes('[mcp_servers.supabase]') || !pcfg.includes('read_only=true')) throw new Error('selftest: project codex-lb');
+  if (hasTopLevelCodexLbSelected(pcfg) || !pcfg.includes('[model_providers.codex-lb]') || !pcfg.includes('[mcp_servers.supabase]') || !pcfg.includes('read_only=true')) throw new Error('selftest: project codex-lb');
   if (!hasCodexUnstableFeatureWarningSuppression(pcfg)) throw new Error('selftest: project codex-lb config did not suppress Codex unstable feature warning');
   await writeTextAtomic(path.join(codexLbHome, '.codex', 'auth.json'), '{"auth_mode":"browser"}\n');
   const codexLbRepair = await runProcess(process.execPath, [path.join(packageRoot(), 'bin', 'sks.mjs'), 'auth', 'repair', '--json'], { cwd: tmp, env: codexLbEnvForSelftest, timeoutMs: 15000, maxOutputBytes: 64 * 1024 });
@@ -1167,7 +1175,7 @@ export async function selftestCodexLb(tmp) {
   const codexLbPostBootstrapConfig = await safeReadText(path.join(codexLbHome, '.codex', 'config.toml'));
   const codexLbLoginCallsAfterBootstrap = (await safeReadText(path.join(codexLbHome, '.codex', 'login-calls.log'))).trim().split(/\r?\n/).filter(Boolean).length;
   if (!codexLbPostBootstrapAuth.includes('"auth_mode":"apikey"') || !codexLbPostBootstrapAuth.includes('sk-test') || codexLbLoginCallsAfterBootstrap <= codexLbLoginCallsBeforeBootstrap) throw new Error('selftest: postinstall drift auth');
-  if (!codexLbPostBootstrapConfig.includes('model_provider = "codex-lb"') || !codexLbPostBootstrapConfig.includes('[model_providers.codex-lb]') || !codexLbPostBootstrapConfig.includes('https://lb.example.test/backend-api/codex') || codexLbPostBootstrapConfig.includes('sk-test')) throw new Error('selftest: postinstall drift config');
+  if (hasTopLevelCodexLbSelected(codexLbPostBootstrapConfig) || !codexLbPostBootstrapConfig.includes('[model_providers.codex-lb]') || !codexLbPostBootstrapConfig.includes('https://lb.example.test/backend-api/codex') || codexLbPostBootstrapConfig.includes('sk-test')) throw new Error('selftest: postinstall drift config');
   const doctorProject = tmpdir();
   await ensureDir(path.join(doctorProject, '.git'));
   await writeTextAtomic(path.join(doctorProject, 'package.json'), '{"name":"codex-lb-doctor-project","version":"0.0.0"}\n');
@@ -1184,7 +1192,7 @@ export async function selftestCodexLb(tmp) {
   const codexLbDoctorJson = JSON.parse(codexLbDoctorRepair.stdout);
   const codexLbDoctorAuth = await safeReadText(path.join(codexLbHome, '.codex', 'auth.json'));
   const codexLbDoctorConfig = await safeReadText(path.join(codexLbHome, '.codex', 'config.toml'));
-  if (!codexLbDoctorJson.repair?.codex_lb?.ok || !codexLbDoctorJson.repair.codex_lb.config_repaired || !codexLbDoctorJson.codex_lb?.ok || !codexLbDoctorAuth.includes('"auth_mode":"apikey"') || !codexLbDoctorAuth.includes('sk-test') || !codexLbDoctorConfig.includes('model_provider = "codex-lb"') || !codexLbDoctorConfig.includes('https://lb.example.test/backend-api/codex') || !hasCodexUnstableFeatureWarningSuppression(codexLbDoctorConfig)) throw new Error('selftest: doctor codex-lb');
+  if (!codexLbDoctorJson.repair?.codex_lb?.ok || !codexLbDoctorJson.repair.codex_lb.config_repaired || !codexLbDoctorJson.codex_lb?.ok || !codexLbDoctorAuth.includes('"auth_mode":"apikey"') || !codexLbDoctorAuth.includes('sk-test') || hasTopLevelCodexLbSelected(codexLbDoctorConfig) || !codexLbDoctorConfig.includes('https://lb.example.test/backend-api/codex') || !hasCodexUnstableFeatureWarningSuppression(codexLbDoctorConfig)) throw new Error('selftest: doctor codex-lb');
   const codexLbContext7Bin = path.join(tmp, 'codex-lb-context7-bin');
   await ensureDir(codexLbContext7Bin);
   await writeTextAtomic(path.join(codexLbContext7Bin, 'codex'), '#!/bin/sh\nif [ "$1" = "--version" ]; then echo "codex-cli 99.0.0"; exit 0; fi\nif [ "$CODEX_LB_API_KEY" ]; then echo "context7 leaked CODEX_LB_API_KEY" >&2; exit 77; fi\nif [ "$1" = "mcp" ] && [ "$2" = "list" ]; then echo ""; exit 0; fi\nif [ "$1" = "mcp" ] && [ "$2" = "add" ]; then echo "context7 added"; exit 0; fi\necho "unexpected codex $*" >&2\nexit 2\n');
@@ -1326,7 +1334,7 @@ function hasTopLevelCodexModeLock(text = '') {
   const lines = String(text || '').split('\n');
   const firstTable = lines.findIndex((x) => /^\s*\[.+\]\s*$/.test(x));
   const top = (firstTable === -1 ? lines : lines.slice(0, firstTable)).join('\n');
-  return /(^|\n)\s*model\s*=\s*"codex-lb"\s*(\n|$)/.test(top) || /(^|\n)\s*model_provider\s*=\s*"openai"\s*(\n|$)/.test(top) || /(^|\n)\s*model_reasoning_effort\s*=/.test(top);
+  return /(^|\n)\s*model_provider\s*=\s*"codex-lb"\s*(\n|$)/.test(top) || /(^|\n)\s*model_reasoning_effort\s*=/.test(top);
 }
 function hasDeprecatedCodexHooksFeatureFlag(text = '') {

package/src/cli/main.mjs CHANGED Viewed

@@ -22,7 +22,7 @@ import { bumpProjectVersion, disableVersionGitHook, runVersionPreCommit, version
 import { rustInfo } from '../core/rust-accelerator.mjs';
 import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
 import { defaultEvaluationScenario, runEvaluationBenchmark } from '../core/evaluation.mjs';
-import { buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
+import { buildResearchPrompt, evaluateResearchGate, isDatedResearchPaperArtifact, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
 import { evaluateRecallPulseFixtures, readMissionStatusLedger, writeRecallPulseArtifacts } from '../core/recallpulse.mjs';
 import {
   PPT_AUDIENCE_STRATEGY_ARTIFACT,
@@ -155,11 +155,19 @@ function codexLbImmediateLaunchOpts(args = [], lb = {}, opts = {}) {
     return { ...opts, session, codexArgs: [...(opts.codexArgs || []), '-c', 'model_provider="openai"'], codexLbBypassed: true };
   }
   if (!lb?.ok) return opts;
-  if (explicitSession) return opts;
+  const nextOpts = withCodexLbProviderArgs(opts);
+  if (explicitSession) return nextOpts;
   const session = sanitizeTmuxSessionName(`sks-codex-lb-${Date.now().toString(36)}-${defaultTmuxSessionName(root)}`);
   console.log(`codex-lb active for this launch: ${lb.env_path || lb.base_url || 'configured'}`);
   console.log(`Using fresh tmux session: ${session}`);
-  return { ...opts, session, codexLbFreshSession: true };
+  return { ...nextOpts, session, codexLbFreshSession: true };
+}
+function withCodexLbProviderArgs(opts = {}) {
+  const codexArgs = [...(opts.codexArgs || [])];
+  const hasProviderOverride = codexArgs.some((arg) => /model_provider\s*=/.test(String(arg || '')));
+  if (!hasProviderOverride) codexArgs.push('-c', 'model_provider="codex-lb"');
+  return { ...opts, codexArgs };
 }
 function help(args = []) {
@@ -3946,17 +3954,21 @@ async function selftest() {
   const researchPlan = await writeResearchPlan(researchDir, researchMission.prompt, {});
   if (researchPlan.methodology !== 'genius-scout-council-frontier-discovery-loop' || researchPlan.web_research_policy?.mode !== 'layered_source_retrieval_and_triangulation') throw new Error('selftest: research plan contract');
   if (researchPlan.execution_policy?.default_max_cycles !== 12 || researchPlan.mutation_policy?.implementation_allowed !== false || !String(researchPlan.research_council?.debate_policy?.rule || '').includes('every scout records final agreement')) throw new Error('selftest: research consensus/no-code contract');
-  if (!researchPlan.research_council?.scouts?.every((scout) => scout.display_name && scout.persona && scout.persona_boundary && scout.reasoning_effort === 'xhigh')) throw new Error('selftest: research scout persona contract missing from plan');
+  if (!researchPlan.research_council?.scouts?.every((scout) => scout.agent_name && scout.display_name && scout.persona && scout.persona_boundary && scout.reasoning_effort === 'xhigh') || !researchPlan.research_council.scouts.some((scout) => scout.agent_name === 'Einstein Scout')) throw new Error('selftest: research scout persona contract missing from plan');
+  const researchPaperArtifact = researchPlan.artifacts?.research_paper;
+  if (!isDatedResearchPaperArtifact(researchPaperArtifact) || researchPaperArtifact === 'research-paper.md') throw new Error('selftest: research paper artifact filename is not dated and titled');
   const researchPrompt = buildResearchPrompt({ id: researchMission.id, mission: researchMission, plan: researchPlan, cycle: 1, previous: '' });
-  if (!researchPrompt.includes('NO-CODE-MUTATION POLICY') || !researchPrompt.includes('not a fixed three-cycle run') || !researchPrompt.includes('unanimous_consensus=true')) throw new Error('selftest: research prompt missing no-code unanimous consensus policy');
+  if (!researchPrompt.includes('NO-CODE-MUTATION POLICY') || !researchPrompt.includes('not a fixed three-cycle run') || !researchPrompt.includes('unanimous_consensus=true') || !researchPrompt.includes('agent_name') || !researchPrompt.includes(researchPaperArtifact)) throw new Error('selftest: research prompt missing no-code unanimous consensus policy');
   const rArts = researchPlan.required_artifacts || [];
   for (const a of [rss, 'source-ledger.json', 'scout-ledger.json', 'debate-ledger.json', 'falsification-ledger.json']) if (!rArts.includes(a) || !(await exists(path.join(researchDir, a)))) throw new Error('selftest: research artifact');
-  if (!rArts.includes('research-paper.md') || !rArts.includes(gos)) throw new Error('selftest: research paper');
+  if (!rArts.includes(researchPaperArtifact) || rArts.includes('research-paper.md') || !rArts.includes(gos)) throw new Error('selftest: research paper');
   const initialResearchGate = await evaluateResearchGate(researchDir);
   if (initialResearchGate.passed || ['web_search_pass_missing', 'eureka_missing', 'debate_exchanges_missing', 'research_paper_missing', 'consensus_iteration_missing', 'unanimous_consensus_missing'].some((r) => !initialResearchGate.reasons.includes(r))) throw new Error('selftest: research gate');
   const researchGate = await writeMockResearchResult(researchDir, researchPlan);
   if (!researchGate.passed) throw new Error('selftest: mock research gate did not pass');
+  if (!(await exists(path.join(researchDir, researchPaperArtifact))) || await exists(path.join(researchDir, 'research-paper.md'))) throw new Error('selftest: mock research paper filename did not use dated title artifact');
   const rm = researchGate.metrics || {};
+  if (rm.research_paper_artifact !== researchPaperArtifact) throw new Error('selftest: research gate did not report dated paper artifact');
   if (rm.scout_persona_contract_ok !== true || (rm.scout_persona_issues || []).length) throw new Error('selftest: research scout persona contract did not pass');
   if (['independent_scouts', 'xhigh_scouts', 'eureka_moments', 'debate_participants', 'genius_opinion_summaries'].some((m) => rm[m] < 5) || ['counterevidence_sources', 'falsification_cases', 'triangulation_checks'].some((m) => rm[m] < 1) || rm.paper_sections < 8 || rm.citation_coverage !== true || rm.source_layers_covered < 7 || rm.consensus_iterations < 1 || rm.unanimous_consensus !== true || rm.consensus_agreed_scouts < 5) throw new Error('selftest: research metrics');
   await writeJsonAtomic(path.join(dir, 'done-gate.json'), { passed: true, unsupported_critical_claims: 0, database_safety_violation: false, database_safety_reviewed: true, visual_drift: 'low', wiki_drift: 'low', tests_required: false });

package/src/cli/maintenance-commands.mjs CHANGED Viewed

@@ -9,7 +9,7 @@ import { buildQuestionSchema, writeQuestions } from '../core/questions.mjs';
 import { sealContract } from '../core/decision-contract.mjs';
 import { buildQaLoopQuestionSchema, buildQaLoopPrompt, evaluateQaGate, qaStatus, writeMockQaResult, writeQaLoopArtifacts } from '../core/qa-loop.mjs';
 import { containsUserQuestion, noQuestionContinuationReason } from '../core/no-question-guard.mjs';
-import { RESEARCH_GENIUS_SUMMARY_ARTIFACT, RESEARCH_PAPER_ARTIFACT, RESEARCH_SOURCE_SKILL_ARTIFACT, countGeniusOpinionSummaries, countResearchPaperSections, buildResearchPrompt, evaluateResearchGate, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
+import { RESEARCH_GENIUS_SUMMARY_ARTIFACT, RESEARCH_SOURCE_SKILL_ARTIFACT, countGeniusOpinionSummaries, countResearchPaperSections, buildResearchPrompt, evaluateResearchGate, findResearchPaperArtifact, researchPaperArtifactForPlan, writeMockResearchResult, writeResearchPlan } from '../core/research.mjs';
 import { storageReport, enforceRetention, pruneWikiArtifacts } from '../core/retention.mjs';
 import { evaluateDoneGate } from '../core/hproof.mjs';
 import { renderCartridge, validateCartridge, driftCartridge, snapshotCartridge } from '../core/gx-renderer.mjs';
@@ -76,11 +76,19 @@ function codexLbImmediateLaunchOpts(args = [], lb = {}, opts = {}) {
     return { ...opts, session, codexArgs: [...(opts.codexArgs || []), '-c', 'model_provider="openai"'], codexLbBypassed: true };
   }
   if (!lb?.ok) return opts;
-  if (explicitSession) return opts;
+  const nextOpts = withCodexLbProviderArgs(opts);
+  if (explicitSession) return nextOpts;
   const session = sanitizeTmuxSessionName(`sks-codex-lb-${Date.now().toString(36)}-${defaultTmuxSessionName(root)}`);
   console.log(`codex-lb active for this launch: ${lb.env_path || lb.base_url || 'configured'}`);
   console.log(`Using fresh tmux session: ${session}`);
-  return { ...opts, session, codexLbFreshSession: true };
+  return { ...nextOpts, session, codexLbFreshSession: true };
+}
+function withCodexLbProviderArgs(opts = {}) {
+  const codexArgs = [...(opts.codexArgs || [])];
+  const hasProviderOverride = codexArgs.some((arg) => /model_provider\s*=/.test(String(arg || '')));
+  if (!hasProviderOverride) codexArgs.push('-c', 'model_provider="codex-lb"');
+  return { ...opts, codexArgs };
 }
 export async function madHighCommand(args = [], deps = {}) {
@@ -491,7 +499,7 @@ async function researchPrepare(args) {
   console.log(`Methodology: ${plan.methodology}`);
   console.log(`Plan: ${path.relative(root, path.join(dir, 'research-plan.md'))}`);
   console.log(`Pipeline: ${path.relative(root, path.join(dir, PIPELINE_PLAN_ARTIFACT))}`);
-  console.log(`Paper: ${RESEARCH_PAPER_ARTIFACT}`);
+  console.log(`Paper: ${researchPaperArtifactForPlan(plan)}`);
   console.log(`Genius summary: ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}`);
   console.log(`Source skill: ${RESEARCH_SOURCE_SKILL_ARTIFACT}`);
   console.log('Ledgers: source-ledger.json, scout-ledger.json, debate-ledger.json, novelty-ledger.json, falsification-ledger.json');
@@ -612,7 +620,9 @@ async function researchStatus(args) {
   const falsificationLedger = await readJson(path.join(dir, 'falsification-ledger.json'), null);
   const sourceSkillText = await readText(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT), '');
   const geniusSummaryText = await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '');
-  const paperText = await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '');
+  const plan = await readJson(path.join(dir, 'research-plan.json'), null);
+  const paperArtifact = await findResearchPaperArtifact(dir, plan);
+  const paperText = paperArtifact.exists ? await readText(paperArtifact.path, '') : '';
   const scoutRows = Array.isArray(scoutLedger?.scouts) ? scoutLedger.scouts : [];
   const sourceLayerRows = Array.isArray(sourceLedger?.source_layers) ? sourceLedger.source_layers : [];
   const sourceLayersCovered = sourceLayerRows.filter((layer) => layer.status === 'covered' && ((Array.isArray(layer.source_ids) && layer.source_ids.length) || (Array.isArray(layer.counterevidence_ids) && layer.counterevidence_ids.length))).length;
@@ -635,6 +645,7 @@ async function researchStatus(args) {
     unanimous_consensus: gate?.metrics?.unanimous_consensus ?? gate?.unanimous_consensus ?? debateLedger?.unanimous_consensus ?? false,
     research_source_skill_present: Boolean(sourceSkillText.trim()),
     genius_opinion_summary_present: Boolean(geniusSummaryText.trim()),
+    research_paper_artifact: paperArtifact.name,
     paper_present: Boolean(paperText.trim()),
     paper_sections: countResearchPaperSections(paperText),
     falsification_cases: falsificationLedger?.cases?.length ?? null

package/src/core/codex-app.mjs CHANGED Viewed

@@ -401,6 +401,7 @@ async function codexFastModeConfigStatus(opts = {}) {
     if (!config.text) continue;
     const topLevel = topLevelToml(config.text);
     if (/(^|\n)\s*model_reasoning_effort\s*=/.test(topLevel)) blockers.push(`${config.scope}:top_level_model_reasoning_effort`);
+    if (/(^|\n)\s*model_provider\s*=\s*"codex-lb"\s*(?:#.*)?(?=\n|$)/.test(topLevel)) blockers.push(`${config.scope}:top_level_codex_lb_provider`);
     if (/(^|\n)\s*fast_default_opt_out\s*=\s*true\s*(?:#.*)?(?=\n|$)/.test(tomlTable(config.text, 'notice'))) blockers.push(`${config.scope}:fast_default_opt_out`);
   }
   const merged = configs.map((config) => config.text).join('\n');

package/src/core/fsx.mjs CHANGED Viewed

@@ -5,7 +5,7 @@ import os from 'node:os';
 import crypto from 'node:crypto';
 import { spawn } from 'node:child_process';
-export const PACKAGE_VERSION = '0.8.2';
+export const PACKAGE_VERSION = '0.8.4';
 export const DEFAULT_PROCESS_TAIL_BYTES = 256 * 1024;
 export const DEFAULT_PROCESS_TIMEOUT_MS = 30 * 60 * 1000;

package/src/core/init.mjs CHANGED Viewed

@@ -48,7 +48,7 @@ export function hasTopLevelCodexModeLock(text = '') {
   const firstTable = lines.findIndex((x) => /^\s*\[.+\]\s*$/.test(x));
   const top = (firstTable === -1 ? lines : lines.slice(0, firstTable)).join('\n');
   const model = top.match(/^model\s*=\s*"([^"]+)"/m)?.[1];
-  return (Boolean(model) && model !== 'gpt-5.5') || /^model_reasoning_effort\s*=/m.test(top);
+  return (Boolean(model) && model !== 'gpt-5.5') || /^model_reasoning_effort\s*=/m.test(top) || /^model_provider\s*=\s*"codex-lb"/m.test(top);
 }
 export function hasDeprecatedCodexHooksFeatureFlag(text = '') {
@@ -502,6 +502,7 @@ function installPolicy(scope, commandPrefix) {
 function mergeManagedCodexConfigToml(existingContent = '') {
   let next = removeLegacyTopLevelCodexModeLocks(String(existingContent || '').trimEnd());
+  next = removeTopLevelTomlKeyIfValue(next, 'model_provider', 'codex-lb');
   next = removeTomlTableKey(next, 'notice', 'fast_default_opt_out');
   next = removeTomlTableKey(next, 'features', 'codex_hooks');
   next = upsertTopLevelTomlString(next, 'model', 'gpt-5.5');
@@ -546,13 +547,15 @@ async function mergeGlobalCodexConfigIfAvailable(configText = '', configPath = '
   const globalConfig = await readText(globalConfigPath, '');
   let next = mergeGlobalMcpServers(configText, globalConfig);
   next = mergeGlobalCodexAppRuntimeTables(next, globalConfig);
-  if (selectedRe.test(next) && /\[model_providers\.codex-lb\]/.test(next)) return `${String(next || '').trim()}\n`;
+  if (selectedRe.test(next) && /\[model_providers\.codex-lb\]/.test(next)) {
+    return `${removeTopLevelTomlKeyIfValue(next, 'model_provider', 'codex-lb').trim()}\n`;
+  }
   const envPath = path.join(home, '.codex', 'sks-codex-lb.env');
   if (!(await exists(envPath))) return next;
   const envText = await readText(envPath, '');
   const baseUrl = globalConfig.match(/(^|\n)\[model_providers\.codex-lb\][\s\S]*?\n\s*base_url\s*=\s*"([^"]+)"/)?.[2] || parseCodexLbEnvBaseUrl(envText);
   if (!parseCodexLbEnvKey(envText) || !baseUrl || (!selectedRe.test(globalConfig) && !parseCodexLbEnvBaseUrl(envText))) return next;
-  next = upsertTopLevelTomlString(next, 'model_provider', 'codex-lb');
+  next = removeTopLevelTomlKeyIfValue(next, 'model_provider', 'codex-lb');
   next = upsertTomlTable(next, 'model_providers.codex-lb', `[model_providers.codex-lb]\nname = "OpenAI"\nbase_url = "${baseUrl}"\nwire_api = "responses"\nenv_key = "CODEX_LB_API_KEY"\nsupports_websockets = true\nrequires_openai_auth = true`);
   return `${next.trim()}\n`;
 }
@@ -612,6 +615,14 @@ function removeLegacyTopLevelCodexModeLocks(text = '') {
   }).join('\n').replace(/^\n+/, '').replace(/\n{3,}/g, '\n\n');
 }
+function removeTopLevelTomlKeyIfValue(text = '', key = '', value = '') {
+  const lines = String(text || '').split('\n');
+  const firstTable = lines.findIndex((x) => /^\s*\[.+\]\s*$/.test(x));
+  const end = firstTable === -1 ? lines.length : firstTable;
+  const keyPattern = new RegExp(`^\\s*${escapeRegExp(key)}\\s*=\\s*"${escapeRegExp(value)}"\\s*(?:#.*)?$`);
+  return lines.filter((line, index) => index >= end || !keyPattern.test(line)).join('\n').replace(/^\n+/, '').replace(/\n{3,}/g, '\n\n');
+}
 function upsertTopLevelTomlString(text, key, value) {
   const line = `${key} = "${value}"`;
   const lines = String(text || '').split('\n');

package/src/core/research.mjs CHANGED Viewed

@@ -17,11 +17,62 @@ export const RESEARCH_PAPER_SECTION_GROUPS = Object.freeze([
   ['references', 'sources']
 ]);
-export const RESEARCH_SCOUT_COUNCIL = Object.freeze(RESEARCH_SCOUT_PERSONA_CONTRACT.map((scout) => Object.freeze({
-  ...scout,
-  label: scout.display_name,
-  required_outputs: scout.required_outputs
-})));
+function cleanResearchArtifactDate(value = '') {
+  const match = String(value || '').match(/\d{4}-\d{2}-\d{2}/);
+  return match ? match[0] : nowIso().slice(0, 10);
+}
+function researchTitleSlug(prompt = '') {
+  const cleaned = String(prompt || '')
+    .normalize('NFKC')
+    .replace(/[`"'<>]/g, ' ')
+    .replace(/[^\p{L}\p{N}]+/gu, '-')
+    .replace(/^-+|-+$/g, '')
+    .toLowerCase();
+  const slug = cleaned.split('-').filter(Boolean).slice(0, 10).join('-').slice(0, 90).replace(/-+$/g, '');
+  return slug || 'research';
+}
+export function researchPaperArtifactName(prompt = '', createdAt = nowIso(), opts = {}) {
+  const titleSource = opts.title || opts.paperTitle || prompt;
+  return `${cleanResearchArtifactDate(createdAt)}-${researchTitleSlug(titleSource)}-research-paper.md`;
+}
+export function isDatedResearchPaperArtifact(name = '') {
+  return /^\d{4}-\d{2}-\d{2}-[^\s/\\]+-research-paper\.md$/u.test(String(name || ''));
+}
+export function researchPaperArtifactForPlan(plan = null) {
+  const artifact = plan?.artifacts?.research_paper || plan?.paper_artifact;
+  return artifact ? path.basename(String(artifact)) : RESEARCH_PAPER_ARTIFACT;
+}
+export async function findResearchPaperArtifact(dir, plan = null, opts = {}) {
+  const preferred = researchPaperArtifactForPlan(plan);
+  const allowLegacyFallback = opts.allowLegacyFallback === true || preferred === RESEARCH_PAPER_ARTIFACT;
+  const names = [...new Set([preferred, allowLegacyFallback ? RESEARCH_PAPER_ARTIFACT : null].filter(Boolean))];
+  for (const name of names) {
+    const file = path.join(dir, name);
+    if (await exists(file)) return { name, path: file, exists: true, preferred: name === preferred, legacy: name === RESEARCH_PAPER_ARTIFACT };
+  }
+  return { name: preferred, path: path.join(dir, preferred), exists: false, preferred: true, legacy: false };
+}
+export function researchScoutAgentName(scout = {}) {
+  return String(scout.agent_name || scout.display_name || scout.label || scout.id || 'Research Scout').trim();
+}
+export const RESEARCH_SCOUT_COUNCIL = Object.freeze(RESEARCH_SCOUT_PERSONA_CONTRACT.map((scout) => {
+  const displayName = scout.display_name || scout.label || scout.id;
+  return Object.freeze({
+    ...scout,
+    display_name: displayName,
+    label: displayName,
+    agent_name: displayName,
+    codex_agent_name: displayName,
+    required_outputs: scout.required_outputs
+  });
+}));
 export const RESEARCH_SOURCE_LAYERS = Object.freeze([
   {
@@ -86,12 +137,21 @@ export const RESEARCH_SOURCE_LAYER_IDS = Object.freeze(RESEARCH_SOURCE_LAYERS.ma
 export function createResearchPlan(prompt, opts = {}) {
   const depth = opts.depth || 'frontier';
+  const createdAt = nowIso();
+  const paperArtifact = researchPaperArtifactName(prompt, createdAt, opts);
   return {
     schema_version: 1,
     prompt,
     depth,
-    created_at: nowIso(),
+    created_at: createdAt,
     methodology: 'genius-scout-council-frontier-discovery-loop',
+    paper_artifact: paperArtifact,
+    artifacts: {
+      research_paper: paperArtifact,
+      legacy_research_paper: RESEARCH_PAPER_ARTIFACT,
+      genius_opinion_summary: RESEARCH_GENIUS_SUMMARY_ARTIFACT,
+      research_source_skill: RESEARCH_SOURCE_SKILL_ARTIFACT
+    },
     objective: 'Find the shortest useful mechanism that can be falsified or applied, grounded in maximum available source retrieval rather than broad summary.',
     execution_policy: {
       normal_run: 'real_long_running_research_until_unanimous_scout_consensus',
@@ -165,6 +225,10 @@ export function createResearchPlan(prompt, opts = {}) {
       allowed_write_scope: 'route-local mission artifacts only',
       rule: 'Normal Research must not modify repository source, package, docs, config, or generated harness files. It may write only artifacts under its own .sneakoscope/missions/<mission-id>/ directory.'
     },
+    artifact_policy: {
+      research_paper: paperArtifact,
+      rule: 'Write the final manuscript to the dated topic-specific research_paper artifact from this plan, not the legacy generic filename.'
+    },
     rules: [
       'Do not modify code or project source files during Research. Research writes only route-local mission artifacts; implementation belongs to $Team or another execution route.',
       'Do not claim novelty without a novelty ledger entry.',
@@ -175,7 +239,7 @@ export function createResearchPlan(prompt, opts = {}) {
       'Maximize safe web/source search as layered source retrieval and record queries, source layers, citations, quality notes, triangulation checks, and blockers in source-ledger.json.',
       `Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis; do not edit generated .agents/skills during the research run.`,
       'Actively seek disconfirming evidence before synthesis.',
-      'Turn the surviving research result into research-paper.md with paper-style sections and references.',
+      `Turn the surviving research result into ${paperArtifact} with paper-style sections and references.`,
       `End every run with ${RESEARCH_GENIUS_SUMMARY_ARTIFACT}, summarizing each genius-lens scout's final opinion, strongest evidence, disagreement, and changed mind.`,
       'Keep unsupported source-free claims as hypotheses only.',
       'Prefer the smallest testable mechanism or implementation probe, but do not stop source gathering early for speed when the research question needs a longer pass.',
@@ -194,7 +258,7 @@ export function createResearchPlan(prompt, opts = {}) {
     ],
     required_artifacts: [
       'research-report.md',
-      RESEARCH_PAPER_ARTIFACT,
+      paperArtifact,
       RESEARCH_GENIUS_SUMMARY_ARTIFACT,
       RESEARCH_SOURCE_SKILL_ARTIFACT,
       'source-ledger.json',
@@ -214,6 +278,7 @@ export function researchPlanMarkdown(plan) {
   lines.push(`Prompt: ${plan.prompt}`);
   lines.push(`Depth: ${plan.depth}`);
   lines.push(`Methodology: ${plan.methodology}`);
+  lines.push(`Research paper: ${researchPaperArtifactForPlan(plan)}`);
   if (plan.execution_policy) {
     lines.push(`Execution: ${plan.execution_policy.normal_run}; default cycle timeout ${plan.execution_policy.default_cycle_timeout_minutes} minutes`);
     if (plan.execution_policy.default_max_cycles) lines.push(`Consensus loop: repeat until unanimous scout consensus; default safety cap ${plan.execution_policy.default_max_cycles} cycles`);
@@ -227,7 +292,7 @@ export function researchPlanMarkdown(plan) {
   if (plan.research_council?.scouts?.length) {
     lines.push('## Genius Scout Council');
     lines.push(`Policy: ${plan.research_council.policy}`);
-    for (const scout of plan.research_council.scouts) lines.push(`- ${scout.display_name || scout.label || scout.id}: ${scout.persona || scout.role} - ${scout.mandate} (${scout.persona_boundary || 'persona-inspired lens only'})`);
+    for (const scout of plan.research_council.scouts) lines.push(`- ${researchScoutAgentName(scout)}: ${scout.persona || scout.role} - ${scout.mandate} (${scout.persona_boundary || 'persona-inspired lens only'})`);
     lines.push('');
   }
   if (plan.web_research_policy) {
@@ -381,6 +446,7 @@ export function defaultScoutLedger(plan = null) {
     created_at: nowIso(),
     scouts: scouts.map((scout) => ({
       id: scout.id,
+      agent_name: researchScoutAgentName(scout),
       display_name: scout.display_name || scout.label || scout.id,
       historical_inspiration: scout.historical_inspiration || null,
       persona: scout.persona || scout.role,
@@ -417,10 +483,13 @@ export function defaultDebateLedger(plan = null) {
     created_at: nowIso(),
     mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
     required_participants: scouts.map((scout) => scout.id),
+    participant_display_names: scouts.map((scout) => researchScoutAgentName(scout)),
     consensus_iterations: 0,
     unanimous_consensus: false,
     scout_agreements: scouts.map((scout) => ({
       scout_id: scout.id,
+      agent_name: researchScoutAgentName(scout),
+      display_name: scout.display_name || scout.label || scout.id,
       agrees: false,
       final_position: '',
       source_ids: []
@@ -506,6 +575,7 @@ export function defaultResearchGate() {
   return {
     passed: false,
     report_present: false,
+    research_paper_artifact: null,
     paper_present: false,
     paper_sections: 0,
     genius_opinion_summary_present: false,
@@ -548,8 +618,10 @@ export async function evaluateResearchGate(dir) {
   const gate = await readJson(path.join(dir, 'research-gate.json'), defaultResearchGate());
   const plan = await readJson(path.join(dir, 'research-plan.json'), null);
   const reportPresent = await exists(path.join(dir, 'research-report.md'));
-  const paperPresent = await exists(path.join(dir, RESEARCH_PAPER_ARTIFACT));
-  const paperSections = paperPresent ? countResearchPaperSections(await readText(path.join(dir, RESEARCH_PAPER_ARTIFACT), '')) : 0;
+  const paperArtifact = await findResearchPaperArtifact(dir, plan);
+  const paperPresent = paperArtifact.exists;
+  const paperText = paperPresent ? await readText(paperArtifact.path, '') : '';
+  const paperSections = paperPresent ? countResearchPaperSections(paperText) : 0;
   const geniusSummaryPresent = await exists(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT));
   const geniusSummaryCount = geniusSummaryPresent ? countGeniusOpinionSummaries(await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '')) : 0;
   const sourceSkillPresent = await exists(path.join(dir, RESEARCH_SOURCE_SKILL_ARTIFACT));
@@ -624,6 +696,8 @@ export async function evaluateResearchGate(dir) {
     passed: gate.passed === true && reasons.length === 0,
     reasons,
     metrics: {
+      research_paper_artifact: paperArtifact.name,
+      paper_present: paperPresent || gate.paper_present === true,
       web_search_passes: webSearchPasses,
       paper_sections: Math.max(Number(gate.paper_sections || 0), paperSections),
       genius_opinion_summary_present: geniusSummaryPresent || gate.genius_opinion_summary_present === true,
@@ -651,13 +725,18 @@ export async function evaluateResearchGate(dir) {
       citation_coverage: citationCoverage,
       web_search_blockers: searchBlockers.length
     },
-    gate
+    gate: {
+      ...gate,
+      research_paper_artifact: paperArtifact.name,
+      paper_present: paperPresent || gate.paper_present === true
+    }
   };
   await writeJsonAtomic(path.join(dir, 'research-gate.evaluated.json'), result);
   return result;
 }
 export async function writeMockResearchResult(dir, plan) {
+  const paperArtifact = researchPaperArtifactForPlan(plan);
   const mockLayerSources = RESEARCH_SOURCE_LAYERS.map((layer, index) => ({
     id: `mock-source-${index + 1}`,
     layer: layer.id,
@@ -751,6 +830,7 @@ export async function writeMockResearchResult(dir, plan) {
     ...defaultScoutLedger(plan),
     scouts: RESEARCH_SCOUT_COUNCIL.map((scout) => ({
       id: scout.id,
+      agent_name: researchScoutAgentName(scout),
       display_name: scout.display_name || scout.label,
       historical_inspiration: scout.historical_inspiration || null,
       persona: scout.persona || scout.role,
@@ -790,10 +870,13 @@ export async function writeMockResearchResult(dir, plan) {
     created_at: nowIso(),
     mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
     required_participants: RESEARCH_SCOUT_COUNCIL.map((scout) => scout.id),
+    participant_display_names: RESEARCH_SCOUT_COUNCIL.map((scout) => researchScoutAgentName(scout)),
     consensus_iterations: 2,
     unanimous_consensus: true,
     scout_agreements: RESEARCH_SCOUT_COUNCIL.map((scout) => ({
       scout_id: scout.id,
+      agent_name: researchScoutAgentName(scout),
+      display_name: scout.display_name || scout.label,
       agrees: true,
       final_position: 'Agrees to keep the falsifiable, source-cited research mechanism as the surviving claim.',
       source_ids: ['mock-source-1', 'mock-counter-1']
@@ -868,11 +951,12 @@ export async function writeMockResearchResult(dir, plan) {
   await writeJsonAtomic(path.join(dir, 'novelty-ledger.json'), ledger);
   await writeTextAtomic(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), `${geniusSummary}\n`);
   await writeTextAtomic(path.join(dir, 'research-report.md'), `# SKS Research Report\n\nPrompt: ${plan.prompt}\n\n## Scout Council Synthesis\n\nThe mock council keeps one cited methodological insight: a research mode should force layered, falsifiable novelty rather than summarize known material from one corpus [mock-source-1].\n\n## Source Coverage\n\nThis is a selftest fixture. It records mock coverage for academic literature, official data, standards, news, public discourse, developer knowledge, and counterevidence layers, but does not perform live web browsing in --mock mode.\n\n## Candidate Insight\n\nA useful research run must produce source-cited, cross-layer triangulated, falsifiable novelty with scout findings and a cheap probe.\n\n## Falsification\n\nThe claim is weak if no new testable prediction, counterevidence source, cross-layer check, or experiment is produced [mock-counter-1].\n\n## Next Test\n\nCompare this mode against a summary-only run and score candidate insights, falsification passes, citation coverage, source-layer coverage, triangulation checks, and testability.\n`);
-  await writeTextAtomic(path.join(dir, RESEARCH_PAPER_ARTIFACT), `# Research Paper: ${plan.prompt}\n\n## Abstract\nA source-cited research run should produce cross-layer, falsifiable novelty rather than only summarize known material.\n\n## Introduction\nThe mock topic is evaluated as a research workflow outcome with layered source coverage [mock-source-1].\n\n## Methodology\nFive xhigh scouts produce Eureka ideas, debate, triangulate source layers, and falsify the strongest claim.\n\n## Findings\nThe surviving finding is that useful research needs cited novelty, source-layer coverage, cross-layer triangulation, and a cheap decisive probe.\n\n## Discussion\nThe debate favors gate-backed evidence over narrative confidence, and treats public discourse as signal rather than truth.\n\n## Limitations and Falsification\nThe claim fails without sources, counterevidence, triangulation checks, or testable predictions [mock-counter-1].\n\n## Conclusion and Next Experiment\nCompare this loop against a summary-only baseline and score testable insights.\n\n## References\n- [mock-source-1] Mock academic literature coverage.\n- [mock-source-2] Mock official government and leading-institution knowledge coverage.\n- [mock-source-3] Mock standards and primary documents coverage.\n- [mock-source-4] Mock current news and global reporting coverage.\n- [mock-source-5] Mock public discourse coverage.\n- [mock-source-6] Mock developer and practitioner knowledge coverage.\n- [mock-source-7] Mock counterevidence and fact-checking coverage.\n- [mock-counter-1] Mock overclaim counterexample.\n`);
+  await writeTextAtomic(path.join(dir, paperArtifact), `# Research Paper: ${plan.prompt}\n\n## Abstract\nA source-cited research run should produce cross-layer, falsifiable novelty rather than only summarize known material.\n\n## Introduction\nThe mock topic is evaluated as a research workflow outcome with layered source coverage [mock-source-1].\n\n## Methodology\nFive xhigh scouts produce Eureka ideas, debate, triangulate source layers, and falsify the strongest claim.\n\n## Findings\nThe surviving finding is that useful research needs cited novelty, source-layer coverage, cross-layer triangulation, and a cheap decisive probe.\n\n## Discussion\nThe debate favors gate-backed evidence over narrative confidence, and treats public discourse as signal rather than truth.\n\n## Limitations and Falsification\nThe claim fails without sources, counterevidence, triangulation checks, or testable predictions [mock-counter-1].\n\n## Conclusion and Next Experiment\nCompare this loop against a summary-only baseline and score testable insights.\n\n## References\n- [mock-source-1] Mock academic literature coverage.\n- [mock-source-2] Mock official government and leading-institution knowledge coverage.\n- [mock-source-3] Mock standards and primary documents coverage.\n- [mock-source-4] Mock current news and global reporting coverage.\n- [mock-source-5] Mock public discourse coverage.\n- [mock-source-6] Mock developer and practitioner knowledge coverage.\n- [mock-source-7] Mock counterevidence and fact-checking coverage.\n- [mock-counter-1] Mock overclaim counterexample.\n`);
   await writeJsonAtomic(path.join(dir, 'research-gate.json'), {
     ...defaultResearchGate(),
     passed: true,
     report_present: true,
+    research_paper_artifact: paperArtifact,
     paper_present: true,
     paper_sections: RESEARCH_PAPER_SECTION_GROUPS.length,
     genius_opinion_summary_present: true,
@@ -902,12 +986,14 @@ export async function writeMockResearchResult(dir, plan) {
     falsification_cases: 1,
     testable_predictions: 1,
     citation_coverage: true,
-    evidence: ['mock research report', 'mock research paper', 'mock genius opinion summary', 'mock research source skill', 'mock layered source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
+    evidence: ['mock research report', `mock research paper: ${paperArtifact}`, 'mock genius opinion summary', 'mock research source skill', 'mock layered source ledger', 'mock scout ledger', 'mock debate ledger', 'mock novelty ledger', 'mock falsification ledger'],
     notes: ['mock mode records the new contract but does not call a model or perform live web browsing']
   });
   return evaluateResearchGate(dir);
 }
 export function buildResearchPrompt({ id, mission, plan, cycle, previous }) {
-  return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-CODE-MUTATION POLICY: Do not edit repository source, package metadata, docs, config, generated skills, or harness files. Write only route-local artifacts under .sneakoscope/missions/${id}/. If a needed implementation change is discovered, record it as a recommendation or blocker for a later execution route.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT PERSONA POLICY: Every Research scout row must include display_name, persona, persona_boundary, reasoning_effort: "xhigh", service_tier when available, falsifiers, cheap_probes, and challenge_or_response. Persona names are Einstein Scout, Feynman Scout, Turing Scout, von Neumann Scout, and Skeptic Scout; they are cognitive lenses, not impersonations.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nCONSENSUS LOOP POLICY: This is not a fixed three-cycle run. Repeat source-gathering, scout Eureka ideas, debate, falsification, and synthesis pressure until every scout records final agreement with the surviving mechanism. If unanimous agreement is not reached, keep research-gate.json unpassed and continue until the explicit max-cycle safety cap pauses the run.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record exchanges, consensus_iterations, unanimous_consensus, and per-scout agreements before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with display_name/persona/persona_boundary, effort=xhigh, reasoning_effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response for every scout lens.\n4. Debate to agreement: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout, consensus_iterations >= 1, unanimous_consensus=true only when all scouts agree, and scout_agreements for every scout.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, unanimous debate agreement, cross-layer triangulation, and falsification are recorded.\n7. Paper: write research-paper.md as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- research-paper.md: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with display_name, persona, persona_boundary, effort, reasoning_effort, service_tier, eureka, query_set, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, unresolved conflicts, consensus_iterations, unanimous_consensus, and scout_agreements for every scout.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, research-paper.md exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have display_name/persona/persona_boundary, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, consensus_iterations >= 1, unanimous_consensus=true with every scout agreement recorded, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
+  const paperArtifact = researchPaperArtifactForPlan(plan);
+  const scoutAgentNames = (plan?.research_council?.scouts || RESEARCH_SCOUT_COUNCIL).map((scout) => researchScoutAgentName(scout)).join(', ');
+  return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-CODE-MUTATION POLICY: Do not edit repository source, package metadata, docs, config, generated skills, or harness files. Write only route-local artifacts under .sneakoscope/missions/${id}/. If a needed implementation change is discovered, record it as a recommendation or blocker for a later execution route.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT PERSONA POLICY: Every Research scout row must include agent_name, display_name, persona, persona_boundary, reasoning_effort: "xhigh", service_tier when available, falsifiers, cheap_probes, and challenge_or_response. Use these agent_name values exactly: ${scoutAgentNames}. Persona names are cognitive lenses, not impersonations.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nCONSENSUS LOOP POLICY: This is not a fixed three-cycle run. Repeat source-gathering, scout Eureka ideas, debate, falsification, and synthesis pressure until every scout records final agreement with the surviving mechanism. If unanimous agreement is not reached, keep research-gate.json unpassed and continue until the explicit max-cycle safety cap pauses the run.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record exchanges, consensus_iterations, unanimous_consensus, and per-scout agreements before synthesis.\nPAPER POLICY: After the report and ledgers, write ${paperArtifact} as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with agent_name/display_name/persona/persona_boundary, effort=xhigh, reasoning_effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response for every scout lens.\n4. Debate to agreement: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout, consensus_iterations >= 1, unanimous_consensus=true only when all scouts agree, and scout_agreements for every scout.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, unanimous debate agreement, cross-layer triangulation, and falsification are recorded.\n7. Paper: write ${paperArtifact} as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- ${paperArtifact}: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with agent_name, display_name, persona, persona_boundary, effort, reasoning_effort, service_tier, eureka, query_set, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, unresolved conflicts, consensus_iterations, unanimous_consensus, and scout_agreements for every scout.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, ${paperArtifact} exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have agent_name/display_name/persona/persona_boundary, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, consensus_iterations >= 1, unanimous_consensus=true with every scout agreement recorded, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
 }