sneakoscope 0.7.78 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import path from 'node:path';
2
2
  import { appendJsonlBounded, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, exists } from './fsx.mjs';
3
3
  import { OUTCOME_RUBRIC } from './proof-field.mjs';
4
+ import { RESEARCH_SCOUT_PERSONA_CONTRACT, validateResearchScoutPersonas } from './recallpulse.mjs';
4
5
 
5
6
  export const RESEARCH_PAPER_ARTIFACT = 'research-paper.md';
6
7
  export const RESEARCH_SOURCE_SKILL_ARTIFACT = 'research-source-skill.md';
@@ -16,43 +17,11 @@ export const RESEARCH_PAPER_SECTION_GROUPS = Object.freeze([
16
17
  ['references', 'sources']
17
18
  ]);
18
19
 
19
- export const RESEARCH_SCOUT_COUNCIL = Object.freeze([
20
- {
21
- id: 'einstein',
22
- label: 'Einstein lens',
23
- role: 'first_principles_reframer',
24
- mandate: 'Reframe the problem around invariants, constraints, symmetry, and thought experiments.',
25
- required_outputs: ['eureka_moment', 'assumptions_to_remove', 'invariant_or_simplifying_frame', 'decisive_thought_experiment']
26
- },
27
- {
28
- id: 'feynman',
29
- label: 'Feynman lens',
30
- role: 'explanation_experimentalist',
31
- mandate: 'Reduce the idea to a teachable mechanism, toy example, and cheap empirical probe.',
32
- required_outputs: ['eureka_moment', 'plain_language_mechanism', 'toy_model', 'cheap_probe']
33
- },
34
- {
35
- id: 'turing',
36
- label: 'Turing lens',
37
- role: 'formalization_and_adversarial_cases',
38
- mandate: 'Formalize inputs, outputs, algorithms, computability limits, and adversarial countercases.',
39
- required_outputs: ['eureka_moment', 'formal_definition', 'algorithmic_shape', 'edge_or_adversarial_case']
40
- },
41
- {
42
- id: 'von_neumann',
43
- label: 'von Neumann lens',
44
- role: 'systems_strategy_scout',
45
- mandate: 'Map system dynamics, strategic incentives, scaling behavior, and worst-case interactions.',
46
- required_outputs: ['eureka_moment', 'system_model', 'strategic_or_scaling_risk', 'robustness_condition']
47
- },
48
- {
49
- id: 'skeptic',
50
- label: 'Skeptic lens',
51
- role: 'counterevidence_scout',
52
- mandate: 'Find disconfirming sources, replication risks, base-rate failures, and claims that should be weakened.',
53
- required_outputs: ['eureka_moment', 'counterevidence', 'base_rate_or_failure_mode', 'claim_to_downgrade']
54
- }
55
- ]);
20
+ export const RESEARCH_SCOUT_COUNCIL = Object.freeze(RESEARCH_SCOUT_PERSONA_CONTRACT.map((scout) => Object.freeze({
21
+ ...scout,
22
+ label: scout.display_name,
23
+ required_outputs: scout.required_outputs
24
+ })));
56
25
 
57
26
  export const RESEARCH_SOURCE_LAYERS = Object.freeze([
58
27
  {
@@ -125,8 +94,10 @@ export function createResearchPlan(prompt, opts = {}) {
125
94
  methodology: 'genius-scout-council-frontier-discovery-loop',
126
95
  objective: 'Find the shortest useful mechanism that can be falsified or applied, grounded in maximum available source retrieval rather than broad summary.',
127
96
  execution_policy: {
128
- normal_run: 'real_long_running_research',
97
+ normal_run: 'real_long_running_research_until_unanimous_scout_consensus',
129
98
  default_cycle_timeout_minutes: 120,
99
+ default_max_cycles: 12,
100
+ safety_cap: 'Research repeats scout/debate/falsification cycles until unanimous scout consensus or an explicit max-cycle safety cap pauses the run.',
130
101
  mock_policy: '--mock is for selftests and dry harness checks only; normal Research must block rather than silently substitute mock output.'
131
102
  },
132
103
  outcome_rubric: OUTCOME_RUBRIC,
@@ -143,8 +114,8 @@ export function createResearchPlan(prompt, opts = {}) {
143
114
  rule: 'Every scout must record one literal Eureka! moment with a non-obvious idea before debate.'
144
115
  },
145
116
  debate_policy: {
146
- mode: 'vigorous_evidence_bound_debate',
147
- rule: 'Every scout must challenge at least one other scout or respond to a challenge before synthesis.'
117
+ mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
118
+ rule: 'Every scout must challenge at least one other scout or respond to a challenge before synthesis. The loop repeats until every scout records final agreement on the surviving mechanism or the safety cap pauses the run with an unpassed gate.'
148
119
  },
149
120
  scouts: RESEARCH_SCOUT_COUNCIL,
150
121
  protocol: [
@@ -189,12 +160,18 @@ export function createResearchPlan(prompt, opts = {}) {
189
160
  triangulation_checks: 1
190
161
  }
191
162
  },
163
+ mutation_policy: {
164
+ implementation_allowed: false,
165
+ allowed_write_scope: 'route-local mission artifacts only',
166
+ rule: 'Normal Research must not modify repository source, package, docs, config, or generated harness files. It may write only artifacts under its own .sneakoscope/missions/<mission-id>/ directory.'
167
+ },
192
168
  rules: [
169
+ 'Do not modify code or project source files during Research. Research writes only route-local mission artifacts; implementation belongs to $Team or another execution route.',
193
170
  'Do not claim novelty without a novelty ledger entry.',
194
171
  'Separate facts, inferences, hypotheses, and speculations.',
195
172
  'Run the genius-lens scout council independently before synthesis.',
196
173
  'Every Research scout must run at reasoning_effort=xhigh, record one literal "Eureka!" idea, and participate in the debate.',
197
- 'The scout council must debate vigorously but stay evidence-bound; record challenges and responses in debate-ledger.json.',
174
+ 'The scout council must debate vigorously but stay evidence-bound; record challenges and responses in debate-ledger.json. Continue cycles until unanimous_consensus=true with every scout agreeing.',
198
175
  'Maximize safe web/source search as layered source retrieval and record queries, source layers, citations, quality notes, triangulation checks, and blockers in source-ledger.json.',
199
176
  `Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis; do not edit generated .agents/skills during the research run.`,
200
177
  'Actively seek disconfirming evidence before synthesis.',
@@ -209,7 +186,7 @@ export function createResearchPlan(prompt, opts = {}) {
209
186
  { id: 'R1_SOURCE_SKILL', goal: `Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with layer-specific search routes, quality fields, and blockers before source gathering.` },
210
187
  { id: 'R2_SOURCE_SEARCH', goal: 'Run layered web/source retrieval across papers, official data, standards, news, public discourse, developer knowledge, and counterevidence.' },
211
188
  { id: 'R3_EUREKA', goal: 'Have each xhigh genius-lens scout shout Eureka! and record one non-obvious idea with source ids.' },
212
- { id: 'R4_DEBATE', goal: 'Run a vigorous evidence-bound council debate with every scout challenging or responding.' },
189
+ { id: 'R4_DEBATE', goal: 'Run a vigorous evidence-bound council debate with every scout challenging or responding; repeat until unanimous scout consensus is recorded.' },
213
190
  { id: 'R5_FALSIFY', goal: 'Attack each mechanism with counterexamples, missing evidence, source conflicts, and failure modes.' },
214
191
  { id: 'R6_APPLY', goal: 'Keep the smallest surviving mechanism, define a cheap probe, and write all ledgers.' },
215
192
  { id: 'R7_PAPER', goal: 'Convert the final research result into a concise paper manuscript with abstract, method, findings, limitations, and references.' },
@@ -239,8 +216,10 @@ export function researchPlanMarkdown(plan) {
239
216
  lines.push(`Methodology: ${plan.methodology}`);
240
217
  if (plan.execution_policy) {
241
218
  lines.push(`Execution: ${plan.execution_policy.normal_run}; default cycle timeout ${plan.execution_policy.default_cycle_timeout_minutes} minutes`);
219
+ if (plan.execution_policy.default_max_cycles) lines.push(`Consensus loop: repeat until unanimous scout consensus; default safety cap ${plan.execution_policy.default_max_cycles} cycles`);
242
220
  lines.push(`Mock policy: ${plan.execution_policy.mock_policy}`);
243
221
  }
222
+ if (plan.mutation_policy) lines.push(`Mutation policy: ${plan.mutation_policy.rule}`);
244
223
  lines.push('');
245
224
  lines.push('## Rules');
246
225
  for (const rule of plan.rules) lines.push(`- ${rule}`);
@@ -248,7 +227,7 @@ export function researchPlanMarkdown(plan) {
248
227
  if (plan.research_council?.scouts?.length) {
249
228
  lines.push('## Genius Scout Council');
250
229
  lines.push(`Policy: ${plan.research_council.policy}`);
251
- for (const scout of plan.research_council.scouts) lines.push(`- ${scout.id}: ${scout.role} - ${scout.mandate}`);
230
+ for (const scout of plan.research_council.scouts) lines.push(`- ${scout.display_name || scout.label || scout.id}: ${scout.persona || scout.role} - ${scout.mandate} (${scout.persona_boundary || 'persona-inspired lens only'})`);
252
231
  lines.push('');
253
232
  }
254
233
  if (plan.web_research_policy) {
@@ -297,10 +276,12 @@ export function researchSourceSkillMarkdown(plan) {
297
276
  lines.push('- Each source entry should record title, locator/URL, publisher or author when known, published_at when known, accessed_at, layer, reliability, credibility, stance, supports or undermines, and notes.');
298
277
  lines.push('- Public discourse sources such as X/Twitter or Reddit are signals and edge cases, not truth. They must be triangulated with formal, official, practitioner, or counterevidence layers.');
299
278
  lines.push('- If a layer cannot be searched with the available runtime or credentials, record the blocker and keep research-gate.json unpassed.');
279
+ lines.push('- Do not modify repository source code or generated harness files during Research; write only route-local mission artifacts.');
300
280
  lines.push('');
301
281
  lines.push('## Debate Use');
302
282
  lines.push('- Every scout must cite source-ledger ids in findings and Eureka ideas.');
303
283
  lines.push('- The skeptic lens must challenge the strongest claim using counterevidence or source-quality downgrades.');
284
+ lines.push('- Continue scout/debate/falsification cycles until every scout agrees to the surviving mechanism. Record `unanimous_consensus=true`, `consensus_iterations`, and per-scout agreement in debate-ledger.json.');
304
285
  lines.push('- Synthesis keeps only claims that survive cross-layer triangulation and falsification.');
305
286
  lines.push('');
306
287
  return `${lines.join('\n')}\n`;
@@ -315,7 +296,8 @@ export function countGeniusOpinionSummaries(text = '') {
315
296
  const lower = String(text || '').toLowerCase();
316
297
  return RESEARCH_SCOUT_COUNCIL.filter((scout) => {
317
298
  const label = String(scout.label || '').toLowerCase();
318
- return lower.includes(String(scout.id || '').toLowerCase()) || (label && lower.includes(label));
299
+ const display = String(scout.display_name || '').toLowerCase();
300
+ return lower.includes(String(scout.id || '').toLowerCase()) || (label && lower.includes(label)) || (display && lower.includes(display));
319
301
  }).length;
320
302
  }
321
303
 
@@ -399,9 +381,15 @@ export function defaultScoutLedger(plan = null) {
399
381
  created_at: nowIso(),
400
382
  scouts: scouts.map((scout) => ({
401
383
  id: scout.id,
384
+ display_name: scout.display_name || scout.label || scout.id,
385
+ historical_inspiration: scout.historical_inspiration || null,
386
+ persona: scout.persona || scout.role,
387
+ persona_boundary: scout.persona_boundary || 'persona-inspired cognitive lens only; do not impersonate the historical person',
402
388
  role: scout.role,
403
389
  mandate: scout.mandate,
404
390
  effort: 'xhigh',
391
+ reasoning_effort: 'xhigh',
392
+ service_tier: scout.service_tier || 'fast',
405
393
  eureka: {
406
394
  exclamation: 'Eureka!',
407
395
  idea: '',
@@ -411,7 +399,8 @@ export function defaultScoutLedger(plan = null) {
411
399
  query_set: [],
412
400
  findings: [],
413
401
  falsifiers: [],
414
- cheap_probes: []
402
+ cheap_probes: [],
403
+ challenge_or_response: ''
415
404
  })),
416
405
  synthesis: {
417
406
  surviving_claims: [],
@@ -426,8 +415,16 @@ export function defaultDebateLedger(plan = null) {
426
415
  return {
427
416
  schema_version: 1,
428
417
  created_at: nowIso(),
429
- mode: 'vigorous_evidence_bound_debate',
418
+ mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
430
419
  required_participants: scouts.map((scout) => scout.id),
420
+ consensus_iterations: 0,
421
+ unanimous_consensus: false,
422
+ scout_agreements: scouts.map((scout) => ({
423
+ scout_id: scout.id,
424
+ agrees: false,
425
+ final_position: '',
426
+ source_ids: []
427
+ })),
431
428
  exchanges: [],
432
429
  synthesis_pressure: {
433
430
  strongest_disagreement: '',
@@ -474,6 +471,37 @@ function sourceLayerCoverageStats(sourceLedger = null, requiredLayerIds = RESEAR
474
471
  return { covered: [...covered], missing, required: [...requiredLayerIds] };
475
472
  }
476
473
 
474
+ function consensusStats(debateLedger = null, gate = {}) {
475
+ const required = RESEARCH_SCOUT_COUNCIL.map((scout) => scout.id);
476
+ const rows = [
477
+ ...(Array.isArray(debateLedger?.scout_agreements) ? debateLedger.scout_agreements : []),
478
+ ...(Array.isArray(debateLedger?.consensus?.scout_agreements) ? debateLedger.consensus.scout_agreements : []),
479
+ ...(Array.isArray(debateLedger?.final_positions) ? debateLedger.final_positions : [])
480
+ ];
481
+ const agreed = new Set();
482
+ for (const row of rows) {
483
+ const id = row?.scout_id || row?.id || row?.scout;
484
+ if (required.includes(id) && (row.agrees === true || row.agreement === true || row.final_agreement === true)) agreed.add(id);
485
+ }
486
+ const explicitUnanimous = debateLedger?.unanimous_consensus === true
487
+ || debateLedger?.consensus?.unanimous_consensus === true
488
+ || debateLedger?.consensus?.unanimous === true
489
+ || gate.unanimous_consensus === true;
490
+ const iterations = Math.max(
491
+ Number(gate.consensus_iterations || 0),
492
+ Number(debateLedger?.consensus_iterations || 0),
493
+ Number(debateLedger?.consensus?.iterations || 0)
494
+ );
495
+ const unanimous = explicitUnanimous && required.every((id) => agreed.has(id));
496
+ return {
497
+ unanimous,
498
+ iterations,
499
+ agreed_count: agreed.size,
500
+ required_count: required.length,
501
+ missing: required.filter((id) => !agreed.has(id))
502
+ };
503
+ }
504
+
477
505
  export function defaultResearchGate() {
478
506
  return {
479
507
  passed: false,
@@ -500,6 +528,8 @@ export function defaultResearchGate() {
500
528
  scout_findings: 0,
501
529
  debate_participants: 0,
502
530
  debate_exchanges: 0,
531
+ consensus_iterations: 0,
532
+ unanimous_consensus: false,
503
533
  counterevidence_sources: 0,
504
534
  candidate_insights: 0,
505
535
  falsification_passes: 0,
@@ -532,6 +562,8 @@ export async function evaluateResearchGate(dir) {
532
562
  const scoutLedger = await readJson(path.join(dir, 'scout-ledger.json'), null);
533
563
  const debateLedger = await readJson(path.join(dir, 'debate-ledger.json'), null);
534
564
  const falsificationLedger = await readJson(path.join(dir, 'falsification-ledger.json'), null);
565
+ const geniusSummaryText = geniusSummaryPresent ? await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '') : '';
566
+ const personaValidation = validateResearchScoutPersonas(scoutLedger || {}, geniusSummaryText);
535
567
  const sourceEntries = Array.isArray(sourceLedger?.sources) ? sourceLedger.sources.length : 0;
536
568
  const counterEvidenceEntries = Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources.length : 0;
537
569
  const webSearchPasses = Math.max(Number(gate.web_search_passes || 0), Number(sourceLedger?.web_search_passes || 0));
@@ -546,6 +578,7 @@ export async function evaluateResearchGate(dir) {
546
578
  const debateRows = Array.isArray(debateLedger?.exchanges) ? debateLedger.exchanges : [];
547
579
  const debateParticipants = new Set(debateRows.flatMap((exchange) => [exchange?.from, exchange?.to, ...(Array.isArray(exchange?.participants) ? exchange.participants : [])].filter(Boolean))).size;
548
580
  const debateExchanges = debateRows.length;
581
+ const consensus = consensusStats(debateLedger, gate);
549
582
  const falsificationCases = Array.isArray(falsificationLedger?.cases) ? falsificationLedger.cases.length : 0;
550
583
  const searchBlockers = [
551
584
  ...(Array.isArray(gate.web_search_blockers) ? gate.web_search_blockers : []),
@@ -571,9 +604,12 @@ export async function evaluateResearchGate(dir) {
571
604
  if (Math.max(Number(gate.independent_scouts || 0), independentScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('independent_scouts_missing');
572
605
  if (Math.max(Number(gate.xhigh_scouts || 0), xhighScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('scout_effort_not_xhigh');
573
606
  if (Math.max(Number(gate.eureka_moments || 0), eurekaMoments) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('eureka_missing');
607
+ if (!personaValidation.ok) reasons.push(...personaValidation.issues.map((issue) => `scout_persona:${issue}`));
574
608
  if (Math.max(Number(gate.scout_findings || 0), scoutFindings) < 4) reasons.push('scout_findings_missing');
575
609
  if (Math.max(Number(gate.debate_participants || 0), debateParticipants) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('debate_participants_missing');
576
610
  if (Math.max(Number(gate.debate_exchanges || 0), debateExchanges) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('debate_exchanges_missing');
611
+ if (Math.max(Number(gate.consensus_iterations || 0), consensus.iterations) < 1) reasons.push('consensus_iteration_missing');
612
+ if (!consensus.unanimous) reasons.push('unanimous_consensus_missing');
577
613
  if (Math.max(Number(gate.counterevidence_sources || 0), counterEvidenceEntries) < 1) reasons.push('counterevidence_source_missing');
578
614
  if ((gate.candidate_insights || 0) < 1) reasons.push('candidate_insight_missing');
579
615
  if ((gate.falsification_passes || 0) < 1) reasons.push('falsification_missing');
@@ -601,9 +637,15 @@ export async function evaluateResearchGate(dir) {
601
637
  independent_scouts: Math.max(Number(gate.independent_scouts || 0), independentScouts),
602
638
  xhigh_scouts: Math.max(Number(gate.xhigh_scouts || 0), xhighScouts),
603
639
  eureka_moments: Math.max(Number(gate.eureka_moments || 0), eurekaMoments),
640
+ scout_persona_contract_ok: personaValidation.ok,
641
+ scout_persona_issues: personaValidation.issues,
604
642
  scout_findings: Math.max(Number(gate.scout_findings || 0), scoutFindings),
605
643
  debate_participants: Math.max(Number(gate.debate_participants || 0), debateParticipants),
606
644
  debate_exchanges: Math.max(Number(gate.debate_exchanges || 0), debateExchanges),
645
+ consensus_iterations: Math.max(Number(gate.consensus_iterations || 0), consensus.iterations),
646
+ unanimous_consensus: consensus.unanimous,
647
+ consensus_agreed_scouts: consensus.agreed_count,
648
+ consensus_missing_scouts: consensus.missing,
607
649
  counterevidence_sources: Math.max(Number(gate.counterevidence_sources || 0), counterEvidenceEntries),
608
650
  falsification_cases: Math.max(Number(gate.falsification_cases || 0), falsificationCases),
609
651
  citation_coverage: citationCoverage,
@@ -709,12 +751,18 @@ export async function writeMockResearchResult(dir, plan) {
709
751
  ...defaultScoutLedger(plan),
710
752
  scouts: RESEARCH_SCOUT_COUNCIL.map((scout) => ({
711
753
  id: scout.id,
754
+ display_name: scout.display_name || scout.label,
755
+ historical_inspiration: scout.historical_inspiration || null,
756
+ persona: scout.persona || scout.role,
757
+ persona_boundary: scout.persona_boundary,
712
758
  role: scout.role,
713
759
  mandate: scout.mandate,
714
760
  effort: 'xhigh',
761
+ reasoning_effort: 'xhigh',
762
+ service_tier: scout.service_tier || 'fast',
715
763
  eureka: {
716
764
  exclamation: 'Eureka!',
717
- idea: `${scout.label} spots a non-obvious, testable angle for ${plan.prompt}.`,
765
+ idea: `${scout.display_name || scout.label} spots a non-obvious, testable angle for ${plan.prompt}.`,
718
766
  why_it_matters: 'It forces the run to produce one falsifiable idea before synthesis.',
719
767
  source_ids: ['mock-source-1']
720
768
  },
@@ -722,13 +770,14 @@ export async function writeMockResearchResult(dir, plan) {
722
770
  findings: [
723
771
  {
724
772
  id: `mock-${scout.id}-finding-1`,
725
- claim: `${scout.label} supports a source-cited, falsifiable research gate for ${plan.prompt}.`,
773
+ claim: `${scout.display_name || scout.label} supports a source-cited, falsifiable research gate for ${plan.prompt}.`,
726
774
  source_ids: ['mock-source-1'],
727
775
  status: 'mock_supported'
728
776
  }
729
777
  ],
730
778
  falsifiers: ['A run without cited sources, counterevidence, or cheap probes should fail the research gate.'],
731
- cheap_probes: ['Compare discovery-loop output against a summary-only baseline and count testable insights.']
779
+ cheap_probes: ['Compare discovery-loop output against a summary-only baseline and count testable insights.'],
780
+ challenge_or_response: 'Participated in the mock evidence-bound debate.'
732
781
  })),
733
782
  synthesis: {
734
783
  surviving_claims: ['mock-insight-1'],
@@ -739,8 +788,16 @@ export async function writeMockResearchResult(dir, plan) {
739
788
  const debateLedger = {
740
789
  schema_version: 1,
741
790
  created_at: nowIso(),
742
- mode: 'vigorous_evidence_bound_debate',
791
+ mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
743
792
  required_participants: RESEARCH_SCOUT_COUNCIL.map((scout) => scout.id),
793
+ consensus_iterations: 2,
794
+ unanimous_consensus: true,
795
+ scout_agreements: RESEARCH_SCOUT_COUNCIL.map((scout) => ({
796
+ scout_id: scout.id,
797
+ agrees: true,
798
+ final_position: 'Agrees to keep the falsifiable, source-cited research mechanism as the surviving claim.',
799
+ source_ids: ['mock-source-1', 'mock-counter-1']
800
+ })),
744
801
  exchanges: [
745
802
  { id: 'mock-debate-1', from: 'einstein', to: 'feynman', stance: 'challenge', claim: 'A toy probe is not enough unless it preserves the invariant.', source_ids: ['mock-source-1'] },
746
803
  { id: 'mock-debate-2', from: 'feynman', to: 'turing', stance: 'challenge', claim: 'A formal gate must still be explainable as a cheap experiment.', source_ids: ['mock-source-1'] },
@@ -793,8 +850,8 @@ export async function writeMockResearchResult(dir, plan) {
793
850
  '',
794
851
  '## Scout Opinions',
795
852
  ...RESEARCH_SCOUT_COUNCIL.flatMap((scout) => [
796
- `### ${scout.label} (${scout.id})`,
797
- `Final opinion: ${scout.label} wants the run to preserve ${scout.mandate.toLowerCase()} while producing a cited, falsifiable insight.`,
853
+ `### ${scout.display_name || scout.label} (${scout.id})`,
854
+ `Final opinion: ${scout.display_name || scout.label} wants the run to preserve ${scout.mandate.toLowerCase()} while producing a cited, falsifiable insight.`,
798
855
  'Strongest evidence: mock-source-1 plus the layered source ledger.',
799
856
  'Main disagreement: whether formal structure or cheap empirical probes should dominate the first pass.',
800
857
  'Changed mind: accepted that citation coverage, counterevidence, and triangulation are gates before synthesis.',
@@ -831,13 +888,15 @@ export async function writeMockResearchResult(dir, plan) {
831
888
  source_layers_required: RESEARCH_SOURCE_LAYER_IDS.length,
832
889
  source_layers_covered: RESEARCH_SOURCE_LAYER_IDS.length,
833
890
  triangulation_checks: sourceLedger.triangulation.cross_layer_checks.length,
834
- independent_scouts: RESEARCH_SCOUT_COUNCIL.length,
835
- xhigh_scouts: RESEARCH_SCOUT_COUNCIL.length,
836
- eureka_moments: RESEARCH_SCOUT_COUNCIL.length,
837
- scout_findings: RESEARCH_SCOUT_COUNCIL.length,
838
- debate_participants: RESEARCH_SCOUT_COUNCIL.length,
839
- debate_exchanges: debateLedger.exchanges.length,
840
- counterevidence_sources: 1,
891
+ independent_scouts: RESEARCH_SCOUT_COUNCIL.length,
892
+ xhigh_scouts: RESEARCH_SCOUT_COUNCIL.length,
893
+ eureka_moments: RESEARCH_SCOUT_COUNCIL.length,
894
+ scout_findings: RESEARCH_SCOUT_COUNCIL.length,
895
+ debate_participants: RESEARCH_SCOUT_COUNCIL.length,
896
+ debate_exchanges: debateLedger.exchanges.length,
897
+ consensus_iterations: debateLedger.consensus_iterations,
898
+ unanimous_consensus: true,
899
+ counterevidence_sources: 1,
841
900
  candidate_insights: 1,
842
901
  falsification_passes: 1,
843
902
  falsification_cases: 1,
@@ -850,5 +909,5 @@ export async function writeMockResearchResult(dir, plan) {
850
909
  }
851
910
 
852
911
  export function buildResearchPrompt({ id, mission, plan, cycle, previous }) {
853
- return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record the exchanges before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, and cheap_probes for every scout lens.\n4. Debate: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout before synthesis.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, debate, cross-layer triangulation, and falsification are recorded.\n7. Paper: write research-paper.md as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- research-paper.md: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with effort, eureka, query_set, findings, source_ids, falsifiers, and cheap_probes.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, and unresolved conflicts.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, research-paper.md exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
912
+ return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-CODE-MUTATION POLICY: Do not edit repository source, package metadata, docs, config, generated skills, or harness files. Write only route-local artifacts under .sneakoscope/missions/${id}/. If a needed implementation change is discovered, record it as a recommendation or blocker for a later execution route.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT PERSONA POLICY: Every Research scout row must include display_name, persona, persona_boundary, reasoning_effort: "xhigh", service_tier when available, falsifiers, cheap_probes, and challenge_or_response. Persona names are Einstein Scout, Feynman Scout, Turing Scout, von Neumann Scout, and Skeptic Scout; they are cognitive lenses, not impersonations.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nCONSENSUS LOOP POLICY: This is not a fixed three-cycle run. Repeat source-gathering, scout Eureka ideas, debate, falsification, and synthesis pressure until every scout records final agreement with the surviving mechanism. If unanimous agreement is not reached, keep research-gate.json unpassed and continue until the explicit max-cycle safety cap pauses the run.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record exchanges, consensus_iterations, unanimous_consensus, and per-scout agreements before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with display_name/persona/persona_boundary, effort=xhigh, reasoning_effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response for every scout lens.\n4. Debate to agreement: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout, consensus_iterations >= 1, unanimous_consensus=true only when all scouts agree, and scout_agreements for every scout.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, unanimous debate agreement, cross-layer triangulation, and falsification are recorded.\n7. Paper: write research-paper.md as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- research-paper.md: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with display_name, persona, persona_boundary, effort, reasoning_effort, service_tier, eureka, query_set, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, unresolved conflicts, consensus_iterations, unanimous_consensus, and scout_agreements for every scout.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, research-paper.md exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have display_name/persona/persona_boundary, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, consensus_iterations >= 1, unanimous_consensus=true with every scout agreement recorded, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
854
913
  }
@@ -399,7 +399,7 @@ export const ROUTES = [
399
399
  command: '$Research',
400
400
  mode: 'RESEARCH',
401
401
  route: 'research mission',
402
- description: 'Frontier discovery with xhigh genius-lens scouts, Eureka ideas, vigorous evidence-bound debate, layered public source retrieval, falsification, a paper manuscript, a final genius-opinion summary, and testable predictions.',
402
+ description: 'Frontier discovery with named xhigh persona-lens scouts, Eureka ideas, vigorous evidence-bound debate, layered public source retrieval, falsification, a paper manuscript, a final genius-opinion summary, and testable predictions.',
403
403
  requiredSkills: ['research', 'research-discovery', 'pipeline-runner', REFLECTION_SKILL_NAME, 'honest-mode'],
404
404
  lifecycle: ['research_plan', 'source_skill', 'layered_source_ledger', 'xhigh_scout_council', 'eureka_moments', 'debate_ledger', 'report', 'paper', 'genius_opinion_summary', 'novelty_ledger', 'falsification_ledger', 'research_gate', 'post_route_reflection', 'honest_mode'],
405
405
  context7Policy: 'if_external_docs',
@@ -521,7 +521,7 @@ export const COMMAND_CATALOG = [
521
521
  { name: 'quickstart', usage: 'sks quickstart', description: 'Show the shortest safe setup and verification flow.' },
522
522
  { name: 'bootstrap', usage: 'sks bootstrap [--install-scope global|project] [--local-only] [--json]', description: 'Initialize the current project, install SKS Codex App files/skills, check Context7/Codex App/tmux, and print ready true/false.' },
523
523
  { name: 'root', usage: 'sks root [--json]', description: 'Show whether SKS is using a project root or the per-user global SKS runtime root.' },
524
- { name: 'deps', usage: 'sks deps check|install [tmux|codex|context7|all] [--yes]', description: 'Check or guided-install Node/npm PATH, Codex CLI/App, Context7, Browser Use, Computer Use, tmux, and Homebrew on macOS.' },
524
+ { name: 'deps', usage: 'sks deps check|install [tmux|codex|context7|all] [--yes]', description: 'Check or guided-install Node/npm PATH, Codex CLI/App, Context7, Browser tooling, Computer Use, tmux, and Homebrew on macOS.' },
525
525
  { name: 'codex-app', usage: 'sks codex-app [check|open|remote-control]', description: 'Check Codex App install and first-party MCP/plugin readiness, then show app setup files, examples, and Codex CLI 0.130.0+ remote-control availability.' },
526
526
  { name: 'codex-lb', usage: 'sks codex-lb status|health|repair|setup --host <domain> --api-key <key>', description: 'Configure, health-check, or repair codex-lb Codex CLI auth by writing ~/.codex/config.toml, syncing auth.json, and loading the CODEX_LB_API_KEY env file.' },
527
527
  { name: 'auth', usage: 'sks auth status|health|repair|setup --host <domain> --api-key <key>', description: 'Shortcut for codex-lb auth status, health, repair, and setup commands.' },
@@ -535,6 +535,7 @@ export const COMMAND_CATALOG = [
535
535
  { name: 'ppt', usage: 'sks ppt build|status <mission-id|latest> [--json]', description: 'Build or inspect $PPT HTML/PDF artifacts from a sealed presentation decision contract.' },
536
536
  { name: 'image-ux-review', usage: 'sks image-ux-review status <mission-id|latest> [--json]', description: 'Inspect $Image-UX-Review gpt-image-2/imagegen annotated UI/UX review artifacts.' },
537
537
  { name: 'context7', usage: 'sks context7 check|setup|tools|resolve|docs|evidence ...', description: 'Check, configure, and call the local Context7 MCP requirement.' },
538
+ { name: 'recallpulse', usage: 'sks recallpulse run|status|eval|governance|checklist <mission-id|latest>', description: 'Run report-only RecallPulse active recall, durable status, proof capsule, evidence envelope, and governance checks.' },
538
539
  { name: 'pipeline', usage: 'sks pipeline status|resume|plan|answer ...', description: 'Inspect the active skill-first route, materialized execution plan, ambiguity gates, and completion gates.' },
539
540
  { name: 'guard', usage: 'sks guard check [--json]', description: 'Check SKS harness self-protection lock, fingerprints, and source-repo exception state.' },
540
541
  { name: 'conflicts', usage: 'sks conflicts check|prompt [--json]', description: 'Detect other Codex harnesses such as OMX/DCodex and print the GPT-5.5 high cleanup prompt.' },