sneakoscope 0.7.78 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -3
- package/package.json +1 -1
- package/src/cli/install-helpers.mjs +38 -9
- package/src/cli/main.mjs +65 -23
- package/src/cli/maintenance-commands.mjs +98 -6
- package/src/cli/recallpulse-command.mjs +157 -0
- package/src/core/codex-app.mjs +181 -11
- package/src/core/fsx.mjs +1 -1
- package/src/core/hooks-runtime.mjs +95 -1
- package/src/core/init.mjs +43 -8
- package/src/core/pipeline.mjs +3 -3
- package/src/core/recallpulse.mjs +1215 -0
- package/src/core/research.mjs +119 -60
- package/src/core/routes.mjs +3 -2
package/src/core/research.mjs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { appendJsonlBounded, nowIso, readJson, readText, writeJsonAtomic, writeTextAtomic, exists } from './fsx.mjs';
|
|
3
3
|
import { OUTCOME_RUBRIC } from './proof-field.mjs';
|
|
4
|
+
import { RESEARCH_SCOUT_PERSONA_CONTRACT, validateResearchScoutPersonas } from './recallpulse.mjs';
|
|
4
5
|
|
|
5
6
|
export const RESEARCH_PAPER_ARTIFACT = 'research-paper.md';
|
|
6
7
|
export const RESEARCH_SOURCE_SKILL_ARTIFACT = 'research-source-skill.md';
|
|
@@ -16,43 +17,11 @@ export const RESEARCH_PAPER_SECTION_GROUPS = Object.freeze([
|
|
|
16
17
|
['references', 'sources']
|
|
17
18
|
]);
|
|
18
19
|
|
|
19
|
-
export const RESEARCH_SCOUT_COUNCIL = Object.freeze(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
mandate: 'Reframe the problem around invariants, constraints, symmetry, and thought experiments.',
|
|
25
|
-
required_outputs: ['eureka_moment', 'assumptions_to_remove', 'invariant_or_simplifying_frame', 'decisive_thought_experiment']
|
|
26
|
-
},
|
|
27
|
-
{
|
|
28
|
-
id: 'feynman',
|
|
29
|
-
label: 'Feynman lens',
|
|
30
|
-
role: 'explanation_experimentalist',
|
|
31
|
-
mandate: 'Reduce the idea to a teachable mechanism, toy example, and cheap empirical probe.',
|
|
32
|
-
required_outputs: ['eureka_moment', 'plain_language_mechanism', 'toy_model', 'cheap_probe']
|
|
33
|
-
},
|
|
34
|
-
{
|
|
35
|
-
id: 'turing',
|
|
36
|
-
label: 'Turing lens',
|
|
37
|
-
role: 'formalization_and_adversarial_cases',
|
|
38
|
-
mandate: 'Formalize inputs, outputs, algorithms, computability limits, and adversarial countercases.',
|
|
39
|
-
required_outputs: ['eureka_moment', 'formal_definition', 'algorithmic_shape', 'edge_or_adversarial_case']
|
|
40
|
-
},
|
|
41
|
-
{
|
|
42
|
-
id: 'von_neumann',
|
|
43
|
-
label: 'von Neumann lens',
|
|
44
|
-
role: 'systems_strategy_scout',
|
|
45
|
-
mandate: 'Map system dynamics, strategic incentives, scaling behavior, and worst-case interactions.',
|
|
46
|
-
required_outputs: ['eureka_moment', 'system_model', 'strategic_or_scaling_risk', 'robustness_condition']
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
id: 'skeptic',
|
|
50
|
-
label: 'Skeptic lens',
|
|
51
|
-
role: 'counterevidence_scout',
|
|
52
|
-
mandate: 'Find disconfirming sources, replication risks, base-rate failures, and claims that should be weakened.',
|
|
53
|
-
required_outputs: ['eureka_moment', 'counterevidence', 'base_rate_or_failure_mode', 'claim_to_downgrade']
|
|
54
|
-
}
|
|
55
|
-
]);
|
|
20
|
+
export const RESEARCH_SCOUT_COUNCIL = Object.freeze(RESEARCH_SCOUT_PERSONA_CONTRACT.map((scout) => Object.freeze({
|
|
21
|
+
...scout,
|
|
22
|
+
label: scout.display_name,
|
|
23
|
+
required_outputs: scout.required_outputs
|
|
24
|
+
})));
|
|
56
25
|
|
|
57
26
|
export const RESEARCH_SOURCE_LAYERS = Object.freeze([
|
|
58
27
|
{
|
|
@@ -125,8 +94,10 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
125
94
|
methodology: 'genius-scout-council-frontier-discovery-loop',
|
|
126
95
|
objective: 'Find the shortest useful mechanism that can be falsified or applied, grounded in maximum available source retrieval rather than broad summary.',
|
|
127
96
|
execution_policy: {
|
|
128
|
-
normal_run: '
|
|
97
|
+
normal_run: 'real_long_running_research_until_unanimous_scout_consensus',
|
|
129
98
|
default_cycle_timeout_minutes: 120,
|
|
99
|
+
default_max_cycles: 12,
|
|
100
|
+
safety_cap: 'Research repeats scout/debate/falsification cycles until unanimous scout consensus or an explicit max-cycle safety cap pauses the run.',
|
|
130
101
|
mock_policy: '--mock is for selftests and dry harness checks only; normal Research must block rather than silently substitute mock output.'
|
|
131
102
|
},
|
|
132
103
|
outcome_rubric: OUTCOME_RUBRIC,
|
|
@@ -143,8 +114,8 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
143
114
|
rule: 'Every scout must record one literal Eureka! moment with a non-obvious idea before debate.'
|
|
144
115
|
},
|
|
145
116
|
debate_policy: {
|
|
146
|
-
mode: '
|
|
147
|
-
rule: 'Every scout must challenge at least one other scout or respond to a challenge before synthesis.'
|
|
117
|
+
mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
|
|
118
|
+
rule: 'Every scout must challenge at least one other scout or respond to a challenge before synthesis. The loop repeats until every scout records final agreement on the surviving mechanism or the safety cap pauses the run with an unpassed gate.'
|
|
148
119
|
},
|
|
149
120
|
scouts: RESEARCH_SCOUT_COUNCIL,
|
|
150
121
|
protocol: [
|
|
@@ -189,12 +160,18 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
189
160
|
triangulation_checks: 1
|
|
190
161
|
}
|
|
191
162
|
},
|
|
163
|
+
mutation_policy: {
|
|
164
|
+
implementation_allowed: false,
|
|
165
|
+
allowed_write_scope: 'route-local mission artifacts only',
|
|
166
|
+
rule: 'Normal Research must not modify repository source, package, docs, config, or generated harness files. It may write only artifacts under its own .sneakoscope/missions/<mission-id>/ directory.'
|
|
167
|
+
},
|
|
192
168
|
rules: [
|
|
169
|
+
'Do not modify code or project source files during Research. Research writes only route-local mission artifacts; implementation belongs to $Team or another execution route.',
|
|
193
170
|
'Do not claim novelty without a novelty ledger entry.',
|
|
194
171
|
'Separate facts, inferences, hypotheses, and speculations.',
|
|
195
172
|
'Run the genius-lens scout council independently before synthesis.',
|
|
196
173
|
'Every Research scout must run at reasoning_effort=xhigh, record one literal "Eureka!" idea, and participate in the debate.',
|
|
197
|
-
'The scout council must debate vigorously but stay evidence-bound; record challenges and responses in debate-ledger.json.',
|
|
174
|
+
'The scout council must debate vigorously but stay evidence-bound; record challenges and responses in debate-ledger.json. Continue cycles until unanimous_consensus=true with every scout agreeing.',
|
|
198
175
|
'Maximize safe web/source search as layered source retrieval and record queries, source layers, citations, quality notes, triangulation checks, and blockers in source-ledger.json.',
|
|
199
176
|
`Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis; do not edit generated .agents/skills during the research run.`,
|
|
200
177
|
'Actively seek disconfirming evidence before synthesis.',
|
|
@@ -209,7 +186,7 @@ export function createResearchPlan(prompt, opts = {}) {
|
|
|
209
186
|
{ id: 'R1_SOURCE_SKILL', goal: `Create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with layer-specific search routes, quality fields, and blockers before source gathering.` },
|
|
210
187
|
{ id: 'R2_SOURCE_SEARCH', goal: 'Run layered web/source retrieval across papers, official data, standards, news, public discourse, developer knowledge, and counterevidence.' },
|
|
211
188
|
{ id: 'R3_EUREKA', goal: 'Have each xhigh genius-lens scout shout Eureka! and record one non-obvious idea with source ids.' },
|
|
212
|
-
{ id: 'R4_DEBATE', goal: 'Run a vigorous evidence-bound council debate with every scout challenging or responding.' },
|
|
189
|
+
{ id: 'R4_DEBATE', goal: 'Run a vigorous evidence-bound council debate with every scout challenging or responding; repeat until unanimous scout consensus is recorded.' },
|
|
213
190
|
{ id: 'R5_FALSIFY', goal: 'Attack each mechanism with counterexamples, missing evidence, source conflicts, and failure modes.' },
|
|
214
191
|
{ id: 'R6_APPLY', goal: 'Keep the smallest surviving mechanism, define a cheap probe, and write all ledgers.' },
|
|
215
192
|
{ id: 'R7_PAPER', goal: 'Convert the final research result into a concise paper manuscript with abstract, method, findings, limitations, and references.' },
|
|
@@ -239,8 +216,10 @@ export function researchPlanMarkdown(plan) {
|
|
|
239
216
|
lines.push(`Methodology: ${plan.methodology}`);
|
|
240
217
|
if (plan.execution_policy) {
|
|
241
218
|
lines.push(`Execution: ${plan.execution_policy.normal_run}; default cycle timeout ${plan.execution_policy.default_cycle_timeout_minutes} minutes`);
|
|
219
|
+
if (plan.execution_policy.default_max_cycles) lines.push(`Consensus loop: repeat until unanimous scout consensus; default safety cap ${plan.execution_policy.default_max_cycles} cycles`);
|
|
242
220
|
lines.push(`Mock policy: ${plan.execution_policy.mock_policy}`);
|
|
243
221
|
}
|
|
222
|
+
if (plan.mutation_policy) lines.push(`Mutation policy: ${plan.mutation_policy.rule}`);
|
|
244
223
|
lines.push('');
|
|
245
224
|
lines.push('## Rules');
|
|
246
225
|
for (const rule of plan.rules) lines.push(`- ${rule}`);
|
|
@@ -248,7 +227,7 @@ export function researchPlanMarkdown(plan) {
|
|
|
248
227
|
if (plan.research_council?.scouts?.length) {
|
|
249
228
|
lines.push('## Genius Scout Council');
|
|
250
229
|
lines.push(`Policy: ${plan.research_council.policy}`);
|
|
251
|
-
for (const scout of plan.research_council.scouts) lines.push(`- ${scout.id}: ${scout.role} - ${scout.mandate}`);
|
|
230
|
+
for (const scout of plan.research_council.scouts) lines.push(`- ${scout.display_name || scout.label || scout.id}: ${scout.persona || scout.role} - ${scout.mandate} (${scout.persona_boundary || 'persona-inspired lens only'})`);
|
|
252
231
|
lines.push('');
|
|
253
232
|
}
|
|
254
233
|
if (plan.web_research_policy) {
|
|
@@ -297,10 +276,12 @@ export function researchSourceSkillMarkdown(plan) {
|
|
|
297
276
|
lines.push('- Each source entry should record title, locator/URL, publisher or author when known, published_at when known, accessed_at, layer, reliability, credibility, stance, supports or undermines, and notes.');
|
|
298
277
|
lines.push('- Public discourse sources such as X/Twitter or Reddit are signals and edge cases, not truth. They must be triangulated with formal, official, practitioner, or counterevidence layers.');
|
|
299
278
|
lines.push('- If a layer cannot be searched with the available runtime or credentials, record the blocker and keep research-gate.json unpassed.');
|
|
279
|
+
lines.push('- Do not modify repository source code or generated harness files during Research; write only route-local mission artifacts.');
|
|
300
280
|
lines.push('');
|
|
301
281
|
lines.push('## Debate Use');
|
|
302
282
|
lines.push('- Every scout must cite source-ledger ids in findings and Eureka ideas.');
|
|
303
283
|
lines.push('- The skeptic lens must challenge the strongest claim using counterevidence or source-quality downgrades.');
|
|
284
|
+
lines.push('- Continue scout/debate/falsification cycles until every scout agrees to the surviving mechanism. Record `unanimous_consensus=true`, `consensus_iterations`, and per-scout agreement in debate-ledger.json.');
|
|
304
285
|
lines.push('- Synthesis keeps only claims that survive cross-layer triangulation and falsification.');
|
|
305
286
|
lines.push('');
|
|
306
287
|
return `${lines.join('\n')}\n`;
|
|
@@ -315,7 +296,8 @@ export function countGeniusOpinionSummaries(text = '') {
|
|
|
315
296
|
const lower = String(text || '').toLowerCase();
|
|
316
297
|
return RESEARCH_SCOUT_COUNCIL.filter((scout) => {
|
|
317
298
|
const label = String(scout.label || '').toLowerCase();
|
|
318
|
-
|
|
299
|
+
const display = String(scout.display_name || '').toLowerCase();
|
|
300
|
+
return lower.includes(String(scout.id || '').toLowerCase()) || (label && lower.includes(label)) || (display && lower.includes(display));
|
|
319
301
|
}).length;
|
|
320
302
|
}
|
|
321
303
|
|
|
@@ -399,9 +381,15 @@ export function defaultScoutLedger(plan = null) {
|
|
|
399
381
|
created_at: nowIso(),
|
|
400
382
|
scouts: scouts.map((scout) => ({
|
|
401
383
|
id: scout.id,
|
|
384
|
+
display_name: scout.display_name || scout.label || scout.id,
|
|
385
|
+
historical_inspiration: scout.historical_inspiration || null,
|
|
386
|
+
persona: scout.persona || scout.role,
|
|
387
|
+
persona_boundary: scout.persona_boundary || 'persona-inspired cognitive lens only; do not impersonate the historical person',
|
|
402
388
|
role: scout.role,
|
|
403
389
|
mandate: scout.mandate,
|
|
404
390
|
effort: 'xhigh',
|
|
391
|
+
reasoning_effort: 'xhigh',
|
|
392
|
+
service_tier: scout.service_tier || 'fast',
|
|
405
393
|
eureka: {
|
|
406
394
|
exclamation: 'Eureka!',
|
|
407
395
|
idea: '',
|
|
@@ -411,7 +399,8 @@ export function defaultScoutLedger(plan = null) {
|
|
|
411
399
|
query_set: [],
|
|
412
400
|
findings: [],
|
|
413
401
|
falsifiers: [],
|
|
414
|
-
cheap_probes: []
|
|
402
|
+
cheap_probes: [],
|
|
403
|
+
challenge_or_response: ''
|
|
415
404
|
})),
|
|
416
405
|
synthesis: {
|
|
417
406
|
surviving_claims: [],
|
|
@@ -426,8 +415,16 @@ export function defaultDebateLedger(plan = null) {
|
|
|
426
415
|
return {
|
|
427
416
|
schema_version: 1,
|
|
428
417
|
created_at: nowIso(),
|
|
429
|
-
mode: '
|
|
418
|
+
mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
|
|
430
419
|
required_participants: scouts.map((scout) => scout.id),
|
|
420
|
+
consensus_iterations: 0,
|
|
421
|
+
unanimous_consensus: false,
|
|
422
|
+
scout_agreements: scouts.map((scout) => ({
|
|
423
|
+
scout_id: scout.id,
|
|
424
|
+
agrees: false,
|
|
425
|
+
final_position: '',
|
|
426
|
+
source_ids: []
|
|
427
|
+
})),
|
|
431
428
|
exchanges: [],
|
|
432
429
|
synthesis_pressure: {
|
|
433
430
|
strongest_disagreement: '',
|
|
@@ -474,6 +471,37 @@ function sourceLayerCoverageStats(sourceLedger = null, requiredLayerIds = RESEAR
|
|
|
474
471
|
return { covered: [...covered], missing, required: [...requiredLayerIds] };
|
|
475
472
|
}
|
|
476
473
|
|
|
474
|
+
function consensusStats(debateLedger = null, gate = {}) {
|
|
475
|
+
const required = RESEARCH_SCOUT_COUNCIL.map((scout) => scout.id);
|
|
476
|
+
const rows = [
|
|
477
|
+
...(Array.isArray(debateLedger?.scout_agreements) ? debateLedger.scout_agreements : []),
|
|
478
|
+
...(Array.isArray(debateLedger?.consensus?.scout_agreements) ? debateLedger.consensus.scout_agreements : []),
|
|
479
|
+
...(Array.isArray(debateLedger?.final_positions) ? debateLedger.final_positions : [])
|
|
480
|
+
];
|
|
481
|
+
const agreed = new Set();
|
|
482
|
+
for (const row of rows) {
|
|
483
|
+
const id = row?.scout_id || row?.id || row?.scout;
|
|
484
|
+
if (required.includes(id) && (row.agrees === true || row.agreement === true || row.final_agreement === true)) agreed.add(id);
|
|
485
|
+
}
|
|
486
|
+
const explicitUnanimous = debateLedger?.unanimous_consensus === true
|
|
487
|
+
|| debateLedger?.consensus?.unanimous_consensus === true
|
|
488
|
+
|| debateLedger?.consensus?.unanimous === true
|
|
489
|
+
|| gate.unanimous_consensus === true;
|
|
490
|
+
const iterations = Math.max(
|
|
491
|
+
Number(gate.consensus_iterations || 0),
|
|
492
|
+
Number(debateLedger?.consensus_iterations || 0),
|
|
493
|
+
Number(debateLedger?.consensus?.iterations || 0)
|
|
494
|
+
);
|
|
495
|
+
const unanimous = explicitUnanimous && required.every((id) => agreed.has(id));
|
|
496
|
+
return {
|
|
497
|
+
unanimous,
|
|
498
|
+
iterations,
|
|
499
|
+
agreed_count: agreed.size,
|
|
500
|
+
required_count: required.length,
|
|
501
|
+
missing: required.filter((id) => !agreed.has(id))
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
|
|
477
505
|
export function defaultResearchGate() {
|
|
478
506
|
return {
|
|
479
507
|
passed: false,
|
|
@@ -500,6 +528,8 @@ export function defaultResearchGate() {
|
|
|
500
528
|
scout_findings: 0,
|
|
501
529
|
debate_participants: 0,
|
|
502
530
|
debate_exchanges: 0,
|
|
531
|
+
consensus_iterations: 0,
|
|
532
|
+
unanimous_consensus: false,
|
|
503
533
|
counterevidence_sources: 0,
|
|
504
534
|
candidate_insights: 0,
|
|
505
535
|
falsification_passes: 0,
|
|
@@ -532,6 +562,8 @@ export async function evaluateResearchGate(dir) {
|
|
|
532
562
|
const scoutLedger = await readJson(path.join(dir, 'scout-ledger.json'), null);
|
|
533
563
|
const debateLedger = await readJson(path.join(dir, 'debate-ledger.json'), null);
|
|
534
564
|
const falsificationLedger = await readJson(path.join(dir, 'falsification-ledger.json'), null);
|
|
565
|
+
const geniusSummaryText = geniusSummaryPresent ? await readText(path.join(dir, RESEARCH_GENIUS_SUMMARY_ARTIFACT), '') : '';
|
|
566
|
+
const personaValidation = validateResearchScoutPersonas(scoutLedger || {}, geniusSummaryText);
|
|
535
567
|
const sourceEntries = Array.isArray(sourceLedger?.sources) ? sourceLedger.sources.length : 0;
|
|
536
568
|
const counterEvidenceEntries = Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources.length : 0;
|
|
537
569
|
const webSearchPasses = Math.max(Number(gate.web_search_passes || 0), Number(sourceLedger?.web_search_passes || 0));
|
|
@@ -546,6 +578,7 @@ export async function evaluateResearchGate(dir) {
|
|
|
546
578
|
const debateRows = Array.isArray(debateLedger?.exchanges) ? debateLedger.exchanges : [];
|
|
547
579
|
const debateParticipants = new Set(debateRows.flatMap((exchange) => [exchange?.from, exchange?.to, ...(Array.isArray(exchange?.participants) ? exchange.participants : [])].filter(Boolean))).size;
|
|
548
580
|
const debateExchanges = debateRows.length;
|
|
581
|
+
const consensus = consensusStats(debateLedger, gate);
|
|
549
582
|
const falsificationCases = Array.isArray(falsificationLedger?.cases) ? falsificationLedger.cases.length : 0;
|
|
550
583
|
const searchBlockers = [
|
|
551
584
|
...(Array.isArray(gate.web_search_blockers) ? gate.web_search_blockers : []),
|
|
@@ -571,9 +604,12 @@ export async function evaluateResearchGate(dir) {
|
|
|
571
604
|
if (Math.max(Number(gate.independent_scouts || 0), independentScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('independent_scouts_missing');
|
|
572
605
|
if (Math.max(Number(gate.xhigh_scouts || 0), xhighScouts) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('scout_effort_not_xhigh');
|
|
573
606
|
if (Math.max(Number(gate.eureka_moments || 0), eurekaMoments) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('eureka_missing');
|
|
607
|
+
if (!personaValidation.ok) reasons.push(...personaValidation.issues.map((issue) => `scout_persona:${issue}`));
|
|
574
608
|
if (Math.max(Number(gate.scout_findings || 0), scoutFindings) < 4) reasons.push('scout_findings_missing');
|
|
575
609
|
if (Math.max(Number(gate.debate_participants || 0), debateParticipants) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('debate_participants_missing');
|
|
576
610
|
if (Math.max(Number(gate.debate_exchanges || 0), debateExchanges) < RESEARCH_SCOUT_COUNCIL.length) reasons.push('debate_exchanges_missing');
|
|
611
|
+
if (Math.max(Number(gate.consensus_iterations || 0), consensus.iterations) < 1) reasons.push('consensus_iteration_missing');
|
|
612
|
+
if (!consensus.unanimous) reasons.push('unanimous_consensus_missing');
|
|
577
613
|
if (Math.max(Number(gate.counterevidence_sources || 0), counterEvidenceEntries) < 1) reasons.push('counterevidence_source_missing');
|
|
578
614
|
if ((gate.candidate_insights || 0) < 1) reasons.push('candidate_insight_missing');
|
|
579
615
|
if ((gate.falsification_passes || 0) < 1) reasons.push('falsification_missing');
|
|
@@ -601,9 +637,15 @@ export async function evaluateResearchGate(dir) {
|
|
|
601
637
|
independent_scouts: Math.max(Number(gate.independent_scouts || 0), independentScouts),
|
|
602
638
|
xhigh_scouts: Math.max(Number(gate.xhigh_scouts || 0), xhighScouts),
|
|
603
639
|
eureka_moments: Math.max(Number(gate.eureka_moments || 0), eurekaMoments),
|
|
640
|
+
scout_persona_contract_ok: personaValidation.ok,
|
|
641
|
+
scout_persona_issues: personaValidation.issues,
|
|
604
642
|
scout_findings: Math.max(Number(gate.scout_findings || 0), scoutFindings),
|
|
605
643
|
debate_participants: Math.max(Number(gate.debate_participants || 0), debateParticipants),
|
|
606
644
|
debate_exchanges: Math.max(Number(gate.debate_exchanges || 0), debateExchanges),
|
|
645
|
+
consensus_iterations: Math.max(Number(gate.consensus_iterations || 0), consensus.iterations),
|
|
646
|
+
unanimous_consensus: consensus.unanimous,
|
|
647
|
+
consensus_agreed_scouts: consensus.agreed_count,
|
|
648
|
+
consensus_missing_scouts: consensus.missing,
|
|
607
649
|
counterevidence_sources: Math.max(Number(gate.counterevidence_sources || 0), counterEvidenceEntries),
|
|
608
650
|
falsification_cases: Math.max(Number(gate.falsification_cases || 0), falsificationCases),
|
|
609
651
|
citation_coverage: citationCoverage,
|
|
@@ -709,12 +751,18 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
709
751
|
...defaultScoutLedger(plan),
|
|
710
752
|
scouts: RESEARCH_SCOUT_COUNCIL.map((scout) => ({
|
|
711
753
|
id: scout.id,
|
|
754
|
+
display_name: scout.display_name || scout.label,
|
|
755
|
+
historical_inspiration: scout.historical_inspiration || null,
|
|
756
|
+
persona: scout.persona || scout.role,
|
|
757
|
+
persona_boundary: scout.persona_boundary,
|
|
712
758
|
role: scout.role,
|
|
713
759
|
mandate: scout.mandate,
|
|
714
760
|
effort: 'xhigh',
|
|
761
|
+
reasoning_effort: 'xhigh',
|
|
762
|
+
service_tier: scout.service_tier || 'fast',
|
|
715
763
|
eureka: {
|
|
716
764
|
exclamation: 'Eureka!',
|
|
717
|
-
idea: `${scout.label} spots a non-obvious, testable angle for ${plan.prompt}.`,
|
|
765
|
+
idea: `${scout.display_name || scout.label} spots a non-obvious, testable angle for ${plan.prompt}.`,
|
|
718
766
|
why_it_matters: 'It forces the run to produce one falsifiable idea before synthesis.',
|
|
719
767
|
source_ids: ['mock-source-1']
|
|
720
768
|
},
|
|
@@ -722,13 +770,14 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
722
770
|
findings: [
|
|
723
771
|
{
|
|
724
772
|
id: `mock-${scout.id}-finding-1`,
|
|
725
|
-
claim: `${scout.label} supports a source-cited, falsifiable research gate for ${plan.prompt}.`,
|
|
773
|
+
claim: `${scout.display_name || scout.label} supports a source-cited, falsifiable research gate for ${plan.prompt}.`,
|
|
726
774
|
source_ids: ['mock-source-1'],
|
|
727
775
|
status: 'mock_supported'
|
|
728
776
|
}
|
|
729
777
|
],
|
|
730
778
|
falsifiers: ['A run without cited sources, counterevidence, or cheap probes should fail the research gate.'],
|
|
731
|
-
cheap_probes: ['Compare discovery-loop output against a summary-only baseline and count testable insights.']
|
|
779
|
+
cheap_probes: ['Compare discovery-loop output against a summary-only baseline and count testable insights.'],
|
|
780
|
+
challenge_or_response: 'Participated in the mock evidence-bound debate.'
|
|
732
781
|
})),
|
|
733
782
|
synthesis: {
|
|
734
783
|
surviving_claims: ['mock-insight-1'],
|
|
@@ -739,8 +788,16 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
739
788
|
const debateLedger = {
|
|
740
789
|
schema_version: 1,
|
|
741
790
|
created_at: nowIso(),
|
|
742
|
-
mode: '
|
|
791
|
+
mode: 'vigorous_evidence_bound_debate_until_unanimous_consensus',
|
|
743
792
|
required_participants: RESEARCH_SCOUT_COUNCIL.map((scout) => scout.id),
|
|
793
|
+
consensus_iterations: 2,
|
|
794
|
+
unanimous_consensus: true,
|
|
795
|
+
scout_agreements: RESEARCH_SCOUT_COUNCIL.map((scout) => ({
|
|
796
|
+
scout_id: scout.id,
|
|
797
|
+
agrees: true,
|
|
798
|
+
final_position: 'Agrees to keep the falsifiable, source-cited research mechanism as the surviving claim.',
|
|
799
|
+
source_ids: ['mock-source-1', 'mock-counter-1']
|
|
800
|
+
})),
|
|
744
801
|
exchanges: [
|
|
745
802
|
{ id: 'mock-debate-1', from: 'einstein', to: 'feynman', stance: 'challenge', claim: 'A toy probe is not enough unless it preserves the invariant.', source_ids: ['mock-source-1'] },
|
|
746
803
|
{ id: 'mock-debate-2', from: 'feynman', to: 'turing', stance: 'challenge', claim: 'A formal gate must still be explainable as a cheap experiment.', source_ids: ['mock-source-1'] },
|
|
@@ -793,8 +850,8 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
793
850
|
'',
|
|
794
851
|
'## Scout Opinions',
|
|
795
852
|
...RESEARCH_SCOUT_COUNCIL.flatMap((scout) => [
|
|
796
|
-
`### ${scout.label} (${scout.id})`,
|
|
797
|
-
`Final opinion: ${scout.label} wants the run to preserve ${scout.mandate.toLowerCase()} while producing a cited, falsifiable insight.`,
|
|
853
|
+
`### ${scout.display_name || scout.label} (${scout.id})`,
|
|
854
|
+
`Final opinion: ${scout.display_name || scout.label} wants the run to preserve ${scout.mandate.toLowerCase()} while producing a cited, falsifiable insight.`,
|
|
798
855
|
'Strongest evidence: mock-source-1 plus the layered source ledger.',
|
|
799
856
|
'Main disagreement: whether formal structure or cheap empirical probes should dominate the first pass.',
|
|
800
857
|
'Changed mind: accepted that citation coverage, counterevidence, and triangulation are gates before synthesis.',
|
|
@@ -831,13 +888,15 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
831
888
|
source_layers_required: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
832
889
|
source_layers_covered: RESEARCH_SOURCE_LAYER_IDS.length,
|
|
833
890
|
triangulation_checks: sourceLedger.triangulation.cross_layer_checks.length,
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
891
|
+
independent_scouts: RESEARCH_SCOUT_COUNCIL.length,
|
|
892
|
+
xhigh_scouts: RESEARCH_SCOUT_COUNCIL.length,
|
|
893
|
+
eureka_moments: RESEARCH_SCOUT_COUNCIL.length,
|
|
894
|
+
scout_findings: RESEARCH_SCOUT_COUNCIL.length,
|
|
895
|
+
debate_participants: RESEARCH_SCOUT_COUNCIL.length,
|
|
896
|
+
debate_exchanges: debateLedger.exchanges.length,
|
|
897
|
+
consensus_iterations: debateLedger.consensus_iterations,
|
|
898
|
+
unanimous_consensus: true,
|
|
899
|
+
counterevidence_sources: 1,
|
|
841
900
|
candidate_insights: 1,
|
|
842
901
|
falsification_passes: 1,
|
|
843
902
|
falsification_cases: 1,
|
|
@@ -850,5 +909,5 @@ export async function writeMockResearchResult(dir, plan) {
|
|
|
850
909
|
}
|
|
851
910
|
|
|
852
911
|
export function buildResearchPrompt({ id, mission, plan, cycle, previous }) {
|
|
853
|
-
return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record
|
|
912
|
+
return `You are running SKS Research Mode.\nMISSION: ${id}\nTOPIC: ${mission.prompt}\nCYCLE: ${cycle}\nMODE: Genius Scout Council + frontier discovery loop. Use maximum reasoning depth available under the current Codex profile.\nLONG-RUN REAL-RESEARCH POLICY: Normal Research is allowed to take one or two hours when the question requires it. Do real source gathering and evidence comparison; do not shortcut into mock, fixture, or summary-only output. If live source access is unavailable, write the blocker and keep the gate unpassed.\nNO-CODE-MUTATION POLICY: Do not edit repository source, package metadata, docs, config, generated skills, or harness files. Write only route-local artifacts under .sneakoscope/missions/${id}/. If a needed implementation change is discovered, record it as a recommendation or blocker for a later execution route.\nNO-QUESTION LOCK: Do not ask the user. Resolve scope from research-plan.json and current project evidence.\nSAFETY: Destructive database operations and unsafe external actions are forbidden. Prefer read-only inspection, local files, and cited public sources.\nPERSONA POLICY: Use Einstein/Feynman/Turing/von Neumann-inspired scout lenses only as cognitive roles. Do not impersonate, roleplay private identity, or speak as the historical people.\nSCOUT PERSONA POLICY: Every Research scout row must include display_name, persona, persona_boundary, reasoning_effort: "xhigh", service_tier when available, falsifiers, cheap_probes, and challenge_or_response. Persona names are Einstein Scout, Feynman Scout, Turing Scout, von Neumann Scout, and Skeptic Scout; they are cognitive lenses, not impersonations.\nSCOUT EFFORT POLICY: Every Research scout agent must use reasoning_effort=xhigh. Record effort: "xhigh" for every scout in scout-ledger.json. Any lower-effort scout output must keep research-gate.json unpassed.\nEUREKA POLICY: Every scout must literally write "Eureka!" and one non-obvious, source-linked idea before debate.\nCONSENSUS LOOP POLICY: This is not a fixed three-cycle run. Repeat source-gathering, scout Eureka ideas, debate, falsification, and synthesis pressure until every scout records final agreement with the surviving mechanism. If unanimous agreement is not reached, keep research-gate.json unpassed and continue until the explicit max-cycle safety cap pauses the run.\nDEBATE POLICY: The scouts must debate vigorously but stay evidence-bound. Every scout must challenge or respond at least once, and debate-ledger.json must record exchanges, consensus_iterations, unanimous_consensus, and per-scout agreements before synthesis.\nPAPER POLICY: After the report and ledgers, write research-paper.md as a concise manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References.\nSOURCE SKILL POLICY: Create or update ${RESEARCH_SOURCE_SKILL_ARTIFACT} as a route-local source collection skill before synthesis. It must name the selected source layers, query routes, quality fields, blockers, and cross-layer triangulation checks. Do not edit generated .agents/skills during the research run.\nWEB/SOURCE POLICY: Run layered source retrieval across every safely available layer before synthesis: latest public papers, official government or leading-institution data, standards or primary docs, current news including BBC/CNN/GDELT-style sources when relevant, public discourse including X/Twitter and Reddit when available, developer/practitioner sources such as Stack Overflow/Stack Exchange/GitHub, and counterevidence or fact-checking sources. Treat public discourse as signal, not truth. If a layer cannot be searched, record the blocker in source-ledger.json and do not pass the gate.\nRESEARCH PLAN:\n${JSON.stringify(plan, null, 2)}\n\nOBJECTIVE: Produce genuinely useful candidate discoveries: non-obvious hypotheses, mechanisms, predictions, or experiments. Do not merely summarize. Mark uncertainty clearly.\n\nREQUIRED PROCESS:\n1. Source skill first: create ${RESEARCH_SOURCE_SKILL_ARTIFACT} with source layers, query templates, quality fields, blockers, and triangulation rules.\n2. Layered source search: create source-ledger.json with source_layers, queries, source ids, source quality notes, counterevidence sources, triangulation.cross_layer_checks, citation coverage, and blockers.\n3. Independent xhigh scouts: create scout-ledger.json with display_name/persona/persona_boundary, effort=xhigh, reasoning_effort=xhigh, a literal Eureka! idea, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response for every scout lens.\n4. Debate to agreement: create debate-ledger.json with evidence-bound challenge/response exchanges involving every scout, consensus_iterations >= 1, unanimous_consensus=true only when all scouts agree, and scout_agreements for every scout.\n5. Falsification: create falsification-ledger.json with attacks, missing evidence, source conflicts, and decisive next tests.\n6. Synthesis: write research-report.md and novelty-ledger.json only after cited scout findings, Eureka ideas, unanimous debate agreement, cross-layer triangulation, and falsification are recorded.\n7. Paper: write research-paper.md as a paper-style manuscript with source-ledger references and limitations.\n\nREQUIRED OUTPUT FILES in .sneakoscope/missions/${id}/:\n- research-report.md: concise report with framing, source coverage, scout synthesis, debate synthesis, hypotheses, falsification, predictions, and next experiments. Cite source-ledger ids for factual claims.\n- research-paper.md: paper manuscript with Abstract, Introduction, Methodology, Findings/Results, Discussion, Limitations/Falsification, Conclusion/Next Experiment, and References using source-ledger ids.\n- ${RESEARCH_SOURCE_SKILL_ARTIFACT}: route-local source collection skill; it is evidence for the Skill Creator step and must not mutate generated .agents/skills.\n- source-ledger.json: layered web/source queries, source ids, source priority, source quality notes, counterevidence sources, citation coverage, triangulation checks, and blockers.\n- scout-ledger.json: one entry per scout lens with display_name, persona, persona_boundary, effort, reasoning_effort, service_tier, eureka, query_set, findings, source_ids, falsifiers, cheap_probes, and challenge_or_response.\n- debate-ledger.json: evidence-bound challenge/response exchanges, participants, changed minds, unresolved conflicts, consensus_iterations, unanimous_consensus, and scout_agreements for every scout.\n- novelty-ledger.json: entries with claim, novelty, confidence, falsifiability, evidence source ids, falsifiers, next_experiment.\n- falsification-ledger.json: attacks/counterexamples/source conflicts, result, and next_decisive_tests.\n- research-gate.json: set passed only when all ledgers exist, ${RESEARCH_SOURCE_SKILL_ARTIFACT} exists, research-paper.md exists with required paper sections, layered web/source retrieval covered every required source layer, at least one cross-layer triangulation check exists, all scouts have display_name/persona/persona_boundary, all scouts have effort=xhigh, all scouts have literal Eureka! ideas, every scout participated in debate, consensus_iterations >= 1, unanimous_consensus=true with every scout agreement recorded, at least one counterevidence source exists, citation coverage is complete, at least one insight survived falsification, at least one testable prediction exists, and unsupported breakthrough claims are zero.\n\nPrevious cycle tail:\n${String(previous || '').slice(-2500)}\n`;
|
|
854
913
|
}
|
package/src/core/routes.mjs
CHANGED
|
@@ -399,7 +399,7 @@ export const ROUTES = [
|
|
|
399
399
|
command: '$Research',
|
|
400
400
|
mode: 'RESEARCH',
|
|
401
401
|
route: 'research mission',
|
|
402
|
-
description: 'Frontier discovery with xhigh
|
|
402
|
+
description: 'Frontier discovery with named xhigh persona-lens scouts, Eureka ideas, vigorous evidence-bound debate, layered public source retrieval, falsification, a paper manuscript, a final genius-opinion summary, and testable predictions.',
|
|
403
403
|
requiredSkills: ['research', 'research-discovery', 'pipeline-runner', REFLECTION_SKILL_NAME, 'honest-mode'],
|
|
404
404
|
lifecycle: ['research_plan', 'source_skill', 'layered_source_ledger', 'xhigh_scout_council', 'eureka_moments', 'debate_ledger', 'report', 'paper', 'genius_opinion_summary', 'novelty_ledger', 'falsification_ledger', 'research_gate', 'post_route_reflection', 'honest_mode'],
|
|
405
405
|
context7Policy: 'if_external_docs',
|
|
@@ -521,7 +521,7 @@ export const COMMAND_CATALOG = [
|
|
|
521
521
|
{ name: 'quickstart', usage: 'sks quickstart', description: 'Show the shortest safe setup and verification flow.' },
|
|
522
522
|
{ name: 'bootstrap', usage: 'sks bootstrap [--install-scope global|project] [--local-only] [--json]', description: 'Initialize the current project, install SKS Codex App files/skills, check Context7/Codex App/tmux, and print ready true/false.' },
|
|
523
523
|
{ name: 'root', usage: 'sks root [--json]', description: 'Show whether SKS is using a project root or the per-user global SKS runtime root.' },
|
|
524
|
-
{ name: 'deps', usage: 'sks deps check|install [tmux|codex|context7|all] [--yes]', description: 'Check or guided-install Node/npm PATH, Codex CLI/App, Context7, Browser
|
|
524
|
+
{ name: 'deps', usage: 'sks deps check|install [tmux|codex|context7|all] [--yes]', description: 'Check or guided-install Node/npm PATH, Codex CLI/App, Context7, Browser tooling, Computer Use, tmux, and Homebrew on macOS.' },
|
|
525
525
|
{ name: 'codex-app', usage: 'sks codex-app [check|open|remote-control]', description: 'Check Codex App install and first-party MCP/plugin readiness, then show app setup files, examples, and Codex CLI 0.130.0+ remote-control availability.' },
|
|
526
526
|
{ name: 'codex-lb', usage: 'sks codex-lb status|health|repair|setup --host <domain> --api-key <key>', description: 'Configure, health-check, or repair codex-lb Codex CLI auth by writing ~/.codex/config.toml, syncing auth.json, and loading the CODEX_LB_API_KEY env file.' },
|
|
527
527
|
{ name: 'auth', usage: 'sks auth status|health|repair|setup --host <domain> --api-key <key>', description: 'Shortcut for codex-lb auth status, health, repair, and setup commands.' },
|
|
@@ -535,6 +535,7 @@ export const COMMAND_CATALOG = [
|
|
|
535
535
|
{ name: 'ppt', usage: 'sks ppt build|status <mission-id|latest> [--json]', description: 'Build or inspect $PPT HTML/PDF artifacts from a sealed presentation decision contract.' },
|
|
536
536
|
{ name: 'image-ux-review', usage: 'sks image-ux-review status <mission-id|latest> [--json]', description: 'Inspect $Image-UX-Review gpt-image-2/imagegen annotated UI/UX review artifacts.' },
|
|
537
537
|
{ name: 'context7', usage: 'sks context7 check|setup|tools|resolve|docs|evidence ...', description: 'Check, configure, and call the local Context7 MCP requirement.' },
|
|
538
|
+
{ name: 'recallpulse', usage: 'sks recallpulse run|status|eval|governance|checklist <mission-id|latest>', description: 'Run report-only RecallPulse active recall, durable status, proof capsule, evidence envelope, and governance checks.' },
|
|
538
539
|
{ name: 'pipeline', usage: 'sks pipeline status|resume|plan|answer ...', description: 'Inspect the active skill-first route, materialized execution plan, ambiguity gates, and completion gates.' },
|
|
539
540
|
{ name: 'guard', usage: 'sks guard check [--json]', description: 'Check SKS harness self-protection lock, fingerprints, and source-repo exception state.' },
|
|
540
541
|
{ name: 'conflicts', usage: 'sks conflicts check|prompt [--json]', description: 'Detect other Codex harnesses such as OMX/DCodex and print the GPT-5.5 high cleanup prompt.' },
|