helixevo 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@ const TOC = [
18
18
  { id: 'judges', label: 'Multi-Judge System', icon: '⚖' },
19
19
  { id: 'networkhealth', label: 'Network Health', icon: '♺' },
20
20
  { id: 'autogen', label: 'Auto-Generalization', icon: '↑' },
21
- { id: 'metrics', label: 'Closed-Loop Metrics', icon: '📊' },
21
+ { id: 'metrics', label: 'Proof & Metrics', icon: '📊' },
22
22
  { id: 'frontier', label: 'Pareto Frontier', icon: '▲' },
23
23
  { id: 'regression', label: 'Regression Testing', icon: '✓' },
24
24
  { id: 'research', label: 'Proactive Research', icon: '◎' },
@@ -286,13 +286,13 @@ export default function GuidePage() {
286
286
  <div className="grid-3" style={{ marginTop: 24, marginBottom: 24 }}>
287
287
  <div className="card" style={{ padding: '18px 18px 16px' }}>
288
288
  <div style={{ fontSize: 10, fontWeight: 700, color: 'var(--text-muted)', textTransform: 'uppercase', letterSpacing: 0.7, marginBottom: 8 }}>Start operating</div>
289
- <div style={{ fontSize: 15, fontWeight: 700, color: 'var(--text)', marginBottom: 6 }}>Project setup → Watch / Capture → Co-Evolution → Topology</div>
290
- <div style={{ fontSize: 12.5, color: 'var(--text-dim)', lineHeight: 1.6 }}>This is the shortest path to seeing pressure, governed response, and structural control in the live product.</div>
289
+ <div style={{ fontSize: 15, fontWeight: 700, color: 'var(--text)', marginBottom: 6 }}>Project setup → Watch / Capture → Co-Evolution → Topology → Proof</div>
290
+ <div style={{ fontSize: 12.5, color: 'var(--text-dim)', lineHeight: 1.6 }}>This is the shortest path to seeing pressure, governed response, structural control, and the new bounded prove stage in the live product.</div>
291
291
  </div>
292
292
  <div className="card" style={{ padding: '18px 18px 16px' }}>
293
293
  <div style={{ fontSize: 10, fontWeight: 700, color: 'var(--text-muted)', textTransform: 'uppercase', letterSpacing: 0.7, marginBottom: 8 }}>Understand the brain</div>
294
294
  <div style={{ fontSize: 15, fontWeight: 700, color: 'var(--text)', marginBottom: 6 }}>Read the stack, then trace one signal through the loop</div>
295
- <div style={{ fontSize: 12.5, color: 'var(--text-dim)', lineHeight: 1.6 }}>The current system is best understood as layered cognition: semantic kernel → observation → pressure → response → transfer → governance → topology.</div>
295
+ <div style={{ fontSize: 12.5, color: 'var(--text-dim)', lineHeight: 1.6 }}>The current system is best understood as layered cognition: semantic kernel → observation → pressure → response → transfer → governance → topology → proof.</div>
296
296
  </div>
297
297
  <div className="card" style={{ padding: '18px 18px 16px' }}>
298
298
  <div style={{ fontSize: 10, fontWeight: 700, color: 'var(--text-muted)', textTransform: 'uppercase', letterSpacing: 0.7, marginBottom: 8 }}>Fast jumps</div>
@@ -319,11 +319,11 @@ export default function GuidePage() {
319
319
  <p className="guide-text">
320
320
  HelixEvo still captures failures, proposes skill mutations, evaluates them with judges, and deploys improvements carefully.
321
321
  What changed over the recent milestone arc is that these mutation mechanics now live inside a larger architecture that senses pressure,
322
- routes intervention under governance, records transfer evidence, reviews topology, and can execute a safe reviewed subset of structural change with rollback.
322
+ routes intervention under governance, records transfer evidence, reviews topology, executes a safe reviewed subset of structural change with rollback, and now exposes bounded proof review over what appears to have worked.
323
323
  </p>
324
324
  <p className="guide-text">
325
325
  That means the current product should not be explained as only “capture → evolve → validate.” The more truthful frame is:
326
- <strong> semantic kernel → observation → pressure → response → transfer → governance → topology review → topology execution → operator surfaces.</strong>
326
+ <strong> semantic kernel → observation → pressure → response → transfer → governance → topology review → topology execution → proof → operator surfaces.</strong>
327
327
  </p>
328
328
  <div className="guide-directions">
329
329
  <div className="guide-direction">
@@ -359,6 +359,7 @@ export default function GuidePage() {
359
359
  <Callout type="info">
360
360
  <strong>Prerequisites:</strong> Node.js 18+, <a href="https://bun.sh">Bun</a> (for building),
361
361
  and <a href="https://docs.anthropic.com/en/docs/claude-code">Claude CLI</a> with a Claude Max plan.
362
+ Claude Code remains the default provider. Codex and Ollama are optional providers for shared prompt-in / text-out paths once enabled in <code>~/.helix/config.json</code>.
362
363
  Prefer <code>claude auth login</code> managed credentials over exporting a hardcoded <code>CLAUDE_CODE_OAUTH_TOKEN</code>.
363
364
  </Callout>
364
365
 
@@ -403,7 +404,7 @@ helixevo evolve --verbose
403
404
  helixevo graph --optimize
404
405
  helixevo topology --status`}</Code>
405
406
  <p className="guide-text-sm">
406
- <code>evolve</code> mutates skills directly, while <code>graph --optimize</code> and <code>topology</code> expose the higher-level structural review and execution loop.
407
+ <code>evolve</code> mutates skills directly, while <code>graph --optimize</code> and <code>topology</code> expose the higher-level structural review and execution loop. Optimize now refreshes the review queue first and then reports whether conflict enrichment completed fully or only partially.
407
408
  </p>
408
409
  </Step>
409
410
 
@@ -488,9 +489,14 @@ helixevo topology --status`}</Code>
488
489
  },
489
490
  {
490
491
  cmd: 'helixevo metrics',
491
- desc: 'Measure whether evolution actually reduces corrections over time. This is the primary proof command.',
492
+ desc: 'Measure correction-rate and evolution-impact trends over time. This remains the quantitative metrics surface inside the broader prove stage.',
492
493
  flags: ['--verbose'],
493
494
  },
495
+ {
496
+ cmd: 'helixevo proof',
497
+ desc: 'Review bounded outcome attribution across interventions, transfer, topology execution, semantic adoption, and evolution impact; then verify, defer, or contest proof records explicitly.',
498
+ flags: ['--status', '--review <recordId>', '--decision <verify|defer|contest>', '--rationale <text>', '--verbose'],
499
+ },
494
500
  {
495
501
  cmd: 'helixevo dashboard',
496
502
  desc: 'Open the premium operator dashboard. It prefers localhost:3847, reuses a known managed dashboard, falls forward if needed, and can auto-update before launch.',
@@ -522,7 +528,7 @@ helixevo topology --status`}</Code>
522
528
  <Callout type="tip">
523
529
  A good mental grouping is: <strong>observe</strong> with <code>project-setup</code>, <code>watch</code>, and <code>capture</code>;
524
530
  <strong>respond</strong> with <code>research</code>, <code>specialize</code>, <code>evolve</code>, and <code>generalize</code>;
525
- <strong>restructure</strong> with <code>graph --optimize</code> plus <code>topology</code>; and <strong>prove</strong> with <code>metrics</code>, <code>health</code>, and <code>report</code>.
531
+ <strong>restructure</strong> with <code>graph --optimize</code> plus <code>topology</code>; and <strong>prove</strong> with <code>proof</code> first, supported by <code>metrics</code>, <code>health</code>, and <code>report</code>.
526
532
  </Callout>
527
533
  </Section>
528
534
 
@@ -729,11 +735,12 @@ helixevo ontology --deprecate <conceptId>`}</Code>
729
735
  <Section id="surfaces" title="Dashboard Surface Map" subtitle="Each tab is a different control or observability surface for the same brain.">
730
736
  <div className="grid-2" style={{ gap: 12 }}>
731
737
  {[
732
- ['Overview', 'var(--blue)', 'Top-level cockpit for frontier state, brain foundation, pressure totals, topology review counts, and prepared/applied structural state.'],
738
+ ['Overview', 'var(--blue)', 'Top-level cockpit for frontier state, brain foundation, pressure totals, topology review counts, prepared/applied structural state, and proof review visibility.'],
733
739
  ['Co-Evolution', 'var(--purple)', 'The response cockpit. Use it to inspect routed pressure, governance mode, promotion queue, transfer evidence, and where approved ontology concepts are influencing live route rationale.'],
734
740
  ['Skill Network', 'var(--green)', 'Graph-level understanding: relationships, co-evolution signals, inspector context, and structural handoff links.'],
735
741
  ['Ontology', 'var(--blue)', 'Semantic control surface for kernel visibility, frontier review, approved extensions, semantic adoption coverage, consumer summaries, and ontology change events.'],
736
742
  ['Topology', 'var(--yellow)', 'Governed plasticity surface for review decisions, accepted-ready queue, prepared plans, apply, rollback, and execution history.'],
743
+ ['Proof', 'var(--text-secondary)', 'Outcome-attribution cockpit for bounded review across interventions, transfer, topology execution, semantic adoption, and evolution impact.'],
737
744
  ['Projects', 'var(--blue)', 'Project intake and project-aware pressure surface. Best for capability gaps, activation traces, and promotion feeders.'],
738
745
  ['Research', 'var(--purple)', 'Discovery-oriented view grounded in current pressure and routed recommendations rather than disconnected idea generation.'],
739
746
  ['Evolution', 'var(--green)', 'Proposal-centric evidence view: judge scores, artifact provenance, and iteration history.'],
@@ -746,8 +753,8 @@ helixevo ontology --deprecate <conceptId>`}</Code>
746
753
  ))}
747
754
  </div>
748
755
  <Callout type="tip">
749
- If you are debugging current state, the best sequence is usually: <strong>Overview → Co-Evolution → Ontology → Topology → Skill Network → Projects / Research</strong>.
750
- That path mirrors the stack from summary → routed demand → semantic interpretation → structural review/execution → graph context → project or discovery detail.
756
+ If you are debugging current state, the best sequence is usually: <strong>Overview → Co-Evolution → Ontology → Topology → Proof → Skill Network → Projects / Research</strong>.
757
+ That path mirrors the stack from summary → routed demand → semantic interpretation → structural review/execution → bounded outcome review → graph context → project or discovery detail.
751
758
  </Callout>
752
759
  </Section>
753
760
 
@@ -851,25 +858,32 @@ Project B: "Use FlashList not FlatList" (React Native perf)
851
858
  </Section>
852
859
 
853
860
  {/* ─── Closed-Loop Metrics ─── */}
854
- <Section id="metrics" title="Closed-Loop Metrics" subtitle="Proving that HelixEvo actually makes the agent better with data, not just LLM scores.">
861
+ <Section id="metrics" title="Proof & Closed-Loop Metrics" subtitle="The prove stage is now first-class: metrics remain useful, but proof now unifies bounded outcome review across the newer brain loop.">
855
862
  <p className="guide-text">
856
- The <code>helixevo metrics</code> command answers the most important question: <strong>&ldquo;Is HelixEvo actually
857
- reducing corrections?&rdquo;</strong> It tracks correction rates per skill over time and measures the real
858
- impact of each evolution.
863
+ The <code>helixevo proof</code> command is now the primary operator surface for the <strong>prove</strong> stage. It reviews bounded outcome attribution across interventions,
864
+ transfer, topology execution, semantic adoption, and legacy evolution impact without pretending to know more than the evidence supports.
859
865
  </p>
860
- <Code title="Terminal">{`helixevo metrics --verbose`}</Code>
866
+ <Code title="Terminal">{`helixevo proof --status --verbose
867
+ helixevo metrics --verbose`}</Code>
868
+
869
+ <h3 className="guide-h3">What Proof Adds</h3>
870
+ <ul className="guide-list">
871
+ <li><strong>Unified proof targets:</strong> interventions, realized transfers, topology execution, semantic-adoption effectiveness, and existing evolution impact</li>
872
+ <li><strong>Bounded outcome states:</strong> effective, mixed, regressed, measuring, and insufficient-evidence</li>
873
+ <li><strong>Operator review:</strong> verify, defer, or contest proof records explicitly instead of trusting derived heuristics blindly</li>
874
+ <li><strong>Dedicated dashboard route:</strong> the Prove stage now lands on <code>/proof</code> instead of only the Guide metrics section</li>
875
+ </ul>
861
876
 
862
- <h3 className="guide-h3">What It Tracks</h3>
877
+ <h3 className="guide-h3">What Metrics Still Tracks</h3>
863
878
  <ul className="guide-list">
864
879
  <li><strong>Per-skill correction rates:</strong> 7-day rolling windows showing how often each skill leads to corrections</li>
865
880
  <li><strong>Trend detection:</strong> Each skill is marked as improving (↓), stable (→), or degrading (↑)</li>
866
881
  <li><strong>Evolution impact:</strong> Before/after comparison for each evolution — failures/day in the 7 days before vs. after</li>
867
- <li><strong>Verdict:</strong> &ldquo;X/Y evolutions reduced corrections&rdquo; the bottom line</li>
882
+ <li><strong>Quantitative baseline:</strong> legacy correction reduction remains an important proof input even though it is no longer the whole prove layer</li>
868
883
  </ul>
869
884
 
870
885
  <Callout type="warning">
871
- Metrics need time to accumulate. The system needs at least 7 days of data after an evolution to produce
872
- a reliable before/after comparison. Results shown as &ldquo;Measuring&rdquo; during the first 3 days.
886
+ Proof remains bounded. Recent changes should stay <strong>measuring</strong>, weak evidence should stay <strong>insufficient-evidence</strong>, and semantic-adoption proof should be treated as correlational evidence rather than strong direct causality. Treat <strong>regressed</strong> as explicit negative evidence that should trigger route/structure review before retry, and treat <strong>verified</strong> as stronger operator trust in the review state rather than magical proof of causality.
873
887
  </Callout>
874
888
  </Section>
875
889
 
@@ -1077,10 +1091,19 @@ generation: 3
1077
1091
 
1078
1092
  {/* ─── Configuration ─── */}
1079
1093
  <Section id="config" title="Configuration" subtitle="All configurable parameters and their defaults.">
1094
+ <h3 className="guide-h3">Provider control</h3>
1095
+ <div className="guide-params">
1096
+ <Param name="model" type="string" desc="Backward-compatible top-level model alias. It stays in sync with the current default provider model." def='"sonnet"' />
1097
+ <Param name="judgeModel" type="string" desc="Backward-compatible top-level judge model alias. It stays in sync with the current default provider judge model." def='"sonnet"' />
1098
+ <Param name="llm.defaultProvider" type="string" desc="Default provider for shared prompt-in / text-out operations." def='"claude-code"' />
1099
+ <Param name="llm.fallbackPolicy" type="string" desc="Fallback policy for shared provider-eligible operations. Keep disabled unless you want explicit on-failure fallback." def='"disabled"' />
1100
+ <Param name="llm.providers.claudeCode.enabled" type="boolean" desc="Keep Claude Code enabled as the default provider unless you intentionally move the default elsewhere." def="true" />
1101
+ <Param name="llm.providers.codex.enabled" type="boolean" desc="Enable GPT Codex for shared chat / JSON / judge-style paths." def="false" />
1102
+ <Param name="llm.providers.ollama.enabled" type="boolean" desc="Enable Ollama for shared local-model chat / JSON / judge-style paths." def="false" />
1103
+ </div>
1104
+
1080
1105
  <h3 className="guide-h3">Evolution</h3>
1081
1106
  <div className="guide-params">
1082
- <Param name="model" type="string" desc="LLM model for proposals and clustering." def='"sonnet"' />
1083
- <Param name="judgeModel" type="string" desc="LLM model for judge evaluations." def='"sonnet"' />
1084
1107
  <Param name="evolution.schedule" type="cron" desc="When to run automatic evolution." def='"0 2 * * *"' />
1085
1108
  <Param name="evolution.minFailuresForEvolution" type="number" desc="Minimum unresolved failures before evolve runs." def="5" />
1086
1109
  <Param name="evolution.maxFailuresPerRun" type="number" desc="Max failures to process per run." def="20" />
@@ -1102,6 +1125,32 @@ generation: 3
1102
1125
  <Code title="~/.helix/config.json">{`{
1103
1126
  "model": "sonnet",
1104
1127
  "judgeModel": "sonnet",
1128
+ "llm": {
1129
+ "defaultProvider": "claude-code",
1130
+ "fallbackPolicy": "disabled",
1131
+ "fallbackOrder": [],
1132
+ "providers": {
1133
+ "claudeCode": {
1134
+ "enabled": true,
1135
+ "command": "claude",
1136
+ "model": "sonnet",
1137
+ "judgeModel": "sonnet"
1138
+ },
1139
+ "codex": {
1140
+ "enabled": false,
1141
+ "command": "codex",
1142
+ "model": "gpt-5-codex",
1143
+ "judgeModel": "gpt-5-codex"
1144
+ },
1145
+ "ollama": {
1146
+ "enabled": false,
1147
+ "command": "ollama",
1148
+ "host": "http://127.0.0.1:11434",
1149
+ "model": "qwen3-coder:latest",
1150
+ "judgeModel": "qwen3-coder:latest"
1151
+ }
1152
+ }
1153
+ },
1105
1154
  "evolution": {
1106
1155
  "schedule": "0 2 * * *",
1107
1156
  "minFailuresForEvolution": 5,
@@ -1131,8 +1180,10 @@ generation: 3
1131
1180
  ├── pressure-interventions.jsonl # Routed intervention ledger across response lanes
1132
1181
  ├── transfer-events.jsonl # Promotion / transfer evidence across motifs and projects
1133
1182
  ├── governance-state.json # Operator-selected governance steering state
1183
+ ├── llm-runtime-state.json # Default provider, per-provider health, last execution, and fallback truth
1134
1184
  ├── topology-review-candidates.json # Persisted structural review queue
1135
1185
  ├── topology-review-decisions.jsonl # Accept / reject / defer decision ledger
1186
+ ├── topology-optimize-status.json # Last full/partial optimize refresh status + queue/enrichment summary
1136
1187
  ├── topology-overrides.json # Applied safe structural topology overrides
1137
1188
  ├── topology-snapshots.json # Snapshot refs for reviewed execution and rollback
1138
1189
  ├── topology-apply-plans.json # Prepared reviewed topology plans
@@ -1163,7 +1214,8 @@ generation: 3
1163
1214
  ['Observation memory', 'var(--blue)', 'failures.jsonl + activation-traces.jsonl capture what happened, where it happened, and which skills were active.'],
1164
1215
  ['Pressure & response memory', 'var(--yellow)', 'pressure-signals.jsonl + pressure-interventions.jsonl + transfer-events.jsonl describe demand, routing, and reusable promotion evidence.'],
1165
1216
  ['Ontology frontier memory', 'var(--blue)', 'ontology/kernel.json + ontology/frontier.json + ontology/extensions.json + ontology/reviews.jsonl + ontology/change-log.jsonl preserve semantic kernel state, provisional concepts, approved extensions, and review/change lineage while semantic adoption is derived from active runtime/control objects rather than a parallel ontology-usage ledger.'],
1166
- ['Governance & review memory', 'var(--purple)', 'governance-state.json + topology-review-candidates.json + topology-review-decisions.jsonl preserve why structural decisions are being made.'],
1217
+ ['Governance & provider memory', 'var(--purple)', 'governance-state.json + llm-runtime-state.json preserve adaptation steering plus the truthful backend ledger: default provider, per-provider health, last execution, and fallback state.'],
1218
+ ['Structural review memory', 'var(--purple)', 'topology-review-candidates.json + topology-review-decisions.jsonl + topology-optimize-status.json preserve why structural decisions are being made and whether the last optimize pass was full or degraded.'],
1167
1219
  ['Topology execution memory', 'var(--green)', 'topology-overrides.json + topology-snapshots.json + topology-apply-plans.json + topology-executions.jsonl + topology-artifacts.jsonl preserve reviewed structural execution and rollback.'],
1168
1220
  ['Evaluation & frontier memory', 'var(--blue)', 'evolution-history.json + evolution-artifacts.jsonl + skill-tests.jsonl + canary-registry.json + frontier.json preserve proof, guardrails, and best configurations.'],
1169
1221
  ['Discovery memory', 'var(--purple)', 'knowledge-buffer.json keeps research discoveries and drafts so failed experiments can be iterated instead of lost.'],
@@ -1359,7 +1411,7 @@ generation: 3
1359
1411
  A transfer event is evidence that reusable knowledge was actually promoted or reused across layers or projects. This is how HelixEvo distinguishes a recommendation from a realized knowledge transfer.
1360
1412
  </FAQItem>
1361
1413
  <FAQItem q="How do I prove HelixEvo's brain is working?">
1362
- Use multiple proof surfaces together: <code>metrics</code> for correction reduction, Co-Evolution for routed interventions and transfer evidence, Topology for reviewed structural execution state, and the verification reports under <code>reports/verification/</code> for milestone-level backtesting.
1414
+ Start with <code>helixevo proof --status</code> or the <code>/proof</code> dashboard route, then use supporting proof surfaces together: <code>metrics</code> for correction reduction, Co-Evolution for routed interventions and transfer evidence, Topology for reviewed structural execution state, and the verification reports under <code>reports/verification/</code> for milestone-level backtesting.
1363
1415
  </FAQItem>
1364
1416
  <FAQItem q="How many failures do I need before evolution works?">
1365
1417
  By default, 5 unresolved failures are required (<code>minFailuresForEvolution</code>) for the standard evolution trigger.
@@ -10,6 +10,7 @@ import { OperatorLoopTrail } from '@/components/operator-loop-trail'
10
10
  import { SurfaceJumpLinks } from '@/components/surface-jump-links'
11
11
  import { NextStepEmptyState } from '@/components/next-step-empty-state'
12
12
  import type { OntologyControlDashboardSummary, OntologyReviewDecisionStatus } from '@/lib/data'
13
+ import type { ProofDashboardSummary } from '@/lib/proof'
13
14
 
14
15
  type RunState = 'idle' | 'running' | 'success' | 'error'
15
16
 
@@ -35,7 +36,7 @@ function formatMode(value: string) {
35
36
  return value.replace(/-/g, ' ')
36
37
  }
37
38
 
38
- export default function OntologyClient({ initialDashboard }: { initialDashboard: OntologyControlDashboardSummary }) {
39
+ export default function OntologyClient({ initialDashboard, proof }: { initialDashboard: OntologyControlDashboardSummary; proof: ProofDashboardSummary }) {
39
40
  const [dashboard, setDashboard] = useState(initialDashboard)
40
41
  const [runState, setRunState] = useState<RunState>('idle')
41
42
  const [output, setOutput] = useState('')
@@ -80,6 +81,7 @@ export default function OntologyClient({ initialDashboard }: { initialDashboard:
80
81
  { label: `${dashboard.summary.extensions} approved extensions`, tone: dashboard.summary.extensions > 0 ? 'blue' : 'neutral' },
81
82
  { label: `${dashboard.adoption.activeConcepts} active concepts`, tone: dashboard.adoption.activeConcepts > 0 ? 'green' : 'neutral' },
82
83
  { label: `${dashboard.adoption.routesInfluenced} semantically influenced routes`, tone: dashboard.adoption.routesInfluenced > 0 ? 'purple' : 'neutral' },
84
+ { label: `${proof.summary.reviewOpen} proof review`, tone: proof.summary.reviewOpen > 0 ? 'yellow' : proof.summary.effective > 0 ? 'green' : 'neutral' },
83
85
  { label: formatMode(dashboard.governance.activeMode), tone: dashboard.governance.activeMode === 'transfer-focused' ? 'purple' : dashboard.governance.activeMode === 'project-critical' ? 'yellow' : 'blue' },
84
86
  ]}
85
87
  actions={
@@ -88,6 +90,10 @@ export default function OntologyClient({ initialDashboard }: { initialDashboard:
88
90
  <div className="hero-note-label">Native ontology state</div>
89
91
  <div className="hero-note-title">Kernel + review + semantic adoption</div>
90
92
  <div className="hero-note-copy">Use this surface to move from frontier hypotheses into approved extensions, inspect active semantic consumers, and manage deprecation with operator-visible risk rather than hidden drift.</div>
93
+ <div style={{ marginTop: 8, display: 'flex', gap: 6, flexWrap: 'wrap' }}>
94
+ <Link href="/proof" className="badge badge-gray" style={{ textDecoration: 'none' }}>open proof</Link>
95
+ <span className="badge badge-gray">{proof.summary.effective} effective records live</span>
96
+ </div>
91
97
  </div>
92
98
  <div style={{ display: 'grid', gap: 10 }}>
93
99
  <div style={{ display: 'flex', gap: 10, flexWrap: 'wrap', justifyContent: 'flex-end' }}>
@@ -107,6 +113,7 @@ export default function OntologyClient({ initialDashboard }: { initialDashboard:
107
113
  <MetricCard label="Approved extensions" value={dashboard.summary.extensions} sublabel={`${dashboard.summary.deprecated} deprecated • ${dashboard.adoption.unusedExtensions} unused`} tone={dashboard.summary.extensions > 0 ? 'blue' : 'neutral'} icon="↑" />
108
114
  <MetricCard label="Active semantic concepts" value={dashboard.adoption.activeConcepts} sublabel={`${dashboard.adoption.totalBindings} bindings • ${dashboard.adoption.routesInfluenced} routed influences`} tone={dashboard.adoption.activeConcepts > 0 ? 'green' : 'neutral'} icon="⇄" />
109
115
  <MetricCard label="Deprecation-sensitive" value={dashboard.adoption.conceptsAtDeprecationRisk} sublabel="approved concepts with live consumers" tone={dashboard.adoption.conceptsAtDeprecationRisk > 0 ? 'yellow' : 'neutral'} icon="!" />
116
+ <MetricCard label="Semantic proof review" value={proof.summary.reviewOpen} sublabel={`${proof.summary.effective} effective • ${proof.summary.mixed} mixed`} tone={proof.summary.reviewOpen > 0 ? 'yellow' : proof.summary.effective > 0 ? 'green' : 'neutral'} icon="◇" />
110
117
  <MetricCard label="Concept changes" value={dashboard.summary.changeEvents} sublabel={`${dashboard.summary.promoted} promoted • ${dashboard.summary.rejected} rejected`} tone={dashboard.summary.changeEvents > 0 ? 'green' : 'neutral'} icon="⇄" />
111
118
  </div>
112
119
 
@@ -1,9 +1,11 @@
1
1
  import OntologyClient from './client'
2
2
  import { loadOntologyControlSummary } from '@/lib/data'
3
+ import { loadProofDashboardSummary } from '@/lib/proof'
3
4
 
4
5
  export const dynamic = 'force-dynamic'
5
6
 
6
7
  export default function OntologyPage() {
7
8
  const dashboard = loadOntologyControlSummary()
8
- return <OntologyClient initialDashboard={dashboard} />
9
+ const proof = loadProofDashboardSummary()
10
+ return <OntologyClient initialDashboard={dashboard} proof={proof} />
9
11
  }
@@ -1,5 +1,6 @@
1
1
  import Link from 'next/link'
2
- import { getDashboardSummary, getOntologyDashboardSummary, loadCoEvolutionSummary, loadFailures, loadFrontier, loadGraph, loadHistory, listProjects } from '@/lib/data'
2
+ import { getDashboardSummary, getOntologyDashboardSummary, loadCoEvolutionSummary, loadFailures, loadFrontier, loadGraph, loadHistory, listProjects, loadTopologyDashboardSummary, loadLlmRuntimeState } from '@/lib/data'
3
+ import { loadProofDashboardSummary } from '@/lib/proof'
3
4
  import { OverviewActions } from '@/components/overview-actions'
4
5
  import { PageHero } from '@/components/page-hero'
5
6
  import { MetricCard } from '@/components/metric-card'
@@ -16,10 +17,95 @@ function scoreColor(score: number) {
16
17
  return 'var(--red)'
17
18
  }
18
19
 
20
+ function providerLabel(provider: 'claude-code' | 'codex' | 'ollama') {
21
+ if (provider === 'claude-code') return 'Claude Code'
22
+ if (provider === 'codex') return 'GPT Codex'
23
+ return 'Ollama'
24
+ }
25
+
26
+ function providerTone(status: 'healthy' | 'degraded' | 'unavailable' | 'unknown') {
27
+ if (status === 'healthy') return 'green' as const
28
+ if (status === 'degraded') return 'yellow' as const
29
+ if (status === 'unavailable') return 'red' as const
30
+ return 'neutral' as const
31
+ }
32
+
33
+ function getPriorityActions(params: {
34
+ unresolved: number
35
+ proofOpen: number
36
+ topologyOpen: number
37
+ optimizeStatus: 'idle' | 'healthy' | 'partial' | 'failed'
38
+ optimizeNextStep?: string
39
+ providerStatus: 'healthy' | 'degraded' | 'unavailable' | 'unknown'
40
+ providerSummary: string
41
+ providerNextStep?: string
42
+ }) {
43
+ const actions: Array<{ href: string; title: string; description: string; tone: 'blue' | 'green' | 'purple' | 'yellow' }> = []
44
+
45
+ if (params.providerStatus === 'degraded' || params.providerStatus === 'unavailable') {
46
+ actions.push({
47
+ href: '/commands',
48
+ title: 'Stabilize provider control',
49
+ description: params.providerNextStep ?? params.providerSummary,
50
+ tone: 'yellow',
51
+ })
52
+ }
53
+
54
+ if (params.optimizeStatus === 'partial' || params.optimizeStatus === 'failed') {
55
+ actions.push({
56
+ href: '/topology',
57
+ title: 'Review degraded optimize state',
58
+ description: params.optimizeNextStep ?? 'The review queue refreshed, but structural enrichment did not complete fully. Inspect topology control before treating the queue as fully enriched.',
59
+ tone: 'yellow',
60
+ })
61
+ }
62
+
63
+ if (params.proofOpen > 0) {
64
+ actions.push({
65
+ href: '/proof',
66
+ title: 'Review open proof records',
67
+ description: `${params.proofOpen} proof record${params.proofOpen === 1 ? '' : 's'} currently need operator review before the prove layer becomes more trustworthy.`,
68
+ tone: 'blue',
69
+ })
70
+ }
71
+
72
+ if (params.topologyOpen > 0) {
73
+ actions.push({
74
+ href: '/topology',
75
+ title: 'Triage structural backlog',
76
+ description: `${params.topologyOpen} topology review item${params.topologyOpen === 1 ? '' : 's'} are waiting for accept/defer/reject decisions.`,
77
+ tone: 'purple',
78
+ })
79
+ }
80
+
81
+ if (params.unresolved > 0) {
82
+ actions.push({
83
+ href: '/coevolution',
84
+ title: 'Route live pressure',
85
+ description: `${params.unresolved} unresolved correction${params.unresolved === 1 ? '' : 's'} still need to be routed back into the learning loop.`,
86
+ tone: 'green',
87
+ })
88
+ }
89
+
90
+ if (actions.length === 0) {
91
+ actions.push({
92
+ href: '/projects',
93
+ title: 'Setup a project',
94
+ description: 'Analyze a folder or GitHub repo, match skills, and identify capability gaps to seed the next loop.',
95
+ tone: 'green',
96
+ })
97
+ }
98
+
99
+ return actions.slice(0, 3)
100
+ }
101
+
19
102
  export default function Overview() {
20
103
  const summary = getDashboardSummary()
21
104
  const ontology = getOntologyDashboardSummary()
22
105
  const coevolution = loadCoEvolutionSummary()
106
+ const topologyControl = loadTopologyDashboardSummary()
107
+ const llmRuntime = loadLlmRuntimeState()
108
+ const proof = loadProofDashboardSummary()
23
109
  const frontier = loadFrontier()
24
110
  const history = loadHistory()
25
111
  const graph = loadGraph()
@@ -27,6 +113,18 @@ export default function Overview() {
27
113
  const unresolved = failures.filter((failure) => !failure.resolved)
28
114
  const recentRuns = history.iterations.slice(-4).reverse()
29
115
  const topSkills = [...graph.nodes].sort((a, b) => b.score - a.score).slice(0, 10)
116
+ const defaultProvider = llmRuntime.providers[llmRuntime.defaultProvider]
117
+ const priorityActions = getPriorityActions({
118
+ unresolved: summary.failures.unresolved,
119
+ proofOpen: proof.summary.reviewOpen,
120
+ topologyOpen: topologyControl.summary.open,
121
+ optimizeStatus: topologyControl.optimizeStatus.status,
122
+ optimizeNextStep: topologyControl.optimizeStatus.nextStep,
123
+ providerStatus: defaultProvider.status,
124
+ providerSummary: defaultProvider.summary,
125
+ providerNextStep: defaultProvider.nextStep,
126
+ })
127
+ const primaryAction = priorityActions[0]
30
128
 
31
129
  return (
32
130
  <div className="overview-grid">
@@ -42,19 +140,21 @@ export default function Overview() {
42
140
  { label: `${coevolution.pressureMotifs.promotionReady} promotion-ready motifs`, tone: coevolution.pressureMotifs.promotionReady > 0 ? 'purple' : 'neutral' },
43
141
  { label: `${coevolution.topologyReviews.open} topology reviews`, tone: coevolution.topologyReviews.open > 0 ? 'yellow' : 'green' },
44
142
  { label: `${coevolution.topologyExecution.prepared} prepared structural plans`, tone: coevolution.topologyExecution.prepared > 0 ? 'blue' : 'neutral' },
143
+ { label: `${proof.summary.reviewOpen} proof reviews`, tone: proof.summary.reviewOpen > 0 ? 'yellow' : proof.summary.effective > 0 ? 'green' : 'neutral' },
144
+ { label: `${providerLabel(llmRuntime.defaultProvider)} ${defaultProvider.status}`, tone: providerTone(defaultProvider.status) },
45
145
  { label: `mode: ${coevolution.governance.activeMode.replace(/-/g, ' ')}`, tone: coevolution.governance.activeMode === 'transfer-focused' ? 'purple' : coevolution.governance.activeMode === 'project-critical' ? 'yellow' : 'blue' },
46
146
  ]}
47
147
  actions={
48
- <Link href="/projects" className="metric-card-anchor" style={{ minWidth: 240, display: 'block' }}>
49
- <div className="metric-card metric-card-green metric-card-link">
148
+ <Link href={primaryAction.href} className="metric-card-anchor" style={{ minWidth: 260, display: 'block' }}>
149
+ <div className={`metric-card metric-card-${primaryAction.tone} metric-card-link`}>
50
150
  <div className="metric-card-header">
51
151
  <div>
52
- <div className="metric-card-label">Next workflow</div>
53
- <div className="metric-card-value" style={{ fontSize: 24 }}>Setup a project</div>
152
+ <div className="metric-card-label">Priority now</div>
153
+ <div className="metric-card-value" style={{ fontSize: 24 }}>{primaryAction.title}</div>
54
154
  </div>
55
155
  <div className="metric-card-icon">↗</div>
56
156
  </div>
57
- <div className="metric-card-sublabel">Analyze a folder or GitHub repo, match skills, and identify capability gaps.</div>
157
+ <div className="metric-card-sublabel">{primaryAction.description}</div>
58
158
  </div>
59
159
  </Link>
60
160
  }
@@ -68,6 +168,7 @@ export default function Overview() {
68
168
  <MetricCard label="Unresolved corrections" value={summary.failures.unresolved} sublabel={`out of ${summary.failures.total} captured failures`} tone={summary.failures.unresolved > 0 ? 'yellow' : 'green'} href={summary.failures.unresolved > 0 ? '#attention' : '/evolution'} icon="!" />
69
169
  <MetricCard label="Discoveries" value={summary.buffer.discoveries} sublabel={`${summary.buffer.drafts} drafts in progress`} tone="blue" href="/research" icon="◎" />
70
170
  <MetricCard label="Frontier candidates" value={frontier.programs.length} sublabel={`${summary.canaries} active canaries`} tone="neutral" href="/frontier" icon="▲" />
171
+ <MetricCard label="Proof review" value={proof.summary.reviewOpen} sublabel={`${proof.summary.effective} effective • ${proof.summary.regressed} regressed`} tone={proof.summary.reviewOpen > 0 ? 'yellow' : proof.summary.effective > 0 ? 'green' : 'neutral'} href="/proof" icon="◇" />
71
172
  </div>
72
173
 
73
174
  <SectionFrame
@@ -108,6 +209,69 @@ export default function Overview() {
108
209
  />
109
210
  </SectionFrame>
110
211
 
212
+ <SectionFrame
213
+ eyebrow="Provider control"
214
+ title="Live backend truth"
215
+ description="Claude Code remains the default provider, while Codex and Ollama are optional for shared prompt-in/text-out paths. Claude-only web-search and research tooling stay explicitly Claude-scoped."
216
+ tone="blue"
217
+ >
218
+ <div className="grid-2" style={{ gap: 16 }}>
219
+ <div className="summary-list">
220
+ {(['claude-code', 'codex', 'ollama'] as const).map((provider) => {
221
+ const snapshot = llmRuntime.providers[provider]
222
+ return (
223
+ <div key={provider} className="summary-row">
224
+ <div className="summary-row-main">
225
+ <div className="summary-row-title">{providerLabel(provider)}</div>
226
+ <div className="summary-row-meta">{snapshot.summary}</div>
227
+ {snapshot.nextStep ? <div className="summary-row-meta" style={{ marginTop: 6 }}>Next: {snapshot.nextStep}</div> : null}
228
+ </div>
229
+ <span className={`hero-chip hero-chip-${providerTone(snapshot.status)}`}>{snapshot.status}</span>
230
+ </div>
231
+ )
232
+ })}
233
+ </div>
234
+ <div style={{ display: 'grid', gap: 12 }}>
235
+ <MetricCard
236
+ label="Default provider"
237
+ value={providerLabel(llmRuntime.defaultProvider)}
238
+ sublabel={`fallback: ${llmRuntime.fallbackPolicy}${llmRuntime.fallbackOrder.length > 0 ? ` • ${llmRuntime.fallbackOrder.map((provider) => providerLabel(provider)).join(' → ')}` : ''}`}
239
+ tone={providerTone(defaultProvider.status)}
240
+ href="/commands"
241
+ icon="☍"
242
+ />
243
+ <MetricCard
244
+ label="Last provider run"
245
+ value={llmRuntime.lastExecution?.usedProvider ? providerLabel(llmRuntime.lastExecution.usedProvider) : 'None'}
246
+ sublabel={llmRuntime.lastExecution ? llmRuntime.lastExecution.summary : 'No provider-backed execution has been recorded yet.'}
247
+ tone={llmRuntime.lastExecution?.success ? 'green' : llmRuntime.lastExecution ? 'yellow' : 'neutral'}
248
+ href="/commands"
249
+ icon="↺"
250
+ />
251
+ <div className="signal-text">Commands, status, and dashboard surfaces now track whether execution stayed on the selected provider, degraded, or used an explicit fallback path.</div>
252
+ </div>
253
+ </div>
254
+ </SectionFrame>
255
+
256
+ <SectionFrame
257
+ eyebrow="Priority now"
258
+ title="Top ranked next actions"
259
+ description="This layer compresses the live loop into the most important operator moves right now instead of leaving everything as flat dashboard signal."
260
+ tone="blue"
261
+ >
262
+ <div className="summary-list">
263
+ {priorityActions.map((action, index) => (
264
+ <Link key={`${action.href}-${index}`} href={action.href} className="summary-row">
265
+ <div className="summary-row-main">
266
+ <div className="summary-row-title">{index + 1}. {action.title}</div>
267
+ <div className="summary-row-meta">{action.description}</div>
268
+ </div>
269
+ <span className={`hero-chip hero-chip-${action.tone}`}>open</span>
270
+ </Link>
271
+ ))}
272
+ </div>
273
+ </SectionFrame>
274
+
111
275
  <SectionFrame
112
276
  eyebrow="Brain foundation"
113
277
  title="Semantic backbone"
@@ -215,8 +379,10 @@ export default function Overview() {
215
379
  <span className="badge badge-gray">deprecation risk → {ontology.ontologyLoop.adoption.conceptsAtDeprecationRisk} concepts • {ontology.ontologyLoop.adoption.unusedExtensions} unused extensions</span>
216
380
  <span className="badge badge-gray">topology → {ontology.topologyReviews.open} open • {ontology.topologyReviews.accepted} accepted • {ontology.topologyReviews.generatedFromManualReview} manual-route</span>
217
381
  <span className="badge badge-gray">execution → {ontology.topologyExecution.prepared} prepared • {ontology.topologyExecution.applied} applied • {ontology.topologyExecution.rolledBack} rolled back</span>
382
+ <span className="badge badge-gray">proof → {proof.summary.total} total • {proof.summary.effective} effective • {proof.summary.reviewOpen} open review</span>
218
383
  <Link href="/ontology" className="badge badge-blue" style={{ textDecoration: 'none' }}>Open ontology control</Link>
219
384
  <Link href="/topology" className="badge badge-blue" style={{ textDecoration: 'none' }}>Open topology control</Link>
385
+ <Link href="/proof" className="badge badge-blue" style={{ textDecoration: 'none' }}>Open proof control</Link>
220
386
  <span className="badge badge-gray">governance: {ontology.governance.activeMode.replace(/-/g, ' ')} ({ontology.governance.source})</span>
221
387
  <span className="badge badge-gray">routes → research {ontology.governedRoutes.research} • specialize {ontology.governedRoutes.specialize} • evolve {ontology.governedRoutes.evolve} • generalize {ontology.governedRoutes.generalize} • manual-review {ontology.governedRoutes['manual-review']}</span>
222
388
  <span className="badge badge-gray">{ontology.enrichedSkillNodes} skills carry explicit brain metadata</span>