onto-mcp 0.4.9 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.onto/authority/supported-models.yaml +27 -0
- package/.onto/processes/reconstruct/ontology-seeding-and-maturation-design.md +53 -143
- package/.onto/processes/reconstruct/reconstruct-contract-registry.yaml +49 -0
- package/.onto/processes/reconstruct/reconstruct-execution-ux-contract.md +5 -0
- package/.onto/processes/reconstruct/source-profile-contract.md +6 -1
- package/.onto/processes/shared/pipeline-execution-ledger-contract.md +89 -0
- package/.onto/processes/shared/target-material-kind-contract.md +38 -37
- package/dist/cli.js +6 -1
- package/dist/core-api/reconstruct-api.js +50 -12
- package/dist/core-runtime/discovery/settings-chain.js +125 -1
- package/dist/core-runtime/discovery/supported-models.js +188 -0
- package/dist/core-runtime/llm/llm-caller.js +163 -0
- package/dist/core-runtime/onboard/bootstrap-provider.js +159 -0
- package/dist/core-runtime/onboard/configure-provider.js +362 -0
- package/dist/core-runtime/reconstruct/benchmark-evidence.js +61 -0
- package/dist/core-runtime/reconstruct/benchmark-failure-class.js +25 -0
- package/dist/core-runtime/reconstruct/execution-telemetry.js +203 -0
- package/dist/core-runtime/reconstruct/markdown-section.js +77 -0
- package/dist/core-runtime/reconstruct/mock-llm-realization.js +840 -0
- package/dist/core-runtime/reconstruct/pipeline-execution-ledger.js +6 -2
- package/dist/core-runtime/reconstruct/post-seed-validation.js +1 -15
- package/dist/core-runtime/reconstruct/run.js +377 -171
- package/dist/core-runtime/reconstruct/semantic-quality-gate.js +417 -0
- package/dist/mcp/review-read-mode.js +17 -0
- package/dist/mcp/review-sync-window.js +35 -0
- package/dist/mcp/server.js +171 -67
- package/dist/mcp/tool-schemas.js +39 -5
- package/package.json +4 -1
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Supported model registry — authority SSOT for selectable LLM models.
|
|
2
|
+
#
|
|
3
|
+
# Only models whose support has been verified by a benchmark (a benchmark record
|
|
4
|
+
# shows the model completing a pipeline run end to end) may be listed here.
|
|
5
|
+
# settings.json model selection is validated against this registry by the
|
|
6
|
+
# supported-model gate (assertSettingsModelsSupported): at the reconstruct live
|
|
7
|
+
# execution boundary (real provider calls) and by the `check:supported-models`
|
|
8
|
+
# (G7) guard on the committed config (every seat). Review-side runtime
|
|
9
|
+
# enforcement is a noted follow-up, so the runtime gate is wired on the
|
|
10
|
+
# reconstruct live path only today. A (provider, model) pair not listed here is
|
|
11
|
+
# rejected fail-loud at those gate points. Settings resolution itself is a pure
|
|
12
|
+
# projection and does not enforce this gate.
|
|
13
|
+
#
|
|
14
|
+
# To add a model: run the benchmark, confirm a completed run, then add an entry
|
|
15
|
+
# citing the benchmark record under benchmark_evidence_refs. Curated by humans;
|
|
16
|
+
# the citation is the evidence, the entry is the authority.
|
|
17
|
+
schema_version: "1"
|
|
18
|
+
supported_models:
|
|
19
|
+
- provider: openai
|
|
20
|
+
model: gpt-5.5
|
|
21
|
+
verified_at: "2026-06-13"
|
|
22
|
+
benchmark_evidence_refs:
|
|
23
|
+
- development-records/benchmark/reconstruct-pipeline-live-20260613.json
|
|
24
|
+
notes: >-
|
|
25
|
+
Completed a full reconstruct pipeline run in the live medium baseline
|
|
26
|
+
(1 of 6 attempts completed end to end); support verified. Performance
|
|
27
|
+
evidence is PRELIMINARY in that record — support, not a performance claim.
|
|
@@ -813,8 +813,10 @@ is used as an implementation contract.
|
|
|
813
813
|
|
|
814
814
|
| Artifact | Registry status | Owner | Role |
|
|
815
815
|
|---|---|---|---|
|
|
816
|
-
| `maturation-baseline.yaml` | active registry | runtime | L0-L4 matrix from seed, CQs, limitations, and the validated seeding reconstruct record |
|
|
816
|
+
| `maturation-baseline.yaml` | active registry | runtime | **[immutable seed-derived baseline]** L0-L4 matrix from seed, CQs, limitations, and the validated seeding reconstruct record |
|
|
817
817
|
| `maturation-baseline-validation.yaml` | active registry | runtime | proves baseline rows derive from validated seed, purpose, CQ/proof, handoff authorities, and the source seeding record ref/hash |
|
|
818
|
+
| `baseline-actionability-matrix.yaml` | active registry | runtime | **[immutable zero-delta baseline matrix]** baseline static/kinetic/dynamic actionability matrix consumed by question-frontier authoring; distinct from the mutable current `actionability-matrix.yaml` |
|
|
819
|
+
| `baseline-actionability-matrix-validation.yaml` | active registry | runtime | proves the baseline matrix derives from the validated maturation baseline with zero delta before question-frontier consumption |
|
|
818
820
|
| `maturation-promotion-request.yaml` | planned registry | runtime | durable request authority for maturation execution or planned gate promotion |
|
|
819
821
|
| `maturation-promotion-request-validation.yaml` | planned registry | runtime | proves request id, trigger refs, requested gates, and replay authority before promotion-readiness evaluation |
|
|
820
822
|
| `maturation-runtime-capability-profile.yaml` | planned registry | runtime | records runtime-observed writer, validator, predicate, and activation capability for planned maturation gates |
|
|
@@ -838,7 +840,7 @@ is used as an implementation contract.
|
|
|
838
840
|
| `maturation-answer-claims-validation.yaml` | active registry | runtime | answer claim refs, evidence, and limitation closure |
|
|
839
841
|
| `ontology-expansion.yaml` | active registry | host LLM author | ontology additions/refinements/deferred/rejected changes |
|
|
840
842
|
| `ontology-expansion-validation.yaml` | active registry | runtime | concept economy, ref closure, surface coverage, and regression guards |
|
|
841
|
-
| `actionability-matrix.yaml` | active registry | runtime | static/kinetic/dynamic by 7D and purpose element, with L0-L4 levels |
|
|
843
|
+
| `actionability-matrix.yaml` | active registry | runtime | **[mutable current projection]** static/kinetic/dynamic by 7D and purpose element, with L0-L4 levels |
|
|
842
844
|
| `actionability-matrix-validation.yaml` | active registry | runtime | proves matrix rows derive from validated baseline and active maturation artifacts; promoted source-delta/source-impact authorities are consumed when activated |
|
|
843
845
|
| `maturation-convergence-ledger.yaml` | active registry | runtime | append-only round ledger of material question closure, trace/audit-only closure, round source-observation delta refs, and remaining frontier |
|
|
844
846
|
| `maturation-convergence-ledger-validation.yaml` | active registry | runtime | proves every blocker/high question is closed, carried forward, or blocked with refs before continuation is projected |
|
|
@@ -1476,6 +1478,33 @@ valid evidence cluster, proof, or user confirmation. An answer may be
|
|
|
1476
1478
|
`partially_answered` only when the answered portion has positive support and the
|
|
1477
1479
|
remaining gap is represented as a limitation or frontier question.
|
|
1478
1480
|
|
|
1481
|
+
Implementable `answer-support-judgment.yaml` shape (target design until the judge
|
|
1482
|
+
runtime stage is implemented):
|
|
1483
|
+
|
|
1484
|
+
```yaml
|
|
1485
|
+
schema_version: "1"
|
|
1486
|
+
session_id:
|
|
1487
|
+
created_at:
|
|
1488
|
+
round_id:
|
|
1489
|
+
judgments:
|
|
1490
|
+
- judgment_id:
|
|
1491
|
+
evidence_cluster_ref: # a validated answer-support-ledger evidence cluster
|
|
1492
|
+
evidence_ref: # one evidence ref of that cluster
|
|
1493
|
+
supports: supported | not_supported
|
|
1494
|
+
rationale_ref: # bounded judge rationale
|
|
1495
|
+
```
|
|
1496
|
+
|
|
1497
|
+
An independent judge role, distinct from the answer-support-ledger author, records
|
|
1498
|
+
one bounded `supports` verdict per cited evidence ref. The judge does not decide
|
|
1499
|
+
sufficiency: a `convergent_source_evidence` answer claim validates only when at
|
|
1500
|
+
least two independent evidence refs each carry a `supports: supported` judgment
|
|
1501
|
+
with contradictions still bounded, and runtime aggregates that count. Author and
|
|
1502
|
+
judge are separated structurally because the judgment is a distinct authored
|
|
1503
|
+
artifact attributed to its own pipeline stage, not a field the support author can
|
|
1504
|
+
fill. This hardening narrows only the "evidence implies the answer" residue for
|
|
1505
|
+
convergent source evidence; the existing count, independence, and contradiction
|
|
1506
|
+
checks are unchanged.
|
|
1507
|
+
|
|
1479
1508
|
#### Maturation Closure Dispositions
|
|
1480
1509
|
|
|
1481
1510
|
Not every inspected issue should become ontology meaning. Maturation therefore
|
|
@@ -2421,12 +2450,13 @@ runtime, user, external, or domain-standard authority, maturation projects
|
|
|
2421
2450
|
freshness concerns, or out-of-scope questions, those rows are closed in the
|
|
2422
2451
|
convergence ledger.
|
|
2423
2452
|
|
|
2424
|
-
This gives maturation two separate stop signals
|
|
2453
|
+
This gives maturation two separate stop signals, each an explicit projection of
|
|
2454
|
+
the convergence conditions above (not a second source of truth):
|
|
2425
2455
|
|
|
2426
|
-
| Stop signal | Meaning |
|
|
2427
|
-
|
|
2428
|
-
| Matrix closure | every material static/kinetic/dynamic x seven-dimension row is L4 or limitation-backed outside the claim |
|
|
2429
|
-
| Re-question closure | a fresh frontier generated from the current artifacts yields no new material question that can change the
|
|
2456
|
+
| Stop signal | Projects which convergence conditions | Meaning |
|
|
2457
|
+
|---|---|---|
|
|
2458
|
+
| Matrix closure | Static/kinetic/dynamic actionability + L4 matrix + Material gap closure | every material static/kinetic/dynamic x seven-dimension row is L4 or limitation-backed outside the claim |
|
|
2459
|
+
| Re-question closure | Re-question convergence | a fresh frontier generated from the current artifacts yields no new material question that can change the source-derived purpose adequacy frame |
|
|
2430
2460
|
|
|
2431
2461
|
Both are required before `actionable_ready`; `actionable_limited` may exclude
|
|
2432
2462
|
named rows only when the convergence ledger explains the limitation and the
|
|
@@ -3083,90 +3113,13 @@ Implementation file map:
|
|
|
3083
3113
|
| MCP/API projection | `src/core-api/reconstruct-api.ts`, `src/mcp/server.ts` |
|
|
3084
3114
|
| active docs and user-facing guide | this document, `operational-ontology-seed-contract.md`, `README.md`, `IMPLEMENTATION_MAP.html` |
|
|
3085
3115
|
|
|
3086
|
-
Current implementation
|
|
3087
|
-
|
|
3088
|
-
|
|
3089
|
-
|
|
3090
|
-
|
|
3091
|
-
|
|
3092
|
-
|
|
3093
|
-
continuation decision, and explicit proof-authority boundaries. Multi-round
|
|
3094
|
-
source-observation delta and source-observation re-entry validation are active
|
|
3095
|
-
for frontier-triggered observations before they re-enter prompt/context semantic
|
|
3096
|
-
authoring or answer-support consumption. The optional `actionable-ontology.yaml`
|
|
3097
|
-
projection is active for `actionable_limited` or `actionable_ready` continuation
|
|
3098
|
-
states and is validated as a runtime projection of existing seed, expansion,
|
|
3099
|
-
matrix, convergence, continuation, and proof boundary authorities.
|
|
3100
|
-
Promoted same-request resume is active for authored artifacts only when reuse
|
|
3101
|
-
provenance matches the current request, source/profile/domain
|
|
3102
|
-
snapshot, source-safety/scout/lineage validation, and seed-authoring readiness
|
|
3103
|
-
validation once those upstream authorities exist. Run-control resume rows record
|
|
3104
|
-
the provenance match policy and check refs; semantic quality remains revalidated by
|
|
3105
|
-
the downstream artifact validators.
|
|
3106
|
-
`seed-authoring-readiness-validation.yaml` now also records
|
|
3107
|
-
`deterministic_gate_scope: pre_seed_closure_only` and fails when the readiness
|
|
3108
|
-
artifact omits the required boundary notes that keep deterministic closure
|
|
3109
|
-
separate from semantic ontology adequacy. It also validates
|
|
3110
|
-
`max_round_exhaustion_interpretation` so `max_round_exhausted` is not collapsed
|
|
3111
|
-
into one generic state: a selected-purpose closure can remain
|
|
3112
|
-
`sufficient_for_claim_scope`, while an exhausted open frontier projects
|
|
3113
|
-
`insufficient_for_claim_scope` plus `exhausted_with_open_frontier`.
|
|
3114
|
-
Ontology-domain category rows remain diagnostic unless the selected purpose
|
|
3115
|
-
actually has a closure row for that category. They can expose modeling gaps, but
|
|
3116
|
-
they must not block seed authoring just because a domain profile contains a
|
|
3117
|
-
category that the selected source purpose did not require.
|
|
3118
|
-
The first source frontier now has an actor-action-state scout policy: for
|
|
3119
|
-
`round-1`, valid `SourceScoutPack` actor/action/state coverage gaps are sent as
|
|
3120
|
-
inventory-only exploration candidates, and runtime may add up to three
|
|
3121
|
-
unobserved code/document refs when the author returns an empty frontier. This
|
|
3122
|
-
policy chooses exploration priority only; it does not create purpose elements or
|
|
3123
|
-
ontology claims.
|
|
3124
|
-
`source-scout-pack.yaml` remains a latest-current scout projection alias.
|
|
3125
|
-
Pre-seed source-purpose, candidate-inventory, SeedAuthoringReadiness, and seed
|
|
3126
|
-
reuse provenance consume immutable `source-scout-pack.pre-seed.yaml` and
|
|
3127
|
-
`source-scout-pack-validation.pre-seed.yaml` snapshots. After maturation source
|
|
3128
|
-
lineage refresh, runtime emits `source-scout-pack.post-maturation.yaml` and
|
|
3129
|
-
`source-scout-pack-validation.post-maturation.yaml` so later audit surfaces can
|
|
3130
|
-
distinguish the exact consumed snapshot from the latest-current alias.
|
|
3131
|
-
The contract registry treats those validation snapshots as snapshot-scoped
|
|
3132
|
-
active gate outputs, and the SeedAuthoringReadiness validator consumes the
|
|
3133
|
-
pre-seed validation snapshot as its concrete source scout authority. Runtime
|
|
3134
|
-
identity checks compare the validation artifact to its concrete sibling snapshot
|
|
3135
|
-
ref, not only to `source-scout-pack.pre-seed.yaml` by basename, so copied
|
|
3136
|
-
same-basename snapshots from another session do not satisfy the pre-seed
|
|
3137
|
-
authority boundary.
|
|
3138
|
-
Because the post-maturation snapshot is emitted after pre-handoff readiness,
|
|
3139
|
-
`handoff-decision-validation.yaml` projects its gate as `not_applicable` during
|
|
3140
|
-
the seed handoff. Runtime closes the later lifecycle boundary with
|
|
3141
|
-
`post-maturation-gate-projection-validation.yaml`, which evaluates
|
|
3142
|
-
`source_scout_pack_post_maturation_gate` from the post-maturation snapshot refs
|
|
3143
|
-
before final-output and record consumption. That projection also requires the
|
|
3144
|
-
post-maturation validation artifact and SourceScoutPack snapshot to be concrete
|
|
3145
|
-
same-session siblings, not only same-phase basenames.
|
|
3146
|
-
Prompt payloads now compact `exploration-synthesis.yaml` before source-frontier,
|
|
3147
|
-
source-purpose, and candidate-inventory authoring. The projection preserves gap
|
|
3148
|
-
ids, lens ids, descriptions, requested source refs, and evidence observation ids,
|
|
3149
|
-
while omitting full `evidence_refs` objects to reduce prompt size without
|
|
3150
|
-
changing artifact authority.
|
|
3151
|
-
Mixed targets currently record `member_scoped_composite` scout scope as a
|
|
3152
|
-
phase-1 limitation with no signal rows. This preserves member-scope truth
|
|
3153
|
-
without claiming aggregate scout-enabled closure before a member-scoped scout
|
|
3154
|
-
contract is promoted.
|
|
3155
|
-
Seed authoring now has a focused repair loop: when the first
|
|
3156
|
-
`ontology-seed.yaml` fails validation, runtime preserves the invalid seed and
|
|
3157
|
-
validation sidecars as `ontology-seed-repair-1.input*.yaml`, asks the seed author
|
|
3158
|
-
to revise only the validation-derived repair sections, rewrites
|
|
3159
|
-
`ontology-seed.yaml`, and requires the repaired seed validation to pass before
|
|
3160
|
-
downstream maturation consumes it.
|
|
3161
|
-
Provider timeout recovery is staged and bounded. Source-purpose timeout retries
|
|
3162
|
-
with a smaller LLM prompt that keeps the same `SourcePurposeCandidates` output
|
|
3163
|
-
contract. Seed timeout first retries a smaller `OntologySeedMinimalKernel`
|
|
3164
|
-
prompt; if that also times out, the run fails closed because runtime must not
|
|
3165
|
-
author semantic ontology seed content. Claim realization and competency-question
|
|
3166
|
-
authoring receive compact seed summaries and allowed-claim projections, and
|
|
3167
|
-
competency-question timeout recovery may project deterministic coverage
|
|
3168
|
-
questions from allowed claims and domain competency rows so downstream
|
|
3169
|
-
validators can prove coverage or preserve limitations.
|
|
3116
|
+
Current implementation status — which authorities are promoted, and the
|
|
3117
|
+
resume/timeout/scout/repair/snapshot behavior — is owned by
|
|
3118
|
+
`reconstruct-contract-registry.yaml` and surfaced in `IMPLEMENTATION_MAP.html`.
|
|
3119
|
+
This contract intentionally does not restate present-tense implementation status
|
|
3120
|
+
(it goes stale the moment runtime changes and nothing consumes it for dispatch).
|
|
3121
|
+
A point-in-time snapshot of the prior recomposition status is isolated at
|
|
3122
|
+
`development-records/archive/20260614-reconstruct-maturation-design-relocated-narrative.md` §A.
|
|
3170
3123
|
|
|
3171
3124
|
Required test path for each implementation slice:
|
|
3172
3125
|
|
|
@@ -3464,58 +3417,15 @@ linked from the registry, this recomposition may claim only run-level governance
|
|
|
3464
3417
|
not release health, rollback, quota, resource-exhaustion, or post-incident
|
|
3465
3418
|
program completeness.
|
|
3466
3419
|
|
|
3467
|
-
## 16. Completion
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
|
|
3471
|
-
|
|
3472
|
-
|
|
3473
|
-
|
|
3474
|
-
|
|
3475
|
-
|
|
3476
|
-
2. material-aware source observations,
|
|
3477
|
-
3. source-purpose candidates, purpose candidate validation, and purpose
|
|
3478
|
-
confirmation validation when required,
|
|
3479
|
-
4. candidate inventory and disposition with purpose-element and actionability
|
|
3480
|
-
surface mapping,
|
|
3481
|
-
5. `ontology-seed.yaml` using the active seed contract,
|
|
3482
|
-
6. source-derived purpose and purpose adequacy evidence closure,
|
|
3483
|
-
7. user confirmation for inferred purpose when direct source purpose is absent,
|
|
3484
|
-
8. deterministic validation artifacts for every gate,
|
|
3485
|
-
9. canonical candidate-disposition, competency-question, assessment, and
|
|
3486
|
-
handoff-validation authorities, including diagnostic or claim-based P3
|
|
3487
|
-
competency-question disposition when ontology domain competency admission is present,
|
|
3488
|
-
10. phase-scoped material admission rows and validation for pre-seed purpose
|
|
3489
|
-
elements, literal material-value rows, post-CQ domain competency rows, and
|
|
3490
|
-
maturation reassessment rows when each phase is applicable,
|
|
3491
|
-
11. active source-frontier dependency validation, round source-observation
|
|
3492
|
-
delta/re-entry validation, and a validated session lineage index that
|
|
3493
|
-
preserves each newly observed source before answer-support consumption,
|
|
3494
|
-
12. registry ref/hash plus active contract ref/hash, source profile migration,
|
|
3495
|
-
lens judgment, concrete gate-instance, validator, reference-standard,
|
|
3496
|
-
pattern-catalog URI/snapshot, and readiness-projection snapshots,
|
|
3497
|
-
13. separate process-completion and seed-validity reporting,
|
|
3498
|
-
14. final output that explains `OntologySeed` content, source-derived purpose,
|
|
3499
|
-
purpose adequacy frame, seed iteration readiness, maturation frontier, and
|
|
3500
|
-
limitations, and
|
|
3501
|
-
15. a reconstruct record whose artifact refs are the source of truth,
|
|
3502
|
-
16. claim projection rows and validation for status/result/MCP/API surfaces when
|
|
3503
|
-
those surfaces claim readiness, actionability, or material-kind support,
|
|
3504
|
-
citing `target-material-profile-validation.yaml` and the immutable
|
|
3505
|
-
pre-publication run-control checkpoint, and final-output claim sections that
|
|
3506
|
-
cite the canonical refs without restating pre-publication claim values,
|
|
3507
|
-
17. source-safety authority rows and validations when observed source lifecycle,
|
|
3508
|
-
redaction, privacy, or authorization affects prompt/context use, plus planned
|
|
3509
|
-
mutable-vocabulary authority rows after registry promotion when external
|
|
3510
|
-
standards, provider/framework terms, or profile-owned facets affect a
|
|
3511
|
-
material claim,
|
|
3512
|
-
and
|
|
3513
|
-
18. registry-verification evidence for any present-tense active, promoted,
|
|
3514
|
-
current, implemented, or executable claim.
|
|
3515
|
-
|
|
3516
|
-
The full maturation stage is implemented when the required target artifacts are
|
|
3517
|
-
promoted into the registry and a fresh run can continue from that seed and
|
|
3518
|
-
produce:
|
|
3420
|
+
## 16. Maturation Completion Criteria
|
|
3421
|
+
|
|
3422
|
+
Seeding completion criteria are consolidated in §5.1. The one-time recomposition
|
|
3423
|
+
completion checklist (seeding portion) is isolated at
|
|
3424
|
+
`development-records/archive/20260614-reconstruct-maturation-design-relocated-narrative.md` §B;
|
|
3425
|
+
it is historical and is not current authority.
|
|
3426
|
+
|
|
3427
|
+
Maturation is complete when the required target artifacts are promoted into the
|
|
3428
|
+
registry and a fresh run can continue from that seed and produce:
|
|
3519
3429
|
|
|
3520
3430
|
1. valid reconstruct run-control ownership or resume authorization for the
|
|
3521
3431
|
maturation attempt,
|
|
@@ -1229,6 +1229,14 @@ planned_artifact_authorities:
|
|
|
1229
1229
|
authority_ref: required-when-evaluation-validation.yaml
|
|
1230
1230
|
validation_ref: null
|
|
1231
1231
|
activation_condition: registry_predicate_evaluator_runtime_is_implemented
|
|
1232
|
+
answer_support_judgment:
|
|
1233
|
+
authority_ref: answer-support-judgment.yaml
|
|
1234
|
+
validation_ref: answer-support-judgment-validation.yaml
|
|
1235
|
+
activation_condition: answer_support_judge_runtime_is_implemented
|
|
1236
|
+
answer_support_judgment_validation:
|
|
1237
|
+
authority_ref: answer-support-judgment-validation.yaml
|
|
1238
|
+
validation_ref: null
|
|
1239
|
+
activation_condition: answer_support_judge_runtime_is_implemented
|
|
1232
1240
|
|
|
1233
1241
|
validation_gate_catalog:
|
|
1234
1242
|
- gate_id: reconstruct_run_control_gate
|
|
@@ -1388,6 +1396,12 @@ planned_validation_gate_catalog:
|
|
|
1388
1396
|
validation_artifact_ref: required-when-evaluation-validation.yaml
|
|
1389
1397
|
required_when: always
|
|
1390
1398
|
activation_condition: registry_predicate_evaluator_runtime_is_implemented
|
|
1399
|
+
- gate_id: answer_support_judgment_gate
|
|
1400
|
+
validation_artifact_ref: answer-support-judgment-validation.yaml
|
|
1401
|
+
required_when: answer_support_judgment_required
|
|
1402
|
+
activation_condition: answer_support_judge_runtime_is_implemented
|
|
1403
|
+
activation_prerequisites:
|
|
1404
|
+
- answer_support_ledger_validation_is_valid
|
|
1391
1405
|
|
|
1392
1406
|
required_when_predicate_family_catalog:
|
|
1393
1407
|
- predicate_family_id: frontier_observation_use_by_downstream_artifact
|
|
@@ -1515,6 +1529,20 @@ required_when_predicate_catalog:
|
|
|
1515
1529
|
truth_expression: "source_observation_delta_validation.validation_status == valid and answer_support_ledger_refs_delta_observation_ids"
|
|
1516
1530
|
unknown_projection: not_applicable
|
|
1517
1531
|
explanation_template: "Answer support ledger cites observation ids from a frontier-triggered observation delta."
|
|
1532
|
+
- predicate_id: answer_support_judgment_uses_frontier_observation
|
|
1533
|
+
predicate_family_id: frontier_observation_use_by_downstream_artifact
|
|
1534
|
+
gate_instance_scope: per_round
|
|
1535
|
+
downstream_artifact_ref: answer-support-judgment.yaml
|
|
1536
|
+
downstream_validation_ref: answer-support-judgment-validation.yaml
|
|
1537
|
+
input_authority_refs: [rounds/<round-id>/source-observation-delta.yaml, rounds/<round-id>/source-observation-delta-validation.yaml, answer-support-judgment.yaml]
|
|
1538
|
+
truth_expression: "source_observation_delta_validation.validation_status == valid and answer_support_judgment_refs_delta_observation_ids"
|
|
1539
|
+
unknown_projection: not_applicable
|
|
1540
|
+
explanation_template: "Answer support judgment cites observation ids from a frontier-triggered observation delta."
|
|
1541
|
+
- predicate_id: answer_support_judgment_required
|
|
1542
|
+
input_authority_refs: [answer-support-ledger.yaml, answer-support-ledger-validation.yaml]
|
|
1543
|
+
truth_expression: "artifact_exists(answer-support-ledger.yaml) and answer_support_ledger_has_convergent_source_evidence_cluster"
|
|
1544
|
+
unknown_projection: not_applicable
|
|
1545
|
+
explanation_template: "A judge confirmation is required when answer support uses convergent source evidence."
|
|
1518
1546
|
- predicate_id: maturation_answer_claims_use_frontier_observation
|
|
1519
1547
|
predicate_family_id: frontier_observation_use_by_downstream_artifact
|
|
1520
1548
|
gate_instance_scope: per_round
|
|
@@ -2663,11 +2691,20 @@ validator_records:
|
|
|
2663
2691
|
- maturation-question-frontier-validation.yaml
|
|
2664
2692
|
- ontology-seed.yaml
|
|
2665
2693
|
- reconstruct-contract-registry.yaml
|
|
2694
|
+
conditional_input_authority_refs:
|
|
2695
|
+
- artifact_ref: answer-support-judgment-validation.yaml
|
|
2696
|
+
activation_condition: answer_support_judge_runtime_is_implemented
|
|
2697
|
+
consumed_for:
|
|
2698
|
+
- require_convergent_source_evidence_claims_to_have_two_independent_judge_confirmed_supports
|
|
2666
2699
|
validation_obligations:
|
|
2667
2700
|
- validate_answer_claim_question_refs
|
|
2668
2701
|
- validate_support_mode_against_valid_evidence_cluster_or_authority
|
|
2669
2702
|
- require_partial_answers_to_have_limitation_or_frontier_refs
|
|
2670
2703
|
- validate_answer_claim_surface_dimension_and_purpose_element_refs
|
|
2704
|
+
conditional_validation_obligations:
|
|
2705
|
+
- obligation_id: require_convergent_source_evidence_claims_to_have_two_independent_judge_confirmed_supports
|
|
2706
|
+
activation_condition: answer_support_judge_runtime_is_implemented
|
|
2707
|
+
input_authority_refs: [answer-support-judgment-validation.yaml]
|
|
2671
2708
|
output_ref: maturation-answer-claims-validation.yaml
|
|
2672
2709
|
- validator_id: ontology-expansion-validator
|
|
2673
2710
|
gate_ids: [ontology_expansion_gate]
|
|
@@ -2906,6 +2943,18 @@ validator_records:
|
|
|
2906
2943
|
output_ref: handoff-decision-validation.yaml
|
|
2907
2944
|
|
|
2908
2945
|
planned_validator_records:
|
|
2946
|
+
- validator_id: answer-support-judgment-validator
|
|
2947
|
+
gate_ids: [answer_support_judgment_gate]
|
|
2948
|
+
validator_version: 1
|
|
2949
|
+
input_authority_refs:
|
|
2950
|
+
- answer-support-judgment.yaml
|
|
2951
|
+
- answer-support-ledger-validation.yaml
|
|
2952
|
+
- reconstruct-contract-registry.yaml
|
|
2953
|
+
validation_obligations:
|
|
2954
|
+
- validate_judgment_refs_resolve_to_answer_support_ledger_clusters_and_evidence
|
|
2955
|
+
- require_supports_enum_for_each_judgment
|
|
2956
|
+
- require_rationale_ref_for_each_judgment
|
|
2957
|
+
output_ref: answer-support-judgment-validation.yaml
|
|
2909
2958
|
- validator_id: maturation-promotion-request-validator
|
|
2910
2959
|
gate_ids: [maturation_promotion_request_gate]
|
|
2911
2960
|
validator_version: 1
|
|
@@ -19,6 +19,11 @@ No standalone HTML, web UI, or dashboard is required for this contract. MCP and
|
|
|
19
19
|
CLI hosts should render the same information from runtime status/result payloads
|
|
20
20
|
and reconstruct artifacts.
|
|
21
21
|
|
|
22
|
+
This contract owns the generic reconstruct run UX. The material-kind-specific
|
|
23
|
+
delta (detected `target_material_kind`, observation counts by material kind,
|
|
24
|
+
unsupported/out-of-scope material) is owned by `target-material-kind-contract.md`
|
|
25
|
+
§9 and is layered onto the surfaces below.
|
|
26
|
+
|
|
22
27
|
## 2. Opening Brief
|
|
23
28
|
|
|
24
29
|
At run start, the host should show a compact opening brief before expensive
|
|
@@ -186,7 +186,12 @@ promote that facet into a permanent profile rule during the same run.
|
|
|
186
186
|
## 6. Mixed Material Rule
|
|
187
187
|
|
|
188
188
|
`mixed` is a public `TargetMaterialKind` value, but it is not a material parser.
|
|
189
|
-
|
|
189
|
+
Reconstruct exposes these three of the four lexicon-defined mixed behaviors as
|
|
190
|
+
runnable observation behaviors; the fourth, `reserved_future`, is a non-runnable
|
|
191
|
+
vocabulary state and is not a reconstruct-runnable path (so it is not in the
|
|
192
|
+
runnable list below). Runtime must choose one of the three runnable behaviors
|
|
193
|
+
before observation (full enum authority: `core-lexicon.yaml#TargetMaterialKind`
|
|
194
|
+
and `target-material-kind-contract.md` §4.1):
|
|
190
195
|
|
|
191
196
|
| Behavior | Requirement |
|
|
192
197
|
|---|---|
|
|
@@ -81,9 +81,98 @@ interface PipelineExecutionLedgerUnitEntry {
|
|
|
81
81
|
lastFailureMessage: string | null;
|
|
82
82
|
upstreamUnitIds: string[];
|
|
83
83
|
downstreamUnitIds: string[];
|
|
84
|
+
executionTelemetry?: PipelineUnitExecutionTelemetry | null;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
interface PipelineUnitExecutionTelemetry {
|
|
88
|
+
unit_id: string;
|
|
89
|
+
llm_call_count: number;
|
|
90
|
+
duration_ms: number;
|
|
91
|
+
prompt_chars: number;
|
|
92
|
+
output_chars: number;
|
|
93
|
+
provider_tokens_in: number | null;
|
|
94
|
+
provider_tokens_out: number | null;
|
|
95
|
+
provider_route: string | null;
|
|
96
|
+
model_id: string | null;
|
|
97
|
+
effort: string | null;
|
|
98
|
+
prompt_policy_sha256: string | null;
|
|
99
|
+
source_identity_refs: string[];
|
|
100
|
+
attempt_count: number;
|
|
101
|
+
attempts: Array<{
|
|
102
|
+
attempt: number;
|
|
103
|
+
// open sets: known members + (string & {}) — see "additively-extensible" rule below
|
|
104
|
+
kind: "initial" | "parse_repair" | "semantic_repair" | "timeout_recovery" | "validation_gate" | (string & {});
|
|
105
|
+
status: "succeeded" | "failed";
|
|
106
|
+
failure_class:
|
|
107
|
+
| "malformed_json"
|
|
108
|
+
| "parse_repair_failure"
|
|
109
|
+
| "schema_validation_failure"
|
|
110
|
+
| "timeout"
|
|
111
|
+
| "provider_error"
|
|
112
|
+
| (string & {})
|
|
113
|
+
| null;
|
|
114
|
+
failure_message: string | null;
|
|
115
|
+
duration_ms: number;
|
|
116
|
+
}>;
|
|
117
|
+
batch_count: number | null;
|
|
84
118
|
}
|
|
85
119
|
```
|
|
86
120
|
|
|
121
|
+
Execution telemetry rules:
|
|
122
|
+
|
|
123
|
+
- Telemetry is runtime-owned. It is recorded at the LLM call boundary by the
|
|
124
|
+
producing pipeline; LLMs have no authority over any telemetry value.
|
|
125
|
+
- `prompt_chars`/`output_chars` are the canonical size measure for speed and
|
|
126
|
+
size comparisons: runtime computes them directly, so they are always
|
|
127
|
+
available and comparable across providers and mock realizations. Provider
|
|
128
|
+
token usage (`provider_tokens_in/out`) is a supplemental fact recorded only
|
|
129
|
+
when the provider reports it; comparisons are valid only between runs using
|
|
130
|
+
the same measure and the same provider route.
|
|
131
|
+
- One attempt row is recorded per actual LLM call (`initial`, `parse_repair`,
|
|
132
|
+
`semantic_repair`, `timeout_recovery`); these increment `llm_call_count` and
|
|
133
|
+
the size counters. In addition, a `validation_gate` attempt row is recorded
|
|
134
|
+
when a deterministic validation gate rejects an authored artifact before a
|
|
135
|
+
feedback retry: it carries `status: "failed"` and
|
|
136
|
+
`failure_class: "schema_validation_failure"`, increments `attempt_count` so
|
|
137
|
+
the validation miss stays visible in the recovered unit's lineage, but does
|
|
138
|
+
not count as an LLM call (no `llm_call_count`/size contribution). `failure_class`
|
|
139
|
+
separates output-shape failures (`malformed_json`, `parse_repair_failure`),
|
|
140
|
+
validation-gate misses (`schema_validation_failure`), and transport failures
|
|
141
|
+
(`timeout`, `provider_error`).
|
|
142
|
+
- `kind` and `failure_class` are **additively-extensible, forward-compatible
|
|
143
|
+
sets**: handling of LLM input/output is a cross-pipeline concern and LLM
|
|
144
|
+
response/failure characteristics are not under our control, so the shared
|
|
145
|
+
ledger evolves to represent them (new kinds/classes are added as new
|
|
146
|
+
failure-handling or recovery shapes are introduced). Such additions are
|
|
147
|
+
backward-compatible and do **not** bump `schemaVersion`; consumers MUST treat
|
|
148
|
+
the sets as open and tolerate an unknown `kind`/`failure_class` (record or
|
|
149
|
+
pass it through) rather than reject the artifact. `validation_gate` /
|
|
150
|
+
`schema_validation_failure` were added under this policy.
|
|
151
|
+
- `prompt_policy_sha256` is a source-layer identity fact: the hash of the
|
|
152
|
+
unit's first initial system prompt, so before/after comparisons can
|
|
153
|
+
attribute metric deltas to prompt-policy changes. Run-level source-layer
|
|
154
|
+
identities (registry/contract/profile/validator snapshots) remain owned by
|
|
155
|
+
the run manifest's governing snapshot.
|
|
156
|
+
- `source_identity_refs` is the extensible runtime-owned identity list for
|
|
157
|
+
metric attribution. Each ref is a `<kind>:<value>` string. Current kinds:
|
|
158
|
+
`prompt_policy_sha256:<hash>` and `authored_artifact:<name>` (one per
|
|
159
|
+
distinct authored-artifact variant the unit executed; initial, repair, and
|
|
160
|
+
recovery artifact names identify the payload-contract seat). Comparators
|
|
161
|
+
must treat a metric delta as attributable only when the dependent identity
|
|
162
|
+
refs are present on both sides.
|
|
163
|
+
- Telemetry unit ownership is fail-loud: an authored artifact without a unit
|
|
164
|
+
mapping is a contract error at call time, not a silent telemetry omission.
|
|
165
|
+
- Ledger-level `lastFailureMessage` means terminal unit failure only: it is
|
|
166
|
+
set from telemetry when the unit's final recorded attempt failed. Recovered
|
|
167
|
+
intermediate failures (for example a repaired malformed output) stay
|
|
168
|
+
visible in `attempts` and must not surface as `lastFailureMessage`.
|
|
169
|
+
- `batch_count` records deterministic prompt batching (for example
|
|
170
|
+
competency-question assessment) so batching changes stay attributable.
|
|
171
|
+
- Units that made no LLM call carry no telemetry field; absence is not a
|
|
172
|
+
failure signal.
|
|
173
|
+
- Current population status: `reconstruct` populates telemetry from its run
|
|
174
|
+
manifest steps. `review` does not populate it yet.
|
|
175
|
+
|
|
87
176
|
Rules:
|
|
88
177
|
|
|
89
178
|
- `trusted` requires the producing unit to complete and all required output
|
|
@@ -3,6 +3,11 @@
|
|
|
3
3
|
> Status: design goal contract, partially registered in core lexicon.
|
|
4
4
|
> Purpose: define the cross-process goal for material-aware target handling
|
|
5
5
|
> across `review`, `reconstruct`, and future `evolve`.
|
|
6
|
+
> Note: "design goal / partially registered" describes the cross-process axis and
|
|
7
|
+
> `runtime_implementation_status`, not contract activeness. For the `reconstruct`
|
|
8
|
+
> slice specifically, `target-material-profile.yaml` and `material_profile_gate`
|
|
9
|
+
> are already contract-active per
|
|
10
|
+
> `reconstruct-contract-registry.yaml#validation_gate_catalog` / `#artifact_authorities`.
|
|
6
11
|
|
|
7
12
|
Related shared contract:
|
|
8
13
|
|
|
@@ -59,15 +64,18 @@ Allowed values:
|
|
|
59
64
|
| `mixed` | Bundle containing more than one material kind. Each member needs its own material classification; `mixed` itself is not an adapter target. |
|
|
60
65
|
| `unknown` | Runtime cannot classify the material safely. Adapter execution must halt or ask for clarification. |
|
|
61
66
|
|
|
62
|
-
The axis is separate from
|
|
67
|
+
The axis is separate from these other classifying axes. They are not peers in
|
|
68
|
+
ownership: the first two are lexicon-owned; the rest are review-contract-local
|
|
69
|
+
concepts cited here for orthogonality only, not owned by this shared contract or
|
|
70
|
+
the lexicon.
|
|
63
71
|
|
|
64
|
-
| Axis | Question answered |
|
|
65
|
-
|
|
66
|
-
| `domain` | What is the target about? |
|
|
67
|
-
| `medium` | Which cross-product implementation or reference frame accumulates reusable learning? |
|
|
68
|
-
| `target_input_kind` | How did the target enter runtime? |
|
|
69
|
-
| `artifact_roles` | What responsibility does the artifact carry in this run? |
|
|
70
|
-
| review context `source_kind` | Which context-source artifact is being admitted into prompt packets? |
|
|
72
|
+
| Axis | Question answered | Defined in (owner) |
|
|
73
|
+
|---|---|---|
|
|
74
|
+
| `domain` | What is the target about? | core-lexicon (rank-1) |
|
|
75
|
+
| `medium` | Which cross-product implementation or reference frame accumulates reusable learning? | core-lexicon (rank-1) |
|
|
76
|
+
| `target_input_kind` | How did the target enter runtime? | `review-target-profile-contract.md` §5 (review-owned; reconstruct UX references it — promote to a shared/lexicon home only if reconstruct adopts it as a formal field) |
|
|
77
|
+
| `artifact_roles` | What responsibility does the artifact carry in this run? | `review-target-profile-contract.md` §5 (review-owned) |
|
|
78
|
+
| review context `source_kind` | Which context-source artifact is being admitted into prompt packets? | review context contracts (review-owned; reconstruct deliberately does not use it) |
|
|
71
79
|
|
|
72
80
|
## 4. Cross-Process Alignment
|
|
73
81
|
|
|
@@ -161,6 +169,9 @@ Runtime must validate:
|
|
|
161
169
|
|
|
162
170
|
Before full runtime implementation, at least one prompt-backed reference run
|
|
163
171
|
must produce the planned artifact shapes and an acceptance observation.
|
|
172
|
+
("Before full runtime implementation" here scopes `runtime_implementation_status`
|
|
173
|
+
and review/future-`evolve` adoption; it does not mean the `reconstruct` material
|
|
174
|
+
profile/gate are unbuilt — those are contract-active per the registry.)
|
|
164
175
|
|
|
165
176
|
Recommended reference targets:
|
|
166
177
|
|
|
@@ -177,35 +188,25 @@ Historical reference-run evidence is isolated outside runtime reference context.
|
|
|
177
188
|
Current runtime authority is the artifact contract in this file plus the
|
|
178
189
|
review/reconstruct process contracts that consume it.
|
|
179
190
|
|
|
180
|
-
## 9. UX Output Contract
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
- material
|
|
193
|
-
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
- material observations collected
|
|
201
|
-
- semantic claims promoted by LLM directives
|
|
202
|
-
- evidence gaps
|
|
203
|
-
- unsupported or out-of-scope material
|
|
204
|
-
- next action candidates
|
|
205
|
-
|
|
206
|
-
The output contract should be rendered by the host LLM from runtime facts or by
|
|
207
|
-
existing CLI/MCP status surfaces. Do not add a separate HTML implementation just
|
|
208
|
-
to display this progress.
|
|
191
|
+
## 9. UX Output Contract — Material-Kind Delta
|
|
192
|
+
|
|
193
|
+
The full opening/progress/result run-UX skeleton is owned by each process's UX
|
|
194
|
+
contract (reconstruct: `reconstruct-execution-ux-contract.md` §§2-6; review: its
|
|
195
|
+
own status/result surfaces). To keep these same-rank contracts from drifting,
|
|
196
|
+
this section owns only the **material-kind delta** those surfaces must
|
|
197
|
+
additionally expose:
|
|
198
|
+
|
|
199
|
+
- opening: detected `target_material_kind`, planned material reading strategy, and
|
|
200
|
+
unsupported/partial-support status
|
|
201
|
+
- progress: material detection result, observation counts by material kind, and
|
|
202
|
+
unsupported/unknown/skipped material members
|
|
203
|
+
- result: material observations collected vs semantic claims promoted by LLM
|
|
204
|
+
directives, and unsupported or out-of-scope material
|
|
205
|
+
|
|
206
|
+
The host LLM renders these from runtime facts or existing CLI/MCP status surfaces;
|
|
207
|
+
do not add a separate HTML implementation. The generic environment/process/model/
|
|
208
|
+
domain exposure and the observations-vs-claims-vs-gaps separation are defined once
|
|
209
|
+
in the process UX contracts and are not restated here.
|
|
209
210
|
|
|
210
211
|
## 10. Goal Completion Conditions
|
|
211
212
|
|
package/dist/cli.js
CHANGED
|
@@ -39,6 +39,7 @@ function printHelp() {
|
|
|
39
39
|
"Active interface:",
|
|
40
40
|
" mcp Start the MCP stdio tool server",
|
|
41
41
|
" register Register the onto MCP server into supported hosts",
|
|
42
|
+
" configure-provider Write LLM provider settings into the settings.json chain",
|
|
42
43
|
"",
|
|
43
44
|
"Available MCP tools:",
|
|
44
45
|
" onto_review",
|
|
@@ -62,7 +63,7 @@ function printHelp() {
|
|
|
62
63
|
function unsupportedCommandMessage(subcommand) {
|
|
63
64
|
return [
|
|
64
65
|
`[onto] Unsupported public CLI subcommand: ${subcommand}`,
|
|
65
|
-
"Active public commands: onto mcp, onto register",
|
|
66
|
+
"Active public commands: onto mcp, onto register, onto configure-provider",
|
|
66
67
|
].join("\n");
|
|
67
68
|
}
|
|
68
69
|
async function main() {
|
|
@@ -79,6 +80,10 @@ async function main() {
|
|
|
79
80
|
const { runRegister } = await import("./core-runtime/onboard/register.js");
|
|
80
81
|
return runRegister(argv.slice(1));
|
|
81
82
|
}
|
|
83
|
+
case "configure-provider": {
|
|
84
|
+
const { runConfigureProvider } = await import("./core-runtime/onboard/configure-provider.js");
|
|
85
|
+
return runConfigureProvider(argv.slice(1));
|
|
86
|
+
}
|
|
82
87
|
case "--version":
|
|
83
88
|
case "-v": {
|
|
84
89
|
const version = await readOntoVersion();
|