@balpal4495/quorum 3.0.4 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/commands/compass.js +31 -12
- package/dist/advisor/ask.d.ts +13 -0
- package/dist/advisor/ask.d.ts.map +1 -0
- package/dist/advisor/ask.js +67 -0
- package/dist/advisor/ask.js.map +1 -0
- package/dist/advisor/index.d.ts +3 -0
- package/dist/advisor/index.d.ts.map +1 -0
- package/dist/advisor/index.js +2 -0
- package/dist/advisor/index.js.map +1 -0
- package/dist/advisor/prompt.d.ts +5 -0
- package/dist/advisor/prompt.d.ts.map +1 -0
- package/{modules/advisor/prompt.ts → dist/advisor/prompt.js} +22 -26
- package/dist/advisor/prompt.js.map +1 -0
- package/dist/advisor/types.d.ts +23 -0
- package/dist/advisor/types.d.ts.map +1 -0
- package/dist/advisor/types.js +2 -0
- package/dist/advisor/types.js.map +1 -0
- package/dist/compass/behavior.d.ts +4 -0
- package/dist/compass/behavior.d.ts.map +1 -0
- package/dist/compass/behavior.js +138 -0
- package/dist/compass/behavior.js.map +1 -0
- package/dist/compass/create.d.ts +3 -0
- package/dist/compass/create.d.ts.map +1 -0
- package/dist/compass/create.js +289 -0
- package/dist/compass/create.js.map +1 -0
- package/dist/compass/evidence/collect.d.ts +11 -0
- package/dist/compass/evidence/collect.d.ts.map +1 -0
- package/dist/compass/evidence/collect.js +86 -0
- package/dist/compass/evidence/collect.js.map +1 -0
- package/dist/compass/index.d.ts +8 -0
- package/dist/compass/index.d.ts.map +1 -0
- package/dist/compass/index.js +8 -0
- package/dist/compass/index.js.map +1 -0
- package/dist/compass/prompts/index.d.ts +28 -0
- package/dist/compass/prompts/index.d.ts.map +1 -0
- package/{modules/compass/prompts/index.ts → dist/compass/prompts/index.js} +13 -38
- package/dist/compass/prompts/index.js.map +1 -0
- package/dist/compass/prompts/system.d.ts +2 -0
- package/dist/compass/prompts/system.d.ts.map +1 -0
- package/{modules/compass/prompts/system.ts → dist/compass/prompts/system.js} +2 -1
- package/dist/compass/prompts/system.js.map +1 -0
- package/dist/compass/propose.d.ts +15 -0
- package/dist/compass/propose.d.ts.map +1 -0
- package/dist/compass/propose.js +128 -0
- package/dist/compass/propose.js.map +1 -0
- package/dist/compass/schemas.d.ts +1271 -0
- package/dist/compass/schemas.d.ts.map +1 -0
- package/dist/compass/schemas.js +113 -0
- package/dist/compass/schemas.js.map +1 -0
- package/dist/compass/score.d.ts +25 -0
- package/dist/compass/score.d.ts.map +1 -0
- package/dist/compass/score.js +89 -0
- package/dist/compass/score.js.map +1 -0
- package/dist/compass/sources/index.d.ts +9 -0
- package/dist/compass/sources/index.d.ts.map +1 -0
- package/dist/compass/sources/index.js +408 -0
- package/dist/compass/sources/index.js.map +1 -0
- package/dist/compass/types.d.ts +334 -0
- package/dist/compass/types.d.ts.map +1 -0
- package/dist/compass/types.js +2 -0
- package/dist/compass/types.js.map +1 -0
- package/dist/council/advisors.d.ts +15 -0
- package/dist/council/advisors.d.ts.map +1 -0
- package/dist/council/advisors.js +46 -0
- package/dist/council/advisors.js.map +1 -0
- package/dist/council/chairman.d.ts +13 -0
- package/dist/council/chairman.d.ts.map +1 -0
- package/dist/council/chairman.js +145 -0
- package/dist/council/chairman.js.map +1 -0
- package/dist/council/deliberate.d.ts +22 -0
- package/dist/council/deliberate.d.ts.map +1 -0
- package/dist/council/deliberate.js +99 -0
- package/dist/council/deliberate.js.map +1 -0
- package/dist/council/frame.d.ts +8 -0
- package/dist/council/frame.d.ts.map +1 -0
- package/dist/council/frame.js +40 -0
- package/dist/council/frame.js.map +1 -0
- package/dist/council/index.d.ts +6 -0
- package/dist/council/index.d.ts.map +1 -0
- package/dist/council/index.js +4 -0
- package/dist/council/index.js.map +1 -0
- package/dist/council/personas.d.ts +18 -0
- package/dist/council/personas.d.ts.map +1 -0
- package/dist/council/personas.js +44 -0
- package/dist/council/personas.js.map +1 -0
- package/dist/council/reviewers.d.ts +13 -0
- package/dist/council/reviewers.d.ts.map +1 -0
- package/dist/council/reviewers.js +59 -0
- package/dist/council/reviewers.js.map +1 -0
- package/dist/council/risk.d.ts +16 -0
- package/dist/council/risk.d.ts.map +1 -0
- package/dist/council/risk.js +74 -0
- package/dist/council/risk.js.map +1 -0
- package/dist/council/types.d.ts +95 -0
- package/dist/council/types.d.ts.map +1 -0
- package/dist/council/types.js +2 -0
- package/dist/council/types.js.map +1 -0
- package/dist/jury/evaluate.d.ts +13 -0
- package/dist/jury/evaluate.d.ts.map +1 -0
- package/{modules/jury/evaluate.ts → dist/jury/evaluate.js} +60 -84
- package/dist/jury/evaluate.js.map +1 -0
- package/dist/jury/index.d.ts +6 -0
- package/dist/jury/index.d.ts.map +1 -0
- package/dist/jury/index.js +4 -0
- package/dist/jury/index.js.map +1 -0
- package/dist/jury/preflight.d.ts +26 -0
- package/dist/jury/preflight.d.ts.map +1 -0
- package/dist/jury/preflight.js +71 -0
- package/dist/jury/preflight.js.map +1 -0
- package/dist/jury/schema.d.ts +57 -0
- package/dist/jury/schema.d.ts.map +1 -0
- package/dist/jury/schema.js +21 -0
- package/dist/jury/schema.js.map +1 -0
- package/dist/jury/types.d.ts +47 -0
- package/dist/jury/types.d.ts.map +1 -0
- package/dist/jury/types.js +2 -0
- package/dist/jury/types.js.map +1 -0
- package/dist/oracle/adapters/lance-db.d.ts +15 -0
- package/dist/oracle/adapters/lance-db.d.ts.map +1 -0
- package/dist/oracle/adapters/lance-db.js +68 -0
- package/dist/oracle/adapters/lance-db.js.map +1 -0
- package/dist/oracle/adapters/xenova-embedder.d.ts +21 -0
- package/dist/oracle/adapters/xenova-embedder.d.ts.map +1 -0
- package/dist/oracle/adapters/xenova-embedder.js +36 -0
- package/dist/oracle/adapters/xenova-embedder.js.map +1 -0
- package/dist/oracle/bm25.d.ts +20 -0
- package/dist/oracle/bm25.d.ts.map +1 -0
- package/dist/oracle/bm25.js +82 -0
- package/dist/oracle/bm25.js.map +1 -0
- package/dist/oracle/index.d.ts +21 -0
- package/dist/oracle/index.d.ts.map +1 -0
- package/dist/oracle/index.js +25 -0
- package/dist/oracle/index.js.map +1 -0
- package/dist/oracle/log.d.ts +6 -0
- package/dist/oracle/log.d.ts.map +1 -0
- package/dist/oracle/log.js +12 -0
- package/dist/oracle/log.js.map +1 -0
- package/dist/oracle/propose.d.ts +25 -0
- package/dist/oracle/propose.d.ts.map +1 -0
- package/dist/oracle/propose.js +133 -0
- package/dist/oracle/propose.js.map +1 -0
- package/dist/oracle/query.d.ts +17 -0
- package/dist/oracle/query.d.ts.map +1 -0
- package/dist/oracle/query.js +106 -0
- package/dist/oracle/query.js.map +1 -0
- package/dist/oracle/summary.d.ts +11 -0
- package/dist/oracle/summary.d.ts.map +1 -0
- package/dist/oracle/summary.js +102 -0
- package/dist/oracle/summary.js.map +1 -0
- package/dist/oracle/types.d.ts +31 -0
- package/dist/oracle/types.d.ts.map +1 -0
- package/dist/oracle/types.js +2 -0
- package/dist/oracle/types.js.map +1 -0
- package/dist/sentinel/assert.d.ts +28 -0
- package/dist/sentinel/assert.d.ts.map +1 -0
- package/dist/sentinel/assert.js +63 -0
- package/dist/sentinel/assert.js.map +1 -0
- package/dist/sentinel/coverage.d.ts +14 -0
- package/dist/sentinel/coverage.d.ts.map +1 -0
- package/dist/sentinel/coverage.js +96 -0
- package/dist/sentinel/coverage.js.map +1 -0
- package/dist/sentinel/drift.d.ts +12 -0
- package/dist/sentinel/drift.d.ts.map +1 -0
- package/dist/sentinel/drift.js +149 -0
- package/dist/sentinel/drift.js.map +1 -0
- package/dist/sentinel/index.d.ts +7 -0
- package/dist/sentinel/index.d.ts.map +1 -0
- package/dist/sentinel/index.js +5 -0
- package/dist/sentinel/index.js.map +1 -0
- package/dist/sentinel/review.d.ts +15 -0
- package/dist/sentinel/review.d.ts.map +1 -0
- package/dist/sentinel/review.js +177 -0
- package/dist/sentinel/review.js.map +1 -0
- package/dist/setup.d.ts +103 -0
- package/dist/setup.d.ts.map +1 -0
- package/dist/setup.js +87 -0
- package/dist/setup.js.map +1 -0
- package/dist/shared/types.d.ts +173 -0
- package/dist/shared/types.d.ts.map +1 -0
- package/dist/shared/types.js +16 -0
- package/dist/shared/types.js.map +1 -0
- package/package.json +13 -8
- package/.github/copilot-instructions.md +0 -117
- package/CLAUDE.md +0 -146
- package/GEMINI.md +0 -73
- package/SETUP.md +0 -264
- package/evals/__tests__/eval.test.ts +0 -31
- package/evals/cases/auth_hs256_rejected.json +0 -46
- package/evals/cases/auth_rs256_valid.json +0 -30
- package/evals/cases/cache_missing_lock.json +0 -31
- package/evals/cases/db_naive_not_null.json +0 -32
- package/evals/cases/logging_pii_leak.json +0 -32
- package/evals/cases/migration_with_rollback.json +0 -43
- package/evals/cases/no_evidence_novel_design.json +0 -16
- package/evals/cases/payment_no_idempotency.json +0 -33
- package/evals/cases/redis_session_rejected.json +0 -32
- package/evals/cases/safe_refactor.json +0 -17
- package/evals/runner.ts +0 -226
- package/modules/AGENTS.md +0 -78
- package/modules/CLAUDE.md +0 -93
- package/modules/README.md +0 -504
- package/modules/advisor/ask.ts +0 -87
- package/modules/advisor/index.ts +0 -2
- package/modules/advisor/types.ts +0 -26
- package/modules/compass/behavior.ts +0 -161
- package/modules/compass/create.ts +0 -365
- package/modules/compass/evidence/collect.ts +0 -109
- package/modules/compass/index.ts +0 -7
- package/modules/compass/propose.ts +0 -152
- package/modules/compass/schemas.ts +0 -121
- package/modules/compass/score.ts +0 -77
- package/modules/compass/sources/index.ts +0 -413
- package/modules/compass/types.ts +0 -431
- package/modules/council/advisors.ts +0 -71
- package/modules/council/chairman.ts +0 -183
- package/modules/council/deliberate.ts +0 -141
- package/modules/council/frame.ts +0 -54
- package/modules/council/index.ts +0 -9
- package/modules/council/personas.ts +0 -57
- package/modules/council/reviewers.ts +0 -82
- package/modules/council/risk.ts +0 -89
- package/modules/council/types.ts +0 -107
- package/modules/jury/index.ts +0 -5
- package/modules/jury/preflight.ts +0 -101
- package/modules/jury/schema.ts +0 -24
- package/modules/jury/types.ts +0 -50
- package/modules/oracle/adapters/lance-db.ts +0 -81
- package/modules/oracle/adapters/xenova-embedder.ts +0 -43
- package/modules/oracle/bm25.ts +0 -92
- package/modules/oracle/index.ts +0 -36
- package/modules/oracle/log.ts +0 -15
- package/modules/oracle/propose.ts +0 -164
- package/modules/oracle/query.ts +0 -146
- package/modules/oracle/summary.ts +0 -116
- package/modules/oracle/types.ts +0 -32
- package/modules/sentinel/assert.ts +0 -95
- package/modules/sentinel/coverage.ts +0 -106
- package/modules/sentinel/drift.ts +0 -163
- package/modules/sentinel/index.ts +0 -6
- package/modules/sentinel/review.ts +0 -208
- package/modules/setup.ts +0 -202
- package/modules/shared/types.ts +0 -193
package/modules/CLAUDE.md
DELETED
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
# modules/ — Claude Instructions
|
|
2
|
-
|
|
3
|
-
Supplements the root-level instructions. Read this when working inside the `modules/` folder.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## What these modules are
|
|
8
|
-
|
|
9
|
-
Five portable TypeScript modules — Advisor, Oracle, Jury, Council, Sentinel — that form the knowledge and reasoning layer of an agentic workflow. They are designed to be dropped into any Node.js codebase.
|
|
10
|
-
|
|
11
|
-
The entry point for a host application is `setup.ts`. Everything else is internal.
|
|
12
|
-
|
|
13
|
-
---
|
|
14
|
-
|
|
15
|
-
## Key design decisions to preserve
|
|
16
|
-
|
|
17
|
-
### Dependency injection throughout
|
|
18
|
-
No module imports a specific LLM provider, vector store, or embedder. All external dependencies are passed in as function arguments or via a deps object. If you add a new capability, follow this pattern — do not hardcode providers.
|
|
19
|
-
|
|
20
|
-
### Confidence is recomputed from the breakdown — never trusted from the LLM
|
|
21
|
-
In `jury/evaluate.ts`, after parsing the LLM response, `confidence` is recomputed as the exact average of the four `confidence_breakdown` dimensions. The LLM's stated `confidence` value is discarded. `council_brief` is then derived from this recomputed value. Do not remove either override.
|
|
22
|
-
|
|
23
|
-
### Throw on bad LLM output — never default to passing
|
|
24
|
-
`jury/evaluate.ts`, `council/chairman.ts`, and `advisor/ask.ts` all throw if the LLM returns non-JSON or output that fails schema validation. This is intentional. A silently passing score is worse than an error. Do not add fallbacks or defaults.
|
|
25
|
-
|
|
26
|
-
### Advisor is a read-only path
|
|
27
|
-
`advisor/ask.ts` queries Oracle and calls the LLM — it never calls `oracle.propose()` or `oracle.commit()`. It has no side effects on Chronicle. Do not add write calls to the Advisor path.
|
|
28
|
-
|
|
29
|
-
### Advisor validation loop
|
|
30
|
-
`advisor/ask.ts` retries the LLM call up to `MAX_RETRIES` (2) times when the answer does not meet the satisfaction threshold (confidence ≥ 0.7, no blockers). The previous answer is included as context in the retry prompt. After the retry budget is exhausted, the best answer is returned regardless. Do not increase `MAX_RETRIES` without considering LLM cost implications.
|
|
31
|
-
|
|
32
|
-
### oracle.commit() is a human gate
|
|
33
|
-
`council/deliberate.ts` calls `oracle.propose()` at the end of every deliberation. It never calls `oracle.commit()`. If you see a code path that calls `oracle.commit()` without explicit human input, that is a bug.
|
|
34
|
-
|
|
35
|
-
### Oracle proposals use only validated citation IDs
|
|
36
|
-
`deliberate.ts` passes `verdict.citation_validation.valid_ids` as `evidence_cited` when calling `oracle.propose()` — not the raw `evidence_cited` array from the Chairman. Hallucinated IDs (cited but not in the evidence pack) are stripped before the proposal is written.
|
|
37
|
-
|
|
38
|
-
### Preflight runs before every Jury LLM call — do not remove it
|
|
39
|
-
`jury/evaluate.ts` calls `runPreflight()` before building the user prompt. The preflight result is injected as the `## Deterministic Preflight` section. This gives the LLM hard facts to reason over rather than discovering them itself. Do not move this call after the LLM invocation.
|
|
40
|
-
|
|
41
|
-
### Risk classifier determines fan-out counts — do not hardcode them
|
|
42
|
-
`deliberate.ts` reads `risk.council_mode` from `classifyRisk()` to set advisor and reviewer counts. Do not hardcode `advisorCount` or `reviewerCount` defaults inside `deliberate.ts` — the risk classifier owns these defaults.
|
|
43
|
-
|
|
44
|
-
### Query logging is best-effort
|
|
45
|
-
`oracle/log.ts` writes to a JSONL file. The `query()` function wraps this in a try/catch that swallows errors silently. This is correct behaviour — a log write failure must never fail a query.
|
|
46
|
-
|
|
47
|
-
---
|
|
48
|
-
|
|
49
|
-
## When modifying oracle/query.ts
|
|
50
|
-
|
|
51
|
-
The retrieval pipeline has two passes:
|
|
52
|
-
1. **Vector search** — embed query, retrieve `limit × 3` candidates from the vector store
|
|
53
|
-
2. **BM25 re-ranking** — score candidates, enrich query with domain terms from Pass 1, fuse ranks via RRF
|
|
54
|
-
|
|
55
|
-
RRF constant is `k=60`. Score threshold default is `0.031`. Results below the threshold are dropped entirely — not returned as low-confidence results. If you change the threshold, update the default in `query.ts` and the `QueryOptions` type comment in `shared/types.ts`.
|
|
56
|
-
|
|
57
|
-
---
|
|
58
|
-
|
|
59
|
-
## When modifying council/deliberate.ts
|
|
60
|
-
|
|
61
|
-
The pipeline order is fixed: `frameQuestion → fanOutAdvisors → fanOutReviewers → chairman → oracle.propose()`. Advisors and reviewers each run in parallel internally via `Promise.all`. Do not make the advisor and reviewer phases sequential — that defeats the independence of the panel.
|
|
62
|
-
|
|
63
|
-
Anonymisation of advisor responses happens inside `fanOutReviewers()` before any reviewer sees them. It must stay there.
|
|
64
|
-
|
|
65
|
-
The risk classifier runs at the start of `deliberate()` before any LLM calls. It sets advisor/reviewer counts and is logged in the Chronicle proposal's `scope` field. Do not move it.
|
|
66
|
-
|
|
67
|
-
---
|
|
68
|
-
|
|
69
|
-
## When modifying jury/preflight.ts
|
|
70
|
-
|
|
71
|
-
`SENSITIVE_PATTERNS` and the risk rules in `council/risk.ts` are separate but related. Preflight detects patterns for the Jury prompt; the risk classifier uses its own pattern set to determine Council mode. They are intentionally independent — changing one does not update the other. Keep them in sync when adding new sensitive area categories.
|
|
72
|
-
|
|
73
|
-
The eval suite in `evals/cases/` has `preflight_expects` and `risk_level` assertions. When changing patterns, run `npx vitest run evals/` to verify existing cases still pass.
|
|
74
|
-
|
|
75
|
-
---
|
|
76
|
-
|
|
77
|
-
## Safe to change
|
|
78
|
-
|
|
79
|
-
- `council/personas.ts` — add or adjust personas freely
|
|
80
|
-
- `jury/preflight.ts` `SENSITIVE_PATTERNS` — extend with new categories; run evals after
|
|
81
|
-
- `council/risk.ts` `RISK_RULES` — add new risk patterns; run evals after
|
|
82
|
-
- `models` defaults in `setup.ts` — adjust model names as providers evolve
|
|
83
|
-
- BM25 constants (`K1`, `B`) in `oracle/bm25.ts` — tunable, well-commented
|
|
84
|
-
- `CANDIDATE_MULTIPLIER` and `RRF_K` in `oracle/query.ts` — tunable retrieval parameters
|
|
85
|
-
- `evals/cases/` — add new eval cases freely; they run in CI automatically
|
|
86
|
-
|
|
87
|
-
## Do not change without strong reason
|
|
88
|
-
|
|
89
|
-
- The `VectorStore` interface in `oracle/types.ts` — changing it breaks all adapters
|
|
90
|
-
- The `ChronicleEntry` type in `shared/types.ts` — changing it breaks stored data
|
|
91
|
-
- The Zod schemas in `jury/schema.ts` and `council/chairman.ts` — these are the output contracts
|
|
92
|
-
- The `OracleClient` interface in `shared/types.ts` — Jury and Council depend on it
|
|
93
|
-
- The confidence recomputation in `jury/evaluate.ts` — it makes confidence calibrated and deterministic
|
package/modules/README.md
DELETED
|
@@ -1,504 +0,0 @@
|
|
|
1
|
-
# Advisor · Oracle · Jury · Council · Sentinel · Compass
|
|
2
|
-
|
|
3
|
-
Six portable modules for the knowledge, reasoning, and product-direction layer of any agentic workflow.
|
|
4
|
-
|
|
5
|
-
```
|
|
6
|
-
Advisor → plain-language questions answered from Chronicle
|
|
7
|
-
Oracle → Jury → Council → human gate → Executor
|
|
8
|
-
Sentinel → coverage + drift + PR coverage map
|
|
9
|
-
Compass → product-direction synthesis (behaviours, pathways, bets, scoring)
|
|
10
|
-
```
|
|
11
|
-
|
|
12
|
-
---
|
|
13
|
-
|
|
14
|
-
## Modules
|
|
15
|
-
|
|
16
|
-
| Module | Responsibility | LLM? |
|
|
17
|
-
|---|---|---|
|
|
18
|
-
| **Advisor** | Ask a plain-language question — synthesises Chronicle evidence into a concise answer with an internal validation loop | Yes |
|
|
19
|
-
| **Oracle** | Query and write interface to Chronicle (the persistent knowledge store) | No |
|
|
20
|
-
| **Jury** | Evaluate a design against Oracle evidence — produces a confidence score | Yes |
|
|
21
|
-
| **Council** | Adversarial validation via parallel advisor/reviewer fan-out — produces a verdict | Yes |
|
|
22
|
-
| **Sentinel** | Chronicle coverage reporting, drift detection, and PR coverage maps | Optional |
|
|
23
|
-
| **Compass** | Product-direction synthesis from Chronicle + codebase — behaviours, opportunities, pathways, bets, idea scoring | Optional |
|
|
24
|
-
|
|
25
|
-
---
|
|
26
|
-
|
|
27
|
-
## Chronicle
|
|
28
|
-
|
|
29
|
-
Chronicle is the data that underpins the system. It is not a module — it lives at `.chronicle/` in your project root.
|
|
30
|
-
|
|
31
|
-
```
|
|
32
|
-
.chronicle/
|
|
33
|
-
committed/ ← approved entries as JSON (committed to git, source of truth)
|
|
34
|
-
proposals/ ← staged entries awaiting human approval (JSON, not indexed yet)
|
|
35
|
-
SUMMARY.md ← auto-generated agent context, rebuilt on every commit
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
Every entry goes through `oracle.propose()` → human approval → `oracle.commit()`. There are no auto-commits.
|
|
39
|
-
|
|
40
|
-
### Chronicle entry schema (v2)
|
|
41
|
-
|
|
42
|
-
```typescript
|
|
43
|
-
type ChronicleEntry = {
|
|
44
|
-
// Always present (v1 + v2)
|
|
45
|
-
id: string
|
|
46
|
-
key_insight: string // v1: primary text; v2: copy of decision for compat
|
|
47
|
-
affected_areas: string[] // file paths — used by Sentinel for coverage matching
|
|
48
|
-
status: "validated" | "refuted" | "open"
|
|
49
|
-
confidence: number // 0–1
|
|
50
|
-
source_module: string
|
|
51
|
-
evidence_cited: string[]
|
|
52
|
-
timestamp: string
|
|
53
|
-
|
|
54
|
-
// v2 fields (optional — absent on legacy entries)
|
|
55
|
-
schema_version?: 2
|
|
56
|
-
topic?: string // short label: "auth/session strategy"
|
|
57
|
-
decision?: string // the decision — primary text in v2
|
|
58
|
-
scope?: string[] // domain tags: ["auth", "sessions"] — additive
|
|
59
|
-
alternatives_considered?: string[]
|
|
60
|
-
rejected_reason?: string[]
|
|
61
|
-
supersedes?: string | null // ID of the entry this replaces
|
|
62
|
-
superseded_by?: string | null // ID of the entry that replaced this
|
|
63
|
-
|
|
64
|
-
// Outcome tracking fields (optional — filled in post-execution)
|
|
65
|
-
outcome?: string // what actually happened when acted on
|
|
66
|
-
validation_plan?: string[] // steps that confirm the decision was correct
|
|
67
|
-
review_after?: string // ISO date to re-evaluate for drift
|
|
68
|
-
post_merge_result?: "successful" | "bug" | "partial" | "rolled-back"
|
|
69
|
-
}
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
Use `entryText(entry)` from `shared/types` whenever you need to read the primary text — it returns `entry.decision ?? entry.key_insight` and works across both schema versions.
|
|
73
|
-
|
|
74
|
-
New entries created by Council automatically include `decision`, `topic`, `alternatives_considered`, `rejected_reason`, and `scope` (from the risk classifier) from the deliberation output.
|
|
75
|
-
|
|
76
|
-
---
|
|
77
|
-
|
|
78
|
-
## Dependencies
|
|
79
|
-
|
|
80
|
-
**Required** (must be in your project):
|
|
81
|
-
```
|
|
82
|
-
zod
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
**Optional** — only needed if using the included default adapters:
|
|
86
|
-
```
|
|
87
|
-
vectordb ← LanceDB adapter (oracle/adapters/lance-db.ts)
|
|
88
|
-
@xenova/transformers ← local ONNX embedder (oracle/adapters/xenova-embedder.ts)
|
|
89
|
-
```
|
|
90
|
-
|
|
91
|
-
You can substitute any vector store and embedder by implementing the `VectorStore` and `embedder` interfaces.
|
|
92
|
-
|
|
93
|
-
---
|
|
94
|
-
|
|
95
|
-
## TypeScript runtime requirement
|
|
96
|
-
|
|
97
|
-
Quorum ships TypeScript source (`.ts` files). Programmatic imports require a
|
|
98
|
-
TS-aware runtime or bundler. Plain `node` will not work without a loader.
|
|
99
|
-
|
|
100
|
-
**Supported runtimes:**
|
|
101
|
-
|
|
102
|
-
```bash
|
|
103
|
-
# tsx (recommended — zero config)
|
|
104
|
-
npx tsx your-script.ts
|
|
105
|
-
|
|
106
|
-
# ts-node
|
|
107
|
-
npx ts-node --esm your-script.ts
|
|
108
|
-
|
|
109
|
-
# Bun
|
|
110
|
-
bun your-script.ts
|
|
111
|
-
|
|
112
|
-
# Vitest / Jest with ts transform — works out of the box in test files
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
**Bundlers:** esbuild, Vite, Rollup, webpack — all supported with standard TS config.
|
|
116
|
-
|
|
117
|
-
**Plain Node.js** (no TS runtime): use the CLI instead:
|
|
118
|
-
|
|
119
|
-
```bash
|
|
120
|
-
quorum advisor "question"
|
|
121
|
-
quorum check --outcome "..." --design "..."
|
|
122
|
-
```
|
|
123
|
-
|
|
124
|
-
The CLI is always available after `npm install -g @balpal4495/quorum` and requires no
|
|
125
|
-
TS loader. It is the recommended interface for most host-project use cases.
|
|
126
|
-
|
|
127
|
-
---
|
|
128
|
-
|
|
129
|
-
## TypeScript
|
|
130
|
-
|
|
131
|
-
Requires TypeScript 4.7+ and `zod` v3.
|
|
132
|
-
|
|
133
|
-
Recommended `tsconfig.json` settings:
|
|
134
|
-
```json
|
|
135
|
-
{
|
|
136
|
-
"compilerOptions": {
|
|
137
|
-
"strict": true,
|
|
138
|
-
"moduleResolution": "node"
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
---
|
|
144
|
-
|
|
145
|
-
## Quick start
|
|
146
|
-
|
|
147
|
-
### npm users
|
|
148
|
-
|
|
149
|
-
```typescript
|
|
150
|
-
import { setup } from "@balpal4495/quorum"
|
|
151
|
-
|
|
152
|
-
// The simplest entry point — wires all modules from one call
|
|
153
|
-
const { oracle, evaluate, deliberate, ask } = await setup({ llm: myLLMProvider })
|
|
154
|
-
|
|
155
|
-
// Ask a plain-language question (Advisor)
|
|
156
|
-
const answer = await ask("what did the team decide about authentication?")
|
|
157
|
-
// answer.what_we_know, .recommendation, .next_step, .risks, .blockers, .retries
|
|
158
|
-
|
|
159
|
-
// Or run the full evaluation pipeline for a proposed design:
|
|
160
|
-
const evidence = await oracle.query("authentication patterns in this codebase")
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
### Quorum repo contributors
|
|
164
|
-
|
|
165
|
-
Working directly inside the Quorum source tree? Import from the local path instead:
|
|
166
|
-
|
|
167
|
-
```typescript
|
|
168
|
-
import { setup } from "./modules/setup"
|
|
169
|
-
|
|
170
|
-
const { oracle, evaluate, deliberate, ask } = await setup({ llm: myLLMProvider })
|
|
171
|
-
```
|
|
172
|
-
|
|
173
|
-
### Manual wiring (without setup())
|
|
174
|
-
|
|
175
|
-
```typescript
|
|
176
|
-
import { createOracleClient, xenovaEmbed, createLanceDBStore } from "@balpal4495/quorum/oracle"
|
|
177
|
-
import { evaluate } from "@balpal4495/quorum/jury"
|
|
178
|
-
import { deliberate } from "@balpal4495/quorum/council"
|
|
179
|
-
|
|
180
|
-
// Wire Oracle manually
|
|
181
|
-
const oracle = createOracleClient({
|
|
182
|
-
embedder: xenovaEmbed,
|
|
183
|
-
vectorStore: await createLanceDBStore(".chronicle"),
|
|
184
|
-
})
|
|
185
|
-
|
|
186
|
-
// Retrieve evidence for the task at hand
|
|
187
|
-
const evidence = await oracle.query("authentication patterns in this codebase")
|
|
188
|
-
|
|
189
|
-
// 3. Jury evaluates the design against the evidence
|
|
190
|
-
const juryOutput = await evaluate(
|
|
191
|
-
{
|
|
192
|
-
outcome: "Add JWT authentication to the API",
|
|
193
|
-
design: "RS256 tokens, 15-min expiry, refresh rotation in httpOnly cookies",
|
|
194
|
-
evidence,
|
|
195
|
-
},
|
|
196
|
-
{ llm: yourLLMProvider, model: "gpt-4o-mini" },
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
// 4. Council validates adversarially
|
|
200
|
-
const verdict = await deliberate(
|
|
201
|
-
{
|
|
202
|
-
outcome: "Add JWT authentication to the API",
|
|
203
|
-
design: "RS256 tokens, 15-min expiry, refresh rotation in httpOnly cookies",
|
|
204
|
-
evidence,
|
|
205
|
-
jury_output: juryOutput,
|
|
206
|
-
},
|
|
207
|
-
{
|
|
208
|
-
llm: yourLLMProvider,
|
|
209
|
-
oracle,
|
|
210
|
-
models: {
|
|
211
|
-
frame: "gpt-4o-mini",
|
|
212
|
-
advisors: "gpt-4o-mini",
|
|
213
|
-
reviewers: "gpt-4o",
|
|
214
|
-
chairman: "gpt-4o",
|
|
215
|
-
},
|
|
216
|
-
},
|
|
217
|
-
)
|
|
218
|
-
|
|
219
|
-
// 5. Route on verdict
|
|
220
|
-
if (verdict.satisfied) {
|
|
221
|
-
// → human gate → Executor
|
|
222
|
-
} else if (verdict.recommendation === "redesign") {
|
|
223
|
-
// → return to Designer with verdict.verdict as feedback
|
|
224
|
-
} else {
|
|
225
|
-
// → return to Detective with juryOutput.gaps
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
// 6. Human approves the proposed Chronicle entry
|
|
229
|
-
// The Council automatically called oracle.propose() — you just need to commit:
|
|
230
|
-
// await oracle.commit(proposalId)
|
|
231
|
-
```
|
|
232
|
-
|
|
233
|
-
---
|
|
234
|
-
|
|
235
|
-
## Advisor
|
|
236
|
-
|
|
237
|
-
The Advisor is the plain-language interface to Chronicle. Use it to answer questions rather than to evaluate designs. It is a **read-only** path — it never calls `oracle.propose()` or `oracle.commit()`.
|
|
238
|
-
|
|
239
|
-
```typescript
|
|
240
|
-
import { ask } from "@balpal4495/quorum/advisor"
|
|
241
|
-
|
|
242
|
-
const answer = await ask(
|
|
243
|
-
{ question: "What did the team decide about session handling?", evidence },
|
|
244
|
-
{ llm: myLLMProvider },
|
|
245
|
-
)
|
|
246
|
-
```
|
|
247
|
-
|
|
248
|
-
Or via `setup()`, which queries Oracle automatically:
|
|
249
|
-
|
|
250
|
-
```typescript
|
|
251
|
-
const { ask } = await setup({ llm: myLLMProvider })
|
|
252
|
-
const answer = await ask("What did the team decide about session handling?")
|
|
253
|
-
```
|
|
254
|
-
|
|
255
|
-
### Advisor output
|
|
256
|
-
|
|
257
|
-
```typescript
|
|
258
|
-
interface AdvisorOutput {
|
|
259
|
-
question: string
|
|
260
|
-
confidence: number // 0–1 — how confident the answer is given the evidence
|
|
261
|
-
what_we_know: string // plain-language summary of relevant Chronicle knowledge
|
|
262
|
-
risks: string[] // real risks worth knowing
|
|
263
|
-
blockers: string[] // hard blockers — must be resolved before acting (often empty)
|
|
264
|
-
recommendation: string // one clear recommended action
|
|
265
|
-
next_step: string // specific next step or quorum command
|
|
266
|
-
retries: number // how many retry attempts were needed (0 = first try succeeded)
|
|
267
|
-
}
|
|
268
|
-
```
|
|
269
|
-
|
|
270
|
-
### Validation loop
|
|
271
|
-
|
|
272
|
-
Advisor validates its own answer before returning. If `confidence < 0.7` or `blockers.length > 0`, it retries the LLM call with the previous answer as context — up to 2 retries. After the retry budget is exhausted, the best answer is returned regardless. Throws on non-JSON or schema-invalid LLM output.
|
|
273
|
-
|
|
274
|
-
---
|
|
275
|
-
|
|
276
|
-
## LLM provider interface
|
|
277
|
-
|
|
278
|
-
The `LLMProvider` type is a simple function. Wire it to any provider:
|
|
279
|
-
|
|
280
|
-
```typescript
|
|
281
|
-
import type { LLMProvider } from "@balpal4495/quorum"
|
|
282
|
-
|
|
283
|
-
// OpenAI example
|
|
284
|
-
const openaiProvider: LLMProvider = async (messages, model = "gpt-4o") => {
|
|
285
|
-
const res = await openai.chat.completions.create({ model, messages })
|
|
286
|
-
return res.choices[0].message.content ?? ""
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
// Anthropic example
|
|
290
|
-
const anthropicProvider: LLMProvider = async (messages, model = "claude-3-5-sonnet-20241022") => {
|
|
291
|
-
const system = messages.find(m => m.role === "system")?.content ?? ""
|
|
292
|
-
const userMessages = messages.filter(m => m.role !== "system")
|
|
293
|
-
const res = await anthropic.messages.create({ model, system, messages: userMessages, max_tokens: 2048 })
|
|
294
|
-
return res.content[0].type === "text" ? res.content[0].text : ""
|
|
295
|
-
}
|
|
296
|
-
```
|
|
297
|
-
|
|
298
|
-
---
|
|
299
|
-
|
|
300
|
-
## Jury output
|
|
301
|
-
|
|
302
|
-
```typescript
|
|
303
|
-
interface JuryOutput {
|
|
304
|
-
confidence: number // exact average of the four breakdown scores
|
|
305
|
-
confidence_breakdown: {
|
|
306
|
-
evidence_support: number // do validated entries confirm this approach?
|
|
307
|
-
feasibility: number // is this achievable given what Chronicle knows?
|
|
308
|
-
risk: number // how well does the design address failure modes?
|
|
309
|
-
completeness: number // does it cover the full outcome?
|
|
310
|
-
}
|
|
311
|
-
assessment: string
|
|
312
|
-
gaps: string[] // all missing evidence
|
|
313
|
-
blocking_gaps: string[] // subset of gaps that are hard blockers
|
|
314
|
-
council_brief: "challenge" | "pressure-test"
|
|
315
|
-
recommendation: "proceed" | "investigate-more" | "redesign"
|
|
316
|
-
}
|
|
317
|
-
```
|
|
318
|
-
|
|
319
|
-
`confidence` is always recomputed from the breakdown average — the LLM's stated value is discarded. `council_brief` is derived from `confidence` (< 0.6 → challenge, ≥ 0.6 → pressure-test).
|
|
320
|
-
|
|
321
|
-
### Preflight (no LLM)
|
|
322
|
-
|
|
323
|
-
Before the LLM runs, Jury executes a deterministic preflight:
|
|
324
|
-
|
|
325
|
-
```typescript
|
|
326
|
-
import { runPreflight } from "@balpal4495/quorum/jury"
|
|
327
|
-
|
|
328
|
-
const preflight = runPreflight(outcome, design, evidence)
|
|
329
|
-
// preflight.touches_sensitive_area
|
|
330
|
-
// preflight.sensitive_areas — ["auth", "database", ...]
|
|
331
|
-
// preflight.rollback_mentioned
|
|
332
|
-
// preflight.test_strategy_mentioned
|
|
333
|
-
// preflight.chronicle_conflicts — refuted entry IDs that overlap with the design
|
|
334
|
-
```
|
|
335
|
-
|
|
336
|
-
Results are injected into the Jury prompt as hard facts. Auth, database migrations, crypto, payments, PII, and secrets are the detected sensitive areas.
|
|
337
|
-
|
|
338
|
-
### Jury output routing
|
|
339
|
-
|
|
340
|
-
| `recommendation` | Next step |
|
|
341
|
-
|---|---|
|
|
342
|
-
| `proceed` | Pass to Council |
|
|
343
|
-
| `investigate-more` | Return to Detective with `blocking_gaps` |
|
|
344
|
-
| `redesign` | Return to Designer |
|
|
345
|
-
|
|
346
|
-
---
|
|
347
|
-
|
|
348
|
-
## Council output
|
|
349
|
-
|
|
350
|
-
```typescript
|
|
351
|
-
interface CouncilOutput {
|
|
352
|
-
satisfied: boolean
|
|
353
|
-
verdict: string
|
|
354
|
-
blockers: Array<{ // must be resolved before proceeding
|
|
355
|
-
issue: string
|
|
356
|
-
evidence: string[] // Oracle entry IDs that evidence this blocker
|
|
357
|
-
required_fix: string
|
|
358
|
-
}>
|
|
359
|
-
warnings: Array<{ // should be addressed, does not block
|
|
360
|
-
issue: string
|
|
361
|
-
suggested_fix?: string
|
|
362
|
-
}>
|
|
363
|
-
challenges: string[] // flat list of all issues — backwards compatible
|
|
364
|
-
evidence_cited: string[]
|
|
365
|
-
citation_validation: {
|
|
366
|
-
valid_ids: string[] // cited IDs that were in the evidence pack
|
|
367
|
-
hallucinated_ids: string[] // cited IDs that were NOT — hallucinated
|
|
368
|
-
}
|
|
369
|
-
advisor_split: { // how advisors split on recommendation
|
|
370
|
-
proceed: number
|
|
371
|
-
redesign: number
|
|
372
|
-
"investigate-more": number
|
|
373
|
-
}
|
|
374
|
-
recommendation: "proceed" | "redesign" | "investigate-more"
|
|
375
|
-
}
|
|
376
|
-
```
|
|
377
|
-
|
|
378
|
-
Only `citation_validation.valid_ids` are written to the Chronicle proposal — hallucinated IDs are stripped automatically.
|
|
379
|
-
|
|
380
|
-
### Risk classifier (no LLM)
|
|
381
|
-
|
|
382
|
-
Before running the panel, Council classifies risk and scales fan-out accordingly:
|
|
383
|
-
|
|
384
|
-
```typescript
|
|
385
|
-
import { classifyRisk } from "./modules/council"
|
|
386
|
-
|
|
387
|
-
const risk = classifyRisk(outcome, design, evidence)
|
|
388
|
-
// risk.level — "low" | "medium" | "high" | "critical"
|
|
389
|
-
// risk.reasons — ["authentication or authorisation logic", ...]
|
|
390
|
-
// risk.council_mode — "jury-only" | "lite" | "full"
|
|
391
|
-
```
|
|
392
|
-
|
|
393
|
-
| Risk | Triggers | Council mode | Advisor + Reviewer |
|
|
394
|
-
|---|---|---|---|
|
|
395
|
-
| Low | Nothing sensitive detected | jury-only — skipped | 0 + 0 |
|
|
396
|
-
| Medium | Cache, queues, deployments, rate limiting | lite | 1 + 2 |
|
|
397
|
-
| High | DB migrations, permissions, PII, secrets | full | 5 + 5 |
|
|
398
|
-
| Critical | Auth, payments, crypto, data deletion | full + human flag | 5 + 5 |
|
|
399
|
-
|
|
400
|
-
Refuted entries in the evidence pack always elevate risk by at least one level. `jury-only` means Council is not called at all — Jury alone is sufficient for low-risk designs.
|
|
401
|
-
|
|
402
|
-
### Council output routing
|
|
403
|
-
|
|
404
|
-
| `satisfied` | `recommendation` | Next step |
|
|
405
|
-
|---|---|---|
|
|
406
|
-
| `true` | `proceed` | Human gate → Executor |
|
|
407
|
-
| `false` | `redesign` | Return to Designer with `blockers` |
|
|
408
|
-
| `false` | `investigate-more` | Return to Detective with `juryOutput.blocking_gaps` |
|
|
409
|
-
|
|
410
|
-
---
|
|
411
|
-
|
|
412
|
-
## Eval suite
|
|
413
|
-
|
|
414
|
-
`evals/` contains canonical test cases — known-bad proposals that should block and known-good ones that should pass. Deterministic assertions run on every CI pass:
|
|
415
|
-
|
|
416
|
-
```bash
|
|
417
|
-
npx vitest run evals/
|
|
418
|
-
```
|
|
419
|
-
|
|
420
|
-
Each case defines the proposal, expected risk level, expected preflight signals, and (optionally) expected Council recommendation for LLM-gated assertions. See `evals/cases/` for the full set and `evals/runner.ts` for the runner API.
|
|
421
|
-
|
|
422
|
-
---
|
|
423
|
-
|
|
424
|
-
## Sentinel
|
|
425
|
-
|
|
426
|
-
Sentinel is the health and visibility layer. It operates independently of the Oracle → Jury → Council pipeline and has no LLM dependency for its core functions.
|
|
427
|
-
|
|
428
|
-
### Coverage
|
|
429
|
-
|
|
430
|
-
Reports which source files have Chronicle entries and which are blind spots.
|
|
431
|
-
|
|
432
|
-
```typescript
|
|
433
|
-
import { coverage } from "./modules/sentinel"
|
|
434
|
-
|
|
435
|
-
const report = await coverage(".chronicle", "src", {
|
|
436
|
-
excludeTestFiles: true, // default — __tests__/, *.test.ts, *.spec.ts are excluded
|
|
437
|
-
})
|
|
438
|
-
// report.percentage, report.uncoveredFiles, report.coverageByFile
|
|
439
|
-
```
|
|
440
|
-
|
|
441
|
-
### Drift detection
|
|
442
|
-
|
|
443
|
-
For each Chronicle entry, asks the LLM whether the `key_insight` still accurately describes the current code. Advisory only — never modifies entries.
|
|
444
|
-
|
|
445
|
-
```typescript
|
|
446
|
-
import { detectDrift } from "./modules/sentinel"
|
|
447
|
-
|
|
448
|
-
const report = await detectDrift(".chronicle", "src", llmProvider)
|
|
449
|
-
// report.flags (potentially stale), report.confirmed, report.skipped
|
|
450
|
-
```
|
|
451
|
-
|
|
452
|
-
### Vitest assertions
|
|
453
|
-
|
|
454
|
-
Drop into any Vitest suite to get coverage and drift as named tests.
|
|
455
|
-
|
|
456
|
-
```typescript
|
|
457
|
-
import { describe } from "vitest"
|
|
458
|
-
import { sentinelAssertions } from "./modules/sentinel"
|
|
459
|
-
|
|
460
|
-
const assertions = sentinelAssertions({
|
|
461
|
-
chronicleDir: ".chronicle",
|
|
462
|
-
codebasePath: "src", // defaults to "." — scan from project root
|
|
463
|
-
llm: myLLMProvider, // omit to skip drift tests
|
|
464
|
-
minCoveragePercent: 50, // default 0 = advisory only, never fails CI
|
|
465
|
-
})
|
|
466
|
-
|
|
467
|
-
describe("sentinel", () => { assertions.forEach(a => a()) })
|
|
468
|
-
```
|
|
469
|
-
|
|
470
|
-
`minCoveragePercent: 0` (the default) means the coverage test is purely advisory — it logs gaps to the console but never fails the build. Raise it as the project matures.
|
|
471
|
-
|
|
472
|
-
### PR coverage map
|
|
473
|
-
|
|
474
|
-
`sentinel/review.ts` exports `reviewContext(changedFiles, chronicleDir, codebasePath)` — used by the `sentinel-pr.yml` GitHub Actions workflow to post a PR comment showing the full-project coverage table and a colour-coded Mermaid heatmap. Test files are excluded from the scan.
|
|
475
|
-
|
|
476
|
-
---
|
|
477
|
-
|
|
478
|
-
## Running tests
|
|
479
|
-
|
|
480
|
-
Tests use [Vitest](https://vitest.dev/). Add to your project's test config or run directly:
|
|
481
|
-
|
|
482
|
-
```bash
|
|
483
|
-
# Module unit tests
|
|
484
|
-
npx vitest run modules/
|
|
485
|
-
|
|
486
|
-
# Eval suite (deterministic assertions — no LLM required)
|
|
487
|
-
npx vitest run evals/
|
|
488
|
-
|
|
489
|
-
# Eval suite with LLM-gated assertions (jury confidence + council recommendation)
|
|
490
|
-
EVAL_LLM=1 OPENAI_API_KEY=sk-... npx vitest run evals/
|
|
491
|
-
```
|
|
492
|
-
|
|
493
|
-
---
|
|
494
|
-
|
|
495
|
-
## What these modules do NOT include
|
|
496
|
-
|
|
497
|
-
The following are application-specific and must be built in the host project:
|
|
498
|
-
|
|
499
|
-
- **Detective** — investigation and task intake
|
|
500
|
-
- **Designer** — solution proposal
|
|
501
|
-
- **Executor** — task execution (existing tools/agents)
|
|
502
|
-
- **Validator** — satisfaction evaluator on implementation
|
|
503
|
-
- **Human gate UI** — approval interface for Chronicle proposals
|
|
504
|
-
- **Workflow orchestration** — LangGraph, Inngest, or equivalent
|
package/modules/advisor/ask.ts
DELETED
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
import { z } from "zod"
|
|
2
|
-
import type { AdvisorInput, AdvisorOutput, AdvisorAnswer, AdvisorDeps } from "./types"
|
|
3
|
-
import { SYSTEM_PROMPT, buildUserPrompt } from "./prompt"
|
|
4
|
-
|
|
5
|
-
const SATISFACTION_THRESHOLD = 0.7
|
|
6
|
-
const MAX_RETRIES = 2
|
|
7
|
-
|
|
8
|
-
const AdvisorAnswerSchema = z.object({
|
|
9
|
-
confidence: z.number().min(0).max(1),
|
|
10
|
-
what_we_know: z.string().min(1),
|
|
11
|
-
risks: z.array(z.string()),
|
|
12
|
-
blockers: z.array(z.string()),
|
|
13
|
-
recommendation: z.string().min(1),
|
|
14
|
-
next_step: z.string().min(1),
|
|
15
|
-
})
|
|
16
|
-
|
|
17
|
-
async function callLLM(
|
|
18
|
-
input: AdvisorInput,
|
|
19
|
-
deps: AdvisorDeps,
|
|
20
|
-
attempt: number,
|
|
21
|
-
previous: AdvisorAnswer | null,
|
|
22
|
-
): Promise<AdvisorAnswer> {
|
|
23
|
-
const { llm, model } = deps
|
|
24
|
-
|
|
25
|
-
let userPrompt = buildUserPrompt(input.question, input.evidence)
|
|
26
|
-
|
|
27
|
-
if (attempt > 0 && previous) {
|
|
28
|
-
userPrompt += [
|
|
29
|
-
"",
|
|
30
|
-
`## Previous Answer (attempt ${attempt} — did not meet quality threshold)`,
|
|
31
|
-
`Confidence: ${previous.confidence.toFixed(2)} (need ≥ ${SATISFACTION_THRESHOLD})`,
|
|
32
|
-
previous.blockers.length > 0
|
|
33
|
-
? `Unresolved blockers: ${previous.blockers.join("; ")}`
|
|
34
|
-
: "",
|
|
35
|
-
"Please produce a more specific and concrete answer.",
|
|
36
|
-
].filter(Boolean).join("\n")
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
const raw = await llm(
|
|
40
|
-
[
|
|
41
|
-
{ role: "system", content: SYSTEM_PROMPT },
|
|
42
|
-
{ role: "user", content: userPrompt },
|
|
43
|
-
],
|
|
44
|
-
model,
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
let parsed: unknown
|
|
48
|
-
try {
|
|
49
|
-
const cleaned = raw.replace(/^```(?:json)?\s*/m, "").replace(/\s*```$/m, "").trim()
|
|
50
|
-
parsed = JSON.parse(cleaned)
|
|
51
|
-
} catch {
|
|
52
|
-
throw new Error(`Advisor: LLM returned non-JSON. Raw (first 300 chars): ${raw.slice(0, 300)}`)
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
const result = AdvisorAnswerSchema.safeParse(parsed)
|
|
56
|
-
if (!result.success) {
|
|
57
|
-
throw new Error(`Advisor: LLM output failed validation. Issues: ${JSON.stringify(result.error.issues)}`)
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
return result.data
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* Ask the Advisor a plain-language question.
|
|
65
|
-
*
|
|
66
|
-
* Internally calls the LLM and validates the answer against a satisfaction
|
|
67
|
-
* threshold (confidence ≥ 0.7, no blockers). Retries up to MAX_RETRIES times
|
|
68
|
-
* with the previous answer included as context. Returns the best answer found
|
|
69
|
-
* within the retry budget regardless of whether the threshold was met.
|
|
70
|
-
*
|
|
71
|
-
* Throws if the LLM returns non-JSON or output that fails schema validation.
|
|
72
|
-
*/
|
|
73
|
-
export async function ask(input: AdvisorInput, deps: AdvisorDeps): Promise<AdvisorOutput> {
|
|
74
|
-
let last: AdvisorAnswer | null = null
|
|
75
|
-
|
|
76
|
-
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
77
|
-
const answer = await callLLM(input, deps, attempt, last)
|
|
78
|
-
last = answer
|
|
79
|
-
|
|
80
|
-
const satisfied = answer.confidence >= SATISFACTION_THRESHOLD && answer.blockers.length === 0
|
|
81
|
-
if (satisfied || attempt === MAX_RETRIES) {
|
|
82
|
-
return { ...answer, question: input.question, retries: attempt }
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
return { ...last!, question: input.question, retries: MAX_RETRIES }
|
|
87
|
-
}
|
package/modules/advisor/index.ts
DELETED