@balpal4495/quorum 3.0.4 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/advisor/ask.d.ts +13 -0
- package/dist/advisor/ask.d.ts.map +1 -0
- package/dist/advisor/ask.js +67 -0
- package/dist/advisor/ask.js.map +1 -0
- package/dist/advisor/index.d.ts +3 -0
- package/dist/advisor/index.d.ts.map +1 -0
- package/dist/advisor/index.js +2 -0
- package/dist/advisor/index.js.map +1 -0
- package/dist/advisor/prompt.d.ts +5 -0
- package/dist/advisor/prompt.d.ts.map +1 -0
- package/{modules/advisor/prompt.ts → dist/advisor/prompt.js} +22 -26
- package/dist/advisor/prompt.js.map +1 -0
- package/dist/advisor/types.d.ts +23 -0
- package/dist/advisor/types.d.ts.map +1 -0
- package/dist/advisor/types.js +2 -0
- package/dist/advisor/types.js.map +1 -0
- package/dist/compass/behavior.d.ts +4 -0
- package/dist/compass/behavior.d.ts.map +1 -0
- package/dist/compass/behavior.js +138 -0
- package/dist/compass/behavior.js.map +1 -0
- package/dist/compass/create.d.ts +3 -0
- package/dist/compass/create.d.ts.map +1 -0
- package/dist/compass/create.js +289 -0
- package/dist/compass/create.js.map +1 -0
- package/dist/compass/evidence/collect.d.ts +11 -0
- package/dist/compass/evidence/collect.d.ts.map +1 -0
- package/dist/compass/evidence/collect.js +86 -0
- package/dist/compass/evidence/collect.js.map +1 -0
- package/dist/compass/index.d.ts +8 -0
- package/dist/compass/index.d.ts.map +1 -0
- package/dist/compass/index.js +8 -0
- package/dist/compass/index.js.map +1 -0
- package/dist/compass/prompts/index.d.ts +28 -0
- package/dist/compass/prompts/index.d.ts.map +1 -0
- package/{modules/compass/prompts/index.ts → dist/compass/prompts/index.js} +13 -38
- package/dist/compass/prompts/index.js.map +1 -0
- package/dist/compass/prompts/system.d.ts +2 -0
- package/dist/compass/prompts/system.d.ts.map +1 -0
- package/{modules/compass/prompts/system.ts → dist/compass/prompts/system.js} +2 -1
- package/dist/compass/prompts/system.js.map +1 -0
- package/dist/compass/propose.d.ts +15 -0
- package/dist/compass/propose.d.ts.map +1 -0
- package/dist/compass/propose.js +128 -0
- package/dist/compass/propose.js.map +1 -0
- package/dist/compass/schemas.d.ts +1271 -0
- package/dist/compass/schemas.d.ts.map +1 -0
- package/dist/compass/schemas.js +113 -0
- package/dist/compass/schemas.js.map +1 -0
- package/dist/compass/score.d.ts +25 -0
- package/dist/compass/score.d.ts.map +1 -0
- package/dist/compass/score.js +89 -0
- package/dist/compass/score.js.map +1 -0
- package/dist/compass/sources/index.d.ts +9 -0
- package/dist/compass/sources/index.d.ts.map +1 -0
- package/dist/compass/sources/index.js +408 -0
- package/dist/compass/sources/index.js.map +1 -0
- package/dist/compass/types.d.ts +334 -0
- package/dist/compass/types.d.ts.map +1 -0
- package/dist/compass/types.js +2 -0
- package/dist/compass/types.js.map +1 -0
- package/dist/council/advisors.d.ts +15 -0
- package/dist/council/advisors.d.ts.map +1 -0
- package/dist/council/advisors.js +46 -0
- package/dist/council/advisors.js.map +1 -0
- package/dist/council/chairman.d.ts +13 -0
- package/dist/council/chairman.d.ts.map +1 -0
- package/dist/council/chairman.js +145 -0
- package/dist/council/chairman.js.map +1 -0
- package/dist/council/deliberate.d.ts +22 -0
- package/dist/council/deliberate.d.ts.map +1 -0
- package/dist/council/deliberate.js +99 -0
- package/dist/council/deliberate.js.map +1 -0
- package/dist/council/frame.d.ts +8 -0
- package/dist/council/frame.d.ts.map +1 -0
- package/dist/council/frame.js +40 -0
- package/dist/council/frame.js.map +1 -0
- package/dist/council/index.d.ts +6 -0
- package/dist/council/index.d.ts.map +1 -0
- package/dist/council/index.js +4 -0
- package/dist/council/index.js.map +1 -0
- package/dist/council/personas.d.ts +18 -0
- package/dist/council/personas.d.ts.map +1 -0
- package/dist/council/personas.js +44 -0
- package/dist/council/personas.js.map +1 -0
- package/dist/council/reviewers.d.ts +13 -0
- package/dist/council/reviewers.d.ts.map +1 -0
- package/dist/council/reviewers.js +59 -0
- package/dist/council/reviewers.js.map +1 -0
- package/dist/council/risk.d.ts +16 -0
- package/dist/council/risk.d.ts.map +1 -0
- package/dist/council/risk.js +74 -0
- package/dist/council/risk.js.map +1 -0
- package/dist/council/types.d.ts +95 -0
- package/dist/council/types.d.ts.map +1 -0
- package/dist/council/types.js +2 -0
- package/dist/council/types.js.map +1 -0
- package/dist/jury/evaluate.d.ts +13 -0
- package/dist/jury/evaluate.d.ts.map +1 -0
- package/{modules/jury/evaluate.ts → dist/jury/evaluate.js} +60 -84
- package/dist/jury/evaluate.js.map +1 -0
- package/dist/jury/index.d.ts +6 -0
- package/dist/jury/index.d.ts.map +1 -0
- package/dist/jury/index.js +4 -0
- package/dist/jury/index.js.map +1 -0
- package/dist/jury/preflight.d.ts +26 -0
- package/dist/jury/preflight.d.ts.map +1 -0
- package/dist/jury/preflight.js +71 -0
- package/dist/jury/preflight.js.map +1 -0
- package/dist/jury/schema.d.ts +57 -0
- package/dist/jury/schema.d.ts.map +1 -0
- package/dist/jury/schema.js +21 -0
- package/dist/jury/schema.js.map +1 -0
- package/dist/jury/types.d.ts +47 -0
- package/dist/jury/types.d.ts.map +1 -0
- package/dist/jury/types.js +2 -0
- package/dist/jury/types.js.map +1 -0
- package/dist/oracle/adapters/lance-db.d.ts +15 -0
- package/dist/oracle/adapters/lance-db.d.ts.map +1 -0
- package/dist/oracle/adapters/lance-db.js +68 -0
- package/dist/oracle/adapters/lance-db.js.map +1 -0
- package/dist/oracle/adapters/xenova-embedder.d.ts +21 -0
- package/dist/oracle/adapters/xenova-embedder.d.ts.map +1 -0
- package/dist/oracle/adapters/xenova-embedder.js +36 -0
- package/dist/oracle/adapters/xenova-embedder.js.map +1 -0
- package/dist/oracle/bm25.d.ts +20 -0
- package/dist/oracle/bm25.d.ts.map +1 -0
- package/dist/oracle/bm25.js +82 -0
- package/dist/oracle/bm25.js.map +1 -0
- package/dist/oracle/index.d.ts +21 -0
- package/dist/oracle/index.d.ts.map +1 -0
- package/dist/oracle/index.js +25 -0
- package/dist/oracle/index.js.map +1 -0
- package/dist/oracle/log.d.ts +6 -0
- package/dist/oracle/log.d.ts.map +1 -0
- package/dist/oracle/log.js +12 -0
- package/dist/oracle/log.js.map +1 -0
- package/dist/oracle/propose.d.ts +25 -0
- package/dist/oracle/propose.d.ts.map +1 -0
- package/dist/oracle/propose.js +133 -0
- package/dist/oracle/propose.js.map +1 -0
- package/dist/oracle/query.d.ts +17 -0
- package/dist/oracle/query.d.ts.map +1 -0
- package/dist/oracle/query.js +106 -0
- package/dist/oracle/query.js.map +1 -0
- package/dist/oracle/summary.d.ts +11 -0
- package/dist/oracle/summary.d.ts.map +1 -0
- package/dist/oracle/summary.js +102 -0
- package/dist/oracle/summary.js.map +1 -0
- package/dist/oracle/types.d.ts +31 -0
- package/dist/oracle/types.d.ts.map +1 -0
- package/dist/oracle/types.js +2 -0
- package/dist/oracle/types.js.map +1 -0
- package/dist/sentinel/assert.d.ts +28 -0
- package/dist/sentinel/assert.d.ts.map +1 -0
- package/dist/sentinel/assert.js +63 -0
- package/dist/sentinel/assert.js.map +1 -0
- package/dist/sentinel/coverage.d.ts +14 -0
- package/dist/sentinel/coverage.d.ts.map +1 -0
- package/dist/sentinel/coverage.js +96 -0
- package/dist/sentinel/coverage.js.map +1 -0
- package/dist/sentinel/drift.d.ts +12 -0
- package/dist/sentinel/drift.d.ts.map +1 -0
- package/dist/sentinel/drift.js +149 -0
- package/dist/sentinel/drift.js.map +1 -0
- package/dist/sentinel/index.d.ts +7 -0
- package/dist/sentinel/index.d.ts.map +1 -0
- package/dist/sentinel/index.js +5 -0
- package/dist/sentinel/index.js.map +1 -0
- package/dist/sentinel/review.d.ts +15 -0
- package/dist/sentinel/review.d.ts.map +1 -0
- package/dist/sentinel/review.js +177 -0
- package/dist/sentinel/review.js.map +1 -0
- package/dist/setup.d.ts +103 -0
- package/dist/setup.d.ts.map +1 -0
- package/dist/setup.js +87 -0
- package/dist/setup.js.map +1 -0
- package/dist/shared/types.d.ts +173 -0
- package/dist/shared/types.d.ts.map +1 -0
- package/dist/shared/types.js +16 -0
- package/dist/shared/types.js.map +1 -0
- package/package.json +13 -8
- package/.github/copilot-instructions.md +0 -117
- package/CLAUDE.md +0 -146
- package/GEMINI.md +0 -73
- package/SETUP.md +0 -264
- package/evals/__tests__/eval.test.ts +0 -31
- package/evals/cases/auth_hs256_rejected.json +0 -46
- package/evals/cases/auth_rs256_valid.json +0 -30
- package/evals/cases/cache_missing_lock.json +0 -31
- package/evals/cases/db_naive_not_null.json +0 -32
- package/evals/cases/logging_pii_leak.json +0 -32
- package/evals/cases/migration_with_rollback.json +0 -43
- package/evals/cases/no_evidence_novel_design.json +0 -16
- package/evals/cases/payment_no_idempotency.json +0 -33
- package/evals/cases/redis_session_rejected.json +0 -32
- package/evals/cases/safe_refactor.json +0 -17
- package/evals/runner.ts +0 -226
- package/modules/AGENTS.md +0 -78
- package/modules/CLAUDE.md +0 -93
- package/modules/README.md +0 -504
- package/modules/advisor/ask.ts +0 -87
- package/modules/advisor/index.ts +0 -2
- package/modules/advisor/types.ts +0 -26
- package/modules/compass/behavior.ts +0 -161
- package/modules/compass/create.ts +0 -365
- package/modules/compass/evidence/collect.ts +0 -109
- package/modules/compass/index.ts +0 -7
- package/modules/compass/propose.ts +0 -152
- package/modules/compass/schemas.ts +0 -121
- package/modules/compass/score.ts +0 -77
- package/modules/compass/sources/index.ts +0 -413
- package/modules/compass/types.ts +0 -431
- package/modules/council/advisors.ts +0 -71
- package/modules/council/chairman.ts +0 -183
- package/modules/council/deliberate.ts +0 -141
- package/modules/council/frame.ts +0 -54
- package/modules/council/index.ts +0 -9
- package/modules/council/personas.ts +0 -57
- package/modules/council/reviewers.ts +0 -82
- package/modules/council/risk.ts +0 -89
- package/modules/council/types.ts +0 -107
- package/modules/jury/index.ts +0 -5
- package/modules/jury/preflight.ts +0 -101
- package/modules/jury/schema.ts +0 -24
- package/modules/jury/types.ts +0 -50
- package/modules/oracle/adapters/lance-db.ts +0 -81
- package/modules/oracle/adapters/xenova-embedder.ts +0 -43
- package/modules/oracle/bm25.ts +0 -92
- package/modules/oracle/index.ts +0 -36
- package/modules/oracle/log.ts +0 -15
- package/modules/oracle/propose.ts +0 -164
- package/modules/oracle/query.ts +0 -146
- package/modules/oracle/summary.ts +0 -116
- package/modules/oracle/types.ts +0 -32
- package/modules/sentinel/assert.ts +0 -95
- package/modules/sentinel/coverage.ts +0 -106
- package/modules/sentinel/drift.ts +0 -163
- package/modules/sentinel/index.ts +0 -6
- package/modules/sentinel/review.ts +0 -208
- package/modules/setup.ts +0 -202
- package/modules/shared/types.ts +0 -193
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
import type { CouncilInput, CouncilOutput, CouncilDeps } from "./types"
|
|
2
|
-
import { DEFAULT_PERSONAS } from "./personas"
|
|
3
|
-
import { frameQuestion } from "./frame"
|
|
4
|
-
import { fanOutAdvisors } from "./advisors"
|
|
5
|
-
import { fanOutReviewers } from "./reviewers"
|
|
6
|
-
import { chairman } from "./chairman"
|
|
7
|
-
import { classifyRisk } from "./risk"
|
|
8
|
-
|
|
9
|
-
const DEFAULT_ADVISOR_COUNT = 5
|
|
10
|
-
const DEFAULT_REVIEWER_COUNT = 5
|
|
11
|
-
const LITE_ADVISOR_COUNT = 1
|
|
12
|
-
const LITE_REVIEWER_COUNT = 2
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* Run the Council deliberation pipeline.
|
|
16
|
-
*
|
|
17
|
-
* Pipeline:
|
|
18
|
-
* 1. frameQuestion — reframe outcome + design into a deliberation brief
|
|
19
|
-
* 2. fanOutAdvisors — N advisors reason in parallel from Oracle evidence
|
|
20
|
-
* 3. fanOutReviewers — N reviewers critique anonymised advisor responses in parallel
|
|
21
|
-
* 4. chairman — synthesises verdict, cites Oracle entry IDs
|
|
22
|
-
* 5. oracle.propose() — proposes verdict to Chronicle (human approval required to commit)
|
|
23
|
-
*
|
|
24
|
-
* The council_brief from jury_output determines framing tone:
|
|
25
|
-
* "challenge" → find what is wrong (Jury confidence < 0.6)
|
|
26
|
-
* "pressure-test" → try to break what looks solid (Jury confidence ≥ 0.6)
|
|
27
|
-
*
|
|
28
|
-
* Routing on output:
|
|
29
|
-
* satisfied: true → proceed to human gate → Executor
|
|
30
|
-
* satisfied: false, recommendation: redesign → return to Designer
|
|
31
|
-
* satisfied: false, recommendation: investigate-more → return to Detective with gaps list
|
|
32
|
-
*/
|
|
33
|
-
export async function deliberate(
|
|
34
|
-
input: CouncilInput,
|
|
35
|
-
deps: CouncilDeps,
|
|
36
|
-
): Promise<CouncilOutput> {
|
|
37
|
-
const {
|
|
38
|
-
llm,
|
|
39
|
-
oracle,
|
|
40
|
-
models = {},
|
|
41
|
-
} = deps
|
|
42
|
-
|
|
43
|
-
// Classify risk to determine Council mode and advisor/reviewer counts
|
|
44
|
-
const risk = classifyRisk(input.outcome, input.design, input.evidence)
|
|
45
|
-
|
|
46
|
-
if (risk.council_mode === "jury-only") {
|
|
47
|
-
return {
|
|
48
|
-
satisfied: true,
|
|
49
|
-
verdict: "Skipped — low-risk design passed by Jury without Council review.",
|
|
50
|
-
blockers: [],
|
|
51
|
-
warnings: [],
|
|
52
|
-
challenges: [],
|
|
53
|
-
evidence_cited: [],
|
|
54
|
-
citation_validation: { valid_ids: [], hallucinated_ids: [] },
|
|
55
|
-
advisor_split: { proceed: 0, redesign: 0, "investigate-more": 0 },
|
|
56
|
-
recommendation: "proceed",
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
let defaultAdvisors = DEFAULT_ADVISOR_COUNT
|
|
61
|
-
let defaultReviewers = DEFAULT_REVIEWER_COUNT
|
|
62
|
-
if (risk.council_mode === "lite") {
|
|
63
|
-
defaultAdvisors = LITE_ADVISOR_COUNT
|
|
64
|
-
defaultReviewers = LITE_REVIEWER_COUNT
|
|
65
|
-
}
|
|
66
|
-
const advisorCount = deps.advisorCount ?? defaultAdvisors
|
|
67
|
-
const reviewerCount = deps.reviewerCount ?? defaultReviewers
|
|
68
|
-
|
|
69
|
-
// Select personas — cycle DEFAULT_PERSONAS if advisorCount > 5
|
|
70
|
-
const personas = Array.from(
|
|
71
|
-
{ length: advisorCount },
|
|
72
|
-
(_, i) => DEFAULT_PERSONAS[i % DEFAULT_PERSONAS.length],
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
// 1. Frame the deliberation question
|
|
76
|
-
const framedQuestion = await frameQuestion(input, llm, models.frame)
|
|
77
|
-
|
|
78
|
-
// 2. Advisors reason in parallel
|
|
79
|
-
const advisorResponses = await fanOutAdvisors(
|
|
80
|
-
framedQuestion,
|
|
81
|
-
input.evidence,
|
|
82
|
-
personas,
|
|
83
|
-
llm,
|
|
84
|
-
models.advisors,
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
// 3. Reviewers critique in parallel (advisor responses anonymised inside fanOutReviewers)
|
|
88
|
-
const reviewerResponses = await fanOutReviewers(
|
|
89
|
-
advisorResponses,
|
|
90
|
-
input.evidence,
|
|
91
|
-
reviewerCount,
|
|
92
|
-
llm,
|
|
93
|
-
models.reviewers,
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
// 4. Chairman synthesises verdict
|
|
97
|
-
const verdict = await chairman(
|
|
98
|
-
advisorResponses,
|
|
99
|
-
reviewerResponses,
|
|
100
|
-
input.evidence,
|
|
101
|
-
llm,
|
|
102
|
-
models.chairman,
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
// 5. Propose verdict to Oracle — human must call oracle.commit() to index it
|
|
106
|
-
// Truncate to 200 chars so it passes propose()'s schema validation.
|
|
107
|
-
const firstSentence = verdict.verdict.split(/[.!?]/)[0]?.trim() ?? ""
|
|
108
|
-
const keyInsight = (firstSentence.length >= 20 ? firstSentence : verdict.verdict)
|
|
109
|
-
.slice(0, 200)
|
|
110
|
-
|
|
111
|
-
await oracle.propose({
|
|
112
|
-
schema_version: 2,
|
|
113
|
-
topic: input.outcome.slice(0, 80),
|
|
114
|
-
decision: keyInsight,
|
|
115
|
-
key_insight: keyInsight,
|
|
116
|
-
affected_areas: extractAffectedAreas(input.outcome, input.design),
|
|
117
|
-
alternatives_considered: verdict.challenges,
|
|
118
|
-
rejected_reason: verdict.satisfied
|
|
119
|
-
? []
|
|
120
|
-
: verdict.blockers.map(b => b.issue).slice(0, 3),
|
|
121
|
-
status: "open",
|
|
122
|
-
confidence: input.jury_output.confidence,
|
|
123
|
-
source_module: "council",
|
|
124
|
-
evidence_cited: verdict.citation_validation.valid_ids,
|
|
125
|
-
scope: risk.reasons.slice(0, 3),
|
|
126
|
-
})
|
|
127
|
-
|
|
128
|
-
return verdict
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* Extract candidate affected areas from the outcome and design text.
|
|
133
|
-
* Looks for capitalised noun phrases as a simple heuristic.
|
|
134
|
-
* The host application may override by post-processing CouncilOutput.
|
|
135
|
-
*/
|
|
136
|
-
function extractAffectedAreas(outcome: string, design: string): string[] {
|
|
137
|
-
const text = `${outcome} ${design}`
|
|
138
|
-
const phrases = text.match(/\b[A-Z][a-zA-Z]+(?:\s[A-Z][a-zA-Z]+)*\b/g) ?? []
|
|
139
|
-
const unique = [...new Set(phrases)]
|
|
140
|
-
return unique.length > 0 ? unique.slice(0, 5) : ["general"]
|
|
141
|
-
}
|
package/modules/council/frame.ts
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import type { LLMProvider } from "../shared/types"
|
|
2
|
-
import type { CouncilInput } from "./types"
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Reframe the outcome + design into a clear deliberation brief for the advisor panel.
|
|
6
|
-
* Tone and scope are set by the Jury's council_brief value.
|
|
7
|
-
*/
|
|
8
|
-
export async function frameQuestion(
|
|
9
|
-
input: CouncilInput,
|
|
10
|
-
llm: LLMProvider,
|
|
11
|
-
model?: string,
|
|
12
|
-
): Promise<string> {
|
|
13
|
-
const { outcome, design, jury_output } = input
|
|
14
|
-
|
|
15
|
-
const briefInstruction =
|
|
16
|
-
jury_output.council_brief === "challenge"
|
|
17
|
-
? `The Jury has LOW confidence (score: ${jury_output.confidence.toFixed(2)}). ` +
|
|
18
|
-
"Find what is WRONG with this design. Look for fundamental flaws, not just edge cases."
|
|
19
|
-
: `The Jury has HIGH confidence (score: ${jury_output.confidence.toFixed(2)}). ` +
|
|
20
|
-
"PRESSURE-TEST this design. Assume it is broadly correct — try to break it. " +
|
|
21
|
-
"Find edge cases, scaling failures, and hidden assumptions."
|
|
22
|
-
|
|
23
|
-
const systemPrompt = [
|
|
24
|
-
"You are the Council Framer. You write the deliberation brief that a panel of expert advisors will work from.",
|
|
25
|
-
"",
|
|
26
|
-
"Write a clear, precise brief that:",
|
|
27
|
-
"1. States what needs to be achieved (the outcome)",
|
|
28
|
-
"2. States what is being proposed (the design)",
|
|
29
|
-
"3. States the Jury's assessment and the gaps it identified",
|
|
30
|
-
"4. Sets the council directive — challenge or pressure-test",
|
|
31
|
-
"",
|
|
32
|
-
"Keep it under 300 words. Be direct. Advisors must know exactly what to evaluate.",
|
|
33
|
-
].join("\n")
|
|
34
|
-
|
|
35
|
-
const userPrompt = [
|
|
36
|
-
`Outcome: ${outcome}`,
|
|
37
|
-
"",
|
|
38
|
-
`Design: ${design}`,
|
|
39
|
-
"",
|
|
40
|
-
`Jury assessment: ${jury_output.assessment}`,
|
|
41
|
-
`Jury confidence: ${jury_output.confidence.toFixed(2)}`,
|
|
42
|
-
`Jury gaps: ${jury_output.gaps.join("; ") || "none identified"}`,
|
|
43
|
-
"",
|
|
44
|
-
briefInstruction,
|
|
45
|
-
].join("\n")
|
|
46
|
-
|
|
47
|
-
return llm(
|
|
48
|
-
[
|
|
49
|
-
{ role: "system", content: systemPrompt },
|
|
50
|
-
{ role: "user", content: userPrompt },
|
|
51
|
-
],
|
|
52
|
-
model,
|
|
53
|
-
)
|
|
54
|
-
}
|
package/modules/council/index.ts
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
export { deliberate } from "./deliberate"
|
|
2
|
-
export type {
|
|
3
|
-
CouncilInput, CouncilOutput, CouncilDeps, CouncilModels,
|
|
4
|
-
BlockerItem, WarningItem, CitationValidation, AdvisorSplit,
|
|
5
|
-
RiskLevel, CouncilMode, RiskAssessment,
|
|
6
|
-
} from "./types"
|
|
7
|
-
export { DEFAULT_PERSONAS } from "./personas"
|
|
8
|
-
export type { AdvisorPersona } from "./personas"
|
|
9
|
-
export { classifyRisk } from "./risk"
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Default advisor personas for the Council.
|
|
3
|
-
*
|
|
4
|
-
* Personas are interpretive lenses, not knowledge sources.
|
|
5
|
-
* All advisors receive the same Oracle evidence pack — their persona
|
|
6
|
-
* determines which entries they weight and how they read them.
|
|
7
|
-
*
|
|
8
|
-
* Add or replace personas in CouncilDeps to specialise for your domain.
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
export interface AdvisorPersona {
|
|
12
|
-
name: string
|
|
13
|
-
/** One-line description of this persona's evidence focus. */
|
|
14
|
-
lens: string
|
|
15
|
-
/** System prompt fragment injected into the advisor's prompt. */
|
|
16
|
-
systemFragment: string
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export const DEFAULT_PERSONAS: readonly AdvisorPersona[] = [
|
|
20
|
-
{
|
|
21
|
-
name: "Pragmatist",
|
|
22
|
-
lens: "Weights validated entries — what has worked in this codebase",
|
|
23
|
-
systemFragment:
|
|
24
|
-
"Focus on `validated` Oracle entries. What has already worked in this codebase? " +
|
|
25
|
-
"Weight evidence that confirms the design will succeed based on prior outcomes.",
|
|
26
|
-
},
|
|
27
|
-
{
|
|
28
|
-
name: "Sceptic",
|
|
29
|
-
lens: "Weights refuted entries — what has failed and why",
|
|
30
|
-
systemFragment:
|
|
31
|
-
"Focus on `refuted` Oracle entries. What has already failed in this codebase and why? " +
|
|
32
|
-
"Look for signs this design repeats past mistakes. Surface failure modes explicitly.",
|
|
33
|
-
},
|
|
34
|
-
{
|
|
35
|
-
name: "Systems thinker",
|
|
36
|
-
lens: "Looks for patterns across all entries — second-order effects",
|
|
37
|
-
systemFragment:
|
|
38
|
-
"Read all Oracle entries as a system. Look for patterns, dependencies, and second-order " +
|
|
39
|
-
"effects. What does the design miss about how the system as a whole behaves?",
|
|
40
|
-
},
|
|
41
|
-
{
|
|
42
|
-
name: "Risk analyst",
|
|
43
|
-
lens: "Weights open entries — unresolved questions and unknowns",
|
|
44
|
-
systemFragment:
|
|
45
|
-
"Focus on `open` Oracle entries — unresolved questions and unknowns. " +
|
|
46
|
-
"What has not been confirmed? What uncertainty does this design carry? " +
|
|
47
|
-
"Flag every assumption that has not been validated by an outcome.",
|
|
48
|
-
},
|
|
49
|
-
{
|
|
50
|
-
name: "Evidence auditor",
|
|
51
|
-
lens: "Focuses on gaps — what Oracle does NOT contain",
|
|
52
|
-
systemFragment:
|
|
53
|
-
"Look for what is ABSENT from the Oracle evidence. What decisions is this design making " +
|
|
54
|
-
"without any codebase evidence to support them? " +
|
|
55
|
-
"Name every gap — a gap is not a reason to reject, but it must be surfaced.",
|
|
56
|
-
},
|
|
57
|
-
]
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
import type { LLMProvider, OracleResult } from "../shared/types"
|
|
2
|
-
import { entryText } from "../shared/types"
|
|
3
|
-
import type { AdvisorResponse } from "./advisors"
|
|
4
|
-
|
|
5
|
-
export interface ReviewerResponse {
|
|
6
|
-
reviewerId: string
|
|
7
|
-
review: string
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Shuffle advisor responses and label them A–Z.
|
|
12
|
-
* Prevents reviewers deferring to confident responses by position or persona name.
|
|
13
|
-
*/
|
|
14
|
-
function anonymise(responses: AdvisorResponse[]): string {
|
|
15
|
-
const shuffled = [...responses].sort(() => Math.random() - 0.5)
|
|
16
|
-
return shuffled
|
|
17
|
-
.map((r, i) => `## Advisor ${String.fromCharCode(65 + i)}\n${r.response}`)
|
|
18
|
-
.join("\n\n---\n\n")
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
function formatEvidenceSummary(evidence: OracleResult[]): string {
|
|
22
|
-
if (evidence.length === 0) return "No Oracle evidence available."
|
|
23
|
-
return evidence
|
|
24
|
-
.map(e => `[${e.id}] (${e.status}) ${entryText(e)}`)
|
|
25
|
-
.join("\n")
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
const REVIEWER_SYSTEM_PROMPT = [
|
|
29
|
-
"You are a Council reviewer. You evaluate the quality of advisor responses.",
|
|
30
|
-
"",
|
|
31
|
-
"You are NOT deciding whether the design is correct.",
|
|
32
|
-
"You are assessing the reasoning quality of each advisor response:",
|
|
33
|
-
"",
|
|
34
|
-
"1. Does the advisor actually use the Oracle evidence, or reason from general knowledge?",
|
|
35
|
-
"2. Are Oracle entry IDs cited? Do those citations match the evidence provided?",
|
|
36
|
-
"3. Is the response internally consistent?",
|
|
37
|
-
"4. Which responses provide the strongest evidence-backed reasoning?",
|
|
38
|
-
"5. Which responses make unsupported claims?",
|
|
39
|
-
"",
|
|
40
|
-
"Be critical. Evidence quality matters more than conclusion confidence.",
|
|
41
|
-
"Keep your review under 400 words.",
|
|
42
|
-
].join("\n")
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* Run all reviewers in parallel.
|
|
46
|
-
* Each reviewer receives the anonymised advisor responses and the original evidence pack.
|
|
47
|
-
* Anonymisation prevents position bias and persona deference.
|
|
48
|
-
*/
|
|
49
|
-
export async function fanOutReviewers(
|
|
50
|
-
advisorResponses: AdvisorResponse[],
|
|
51
|
-
evidence: OracleResult[],
|
|
52
|
-
reviewerCount: number,
|
|
53
|
-
llm: LLMProvider,
|
|
54
|
-
model?: string,
|
|
55
|
-
): Promise<ReviewerResponse[]> {
|
|
56
|
-
const anonymisedResponses = anonymise(advisorResponses)
|
|
57
|
-
const evidenceSummary = formatEvidenceSummary(evidence)
|
|
58
|
-
|
|
59
|
-
return Promise.all(
|
|
60
|
-
Array.from({ length: reviewerCount }, async (_, i): Promise<ReviewerResponse> => {
|
|
61
|
-
const userPrompt = [
|
|
62
|
-
"## Advisor Responses (anonymised)",
|
|
63
|
-
anonymisedResponses,
|
|
64
|
-
"",
|
|
65
|
-
"## Oracle Evidence (for cross-referencing citations)",
|
|
66
|
-
evidenceSummary,
|
|
67
|
-
"",
|
|
68
|
-
"Review each advisor response for evidence quality.",
|
|
69
|
-
].join("\n")
|
|
70
|
-
|
|
71
|
-
const review = await llm(
|
|
72
|
-
[
|
|
73
|
-
{ role: "system", content: REVIEWER_SYSTEM_PROMPT },
|
|
74
|
-
{ role: "user", content: userPrompt },
|
|
75
|
-
],
|
|
76
|
-
model,
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
return { reviewerId: `reviewer-${i + 1}`, review }
|
|
80
|
-
}),
|
|
81
|
-
)
|
|
82
|
-
}
|
package/modules/council/risk.ts
DELETED
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
import type { OracleResult } from "../shared/types"
|
|
2
|
-
import type { RiskLevel, CouncilMode, RiskAssessment } from "./types"
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Patterns that trigger risk escalation.
|
|
6
|
-
* Each entry has a level (the minimum risk level it triggers) and a reason label.
|
|
7
|
-
*/
|
|
8
|
-
const RISK_RULES: Array<{ pattern: RegExp; level: RiskLevel; reason: string }> = [
|
|
9
|
-
// Critical — always run full Council + flag for human architecture review
|
|
10
|
-
{ pattern: /\b(auth(?:entication|orization)?|jwt|token|session|password|oauth|credential|bearer)\b/i, level: "critical", reason: "authentication or authorisation logic" },
|
|
11
|
-
{ pattern: /\b(payment|stripe|charge|billing|checkout|refund|subscription)\b/i, level: "critical", reason: "payment or billing logic" },
|
|
12
|
-
{ pattern: /\b(encrypt|decrypt|private\s+key|certificate|tls|ssl|hmac|cipher)\b/i, level: "critical", reason: "cryptography or key management" },
|
|
13
|
-
{ pattern: /\b(delete\s+all|drop\s+table|truncate|wipe|destroy.*data|hard\s+delete)\b/i, level: "critical", reason: "irreversible data deletion" },
|
|
14
|
-
|
|
15
|
-
// High — full Council
|
|
16
|
-
{ pattern: /\b(migrat(?:ion|e)|alter\s+table|schema\s+change|not\s+null|backfill|pg_repack|shadow\s+column)\b/i, level: "high", reason: "database schema migration" },
|
|
17
|
-
{ pattern: /\b(permission|role(?:s)?|acl|rbac|access\s+control|entitlement)\b/i, level: "high", reason: "permissions or access control" },
|
|
18
|
-
{ pattern: /\b(pii|personal\s+data|gdpr|ccpa|email(?:\s+address)?|phone(?:\s+number)?|ssn|passport)\b/i, level: "high", reason: "PII or compliance-regulated data" },
|
|
19
|
-
{ pattern: /\b(api\s+key|secret(?:s)?|private\s+key|credentials?)\b/i, level: "high", reason: "secrets or credentials handling" },
|
|
20
|
-
|
|
21
|
-
// Medium — Jury + lite Council
|
|
22
|
-
{ pattern: /\b(cache|redis|memcached|invalidat(?:e|ion))\b/i, level: "medium", reason: "cache strategy" },
|
|
23
|
-
{ pattern: /\b(rate\s*limit|throttl(?:e|ing)|quota)\b/i, level: "medium", reason: "rate limiting or throttling" },
|
|
24
|
-
{ pattern: /\b(webhook|event|queue|pubsub|kafka|rabbitmq|sns|sqs)\b/i, level: "medium", reason: "async event or messaging" },
|
|
25
|
-
{ pattern: /\b(deploy(?:ment)?|ci(?:\/cd)?|docker|kubernetes|infra(?:structure)?)\b/i, level: "medium", reason: "deployment or infrastructure" },
|
|
26
|
-
]
|
|
27
|
-
|
|
28
|
-
const RISK_ORDER: RiskLevel[] = ["low", "medium", "high", "critical"]
|
|
29
|
-
|
|
30
|
-
function maxLevel(a: RiskLevel, b: RiskLevel): RiskLevel {
|
|
31
|
-
return RISK_ORDER.indexOf(a) >= RISK_ORDER.indexOf(b) ? a : b
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
function councilModeForLevel(level: RiskLevel): CouncilMode {
|
|
35
|
-
switch (level) {
|
|
36
|
-
case "low": return "jury-only"
|
|
37
|
-
case "medium": return "lite"
|
|
38
|
-
case "high": return "full"
|
|
39
|
-
case "critical": return "full"
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* Classify the risk level of a proposed change from its text and evidence.
|
|
45
|
-
*
|
|
46
|
-
* Risk determines Council mode — avoid running full fan-out on low-risk changes:
|
|
47
|
-
* low → jury-only (no advisor/reviewer fan-out)
|
|
48
|
-
* medium → lite (Jury + 2 reviewers)
|
|
49
|
-
* high → full (standard 5 advisors + 5 reviewers)
|
|
50
|
-
* critical → full (same as high, but Chronicle entry flags for human architecture review)
|
|
51
|
-
*
|
|
52
|
-
* Refuted Oracle entries also elevate risk — a known failure mode in the evidence pack
|
|
53
|
-
* means the design is repeating something that already went wrong.
|
|
54
|
-
*/
|
|
55
|
-
export function classifyRisk(
|
|
56
|
-
outcome: string,
|
|
57
|
-
design: string,
|
|
58
|
-
evidence: OracleResult[],
|
|
59
|
-
): RiskAssessment {
|
|
60
|
-
const text = `${outcome} ${design}`
|
|
61
|
-
let level: RiskLevel = "low"
|
|
62
|
-
const reasons: string[] = []
|
|
63
|
-
|
|
64
|
-
for (const rule of RISK_RULES) {
|
|
65
|
-
if (rule.pattern.test(text)) {
|
|
66
|
-
const matched = maxLevel(level, rule.level)
|
|
67
|
-
if (matched !== level || !reasons.includes(rule.reason)) {
|
|
68
|
-
level = matched
|
|
69
|
-
reasons.push(rule.reason)
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// Refuted entries in the evidence pack are a direct risk signal
|
|
75
|
-
const refutedCount = evidence.filter(e => e.status === "refuted").length
|
|
76
|
-
if (refutedCount > 0) {
|
|
77
|
-
const refutedRisk: RiskLevel = refutedCount >= 2 ? "high" : "medium"
|
|
78
|
-
if (RISK_ORDER.indexOf(refutedRisk) > RISK_ORDER.indexOf(level)) {
|
|
79
|
-
level = maxLevel(level, refutedRisk)
|
|
80
|
-
}
|
|
81
|
-
reasons.push(`${refutedCount} refuted Chronicle ${refutedCount === 1 ? "entry" : "entries"} in evidence pack`)
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
return {
|
|
85
|
-
level,
|
|
86
|
-
reasons: reasons.length > 0 ? reasons : ["no sensitive patterns detected"],
|
|
87
|
-
council_mode: councilModeForLevel(level),
|
|
88
|
-
}
|
|
89
|
-
}
|
package/modules/council/types.ts
DELETED
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
import type { OracleResult, LLMProvider, OracleClient } from "../shared/types"
|
|
2
|
-
import type { JuryOutput } from "../jury/types"
|
|
3
|
-
|
|
4
|
-
export interface CouncilInput {
|
|
5
|
-
/** What needs to be achieved. */
|
|
6
|
-
outcome: string
|
|
7
|
-
/** Proposed approach from the Designer. */
|
|
8
|
-
design: string
|
|
9
|
-
/** Same evidence pack the Jury received. */
|
|
10
|
-
evidence: OracleResult[]
|
|
11
|
-
/** Jury output — drives the council brief and confidence. */
|
|
12
|
-
jury_output: JuryOutput
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
/** A finding that must be resolved before the design can proceed. */
|
|
16
|
-
export interface BlockerItem {
|
|
17
|
-
issue: string
|
|
18
|
-
/** Oracle entry IDs that evidence this blocker. */
|
|
19
|
-
evidence: string[]
|
|
20
|
-
/** What must change in the design to resolve this. */
|
|
21
|
-
required_fix: string
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
/** A finding that should be addressed but does not block proceeding. */
|
|
25
|
-
export interface WarningItem {
|
|
26
|
-
issue: string
|
|
27
|
-
suggested_fix?: string
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/** Validates that cited Oracle IDs actually appeared in the evidence pack. */
|
|
31
|
-
export interface CitationValidation {
|
|
32
|
-
/** IDs that were cited and exist in the evidence pack. */
|
|
33
|
-
valid_ids: string[]
|
|
34
|
-
/** IDs that were cited but were NOT in the evidence pack — likely hallucinated. */
|
|
35
|
-
hallucinated_ids: string[]
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
/** How advisors split on their recommendation. Signals disagreement level. */
|
|
39
|
-
export interface AdvisorSplit {
|
|
40
|
-
proceed: number
|
|
41
|
-
redesign: number
|
|
42
|
-
"investigate-more": number
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
export interface CouncilOutput {
|
|
46
|
-
satisfied: boolean
|
|
47
|
-
/** Chairman synthesis — every material conclusion cites Oracle entry IDs. */
|
|
48
|
-
verdict: string
|
|
49
|
-
/**
|
|
50
|
-
* Findings that MUST be resolved before the design proceeds.
|
|
51
|
-
* Each blocker names the issue, the Oracle evidence behind it, and the required fix.
|
|
52
|
-
*/
|
|
53
|
-
blockers: BlockerItem[]
|
|
54
|
-
/**
|
|
55
|
-
* Findings that SHOULD be addressed but don't block execution.
|
|
56
|
-
*/
|
|
57
|
-
warnings: WarningItem[]
|
|
58
|
-
/** Flat list of all issues raised — backwards compatible with existing consumers. */
|
|
59
|
-
challenges: string[]
|
|
60
|
-
/** Oracle entry IDs referenced in the verdict. */
|
|
61
|
-
evidence_cited: string[]
|
|
62
|
-
/** Validation of whether cited IDs exist in the evidence pack. */
|
|
63
|
-
citation_validation: CitationValidation
|
|
64
|
-
/** How advisors split on recommendation — high disagreement = escalate. */
|
|
65
|
-
advisor_split: AdvisorSplit
|
|
66
|
-
recommendation: "proceed" | "redesign" | "investigate-more"
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export interface CouncilModels {
|
|
70
|
-
/** Model for the framer step. */
|
|
71
|
-
frame?: string
|
|
72
|
-
/** Model for advisors. High volume — cheaper model appropriate here. */
|
|
73
|
-
advisors?: string
|
|
74
|
-
/** Model for reviewers. Critical analysis — stronger model recommended. */
|
|
75
|
-
reviewers?: string
|
|
76
|
-
/** Model for the chairman. Synthesis — best available model recommended. */
|
|
77
|
-
chairman?: string
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
export interface CouncilDeps {
|
|
81
|
-
llm: LLMProvider
|
|
82
|
-
oracle: OracleClient
|
|
83
|
-
/** Number of advisors to run in parallel. Default: 5. */
|
|
84
|
-
advisorCount?: number
|
|
85
|
-
/** Number of reviewers to run in parallel. Default: 5. */
|
|
86
|
-
reviewerCount?: number
|
|
87
|
-
models?: CouncilModels
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// ── Risk classifier types ─────────────────────────────────────────────────────
|
|
91
|
-
|
|
92
|
-
export type RiskLevel = "low" | "medium" | "high" | "critical"
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Determines which Council mode to use.
|
|
96
|
-
* skip → Oracle query only, no LLM validation
|
|
97
|
-
* jury-only → Jury scores, Council skipped entirely (low-risk fast path)
|
|
98
|
-
* lite → Jury + 1–2 reviewers (no full advisor fan-out)
|
|
99
|
-
* full → Full Council (default 5 advisors + 5 reviewers + Chairman)
|
|
100
|
-
*/
|
|
101
|
-
export type CouncilMode = "skip" | "jury-only" | "lite" | "full"
|
|
102
|
-
|
|
103
|
-
export interface RiskAssessment {
|
|
104
|
-
level: RiskLevel
|
|
105
|
-
reasons: string[]
|
|
106
|
-
council_mode: CouncilMode
|
|
107
|
-
}
|
package/modules/jury/index.ts
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
export { evaluate } from "./evaluate"
|
|
2
|
-
export type { JuryInput, JuryOutput, JuryDeps, ConfidenceBreakdown } from "./types"
|
|
3
|
-
export { JuryOutputSchema } from "./schema"
|
|
4
|
-
export { runPreflight, formatPreflight } from "./preflight"
|
|
5
|
-
export type { PreflightResult } from "./preflight"
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
import type { OracleResult } from "../shared/types"
|
|
2
|
-
import { entryText } from "../shared/types"
|
|
3
|
-
|
|
4
|
-
/** Areas that warrant elevated scrutiny. */
|
|
5
|
-
const SENSITIVE_PATTERNS: Record<string, RegExp> = {
|
|
6
|
-
auth: /\b(auth(?:entication|orization)?|jwt|token|session|password|oauth|login|logout|credential|bearer)\b/i,
|
|
7
|
-
database: /\b(migrat(?:ion|e)|alter\s+table|schema\s+change|postgres|mysql|sqlite|prisma|drizzle|knex|sequelize)\b/i,
|
|
8
|
-
crypto: /\b(encrypt|decrypt|cipher|hash(?:ing)?|hmac|sign(?:ing)?|verify|private\s+key|certificate|tls|ssl)\b/i,
|
|
9
|
-
payments: /\b(payment|stripe|charge|billing|invoice|subscription|price|checkout|refund)\b/i,
|
|
10
|
-
permissions: /\b(permission|role(?:s)?|acl|access\s+control|rbac|authorization|entitlement)\b/i,
|
|
11
|
-
pii: /\b(pii|personal\s+data|gdpr|ccpa|email(?:\s+address)?|phone(?:\s+number)?|postal\s+address|ssn|passport)\b/i,
|
|
12
|
-
data_deletion: /\b(delete(?:\s+all)?|drop\s+table|truncate|purge|wipe|destroy.*data|hard\s+delete)\b/i,
|
|
13
|
-
secrets: /\b(api\s+key|secret(?:s)?|env(?:ironment)?\s+var(?:iable)?|\.env|private\s+key|credentials?)\b/i,
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
const ROLLBACK_PATTERNS = /\b(rollback|roll\s+back|revert|undo|restore|recovery|fallback|backward[- ]compat)\b/i
|
|
17
|
-
const TEST_PATTERNS = /\b(test(?:ing|s)?|spec(?:ification)?|unit\s+test|integration\s+test|coverage|vitest|jest|mocha)\b/i
|
|
18
|
-
|
|
19
|
-
export interface PreflightResult {
|
|
20
|
-
touches_sensitive_area: boolean
|
|
21
|
-
/** Which sensitive area categories were detected. */
|
|
22
|
-
sensitive_areas: string[]
|
|
23
|
-
/** Whether the design mentions a rollback or recovery strategy. */
|
|
24
|
-
rollback_mentioned: boolean
|
|
25
|
-
/** Whether the design mentions testing. */
|
|
26
|
-
test_strategy_mentioned: boolean
|
|
27
|
-
/**
|
|
28
|
-
* IDs of refuted Chronicle entries that semantically overlap with the design text.
|
|
29
|
-
* These are potential conflicts — Jury should surface them.
|
|
30
|
-
*/
|
|
31
|
-
chronicle_conflicts: string[]
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Static preflight analysis — no LLM required.
|
|
36
|
-
*
|
|
37
|
-
* Runs deterministic checks on the outcome + design text and the evidence pack
|
|
38
|
-
* before any LLM call. Results are injected into the Jury prompt so the LLM
|
|
39
|
-
* reasons over concrete signals rather than discovering them itself.
|
|
40
|
-
*/
|
|
41
|
-
export function runPreflight(
|
|
42
|
-
outcome: string,
|
|
43
|
-
design: string,
|
|
44
|
-
evidence: OracleResult[],
|
|
45
|
-
): PreflightResult {
|
|
46
|
-
const text = `${outcome} ${design}`
|
|
47
|
-
|
|
48
|
-
const sensitive_areas = Object.entries(SENSITIVE_PATTERNS)
|
|
49
|
-
.filter(([, pattern]) => pattern.test(text))
|
|
50
|
-
.map(([area]) => area)
|
|
51
|
-
|
|
52
|
-
// Refuted entries whose primary text shares at least one significant word with the design
|
|
53
|
-
const designWords = new Set(
|
|
54
|
-
text
|
|
55
|
-
.toLowerCase()
|
|
56
|
-
.split(/\W+/)
|
|
57
|
-
.filter(w => w.length > 4),
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
const chronicle_conflicts = evidence
|
|
61
|
-
.filter(e => {
|
|
62
|
-
if (e.status !== "refuted") return false
|
|
63
|
-
const entryWords = entryText(e)
|
|
64
|
-
.toLowerCase()
|
|
65
|
-
.split(/\W+/)
|
|
66
|
-
.filter(w => w.length > 4)
|
|
67
|
-
return entryWords.some(w => designWords.has(w))
|
|
68
|
-
})
|
|
69
|
-
.map(e => e.id)
|
|
70
|
-
|
|
71
|
-
return {
|
|
72
|
-
touches_sensitive_area: sensitive_areas.length > 0,
|
|
73
|
-
sensitive_areas,
|
|
74
|
-
rollback_mentioned: ROLLBACK_PATTERNS.test(text),
|
|
75
|
-
test_strategy_mentioned: TEST_PATTERNS.test(text),
|
|
76
|
-
chronicle_conflicts,
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/** Format preflight result for injection into the Jury prompt. */
|
|
81
|
-
export function formatPreflight(preflight: PreflightResult): string {
|
|
82
|
-
const lines: string[] = ["## Deterministic Preflight (machine-checked, not LLM-inferred)"]
|
|
83
|
-
|
|
84
|
-
if (preflight.touches_sensitive_area) {
|
|
85
|
-
lines.push(`⚠ Sensitive areas detected: ${preflight.sensitive_areas.join(", ")}`)
|
|
86
|
-
} else {
|
|
87
|
-
lines.push("✓ No sensitive areas detected")
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
lines.push(preflight.rollback_mentioned ? "✓ Rollback strategy mentioned" : "✗ No rollback strategy mentioned")
|
|
91
|
-
lines.push(preflight.test_strategy_mentioned ? "✓ Test strategy mentioned" : "✗ No test strategy mentioned")
|
|
92
|
-
|
|
93
|
-
if (preflight.chronicle_conflicts.length > 0) {
|
|
94
|
-
lines.push(`⚠ Refuted Chronicle entries potentially conflicting: ${preflight.chronicle_conflicts.join(", ")}`)
|
|
95
|
-
lines.push(" These entries were previously tried and failed — verify the design addresses the documented failure reason.")
|
|
96
|
-
} else {
|
|
97
|
-
lines.push("✓ No conflicting refuted Chronicle entries")
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
return lines.join("\n")
|
|
101
|
-
}
|
package/modules/jury/schema.ts
DELETED
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
import { z } from "zod"
|
|
2
|
-
|
|
3
|
-
const ConfidenceBreakdownSchema = z.object({
|
|
4
|
-
evidence_support: z.number().min(0).max(1),
|
|
5
|
-
feasibility: z.number().min(0).max(1),
|
|
6
|
-
risk: z.number().min(0).max(1),
|
|
7
|
-
completeness: z.number().min(0).max(1),
|
|
8
|
-
})
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Zod schema for the Jury's structured LLM output.
|
|
12
|
-
* evaluate() validates all LLM responses against this before returning.
|
|
13
|
-
*/
|
|
14
|
-
export const JuryOutputSchema = z.object({
|
|
15
|
-
confidence: z.number().min(0).max(1),
|
|
16
|
-
confidence_breakdown: ConfidenceBreakdownSchema,
|
|
17
|
-
assessment: z.string().min(1),
|
|
18
|
-
gaps: z.array(z.string()),
|
|
19
|
-
blocking_gaps: z.array(z.string()),
|
|
20
|
-
council_brief: z.enum(["challenge", "pressure-test"]),
|
|
21
|
-
recommendation: z.enum(["proceed", "investigate-more", "redesign"]),
|
|
22
|
-
})
|
|
23
|
-
|
|
24
|
-
export type JuryOutputParsed = z.infer<typeof JuryOutputSchema>
|