npm - @balpal4495/quorum - Versions diffs - 0.1.0 - Mend

@balpal4495/quorum 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/.github/copilot-instructions.md +94 -0
package/CLAUDE.md +86 -0
package/GEMINI.md +73 -0
package/LICENSE +21 -0
package/README.md +202 -0
package/SETUP.md +256 -0
package/bin/init.js +366 -0
package/modules/AGENTS.md +66 -0
package/modules/CLAUDE.md +64 -0
package/modules/README.md +251 -0
package/modules/council/advisors.ts +68 -0
package/modules/council/chairman.ts +112 -0
package/modules/council/deliberate.ts +106 -0
package/modules/council/frame.ts +54 -0
package/modules/council/index.ts +4 -0
package/modules/council/personas.ts +57 -0
package/modules/council/reviewers.ts +81 -0
package/modules/council/types.ts +45 -0
package/modules/jury/evaluate.ts +112 -0
package/modules/jury/index.ts +3 -0
package/modules/jury/schema.ts +15 -0
package/modules/jury/types.ts +31 -0
package/modules/oracle/adapters/lance-db.ts +81 -0
package/modules/oracle/adapters/xenova-embedder.ts +43 -0
package/modules/oracle/bm25.ts +92 -0
package/modules/oracle/index.ts +36 -0
package/modules/oracle/log.ts +15 -0
package/modules/oracle/propose.ts +148 -0
package/modules/oracle/query.ts +145 -0
package/modules/oracle/summary.ts +115 -0
package/modules/oracle/types.ts +32 -0
package/modules/sentinel/assert.ts +95 -0
package/modules/sentinel/coverage.ts +106 -0
package/modules/sentinel/drift.ts +159 -0
package/modules/sentinel/index.ts +6 -0
package/modules/sentinel/review.ts +207 -0
package/modules/setup.ts +153 -0
package/modules/shared/types.ts +148 -0
package/package.json +47 -0

package/modules/council/personas.ts ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * Default advisor personas for the Council.
+ *
+ * Personas are interpretive lenses, not knowledge sources.
+ * All advisors receive the same Oracle evidence pack — their persona
+ * determines which entries they weight and how they read them.
+ *
+ * Add or replace personas in CouncilDeps to specialise for your domain.
+ */
+export interface AdvisorPersona {
+  name: string
+  /** One-line description of this persona's evidence focus. */
+  lens: string
+  /** System prompt fragment injected into the advisor's prompt. */
+  systemFragment: string
+}
+export const DEFAULT_PERSONAS: readonly AdvisorPersona[] = [
+  {
+    name: "Pragmatist",
+    lens: "Weights validated entries — what has worked in this codebase",
+    systemFragment:
+      "Focus on `validated` Oracle entries. What has already worked in this codebase? " +
+      "Weight evidence that confirms the design will succeed based on prior outcomes.",
+  },
+  {
+    name: "Sceptic",
+    lens: "Weights refuted entries — what has failed and why",
+    systemFragment:
+      "Focus on `refuted` Oracle entries. What has already failed in this codebase and why? " +
+      "Look for signs this design repeats past mistakes. Surface failure modes explicitly.",
+  },
+  {
+    name: "Systems thinker",
+    lens: "Looks for patterns across all entries — second-order effects",
+    systemFragment:
+      "Read all Oracle entries as a system. Look for patterns, dependencies, and second-order " +
+      "effects. What does the design miss about how the system as a whole behaves?",
+  },
+  {
+    name: "Risk analyst",
+    lens: "Weights open entries — unresolved questions and unknowns",
+    systemFragment:
+      "Focus on `open` Oracle entries — unresolved questions and unknowns. " +
+      "What has not been confirmed? What uncertainty does this design carry? " +
+      "Flag every assumption that has not been validated by an outcome.",
+  },
+  {
+    name: "Evidence auditor",
+    lens: "Focuses on gaps — what Oracle does NOT contain",
+    systemFragment:
+      "Look for what is ABSENT from the Oracle evidence. What decisions is this design making " +
+      "without any codebase evidence to support them? " +
+      "Name every gap — a gap is not a reason to reject, but it must be surfaced.",
+  },
+]

package/modules/council/reviewers.ts ADDED Viewed

@@ -0,0 +1,81 @@
+import type { LLMProvider, OracleResult } from "../shared/types"
+import type { AdvisorResponse } from "./advisors"
+export interface ReviewerResponse {
+  reviewerId: string
+  review: string
+}
+/**
+ * Shuffle advisor responses and label them A–Z.
+ * Prevents reviewers deferring to confident responses by position or persona name.
+ */
+function anonymise(responses: AdvisorResponse[]): string {
+  const shuffled = [...responses].sort(() => Math.random() - 0.5)
+  return shuffled
+    .map((r, i) => `## Advisor ${String.fromCharCode(65 + i)}\n${r.response}`)
+    .join("\n\n---\n\n")
+}
+function formatEvidenceSummary(evidence: OracleResult[]): string {
+  if (evidence.length === 0) return "No Oracle evidence available."
+  return evidence
+    .map(e => `[${e.id}] (${e.status}) ${e.key_insight}`)
+    .join("\n")
+}
+const REVIEWER_SYSTEM_PROMPT = [
+  "You are a Council reviewer. You evaluate the quality of advisor responses.",
+  "",
+  "You are NOT deciding whether the design is correct.",
+  "You are assessing the reasoning quality of each advisor response:",
+  "",
+  "1. Does the advisor actually use the Oracle evidence, or reason from general knowledge?",
+  "2. Are Oracle entry IDs cited? Do those citations match the evidence provided?",
+  "3. Is the response internally consistent?",
+  "4. Which responses provide the strongest evidence-backed reasoning?",
+  "5. Which responses make unsupported claims?",
+  "",
+  "Be critical. Evidence quality matters more than conclusion confidence.",
+  "Keep your review under 400 words.",
+].join("\n")
+/**
+ * Run all reviewers in parallel.
+ * Each reviewer receives the anonymised advisor responses and the original evidence pack.
+ * Anonymisation prevents position bias and persona deference.
+ */
+export async function fanOutReviewers(
+  advisorResponses: AdvisorResponse[],
+  evidence: OracleResult[],
+  reviewerCount: number,
+  llm: LLMProvider,
+  model?: string,
+): Promise<ReviewerResponse[]> {
+  const anonymisedResponses = anonymise(advisorResponses)
+  const evidenceSummary = formatEvidenceSummary(evidence)
+  return Promise.all(
+    Array.from({ length: reviewerCount }, async (_, i): Promise<ReviewerResponse> => {
+      const userPrompt = [
+        "## Advisor Responses (anonymised)",
+        anonymisedResponses,
+        "",
+        "## Oracle Evidence (for cross-referencing citations)",
+        evidenceSummary,
+        "",
+        "Review each advisor response for evidence quality.",
+      ].join("\n")
+      const review = await llm(
+        [
+          { role: "system", content: REVIEWER_SYSTEM_PROMPT },
+          { role: "user", content: userPrompt },
+        ],
+        model,
+      )
+      return { reviewerId: `reviewer-${i + 1}`, review }
+    }),
+  )
+}

package/modules/council/types.ts ADDED Viewed

@@ -0,0 +1,45 @@
+import type { OracleResult, LLMProvider, OracleClient } from "../shared/types"
+import type { JuryOutput } from "../jury/types"
+export interface CouncilInput {
+  /** What needs to be achieved. */
+  outcome: string
+  /** Proposed approach from the Designer. */
+  design: string
+  /** Same evidence pack the Jury received. */
+  evidence: OracleResult[]
+  /** Jury output — drives the council brief and confidence. */
+  jury_output: JuryOutput
+}
+export interface CouncilOutput {
+  satisfied: boolean
+  /** Chairman synthesis — every material conclusion cites Oracle entry IDs. */
+  verdict: string
+  /** What was challenged or could not be validated. */
+  challenges: string[]
+  /** Oracle entry IDs referenced in the verdict. */
+  evidence_cited: string[]
+  recommendation: "proceed" | "redesign" | "investigate-more"
+}
+export interface CouncilModels {
+  /** Model for the framer step. */
+  frame?: string
+  /** Model for advisors. High volume — cheaper model appropriate here. */
+  advisors?: string
+  /** Model for reviewers. Critical analysis — stronger model recommended. */
+  reviewers?: string
+  /** Model for the chairman. Synthesis — best available model recommended. */
+  chairman?: string
+}
+export interface CouncilDeps {
+  llm: LLMProvider
+  oracle: OracleClient
+  /** Number of advisors to run in parallel. Default: 5. */
+  advisorCount?: number
+  /** Number of reviewers to run in parallel. Default: 5. */
+  reviewerCount?: number
+  models?: CouncilModels
+}

package/modules/jury/evaluate.ts ADDED Viewed

@@ -0,0 +1,112 @@
+import type { JuryInput, JuryOutput, JuryDeps } from "./types"
+import type { OracleResult } from "../shared/types"
+import { JuryOutputSchema } from "./schema"
+const CONFIDENCE_THRESHOLD = 0.6
+function formatEvidence(evidence: OracleResult[]): string {
+  if (evidence.length === 0) {
+    return "No Oracle entries found. There is no prior evidence for this codebase on this topic."
+  }
+  return evidence
+    .map(e =>
+      [
+        `[${e.id}] status=${e.status}  confidence=${e.confidence.toFixed(2)}  score=${e.score.toFixed(3)}`,
+        `Insight: ${e.key_insight}`,
+        `Areas: ${e.affected_areas.join(", ")}`,
+        e.outcome ? `Outcome: ${e.outcome}` : null,
+      ]
+        .filter(Boolean)
+        .join("\n"),
+    )
+    .join("\n\n")
+}
+const SYSTEM_PROMPT = `You are the Jury — an evidence-based evaluator for agentic development workflows.
+Your job is to evaluate a proposed design against Oracle evidence and produce a structured confidence score.
+You do NOT make decisions. You assess and score. Your output determines the Council's brief.
+Score the design across these four dimensions (equally weighted to produce a final confidence in [0, 1]):
+1. Evidence support   — do validated Oracle entries confirm this approach works in this codebase?
+2. Feasibility        — do Oracle entries (or their absence) suggest this is achievable?
+3. Risk               — what do refuted entries reveal about failure modes? Has this been tried and failed?
+4. Completeness       — does the design address the full outcome, or only part of it?
+council_brief is determined by confidence only (do not invent a value):
+  confidence < 0.6  → council_brief = "challenge"
+  confidence ≥ 0.6  → council_brief = "pressure-test"
+Return ONLY valid JSON that matches this schema exactly — no markdown fences, no explanation:
+{
+  "confidence": <number 0–1>,
+  "assessment": <string — what the evidence supports or contradicts>,
+  "gaps": [<string — each missing piece of evidence from Oracle>],
+  "council_brief": "challenge" | "pressure-test",
+  "recommendation": "proceed" | "investigate-more" | "redesign"
+}`
+/**
+ * Evaluate a proposed design against Oracle evidence.
+ *
+ * Scores across four dimensions (evidence support, feasibility, risk, completeness)
+ * and returns a structured JuryOutput. The council_brief is always derived from the
+ * confidence score — the LLM value is overridden to ensure deterministic routing.
+ *
+ * Throws if the LLM returns non-JSON or a response that fails schema validation.
+ * Never silently defaults to a passing score.
+ */
+export async function evaluate(
+  input: JuryInput,
+  deps: JuryDeps,
+): Promise<JuryOutput> {
+  const { llm, model } = deps
+  const evidenceText = formatEvidence(input.evidence)
+  const userPrompt = [
+    "## Outcome",
+    input.outcome,
+    "",
+    "## Proposed Design",
+    input.design,
+    "",
+    "## Oracle Evidence",
+    evidenceText,
+  ].join("\n")
+  const raw = await llm(
+    [
+      { role: "system", content: SYSTEM_PROMPT },
+      { role: "user", content: userPrompt },
+    ],
+    model,
+  )
+  let parsed: unknown
+  try {
+    const cleaned = raw
+      .replace(/^```(?:json)?\s*/m, "")
+      .replace(/\s*```$/m, "")
+      .trim()
+    parsed = JSON.parse(cleaned)
+  } catch {
+    throw new Error(
+      `Jury: LLM returned non-JSON response. Raw (first 300 chars): ${raw.slice(0, 300)}`,
+    )
+  }
+  const result = JuryOutputSchema.safeParse(parsed)
+  if (!result.success) {
+    throw new Error(
+      `Jury: LLM output failed schema validation. Issues: ${JSON.stringify(result.error.issues)}`,
+    )
+  }
+  const output = result.data
+  // Enforce council_brief from confidence — do not trust the LLM to compute this correctly
+  output.council_brief =
+    output.confidence < CONFIDENCE_THRESHOLD ? "challenge" : "pressure-test"
+  return output
+}

package/modules/jury/index.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export { evaluate } from "./evaluate"
+export type { JuryInput, JuryOutput, JuryDeps } from "./types"
+export { JuryOutputSchema } from "./schema"

package/modules/jury/schema.ts ADDED Viewed

@@ -0,0 +1,15 @@
+import { z } from "zod"
+/**
+ * Zod schema for the Jury's structured LLM output.
+ * evaluate() validates all LLM responses against this before returning.
+ */
+export const JuryOutputSchema = z.object({
+  confidence: z.number().min(0).max(1),
+  assessment: z.string().min(1),
+  gaps: z.array(z.string()),
+  council_brief: z.enum(["challenge", "pressure-test"]),
+  recommendation: z.enum(["proceed", "investigate-more", "redesign"]),
+})
+export type JuryOutputParsed = z.infer<typeof JuryOutputSchema>

package/modules/jury/types.ts ADDED Viewed

@@ -0,0 +1,31 @@
+import type { OracleResult, LLMProvider } from "../shared/types"
+export interface JuryInput {
+  /** What needs to be achieved. */
+  outcome: string
+  /** Proposed approach from the Designer. */
+  design: string
+  /** Evidence retrieved from Oracle. */
+  evidence: OracleResult[]
+}
+export interface JuryOutput {
+  /** 0–1 confidence score. Drives the Council brief. */
+  confidence: number
+  /** What the evidence supports or contradicts. */
+  assessment: string
+  /** Evidence missing from Oracle that would improve confidence. */
+  gaps: string[]
+  /**
+   * Council brief derived from confidence:
+   *   < 0.6  → "challenge"      (find what is wrong — broader scope)
+   *   ≥ 0.6  → "pressure-test"  (assume correct, try to break it)
+   */
+  council_brief: "challenge" | "pressure-test"
+  recommendation: "proceed" | "investigate-more" | "redesign"
+}
+export interface JuryDeps {
+  llm: LLMProvider
+  model?: string
+}

package/modules/oracle/adapters/lance-db.ts ADDED Viewed

@@ -0,0 +1,81 @@
+/**
+ * LanceDB vector store adapter.
+ *
+ * Required package: npm install vectordb
+ *
+ * Chronicle entries are stored in .chronicle/entries/ (LanceDB table directory).
+ * Vectors are indexed with cosine metric — no need to pre-normalise embeddings.
+ *
+ * Note: this adapter targets the `vectordb` package (LanceDB v0.x).
+ * If your project uses `@lancedb/lancedb` (v0.4+), the connect/createTable API
+ * is nearly identical but table.query() replaces table.search() for non-vector queries.
+ */
+import type { VectorStore } from "../types"
+import type { ChronicleEntry } from "../../shared/types"
+import path from "path"
+// eslint-disable-next-line @typescript-eslint/no-require-imports
+const lancedb = require("vectordb")
+interface LanceRow {
+  id: string
+  vector: number[]
+  /** ChronicleEntry serialised as JSON string. */
+  payload: string
+  _distance?: number
+}
+export async function createLanceDBStore(chronicleDir: string): Promise<VectorStore> {
+  const tableDir = path.join(chronicleDir, "entries")
+  const db = await lancedb.connect(tableDir)
+  let table: any = null
+  async function getOrCreateTable(firstRow?: LanceRow): Promise<any> {
+    if (table) return table
+    const names: string[] = await db.tableNames()
+    if (names.includes("entries")) {
+      table = await db.openTable("entries")
+    } else if (firstRow) {
+      table = await db.createTable("entries", [firstRow], { metric: "cosine" })
+    }
+    return table
+  }
+  return {
+    async upsert(id, vector, metadata) {
+      const row: LanceRow = { id, vector, payload: JSON.stringify(metadata) }
+      const t = await getOrCreateTable(row)
+      if (t !== table) {
+        // table was just created with this row — already inserted
+        return
+      }
+      // LanceDB does not have native upsert — delete existing then insert
+      await t.delete(`id = '${sanitiseId(id)}'`)
+      await t.add([row])
+    },
+    async search(vector, limit) {
+      const t = await getOrCreateTable()
+      if (!t) return []
+      const rows: LanceRow[] = await t.search(vector).limit(limit).execute()
+      return rows.map(row => ({
+        entry: JSON.parse(row.payload) as ChronicleEntry,
+        // Convert L2 distance (cosine metric stores 1 - cosine_sim as distance)
+        score: row._distance !== undefined ? 1 - row._distance : 0,
+      }))
+    },
+    async getAll() {
+      const t = await getOrCreateTable()
+      if (!t) return []
+      const rows: LanceRow[] = await t.query().execute()
+      return rows.map(row => JSON.parse(row.payload) as ChronicleEntry)
+    },
+  }
+}
+/** Prevent SQL injection in the delete filter. LanceDB uses SQL-like WHERE clauses. */
+function sanitiseId(id: string): string {
+  return id.replace(/'/g, "''")
+}

package/modules/oracle/adapters/xenova-embedder.ts ADDED Viewed

@@ -0,0 +1,43 @@
+/**
+ * Local ONNX embedder using @xenova/transformers (all-MiniLM-L6-v2).
+ *
+ * Required package: npm install @xenova/transformers
+ *
+ * Runs entirely locally — no API key, no network dependency after first use.
+ * First call downloads and caches the model (~25 MB).
+ * Produces 384-dimensional unit vectors (mean pooling + L2 normalisation).
+ *
+ * For production use, pre-warm the embedder on startup:
+ *   import { warmEmbedder } from "./adapters/xenova-embedder"
+ *   await warmEmbedder()
+ */
+// eslint-disable-next-line @typescript-eslint/no-require-imports
+const { pipeline } = require("@xenova/transformers")
+let embedderPipeline: any = null
+async function getPipeline(): Promise<any> {
+  if (!embedderPipeline) {
+    embedderPipeline = await pipeline(
+      "feature-extraction",
+      "Xenova/all-MiniLM-L6-v2",
+    )
+  }
+  return embedderPipeline
+}
+/**
+ * Embed text using all-MiniLM-L6-v2.
+ * Returns a 384-dimensional unit vector.
+ */
+export async function xenovaEmbed(text: string): Promise<number[]> {
+  const embedder = await getPipeline()
+  const output = await embedder(text, { pooling: "mean", normalize: true })
+  return Array.from(output.data) as number[]
+}
+/** Pre-warm the model so the first real query is not slow. */
+export async function warmEmbedder(): Promise<void> {
+  await getPipeline()
+}

package/modules/oracle/bm25.ts ADDED Viewed

@@ -0,0 +1,92 @@
+/**
+ * Lightweight BM25 implementation for Pass 2 re-ranking.
+ *
+ * k1 = 1.5  (term frequency saturation)
+ * b  = 0.75 (length normalization)
+ *
+ * Formula: score(q, d) = Σ IDF(qi) * f(qi, d) * (k1 + 1) / (f(qi, d) + k1 * (1 − b + b * |d| / avgdl))
+ */
+const K1 = 1.5
+const B = 0.75
+function tokenize(text: string): string[] {
+  return text.toLowerCase().match(/\b\w+\b/g) ?? []
+}
+/** Robertson–Sparck Jones IDF with smoothing. */
+function computeIdf(N: number, df: number): number {
+  return Math.log(1 + (N - df + 0.5) / (df + 0.5))
+}
+/**
+ * Score each document string against the query using BM25.
+ * Returns a score array parallel to `documents`.
+ */
+export function bm25Score(query: string, documents: string[]): number[] {
+  if (documents.length === 0) return []
+  const queryTokens = tokenize(query)
+  const docTokenLists = documents.map(tokenize)
+  const totalLength = docTokenLists.reduce((sum, d) => sum + d.length, 0)
+  const avgdl = totalLength / docTokenLists.length
+  const N = documents.length
+  // Precompute document frequency for each unique query token
+  const df = new Map<string, number>()
+  for (const token of queryTokens) {
+    if (!df.has(token)) {
+      df.set(token, docTokenLists.filter(doc => doc.includes(token)).length)
+    }
+  }
+  return docTokenLists.map(docTokenList => {
+    const docLength = docTokenList.length
+    const tf = new Map<string, number>()
+    for (const token of docTokenList) {
+      tf.set(token, (tf.get(token) ?? 0) + 1)
+    }
+    let score = 0
+    for (const token of queryTokens) {
+      const termFreq = tf.get(token) ?? 0
+      if (termFreq === 0) continue
+      const idfScore = computeIdf(N, df.get(token) ?? 0)
+      const normTf =
+        (termFreq * (K1 + 1)) /
+        (termFreq + K1 * (1 - B + B * (docLength / avgdl)))
+      score += idfScore * normTf
+    }
+    return score
+  })
+}
+const BM25_STOP_WORDS = new Set([
+  "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+  "have", "has", "had", "do", "does", "did", "will", "would", "should",
+  "could", "may", "might", "shall", "can", "to", "of", "in", "for", "on",
+  "with", "at", "by", "from", "as", "into", "through", "and", "or", "but",
+  "if", "then", "this", "that", "these", "those", "it", "its", "we", "they",
+  "their", "there", "when", "where", "what", "which", "who", "how", "not", "no",
+])
+/**
+ * Extract domain terms from Chronicle key insights for Pass 2 query enrichment.
+ * Bridges the vocabulary gap between natural language queries and technical identifiers.
+ * Strips stop words, returns the most frequent distinctive tokens.
+ */
+export function extractDomainTerms(insights: string[]): string[] {
+  const allTokens = insights.flatMap(s => tokenize(s))
+  const freq = new Map<string, number>()
+  for (const token of allTokens) {
+    if (!BM25_STOP_WORDS.has(token) && token.length > 2) {
+      freq.set(token, (freq.get(token) ?? 0) + 1)
+    }
+  }
+  return [...freq.entries()]
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 10)
+    .map(([token]) => token)
+}

package/modules/oracle/index.ts ADDED Viewed

@@ -0,0 +1,36 @@
+export { query } from "./query"
+export { propose, commit } from "./propose"
+export type { OracleDeps, VectorStore } from "./types"
+export type {
+  OracleResult,
+  QueryOptions,
+  ChronicleEntry,
+  OracleClient,
+} from "../shared/types"
+export { createLanceDBStore } from "./adapters/lance-db"
+export { xenovaEmbed, warmEmbedder } from "./adapters/xenova-embedder"
+import type { OracleClient } from "../shared/types"
+import type { OracleDeps } from "./types"
+import { query } from "./query"
+import { propose, commit } from "./propose"
+/**
+ * Create a bound OracleClient from injected deps.
+ * Pass this to Jury and Council — they only need the OracleClient interface,
+ * not the raw Oracle functions.
+ *
+ * @example
+ * const oracle = createOracleClient({
+ *   embedder: xenovaEmbed,
+ *   vectorStore: await createLanceDBStore(".chronicle"),
+ * })
+ */
+export function createOracleClient(deps: OracleDeps): OracleClient {
+  return {
+    query: (text, options) => query(text, options ?? {}, deps),
+    propose: entry => propose(entry, deps),
+    commit: proposalId => commit(proposalId, deps),
+  }
+}

package/modules/oracle/log.ts ADDED Viewed

@@ -0,0 +1,15 @@
+import { promises as fs } from "fs"
+import path from "path"
+/**
+ * Append a query log entry to .chronicle/query-log.jsonl.
+ * Best-effort — callers should swallow errors from this.
+ */
+export async function appendQueryLog(
+  entry: Record<string, unknown>,
+  chronicleDir: string,
+): Promise<void> {
+  await fs.mkdir(chronicleDir, { recursive: true })
+  const logPath = path.join(chronicleDir, "query-log.jsonl")
+  await fs.appendFile(logPath, JSON.stringify(entry) + "\n", "utf8")
+}