useathena 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +258 -0
  2. package/apps/chrome-extension/README.md +35 -0
  3. package/apps/chrome-extension/background.js +97 -0
  4. package/apps/chrome-extension/gmail.js +107 -0
  5. package/apps/chrome-extension/linkedin.js +123 -0
  6. package/apps/chrome-extension/manifest.json +27 -0
  7. package/apps/chrome-extension/options.html +60 -0
  8. package/apps/chrome-extension/options.js +36 -0
  9. package/apps/chrome-extension/popup.html +37 -0
  10. package/apps/chrome-extension/popup.js +22 -0
  11. package/bin/athena +28 -0
  12. package/dist/api/server.js +145 -0
  13. package/dist/capture/ingest.js +85 -0
  14. package/dist/cli/commands.js +201 -0
  15. package/dist/cli/format.js +76 -0
  16. package/dist/cli/setup.js +316 -0
  17. package/dist/cli.js +291 -0
  18. package/dist/config.js +26 -0
  19. package/dist/core/fixtures.js +65 -0
  20. package/dist/core/ids.js +34 -0
  21. package/dist/core/refs.js +25 -0
  22. package/dist/core/types.js +10 -0
  23. package/dist/engine/engine.js +136 -0
  24. package/dist/engine/parse.js +76 -0
  25. package/dist/engine/prompts.js +64 -0
  26. package/dist/eval/harness.js +123 -0
  27. package/dist/eval/judge.js +75 -0
  28. package/dist/eval/run-eval.js +46 -0
  29. package/dist/eval/scenarios.js +470 -0
  30. package/dist/mcp/server.js +107 -0
  31. package/dist/mcp-server.js +7 -0
  32. package/dist/model/api-model-client.js +99 -0
  33. package/dist/model/cli-model-client.js +111 -0
  34. package/dist/model/model-client.js +28 -0
  35. package/dist/model/registry.js +67 -0
  36. package/dist/sensors/claude-code-hook.js +131 -0
  37. package/dist/serve/brief.js +95 -0
  38. package/dist/serve/outcome.js +56 -0
  39. package/dist/store/open.js +19 -0
  40. package/dist/store/store.js +269 -0
  41. package/docs/schema.md +368 -0
  42. package/package.json +43 -0
  43. package/scripts/prepare.mjs +20 -0
@@ -0,0 +1,99 @@
1
+ import { extractJson } from "./cli-model-client.js";
2
+ /**
3
+ * API providers over plain fetch — no provider SDKs (design rule: dependencies
4
+ * are a product decision; the surface we need is one POST per inference).
5
+ *
6
+ * Two wire formats cover the landscape:
7
+ * - Anthropic Messages API (api.anthropic.com)
8
+ * - OpenAI-compatible chat completions (OpenAI itself, Ollama, LM Studio,
9
+ * vLLM, llama.cpp — anything exposing /chat/completions)
10
+ */
11
+ const DEFAULT_TIMEOUT_MS = 240_000;
12
+ const DEFAULT_MAX_TOKENS = 16_000;
13
+ export class AnthropicApiModelClient {
14
+ options;
15
+ id;
16
+ baseUrl;
17
+ constructor(options) {
18
+ this.options = options;
19
+ this.id = `anthropic:${options.model}`;
20
+ this.baseUrl = (options.baseUrl ?? "https://api.anthropic.com").replace(/\/$/, "");
21
+ }
22
+ async generateJson(request) {
23
+ if (!this.options.apiKey) {
24
+ throw new Error("anthropic provider needs an API key — set ANTHROPIC_API_KEY or run: athena setup");
25
+ }
26
+ const body = await postJson(`${this.baseUrl}/v1/messages`, {
27
+ "x-api-key": this.options.apiKey,
28
+ "anthropic-version": "2023-06-01",
29
+ }, {
30
+ model: this.options.model,
31
+ max_tokens: DEFAULT_MAX_TOKENS,
32
+ system: request.system,
33
+ messages: [{ role: "user", content: request.prompt }],
34
+ }, this.options.timeoutMs);
35
+ const message = body;
36
+ if (message.stop_reason === "refusal") {
37
+ throw new Error("anthropic API declined the request (stop_reason: refusal)");
38
+ }
39
+ const text = message.content?.find((block) => block.type === "text")?.text;
40
+ if (typeof text !== "string") {
41
+ throw new Error(`anthropic API returned no text content: ${JSON.stringify(body).slice(0, 300)}`);
42
+ }
43
+ return extractJson(text);
44
+ }
45
+ }
46
+ export class OpenAiCompatibleModelClient {
47
+ options;
48
+ id;
49
+ baseUrl;
50
+ constructor(options, providerLabel = "openai") {
51
+ this.options = options;
52
+ this.id = `${providerLabel}:${options.model}`;
53
+ this.baseUrl = (options.baseUrl ?? "https://api.openai.com/v1").replace(/\/$/, "");
54
+ }
55
+ async generateJson(request) {
56
+ const headers = {};
57
+ if (this.options.apiKey)
58
+ headers.authorization = `Bearer ${this.options.apiKey}`;
59
+ const body = await postJson(`${this.baseUrl}/chat/completions`, headers, {
60
+ model: this.options.model,
61
+ messages: [
62
+ { role: "system", content: request.system },
63
+ { role: "user", content: request.prompt },
64
+ ],
65
+ }, this.options.timeoutMs);
66
+ const completion = body;
67
+ const text = completion.choices?.[0]?.message?.content;
68
+ if (typeof text !== "string") {
69
+ throw new Error(`${this.id} returned no message content: ${JSON.stringify(body).slice(0, 300)}`);
70
+ }
71
+ return extractJson(text);
72
+ }
73
+ }
74
+ async function postJson(url, headers, body, timeoutMs = DEFAULT_TIMEOUT_MS) {
75
+ const controller = new AbortController();
76
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
77
+ try {
78
+ const response = await fetch(url, {
79
+ method: "POST",
80
+ headers: { "content-type": "application/json", ...headers },
81
+ body: JSON.stringify(body),
82
+ signal: controller.signal,
83
+ });
84
+ const text = await response.text();
85
+ if (!response.ok) {
86
+ throw new Error(`${url} returned ${response.status}: ${text.slice(0, 300)}`);
87
+ }
88
+ return JSON.parse(text);
89
+ }
90
+ catch (error) {
91
+ if (error instanceof Error && error.name === "AbortError") {
92
+ throw new Error(`${url} timed out after ${timeoutMs}ms`);
93
+ }
94
+ throw error;
95
+ }
96
+ finally {
97
+ clearTimeout(timer);
98
+ }
99
+ }
@@ -0,0 +1,111 @@
1
+ import { spawn } from "node:child_process";
2
+ import { randomUUID } from "node:crypto";
3
+ import { readFileSync, rmSync } from "node:fs";
4
+ import { tmpdir } from "node:os";
5
+ import { join } from "node:path";
6
+ /**
7
+ * CLI-runner providers: inference through a locally authenticated agent CLI,
8
+ * so a Claude or ChatGPT subscription powers the engine with no API key.
9
+ *
10
+ * Verified against real installs: `claude -p --output-format json` and
11
+ * `codex exec` (prompt on stdin, final message via --output-last-message).
12
+ */
13
+ const DEFAULT_TIMEOUT_MS = 240_000;
14
+ export class ClaudeCliModelClient {
15
+ options;
16
+ id;
17
+ constructor(options = {}) {
18
+ this.options = options;
19
+ this.id = `cli:claude${options.model ? `:${options.model}` : ""}`;
20
+ }
21
+ async generateJson(request) {
22
+ const args = ["-p", "--output-format", "json", "--max-turns", "1"];
23
+ if (this.options.model)
24
+ args.push("--model", this.options.model);
25
+ const stdout = await run("claude", args, `${request.system}\n\n${request.prompt}`, this.options.timeoutMs);
26
+ const envelope = JSON.parse(stdout);
27
+ if (envelope.is_error || typeof envelope.result !== "string") {
28
+ throw new Error(`claude CLI returned an error envelope: ${stdout.slice(0, 300)}`);
29
+ }
30
+ return extractJson(envelope.result);
31
+ }
32
+ }
33
+ export class CodexCliModelClient {
34
+ options;
35
+ id;
36
+ constructor(options = {}) {
37
+ this.options = options;
38
+ this.id = `cli:codex${options.model ? `:${options.model}` : ""}`;
39
+ }
40
+ async generateJson(request) {
41
+ // codex exec has no single-shot JSON mode; the final agent message lands
42
+ // in --output-last-message. Ephemeral + read-only keeps an inference call
43
+ // from touching the workspace or leaving session files behind.
44
+ const outFile = join(tmpdir(), `athena-codex-${randomUUID()}.txt`);
45
+ const args = [
46
+ "exec",
47
+ "--ephemeral",
48
+ "--skip-git-repo-check",
49
+ "--sandbox", "read-only",
50
+ "--color", "never",
51
+ "--output-last-message", outFile,
52
+ ];
53
+ if (this.options.model)
54
+ args.push("--model", this.options.model);
55
+ args.push("-");
56
+ try {
57
+ await run("codex", args, `${request.system}\n\n${request.prompt}`, this.options.timeoutMs);
58
+ const text = readFileSync(outFile, "utf8");
59
+ return extractJson(text);
60
+ }
61
+ finally {
62
+ rmSync(outFile, { force: true });
63
+ }
64
+ }
65
+ }
66
+ function run(command, args, stdin, timeoutMs = DEFAULT_TIMEOUT_MS) {
67
+ return new Promise((resolve, reject) => {
68
+ const child = spawn(command, args, { stdio: ["pipe", "pipe", "pipe"] });
69
+ const timer = setTimeout(() => {
70
+ child.kill("SIGKILL");
71
+ reject(new Error(`${command} timed out after ${timeoutMs}ms`));
72
+ }, timeoutMs);
73
+ let stdout = "";
74
+ let stderr = "";
75
+ child.stdout.on("data", (chunk) => (stdout += chunk.toString()));
76
+ child.stderr.on("data", (chunk) => (stderr += chunk.toString()));
77
+ child.on("error", (error) => {
78
+ clearTimeout(timer);
79
+ reject(error);
80
+ });
81
+ child.on("close", (code) => {
82
+ clearTimeout(timer);
83
+ if (code === 0)
84
+ resolve(stdout);
85
+ else
86
+ reject(new Error(`${command} exited ${code}: ${stderr.slice(0, 300)}`));
87
+ });
88
+ child.stdin.write(stdin);
89
+ child.stdin.end();
90
+ });
91
+ }
92
+ /** Models wrap JSON in prose or fences more often than they should. Extract the first JSON value. */
93
+ export function extractJson(text) {
94
+ const fenced = /```(?:json)?\s*([\s\S]*?)```/.exec(text);
95
+ const body = fenced?.[1] ?? text;
96
+ const start = body.search(/[{[]/);
97
+ if (start === -1)
98
+ throw new Error(`no JSON found in model output: ${text.slice(0, 200)}`);
99
+ for (let end = body.length; end > start; end--) {
100
+ const slice = body.slice(start, end).trim();
101
+ if (!slice.endsWith("}") && !slice.endsWith("]"))
102
+ continue;
103
+ try {
104
+ return JSON.parse(slice);
105
+ }
106
+ catch {
107
+ // keep shrinking — trailing prose after the JSON value
108
+ }
109
+ }
110
+ throw new Error(`unparseable JSON in model output: ${text.slice(0, 200)}`);
111
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Model-agnostic LLM access. The core never assumes a provider.
3
+ *
4
+ * Planned providers (MVP requirement — subscriptions must work, no API key needed):
5
+ * - API: Anthropic, OpenAI-compatible endpoints (incl. local: Ollama, LM Studio)
6
+ * - CLI runners: `claude -p --output-format json`, `codex exec --json` — uses the
7
+ * user's existing Claude/ChatGPT subscription via the locally authenticated tool.
8
+ *
9
+ * The interface is deliberately the lowest common denominator of all of those:
10
+ * one-shot, non-streaming, JSON out. CLI runners can't stream; nothing here may
11
+ * ever require streaming, token counts, or provider-specific tool calling.
12
+ */
13
+ /** Scripted client for tests: returns queued responses in order. */
14
+ export class MockModelClient {
15
+ id = "mock:scripted";
16
+ requests = [];
17
+ responses;
18
+ constructor(responses) {
19
+ this.responses = [...responses];
20
+ }
21
+ generateJson(request) {
22
+ this.requests.push(request);
23
+ if (this.responses.length === 0) {
24
+ return Promise.reject(new Error("MockModelClient: no scripted responses left"));
25
+ }
26
+ return Promise.resolve(this.responses.shift());
27
+ }
28
+ }
@@ -0,0 +1,67 @@
1
+ import { ClaudeCliModelClient, CodexCliModelClient } from "./cli-model-client.js";
2
+ import { AnthropicApiModelClient, OpenAiCompatibleModelClient } from "./api-model-client.js";
3
+ /**
4
+ * Model specs are provider:model strings, with an optional @baseUrl suffix for
5
+ * self-hosted OpenAI-compatible servers:
6
+ *
7
+ * cli:claude[:model] claude CLI — Claude subscription, no key
8
+ * cli:codex[:model] codex CLI — ChatGPT subscription, no key
9
+ * anthropic[:model] Anthropic API (ANTHROPIC_API_KEY)
10
+ * openai:<model> OpenAI API (OPENAI_API_KEY)
11
+ * openai:<model>@<baseUrl> any OpenAI-compatible server (LM Studio, vLLM, …)
12
+ * ollama:<model> local Ollama (http://127.0.0.1:11434/v1)
13
+ *
14
+ * Resolution order for the active spec: ATHENA_MODEL env, then config.json
15
+ * (written by `athena setup`), then the cli:claude default.
16
+ */
17
+ export const DEFAULT_MODEL_SPEC = "cli:claude";
18
+ export const DEFAULT_ANTHROPIC_MODEL = "claude-opus-4-8";
19
+ export const OLLAMA_BASE_URL = "http://127.0.0.1:11434/v1";
20
+ export const SUPPORTED_SPECS = "cli:claude[:model], cli:codex[:model], anthropic[:model], openai:<model>[@baseUrl], ollama:<model>";
21
+ export function resolveModelSpec(config = {}) {
22
+ return process.env.ATHENA_MODEL ?? config.model ?? DEFAULT_MODEL_SPEC;
23
+ }
24
+ export function modelClientFromSpec(spec, config = {}) {
25
+ const at = spec.indexOf("@");
26
+ const head = at === -1 ? spec : spec.slice(0, at);
27
+ const baseUrl = at === -1 ? undefined : spec.slice(at + 1);
28
+ const [provider, ...rest] = head.split(":");
29
+ const model = rest.join(":") || undefined;
30
+ switch (provider) {
31
+ case "cli": {
32
+ const [runner, cliModel] = [rest[0], rest.slice(1).join(":") || undefined];
33
+ if (runner === "claude")
34
+ return new ClaudeCliModelClient(cliModel !== undefined ? { model: cliModel } : {});
35
+ if (runner === "codex")
36
+ return new CodexCliModelClient(cliModel !== undefined ? { model: cliModel } : {});
37
+ throw new Error(`unknown CLI runner "${runner}" — supported: ${SUPPORTED_SPECS}`);
38
+ }
39
+ case "anthropic": {
40
+ const apiKey = process.env.ANTHROPIC_API_KEY ?? config.keys?.anthropic;
41
+ return new AnthropicApiModelClient({
42
+ model: model ?? DEFAULT_ANTHROPIC_MODEL,
43
+ ...(apiKey !== undefined ? { apiKey } : {}),
44
+ ...(baseUrl !== undefined ? { baseUrl } : {}),
45
+ });
46
+ }
47
+ case "openai": {
48
+ if (!model) {
49
+ throw new Error(`openai spec needs a model, e.g. "openai:gpt-5.2" or "openai:<model>@http://localhost:1234/v1"`);
50
+ }
51
+ const apiKey = process.env.OPENAI_API_KEY ?? config.keys?.openai;
52
+ return new OpenAiCompatibleModelClient({
53
+ model,
54
+ ...(apiKey !== undefined ? { apiKey } : {}),
55
+ ...(baseUrl !== undefined ? { baseUrl } : {}),
56
+ });
57
+ }
58
+ case "ollama": {
59
+ if (!model) {
60
+ throw new Error(`ollama spec needs a model, e.g. "ollama:llama3.3" (see: ollama list)`);
61
+ }
62
+ return new OpenAiCompatibleModelClient({ model, baseUrl: baseUrl ?? OLLAMA_BASE_URL }, "ollama");
63
+ }
64
+ default:
65
+ throw new Error(`unknown model spec "${spec}" — supported: ${SUPPORTED_SPECS}`);
66
+ }
67
+ }
@@ -0,0 +1,131 @@
1
+ import { closeSync, openSync, readSync, statSync } from "node:fs";
2
+ import { basename } from "node:path";
3
+ import { ingestSensorEvent } from "../capture/ingest.js";
4
+ const EXPLICIT_MARKER = /^(remember|athena)\s*[:,]\s*(.+)$/is;
5
+ /** Redirects at the start of a prompt: the user is stopping or reversing the agent. */
6
+ const REDIRECT_START = /^(no|nope|wrong|wait|stop|hold on|don't|do not|undo|revert|not like that)\b/i;
7
+ /** Soft openers that look like redirects but aren't. */
8
+ const REDIRECT_START_EXCEPTIONS = /^no (worries|problem|rush|need)\b/i;
9
+ /** Correction phrases anywhere in the prompt. */
10
+ const REDIRECT_INLINE = /\b(that's (wrong|not right|not what)|not what i (asked|meant|wanted)|why did you|you should(n't| not) have|i didn't ask (for|you)|never do that|don't do that again|you keep|again with)\b/i;
11
+ export function detectSignal(prompt) {
12
+ const trimmed = prompt.trim();
13
+ if (trimmed.length === 0)
14
+ return undefined;
15
+ const explicit = EXPLICIT_MARKER.exec(trimmed);
16
+ if (explicit) {
17
+ return { kind: "manual_note", note: explicit[2].trim(), reason: "explicit_marker" };
18
+ }
19
+ if (REDIRECT_START.test(trimmed) && !REDIRECT_START_EXCEPTIONS.test(trimmed)) {
20
+ return { kind: "override", reason: "redirect_opener" };
21
+ }
22
+ if (REDIRECT_INLINE.test(trimmed)) {
23
+ return { kind: "override", reason: "correction_phrase" };
24
+ }
25
+ return undefined;
26
+ }
27
+ export function handleUserPrompt(store, input) {
28
+ const signal = detectSignal(input.prompt);
29
+ if (!signal)
30
+ return { captured: null, reason: "no signal" };
31
+ const project = input.cwd ? basename(input.cwd) : "general";
32
+ const domain = `code.${project}`;
33
+ const emittedAt = new Date().toISOString();
34
+ let event;
35
+ if (signal.kind === "manual_note") {
36
+ event = {
37
+ sensorId: "sen_claude_code",
38
+ emittedAt,
39
+ kind: "manual_note",
40
+ situation: {
41
+ summary: `explicit remember: ${truncate(signal.note, 100)}`,
42
+ domain,
43
+ app: "claude-code",
44
+ },
45
+ after: { mediaType: "text/plain", content: signal.note },
46
+ };
47
+ }
48
+ else {
49
+ const lastTurn = input.transcript_path ? lastAssistantTurn(input.transcript_path) : undefined;
50
+ event = {
51
+ sensorId: "sen_claude_code",
52
+ emittedAt,
53
+ kind: "override",
54
+ situation: {
55
+ summary: `user redirected the agent: ${truncate(input.prompt, 100)}`,
56
+ domain,
57
+ app: "claude-code",
58
+ },
59
+ ...(lastTurn !== undefined ? { before: { mediaType: "text/markdown", content: lastTurn } } : {}),
60
+ after: { mediaType: "text/plain", content: input.prompt },
61
+ raw: { reason: signal.reason, sessionId: input.session_id },
62
+ };
63
+ }
64
+ const instance = ingestSensorEvent(store, event);
65
+ return { captured: instance.id, kind: instance.kind };
66
+ }
67
+ const TAIL_BYTES = 64_000;
68
+ const TURN_CAP = 2_000;
69
+ /** Read the last assistant message (text + tool summary) from a Claude Code transcript. */
70
+ export function lastAssistantTurn(transcriptPath) {
71
+ let raw;
72
+ try {
73
+ raw = readTail(transcriptPath, TAIL_BYTES);
74
+ }
75
+ catch {
76
+ return undefined;
77
+ }
78
+ const lines = raw.split("\n").slice(1); // drop the first, possibly partial, line
79
+ for (let i = lines.length - 1; i >= 0; i--) {
80
+ const line = lines[i];
81
+ if (line.trim().length === 0)
82
+ continue;
83
+ let entry;
84
+ try {
85
+ entry = JSON.parse(line);
86
+ }
87
+ catch {
88
+ continue;
89
+ }
90
+ const rendered = renderAssistantEntry(entry);
91
+ if (rendered !== undefined)
92
+ return rendered;
93
+ }
94
+ return undefined;
95
+ }
96
+ function renderAssistantEntry(entry) {
97
+ if (typeof entry !== "object" || entry === null)
98
+ return undefined;
99
+ const record = entry;
100
+ if (record.type !== "assistant" || !Array.isArray(record.message?.content))
101
+ return undefined;
102
+ const parts = [];
103
+ for (const block of record.message.content) {
104
+ if (block.type === "text" && typeof block.text === "string" && block.text.trim().length > 0) {
105
+ parts.push(block.text.trim());
106
+ }
107
+ else if (block.type === "tool_use" && typeof block.name === "string") {
108
+ parts.push(`[tool: ${block.name}]`);
109
+ }
110
+ }
111
+ if (parts.length === 0)
112
+ return undefined;
113
+ return truncate(parts.join("\n"), TURN_CAP);
114
+ }
115
+ function readTail(path, bytes) {
116
+ const size = statSync(path).size;
117
+ const start = Math.max(0, size - bytes);
118
+ const length = size - start;
119
+ const buffer = Buffer.alloc(length);
120
+ const fd = openSync(path, "r");
121
+ try {
122
+ readSync(fd, buffer, 0, length, start);
123
+ }
124
+ finally {
125
+ closeSync(fd);
126
+ }
127
+ return buffer.toString("utf8");
128
+ }
129
+ function truncate(text, max) {
130
+ return text.length > max ? `${text.slice(0, max)}…` : text;
131
+ }
@@ -0,0 +1,95 @@
1
+ import { newId } from "../core/ids.js";
2
+ import { parseRef, refTo } from "../core/refs.js";
3
+ /**
4
+ * Brief compilation: deterministic select-and-assemble over the store.
5
+ * No LLM call — the intelligence already happened (engine) or will happen
6
+ * (the agent reading the brief). Compilation must be fast, cheap, auditable.
7
+ *
8
+ * v1 readiness policy (documented heuristic, revisit with outcome data):
9
+ * act servable rule at high confidence, nothing contradicted
10
+ * act_with_caveats servable rules, but caveats or modest confidence
11
+ * inspect_first no rules, but facts/unvalidated patterns worth opening
12
+ * ask_human athena knows nothing useful for this task
13
+ */
14
+ const MAX_RULES = 5;
15
+ const MAX_FACTS = 3;
16
+ const MAX_OPEN_QUESTIONS = 3;
17
+ const HIGH_CONFIDENCE = 0.7;
18
+ export function compileBrief(store, request, now = () => new Date()) {
19
+ const inScope = collectHypotheses(store, request);
20
+ const servable = inScope
21
+ .filter((h) => h.status === "validated" || h.status === "active")
22
+ .sort((a, b) => b.confidence - a.confidence);
23
+ const stale = inScope.filter((h) => h.status === "stale");
24
+ const unvalidated = inScope.filter((h) => h.status === "candidate");
25
+ const rules = servable.slice(0, MAX_RULES).map((h) => ({
26
+ hypothesisId: h.id,
27
+ rule: h.rule,
28
+ confidence: h.confidence,
29
+ appliesBecause: request.domain && h.domain === request.domain
30
+ ? `domain ${h.domain} matches the task`
31
+ : `matched task terms in ${h.domain}`,
32
+ boundaries: h.doesNotApplyWhen,
33
+ ref: refTo(h.id),
34
+ }));
35
+ const facts = store
36
+ .search(request.task, "source", MAX_FACTS)
37
+ .flatMap((hit) => {
38
+ const source = store.getSource(parseRef(hit.ref).id);
39
+ return source ? [source] : [];
40
+ })
41
+ .map((source) => ({
42
+ statement: `${source.title}: ${source.content.slice(0, 160)}`,
43
+ ref: refTo(source.id),
44
+ }));
45
+ const doNotAssume = stale.map((h) => `A previously learned rule is stale — do not assume it still holds: "${h.rule}"`);
46
+ const openQuestions = unvalidated
47
+ .slice(0, MAX_OPEN_QUESTIONS)
48
+ .map((h) => `Unvalidated pattern (needs more evidence before acting on it): "${h.rule}"`);
49
+ const brief = {
50
+ id: newId("brf", now().getTime()),
51
+ task: request.task,
52
+ compiledAt: now().toISOString(),
53
+ rules,
54
+ facts,
55
+ doNotAssume,
56
+ openQuestions,
57
+ readiness: readinessFor(rules, facts, doNotAssume, openQuestions),
58
+ refs: [...rules.map((r) => r.ref), ...facts.map((f) => f.ref), ...unvalidated.slice(0, MAX_OPEN_QUESTIONS).map((h) => refTo(h.id))],
59
+ };
60
+ // Serving is an event: fires feed the outcome loop.
61
+ for (const rule of rules) {
62
+ const hypothesis = store.getHypothesis(rule.hypothesisId);
63
+ if (!hypothesis)
64
+ continue;
65
+ hypothesis.validity.fires += 1;
66
+ hypothesis.validity.lastFiredAt = brief.compiledAt;
67
+ store.saveHypothesis(hypothesis);
68
+ }
69
+ store.saveBrief(brief);
70
+ return brief;
71
+ }
72
+ function collectHypotheses(store, request) {
73
+ const byId = new Map();
74
+ if (request.domain) {
75
+ for (const hypothesis of store.listHypotheses({ domain: request.domain })) {
76
+ byId.set(hypothesis.id, hypothesis);
77
+ }
78
+ }
79
+ for (const hit of store.search(request.task, "hypothesis")) {
80
+ const id = parseRef(hit.ref).id;
81
+ const hypothesis = store.getHypothesis(id);
82
+ if (hypothesis)
83
+ byId.set(hypothesis.id, hypothesis);
84
+ }
85
+ return [...byId.values()].filter((h) => h.status !== "retired");
86
+ }
87
+ function readinessFor(rules, facts, doNotAssume, openQuestions) {
88
+ if (rules.length > 0 && doNotAssume.length === 0 && (rules[0]?.confidence ?? 0) >= HIGH_CONFIDENCE)
89
+ return "act";
90
+ if (rules.length > 0)
91
+ return "act_with_caveats";
92
+ if (facts.length > 0 || openQuestions.length > 0)
93
+ return "inspect_first";
94
+ return "ask_human";
95
+ }
@@ -0,0 +1,56 @@
1
+ import { newId } from "../core/ids.js";
2
+ /**
3
+ * Closes the loop: an outcome judges every rule that was served in the brief.
4
+ * Uncorrected output upholds them; a correction overrides them and links the
5
+ * new instance as a counterexample. Repeatedly overridden rules go stale —
6
+ * they leave the servable pool until revalidation.
7
+ *
8
+ * Promotion is autonomous: a validated rule that keeps being upheld with no
9
+ * overrides graduates to active on its own (schema: "review approve | N upheld
10
+ * outcomes --> active"). Review approval is the fast path, not a gate.
11
+ */
12
+ const STALE_OVERRIDE_THRESHOLD = 2;
13
+ const ACTIVE_UPHELD_THRESHOLD = 3;
14
+ export function recordOutcome(store, input, now = () => new Date()) {
15
+ const brief = store.getBrief(input.briefId);
16
+ if (!brief)
17
+ throw new Error(`outcome references unknown brief ${input.briefId}`);
18
+ if (input.result === "uncorrected" || input.result === "corrected") {
19
+ for (const rule of brief.rules) {
20
+ const hypothesis = store.getHypothesis(rule.hypothesisId);
21
+ if (!hypothesis)
22
+ continue;
23
+ if (input.result === "uncorrected") {
24
+ hypothesis.validity.upheld += 1;
25
+ hypothesis.lastConfirmedAt = now().toISOString();
26
+ if (hypothesis.status === "validated" &&
27
+ hypothesis.validity.upheld >= ACTIVE_UPHELD_THRESHOLD &&
28
+ hypothesis.validity.overridden === 0) {
29
+ hypothesis.status = "active";
30
+ }
31
+ }
32
+ else {
33
+ hypothesis.validity.overridden += 1;
34
+ if (input.correctionInstanceId &&
35
+ !hypothesis.counterexampleInstanceIds.includes(input.correctionInstanceId)) {
36
+ hypothesis.counterexampleInstanceIds.push(input.correctionInstanceId);
37
+ }
38
+ if (hypothesis.validity.overridden >= STALE_OVERRIDE_THRESHOLD &&
39
+ hypothesis.validity.overridden > hypothesis.validity.upheld &&
40
+ (hypothesis.status === "validated" || hypothesis.status === "active")) {
41
+ hypothesis.status = "stale";
42
+ }
43
+ }
44
+ store.saveHypothesis(hypothesis);
45
+ }
46
+ }
47
+ const outcome = {
48
+ id: newId("out", now().getTime()),
49
+ briefId: input.briefId,
50
+ result: input.result,
51
+ ...(input.correctionInstanceId ? { correctionInstanceId: input.correctionInstanceId } : {}),
52
+ recordedAt: now().toISOString(),
53
+ };
54
+ store.saveOutcome(outcome);
55
+ return outcome;
56
+ }
@@ -0,0 +1,19 @@
1
+ import { parseRef } from "../core/refs.js";
2
+ /** Dereference an athena:// ref to its entity. Shared by MCP and CLI. */
3
+ export function openRef(store, ref) {
4
+ const parsed = parseRef(ref);
5
+ switch (parsed.kind) {
6
+ case "instance":
7
+ return store.getInstance(parsed.id);
8
+ case "hypothesis":
9
+ return store.getHypothesis(parsed.id);
10
+ case "source":
11
+ return store.getSource(parsed.id);
12
+ case "object":
13
+ return store.getObject(parsed.id);
14
+ case "brief":
15
+ return store.getBrief(parsed.id);
16
+ case "outcome":
17
+ return store.getOutcome(parsed.id);
18
+ }
19
+ }