useathena 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -17
- package/dist/api/server.js +30 -4
- package/dist/cli/commands.js +81 -5
- package/dist/cli/export.js +35 -0
- package/dist/cli/service.js +99 -0
- package/dist/cli/setup.js +30 -2
- package/dist/cli.js +57 -24
- package/dist/core/fixtures.js +26 -0
- package/dist/core/refs.js +2 -0
- package/dist/engine/engine.js +15 -6
- package/dist/engine/facts.js +80 -0
- package/dist/engine/parse.js +31 -6
- package/dist/engine/prompts.js +10 -1
- package/dist/eval/run-eval.js +3 -3
- package/dist/mcp/server.js +38 -10
- package/dist/sensors/claude-code-hook.js +31 -2
- package/dist/serve/auto-learn.js +56 -0
- package/dist/serve/auto-outcome.js +63 -0
- package/dist/serve/brief.js +64 -11
- package/dist/store/open.js +4 -0
- package/dist/store/store.js +109 -0
- package/docs/schema.md +65 -6
- package/package.json +1 -1
package/dist/core/fixtures.js
CHANGED
|
@@ -41,6 +41,32 @@ export function makeInstance(overrides = {}) {
|
|
|
41
41
|
};
|
|
42
42
|
return { ...base, ...overrides };
|
|
43
43
|
}
|
|
44
|
+
export function makeObject(overrides = {}) {
|
|
45
|
+
const base = {
|
|
46
|
+
id: newId("obj"),
|
|
47
|
+
kind: "org",
|
|
48
|
+
name: "Acme Corp",
|
|
49
|
+
aliases: ["acme", "acme.com"],
|
|
50
|
+
properties: {},
|
|
51
|
+
validFrom: new Date(0).toISOString(),
|
|
52
|
+
};
|
|
53
|
+
return { ...base, ...overrides };
|
|
54
|
+
}
|
|
55
|
+
export function makeFact(overrides = {}) {
|
|
56
|
+
const base = {
|
|
57
|
+
id: newId("fct"),
|
|
58
|
+
objectId: newId("obj"),
|
|
59
|
+
statement: "Acme's contract renewal is in September 2026.",
|
|
60
|
+
domain: "email.outreach",
|
|
61
|
+
supportingInstanceIds: [newId("ins")],
|
|
62
|
+
confidence: 0.7,
|
|
63
|
+
firstSeenAt: new Date(0).toISOString(),
|
|
64
|
+
lastConfirmedAt: new Date(0).toISOString(),
|
|
65
|
+
staleAfter: new Date(180 * 24 * 3600 * 1000).toISOString(),
|
|
66
|
+
visibility: "user_private",
|
|
67
|
+
};
|
|
68
|
+
return { ...base, ...overrides };
|
|
69
|
+
}
|
|
44
70
|
export function makeHypothesis(overrides = {}) {
|
|
45
71
|
const base = {
|
|
46
72
|
id: newId("hyp"),
|
package/dist/core/refs.js
CHANGED
package/dist/engine/engine.js
CHANGED
|
@@ -28,13 +28,15 @@ export class LlmHypothesisEngine {
|
|
|
28
28
|
this.replaySamples = Math.max(1, options.replaySamples ?? DEFAULT_REPLAY_SAMPLES);
|
|
29
29
|
}
|
|
30
30
|
async infer(instances) {
|
|
31
|
-
const
|
|
31
|
+
const output = { hypotheses: [], facts: [] };
|
|
32
32
|
for (const [domain, cluster] of clusterByDomain(instances)) {
|
|
33
33
|
if (cluster.length < MIN_CLUSTER_SIZE)
|
|
34
34
|
continue;
|
|
35
|
-
|
|
35
|
+
const clusterOutput = await this.inferCluster(domain, cluster);
|
|
36
|
+
output.hypotheses.push(...clusterOutput.hypotheses);
|
|
37
|
+
output.facts.push(...clusterOutput.facts);
|
|
36
38
|
}
|
|
37
|
-
return
|
|
39
|
+
return output;
|
|
38
40
|
}
|
|
39
41
|
async inferCluster(domain, cluster) {
|
|
40
42
|
const { train, holdout } = splitHoldout(cluster);
|
|
@@ -42,9 +44,9 @@ export class LlmHypothesisEngine {
|
|
|
42
44
|
system: INFERENCE_SYSTEM,
|
|
43
45
|
prompt: inferencePrompt(domain, train),
|
|
44
46
|
});
|
|
45
|
-
const
|
|
47
|
+
const parsed = parseInferenceResponse(raw, train.length);
|
|
46
48
|
const results = [];
|
|
47
|
-
for (const candidate of
|
|
49
|
+
for (const candidate of parsed.hypotheses) {
|
|
48
50
|
const supportingInstanceIds = candidate.supporting.map((index) => train[index - 1].id);
|
|
49
51
|
if (supportingInstanceIds.length === 0)
|
|
50
52
|
continue; // everything cites — uncited rules are dropped
|
|
@@ -66,7 +68,14 @@ export class LlmHypothesisEngine {
|
|
|
66
68
|
}
|
|
67
69
|
results.push(hypothesis);
|
|
68
70
|
}
|
|
69
|
-
|
|
71
|
+
const facts = parsed.facts.map((fact) => ({
|
|
72
|
+
entity: fact.entity,
|
|
73
|
+
statement: fact.statement,
|
|
74
|
+
domain,
|
|
75
|
+
supportingInstanceIds: fact.supporting.map((index) => train[index - 1].id),
|
|
76
|
+
confidence: fact.confidence,
|
|
77
|
+
}));
|
|
78
|
+
return { hypotheses: results, facts };
|
|
70
79
|
}
|
|
71
80
|
/** Majority vote across samples; stops early once a side is unreachable. */
|
|
72
81
|
async replayVerdict(hypothesis, heldOut) {
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { newId } from "../core/ids.js";
|
|
2
|
+
import { changedRatio } from "../capture/ingest.js";
|
|
3
|
+
/**
|
|
4
|
+
* Turn engine fact drafts into stored records: resolve or create the entity,
|
|
5
|
+
* then dedupe against that entity's existing facts — a restated fact confirms
|
|
6
|
+
* (bumps lastConfirmedAt, merges evidence) instead of duplicating. Facts are
|
|
7
|
+
* never entered manually and agents cannot write them directly; this is the
|
|
8
|
+
* only path in.
|
|
9
|
+
*/
|
|
10
|
+
const FACT_STALE_DAYS = 180;
|
|
11
|
+
/** Statements this close (normalized edit distance) are the same fact. */
|
|
12
|
+
const SAME_FACT_RATIO = 0.25;
|
|
13
|
+
export function materializeFacts(store, drafts, now = () => new Date()) {
|
|
14
|
+
const result = { created: [], confirmed: [] };
|
|
15
|
+
for (const draft of drafts) {
|
|
16
|
+
const object = resolveOrCreateObject(store, draft, now);
|
|
17
|
+
const existing = findSameFact(store.listFacts({ objectId: object.id }), draft.statement);
|
|
18
|
+
if (existing) {
|
|
19
|
+
existing.lastConfirmedAt = now().toISOString();
|
|
20
|
+
existing.staleAfter = staleAfter(now);
|
|
21
|
+
existing.confidence = Math.max(existing.confidence, draft.confidence);
|
|
22
|
+
existing.supportingInstanceIds = [
|
|
23
|
+
...new Set([...existing.supportingInstanceIds, ...draft.supportingInstanceIds]),
|
|
24
|
+
];
|
|
25
|
+
store.saveFact(existing);
|
|
26
|
+
result.confirmed.push(existing);
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
const fact = {
|
|
30
|
+
id: newId("fct", now().getTime()),
|
|
31
|
+
objectId: object.id,
|
|
32
|
+
statement: draft.statement,
|
|
33
|
+
domain: draft.domain,
|
|
34
|
+
supportingInstanceIds: draft.supportingInstanceIds,
|
|
35
|
+
confidence: draft.confidence,
|
|
36
|
+
firstSeenAt: now().toISOString(),
|
|
37
|
+
lastConfirmedAt: now().toISOString(),
|
|
38
|
+
staleAfter: staleAfter(now),
|
|
39
|
+
visibility: "user_private",
|
|
40
|
+
};
|
|
41
|
+
store.saveFact(fact);
|
|
42
|
+
result.created.push(fact);
|
|
43
|
+
}
|
|
44
|
+
return result;
|
|
45
|
+
}
|
|
46
|
+
function resolveOrCreateObject(store, draft, now) {
|
|
47
|
+
for (const alias of [draft.entity.name, ...draft.entity.aliases]) {
|
|
48
|
+
const matches = store.resolveObject(alias);
|
|
49
|
+
if (matches[0]) {
|
|
50
|
+
const object = matches[0];
|
|
51
|
+
const known = new Set([object.name.toLowerCase(), ...object.aliases.map((a) => a.toLowerCase())]);
|
|
52
|
+
const fresh = draft.entity.aliases.filter((a) => !known.has(a.toLowerCase()));
|
|
53
|
+
if (fresh.length > 0) {
|
|
54
|
+
object.aliases = [...object.aliases, ...fresh];
|
|
55
|
+
store.saveObject(object);
|
|
56
|
+
}
|
|
57
|
+
return object;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
const object = {
|
|
61
|
+
id: newId("obj", now().getTime()),
|
|
62
|
+
kind: draft.entity.kind,
|
|
63
|
+
name: draft.entity.name,
|
|
64
|
+
aliases: draft.entity.aliases,
|
|
65
|
+
properties: {},
|
|
66
|
+
validFrom: now().toISOString(),
|
|
67
|
+
};
|
|
68
|
+
store.saveObject(object);
|
|
69
|
+
return object;
|
|
70
|
+
}
|
|
71
|
+
function findSameFact(existing, statement) {
|
|
72
|
+
const normalized = normalize(statement);
|
|
73
|
+
return existing.find((fact) => !fact.supersededById && changedRatio(normalize(fact.statement), normalized) <= SAME_FACT_RATIO);
|
|
74
|
+
}
|
|
75
|
+
function normalize(text) {
|
|
76
|
+
return text.toLowerCase().replace(/[^\p{L}\p{N} ]/gu, "").replace(/\s+/g, " ").trim();
|
|
77
|
+
}
|
|
78
|
+
function staleAfter(now) {
|
|
79
|
+
return new Date(now().getTime() + FACT_STALE_DAYS * 24 * 3600 * 1000).toISOString();
|
|
80
|
+
}
|
package/dist/engine/parse.js
CHANGED
|
@@ -1,13 +1,38 @@
|
|
|
1
|
-
|
|
2
|
-
* Strict parsing of the inference response. The model is untrusted input;
|
|
3
|
-
* everything is validated, indices are checked against the instance count,
|
|
4
|
-
* and a malformed response throws with a message naming what was wrong.
|
|
5
|
-
*/
|
|
1
|
+
const OBJECT_KINDS = ["person", "org", "project", "repo", "process", "custom"];
|
|
6
2
|
export function parseInferenceResponse(raw, instanceCount) {
|
|
7
3
|
if (typeof raw !== "object" || raw === null || !Array.isArray(raw.hypotheses)) {
|
|
8
4
|
throw new Error(`inference response missing "hypotheses" array: ${preview(raw)}`);
|
|
9
5
|
}
|
|
10
|
-
|
|
6
|
+
const record = raw;
|
|
7
|
+
const hypotheses = record.hypotheses.map((entry, i) => parseHypothesis(entry, i, instanceCount));
|
|
8
|
+
// "facts" is optional in the response — older prompts and terse models omit it.
|
|
9
|
+
const facts = Array.isArray(record.facts)
|
|
10
|
+
? record.facts.flatMap((entry, i) => parseFact(entry, i, instanceCount))
|
|
11
|
+
: [];
|
|
12
|
+
return { hypotheses, facts };
|
|
13
|
+
}
|
|
14
|
+
/** Malformed facts are dropped, not fatal — rules are the primary output. */
|
|
15
|
+
function parseFact(entry, position, instanceCount) {
|
|
16
|
+
if (typeof entry !== "object" || entry === null)
|
|
17
|
+
return [];
|
|
18
|
+
const record = entry;
|
|
19
|
+
const entity = record.entity;
|
|
20
|
+
const name = typeof entity?.name === "string" ? entity.name.trim() : "";
|
|
21
|
+
const statement = typeof record.statement === "string" ? record.statement.trim() : "";
|
|
22
|
+
if (!name || !statement)
|
|
23
|
+
return [];
|
|
24
|
+
let supporting;
|
|
25
|
+
try {
|
|
26
|
+
supporting = requireIndexArray(record, "supporting", position, instanceCount);
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
if (supporting.length === 0)
|
|
32
|
+
return [];
|
|
33
|
+
const kind = OBJECT_KINDS.includes(entity?.kind) ? entity?.kind : "custom";
|
|
34
|
+
const confidence = typeof record.confidence === "number" ? clamp(record.confidence, 0.05, 0.95) : 0.5;
|
|
35
|
+
return [{ entity: { name, kind, aliases: stringArray(entity?.aliases) }, statement, supporting, confidence }];
|
|
11
36
|
}
|
|
12
37
|
function parseHypothesis(entry, position, instanceCount) {
|
|
13
38
|
if (typeof entry !== "object" || entry === null) {
|
package/dist/engine/prompts.js
CHANGED
|
@@ -18,8 +18,17 @@ Extract the transferable judgment rules behind these edits. Requirements:
|
|
|
18
18
|
- Cite supporting instances by their number. Cite counterexamples (instances that cut against the rule) by number too.
|
|
19
19
|
- confidence is 0..1: your honest estimate that this rule reflects a stable preference rather than coincidence.
|
|
20
20
|
|
|
21
|
+
Separately, extract FACTS: durable, declarative, entity-grounded statements an agent would need before acting in this domain ("Acme's renewal is in September", "Dana is the CFO and prefers bullet summaries", "the team tracks work in Linear"). Requirements for facts:
|
|
22
|
+
|
|
23
|
+
- Each fact is about ONE named entity (a person, org, project, repo, process, or tool) and must be evident in the instances — never world knowledge, never speculation.
|
|
24
|
+
- Facts are stable context, not judgments and not events ("sent an email Tuesday" is not a fact; "Priya is the GTM lead at Vanta" is).
|
|
25
|
+
- No platitudes. If it would be true of any company, it is not a fact worth storing.
|
|
26
|
+
- Cite supporting instances by number. Give each entity its kind and any aliases visible in the evidence (email addresses, handles, short names).
|
|
27
|
+
- It is normal to extract zero facts.
|
|
28
|
+
|
|
21
29
|
Respond with STRICT JSON only, no prose, matching:
|
|
22
|
-
{"hypotheses": [{"rule": string, "cues": string[], "expectancies": string[], "goal": string?, "appliesWhen": string[], "doesNotApplyWhen": string[], "supporting": number[], "counterexamples": number[], "rationale": string, "confidence": number}]
|
|
30
|
+
{"hypotheses": [{"rule": string, "cues": string[], "expectancies": string[], "goal": string?, "appliesWhen": string[], "doesNotApplyWhen": string[], "supporting": number[], "counterexamples": number[], "rationale": string, "confidence": number}],
|
|
31
|
+
"facts": [{"entity": {"name": string, "kind": "person"|"org"|"project"|"repo"|"process"|"custom", "aliases": string[]}, "statement": string, "supporting": number[], "confidence": number}]}`;
|
|
23
32
|
export function inferencePrompt(domain, instances) {
|
|
24
33
|
const blocks = instances.map((instance, i) => renderInstance(instance, i + 1));
|
|
25
34
|
return `Domain: ${domain}\n\n${blocks.join("\n\n")}`;
|
package/dist/eval/run-eval.js
CHANGED
|
@@ -21,9 +21,9 @@ const judge = useLlmJudge ? new LlmJudge(model) : new RubricJudge();
|
|
|
21
21
|
const inferred = new Map();
|
|
22
22
|
const engine = {
|
|
23
23
|
infer: async (instances) => {
|
|
24
|
-
const
|
|
25
|
-
inferred.set(instances[0]?.situation.domain ?? "unknown", hypotheses);
|
|
26
|
-
return hypotheses;
|
|
24
|
+
const output = await inner.infer(instances);
|
|
25
|
+
inferred.set(instances[0]?.situation.domain ?? "unknown", output.hypotheses);
|
|
26
|
+
return output.hypotheses;
|
|
27
27
|
},
|
|
28
28
|
};
|
|
29
29
|
console.log(`engine model: ${model.id} judge: ${useLlmJudge ? `llm (${model.id})` : "rubric"}`);
|
package/dist/mcp/server.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
1
2
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
3
|
import { z } from "zod";
|
|
3
|
-
import {
|
|
4
|
+
import { newId } from "../core/ids.js";
|
|
5
|
+
import { ingestAndMatch } from "../serve/auto-outcome.js";
|
|
6
|
+
import { maybeAutoLearn } from "../serve/auto-learn.js";
|
|
4
7
|
import { compileBrief } from "../serve/brief.js";
|
|
5
8
|
import { recordOutcome } from "../serve/outcome.js";
|
|
6
9
|
import { openRef } from "../store/open.js";
|
|
@@ -22,7 +25,7 @@ const OUTCOME_RESULTS = ["uncorrected", "corrected", "abandoned", "unknown"];
|
|
|
22
25
|
export function buildMcpServer(store) {
|
|
23
26
|
const server = new McpServer({ name: "athena", version: "0.1.0" });
|
|
24
27
|
server.registerTool("athena_brief", {
|
|
25
|
-
description: "Call this BEFORE acting on a task. Returns the tacit judgment rules that apply (with confidence and boundary conditions), relevant facts with citations, things you must not assume, open questions,
|
|
28
|
+
description: "Call this BEFORE acting on a task. Returns the tacit judgment rules that apply (with confidence and boundary conditions), relevant facts with citations, things you must not assume, open questions, a readiness verdict (act / act_with_caveats / inspect_first / ask_human), and a map of everything else athena knows (use athena_search/athena_open to pull what you need — you are not flooded by default). Honor the boundaries and do-not-assume list. After drafting, register your artifact with athena_record type=output so the outcome can be detected automatically; or report it explicitly with type=outcome.",
|
|
26
29
|
inputSchema: {
|
|
27
30
|
task: z.string().describe("The task you are about to perform, in one sentence"),
|
|
28
31
|
domain: z
|
|
@@ -44,18 +47,19 @@ export function buildMcpServer(store) {
|
|
|
44
47
|
return asJson(entity);
|
|
45
48
|
});
|
|
46
49
|
server.registerTool("athena_record", {
|
|
47
|
-
description: "Report back to athena. type=
|
|
50
|
+
description: "Report back to athena. type=output: register the artifact you produced from a brief (an email draft, a message) — when a sensor later observes what the human actually sent, athena matches it and records the outcome automatically. type=outcome: explicitly report whether your briefed output was accepted unchanged (uncorrected) or edited (corrected) — this is how rules earn or lose trust. type=event: capture a judgment moment you observed (a correction of your output, a human decision, a failed approach, an explicit 'remember this'). Events become evidence for new rules and facts; nothing you record changes durable rules directly.",
|
|
48
51
|
inputSchema: {
|
|
49
|
-
type: z.enum(["outcome", "event"]),
|
|
50
|
-
briefId: z.string().optional().describe("outcome: the brief this
|
|
52
|
+
type: z.enum(["outcome", "event", "output"]),
|
|
53
|
+
briefId: z.string().optional().describe("outcome/output: the brief this belongs to"),
|
|
51
54
|
result: z.enum(OUTCOME_RESULTS).optional().describe("outcome: what happened"),
|
|
52
55
|
correctionInstanceId: z
|
|
53
56
|
.string()
|
|
54
57
|
.optional()
|
|
55
58
|
.describe("outcome: if corrected, the instance id of the captured correction (record the event first)"),
|
|
59
|
+
content: z.string().optional().describe("output: the artifact you produced, verbatim"),
|
|
56
60
|
kind: z.enum(INSTANCE_KINDS).optional().describe("event: what kind of judgment moment"),
|
|
57
61
|
summary: z.string().optional().describe("event: one-line situation summary"),
|
|
58
|
-
domain: z.string().optional().describe('event: dot-path domain, e.g. "email.outreach"'),
|
|
62
|
+
domain: z.string().optional().describe('event/output: dot-path domain, e.g. "email.outreach"'),
|
|
59
63
|
task: z.string().optional().describe("event: what was being attempted"),
|
|
60
64
|
before: z.string().optional().describe("event: the draft/output before the human acted"),
|
|
61
65
|
after: z.string().optional().describe("event: the human's version (omit for approvals)"),
|
|
@@ -74,6 +78,24 @@ export function buildMcpServer(store) {
|
|
|
74
78
|
});
|
|
75
79
|
return asJson(outcome);
|
|
76
80
|
}
|
|
81
|
+
if (args.type === "output") {
|
|
82
|
+
if (!args.briefId || !args.content)
|
|
83
|
+
throw new Error("output requires briefId and content");
|
|
84
|
+
const brief = store.getBrief(args.briefId);
|
|
85
|
+
if (!brief)
|
|
86
|
+
throw new Error(`unknown brief ${args.briefId}`);
|
|
87
|
+
const draft = {
|
|
88
|
+
id: newId("drf"),
|
|
89
|
+
briefId: brief.id,
|
|
90
|
+
content: args.content,
|
|
91
|
+
contentHash: createHash("sha256").update(args.content).digest("hex").slice(0, 16),
|
|
92
|
+
mediaType: "text/plain",
|
|
93
|
+
domain: args.domain ?? brief.domain ?? "general",
|
|
94
|
+
recordedAt: new Date().toISOString(),
|
|
95
|
+
};
|
|
96
|
+
store.saveDraft(draft);
|
|
97
|
+
return asJson({ registered: draft.id, watchingDomain: draft.domain });
|
|
98
|
+
}
|
|
77
99
|
if (!args.kind || !args.summary)
|
|
78
100
|
throw new Error("event requires kind and summary");
|
|
79
101
|
const event = {
|
|
@@ -89,14 +111,20 @@ export function buildMcpServer(store) {
|
|
|
89
111
|
...(args.before !== undefined ? { before: { mediaType: "text/plain", content: args.before } } : {}),
|
|
90
112
|
...(args.after !== undefined ? { after: { mediaType: "text/plain", content: args.after } } : {}),
|
|
91
113
|
};
|
|
92
|
-
const instance =
|
|
93
|
-
|
|
114
|
+
const { instance, autoOutcome } = ingestAndMatch(store, event);
|
|
115
|
+
maybeAutoLearn(store);
|
|
116
|
+
return asJson({
|
|
117
|
+
recorded: instance.id,
|
|
118
|
+
kind: instance.kind,
|
|
119
|
+
domain: instance.situation.domain,
|
|
120
|
+
...(autoOutcome ? { autoOutcome: { id: autoOutcome.id, result: autoOutcome.result } } : {}),
|
|
121
|
+
});
|
|
94
122
|
});
|
|
95
123
|
server.registerTool("athena_search", {
|
|
96
|
-
description: "Lexical search across captured judgment instances, learned rules, and sources. Returns athena:// refs ranked by relevance — open them with athena_open.",
|
|
124
|
+
description: "Lexical search across captured judgment instances, learned rules, extracted facts, and sources. Returns athena:// refs ranked by relevance — open them with athena_open. The brief's map tells you what is worth searching for.",
|
|
97
125
|
inputSchema: {
|
|
98
126
|
query: z.string(),
|
|
99
|
-
lane: z.enum(["instance", "hypothesis", "source"]).optional().describe("Restrict to one lane"),
|
|
127
|
+
lane: z.enum(["instance", "hypothesis", "source", "fact"]).optional().describe("Restrict to one lane"),
|
|
100
128
|
limit: z.number().int().min(1).max(50).optional(),
|
|
101
129
|
},
|
|
102
130
|
}, ({ query, lane, limit }) => asJson(store.search(query, lane, limit ?? 20)));
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { closeSync, openSync, readSync, statSync } from "node:fs";
|
|
2
2
|
import { basename } from "node:path";
|
|
3
|
-
import {
|
|
3
|
+
import { ingestAndMatch } from "../serve/auto-outcome.js";
|
|
4
4
|
const EXPLICIT_MARKER = /^(remember|athena)\s*[:,]\s*(.+)$/is;
|
|
5
5
|
/** Redirects at the start of a prompt: the user is stopping or reversing the agent. */
|
|
6
6
|
const REDIRECT_START = /^(no|nope|wrong|wait|stop|hold on|don't|do not|undo|revert|not like that)\b/i;
|
|
@@ -8,6 +8,13 @@ const REDIRECT_START = /^(no|nope|wrong|wait|stop|hold on|don't|do not|undo|reve
|
|
|
8
8
|
const REDIRECT_START_EXCEPTIONS = /^no (worries|problem|rush|need)\b/i;
|
|
9
9
|
/** Correction phrases anywhere in the prompt. */
|
|
10
10
|
const REDIRECT_INLINE = /\b(that's (wrong|not right|not what)|not what i (asked|meant|wanted)|why did you|you should(n't| not) have|i didn't ask (for|you)|never do that|don't do that again|you keep|again with)\b/i;
|
|
11
|
+
/** Approvals at the start of a short prompt: the user is accepting the agent's output. */
|
|
12
|
+
const APPROVAL_START = /^(lgtm|looks good|looks great|looks right|love it|ship it|approved|that works|works for me)\b/i;
|
|
13
|
+
/** Bare one-word sign-offs count too — but only on their own. */
|
|
14
|
+
const APPROVAL_EXACT = /^(perfect|great|nice|done|exactly)[.!\s]*$/i;
|
|
15
|
+
/** "looks good, but…" is a correction wearing an approval's hat. */
|
|
16
|
+
const APPROVAL_REVERSAL = /\b(but|except|however|though|almost|one (thing|issue|nit)|small (thing|nit)|can you|change|fix)\b/i;
|
|
17
|
+
const APPROVAL_MAX_LENGTH = 100;
|
|
11
18
|
export function detectSignal(prompt) {
|
|
12
19
|
const trimmed = prompt.trim();
|
|
13
20
|
if (trimmed.length === 0)
|
|
@@ -22,6 +29,11 @@ export function detectSignal(prompt) {
|
|
|
22
29
|
if (REDIRECT_INLINE.test(trimmed)) {
|
|
23
30
|
return { kind: "override", reason: "correction_phrase" };
|
|
24
31
|
}
|
|
32
|
+
if (trimmed.length <= APPROVAL_MAX_LENGTH &&
|
|
33
|
+
(APPROVAL_START.test(trimmed) || APPROVAL_EXACT.test(trimmed)) &&
|
|
34
|
+
!APPROVAL_REVERSAL.test(trimmed)) {
|
|
35
|
+
return { kind: "approval", reason: "approval_phrase" };
|
|
36
|
+
}
|
|
25
37
|
return undefined;
|
|
26
38
|
}
|
|
27
39
|
export function handleUserPrompt(store, input) {
|
|
@@ -45,6 +57,23 @@ export function handleUserPrompt(store, input) {
|
|
|
45
57
|
after: { mediaType: "text/plain", content: signal.note },
|
|
46
58
|
};
|
|
47
59
|
}
|
|
60
|
+
else if (signal.kind === "approval") {
|
|
61
|
+
// What was approved (the transcript tail) is the evidence — it can also
|
|
62
|
+
// resolve a registered agent draft into an uncorrected outcome.
|
|
63
|
+
const lastTurn = input.transcript_path ? lastAssistantTurn(input.transcript_path) : undefined;
|
|
64
|
+
event = {
|
|
65
|
+
sensorId: "sen_claude_code",
|
|
66
|
+
emittedAt,
|
|
67
|
+
kind: "approval",
|
|
68
|
+
situation: {
|
|
69
|
+
summary: `user approved the agent's output: ${truncate(input.prompt, 100)}`,
|
|
70
|
+
domain,
|
|
71
|
+
app: "claude-code",
|
|
72
|
+
},
|
|
73
|
+
...(lastTurn !== undefined ? { before: { mediaType: "text/markdown", content: lastTurn } } : {}),
|
|
74
|
+
raw: { reason: signal.reason, sessionId: input.session_id },
|
|
75
|
+
};
|
|
76
|
+
}
|
|
48
77
|
else {
|
|
49
78
|
const lastTurn = input.transcript_path ? lastAssistantTurn(input.transcript_path) : undefined;
|
|
50
79
|
event = {
|
|
@@ -61,7 +90,7 @@ export function handleUserPrompt(store, input) {
|
|
|
61
90
|
raw: { reason: signal.reason, sessionId: input.session_id },
|
|
62
91
|
};
|
|
63
92
|
}
|
|
64
|
-
const instance =
|
|
93
|
+
const { instance } = ingestAndMatch(store, event);
|
|
65
94
|
return { captured: instance.id, kind: instance.kind };
|
|
66
95
|
}
|
|
67
96
|
const TAIL_BYTES = 64_000;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { mkdirSync, openSync } from "node:fs";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { dbPath } from "../config.js";
|
|
6
|
+
/**
|
|
7
|
+
* Keeps the loop from stalling on a forgotten `athena learn`: every sensor
|
|
8
|
+
* entry point calls maybeAutoLearn after ingesting, and once enough new
|
|
9
|
+
* evidence has accumulated a detached `athena learn` runs in the background.
|
|
10
|
+
*
|
|
11
|
+
* Guards, in order: a kill switch (ATHENA_AUTO_LEARN=off), an evidence
|
|
12
|
+
* threshold (don't spend inference on every capture), and a cooldown (don't
|
|
13
|
+
* stack runs while one is still thinking). Never throws — a broken trigger
|
|
14
|
+
* must never break capture.
|
|
15
|
+
*/
|
|
16
|
+
/** Stamped by cmdLearn on every completed run (manual or automatic). */
|
|
17
|
+
export const LAST_LEARN_AT = "learn.lastRunAt";
|
|
18
|
+
const LAST_ATTEMPT_AT = "autoLearn.lastAttemptAt";
|
|
19
|
+
export const AUTO_LEARN_THRESHOLD = 5;
|
|
20
|
+
const COOLDOWN_MS = 30 * 60 * 1000;
|
|
21
|
+
export function autoLearnEnabled(env = process.env) {
|
|
22
|
+
const value = (env.ATHENA_AUTO_LEARN ?? "").toLowerCase();
|
|
23
|
+
return !["0", "false", "off", "no"].includes(value);
|
|
24
|
+
}
|
|
25
|
+
export function maybeAutoLearn(store, options = {}) {
|
|
26
|
+
const now = options.now ?? (() => new Date());
|
|
27
|
+
try {
|
|
28
|
+
const pending = store.countInstancesSince(store.getMeta(LAST_LEARN_AT));
|
|
29
|
+
if (!autoLearnEnabled(options.env)) {
|
|
30
|
+
return { launched: false, reason: "disabled via ATHENA_AUTO_LEARN", pending };
|
|
31
|
+
}
|
|
32
|
+
if (pending < AUTO_LEARN_THRESHOLD) {
|
|
33
|
+
return { launched: false, reason: `below threshold (${pending}/${AUTO_LEARN_THRESHOLD})`, pending };
|
|
34
|
+
}
|
|
35
|
+
const lastAttempt = store.getMeta(LAST_ATTEMPT_AT);
|
|
36
|
+
if (lastAttempt && now().getTime() - new Date(lastAttempt).getTime() < COOLDOWN_MS) {
|
|
37
|
+
return { launched: false, reason: "cooling down", pending };
|
|
38
|
+
}
|
|
39
|
+
store.setMeta(LAST_ATTEMPT_AT, now().toISOString());
|
|
40
|
+
(options.launch ?? launchDetachedLearn)();
|
|
41
|
+
return { launched: true, reason: "threshold reached", pending };
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
return { launched: false, reason: "trigger error (ignored)", pending: 0 };
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/** Spawn `athena learn` detached so the calling sensor returns immediately. */
|
|
48
|
+
function launchDetachedLearn() {
|
|
49
|
+
const packageRoot = dirname(dirname(dirname(fileURLToPath(import.meta.url))));
|
|
50
|
+
const bin = join(packageRoot, "bin", "athena");
|
|
51
|
+
const logDir = dirname(dbPath());
|
|
52
|
+
mkdirSync(logDir, { recursive: true });
|
|
53
|
+
const log = openSync(join(logDir, "auto-learn.log"), "a");
|
|
54
|
+
const child = spawn(process.execPath, [bin, "learn"], { detached: true, stdio: ["ignore", log, log] });
|
|
55
|
+
child.unref();
|
|
56
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { changedRatio, ingestSensorEvent } from "../capture/ingest.js";
|
|
2
|
+
import { recordOutcome } from "./outcome.js";
|
|
3
|
+
/**
|
|
4
|
+
* Closes the loop without anyone reporting: an agent registers the draft it
|
|
5
|
+
* produced from a brief (athena_record type=output); when a sensor later
|
|
6
|
+
* captures what the human actually did with it, the instance is matched back
|
|
7
|
+
* to the draft and the outcome records itself — approval → uncorrected,
|
|
8
|
+
* correction → corrected with the instance as the counterexample.
|
|
9
|
+
*
|
|
10
|
+
* Matching is conservative: same domain, recent, and the captured "before"
|
|
11
|
+
* must be essentially the registered draft. A miss costs nothing (outcomes
|
|
12
|
+
* can still be recorded explicitly); a false match would corrupt trust.
|
|
13
|
+
*/
|
|
14
|
+
const MATCH_WINDOW_HOURS = 72;
|
|
15
|
+
/** Captured before-text may differ from the draft by at most this much. */
|
|
16
|
+
const MATCH_RATIO = 0.15;
|
|
17
|
+
/**
|
|
18
|
+
* What every sensor should call: ingest the event, then see whether the new
|
|
19
|
+
* evidence resolves a registered draft into an outcome.
|
|
20
|
+
*/
|
|
21
|
+
export function ingestAndMatch(store, event, options = {}, now = () => new Date()) {
|
|
22
|
+
expireStaleDrafts(store, now);
|
|
23
|
+
const instance = ingestSensorEvent(store, event, options);
|
|
24
|
+
const autoOutcome = matchDraftToInstance(store, instance, now);
|
|
25
|
+
return autoOutcome ? { instance, autoOutcome } : { instance };
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Drafts that outlive the match window resolve to result "unknown": the loop
|
|
29
|
+
* closes honestly instead of leaving them "awaiting" forever, and rule trust
|
|
30
|
+
* is untouched (only uncorrected/corrected adjust it). Swept opportunistically
|
|
31
|
+
* on every ingest.
|
|
32
|
+
*/
|
|
33
|
+
export function expireStaleDrafts(store, now = () => new Date()) {
|
|
34
|
+
const cutoff = new Date(now().getTime() - MATCH_WINDOW_HOURS * 3600 * 1000).toISOString();
|
|
35
|
+
return store
|
|
36
|
+
.listUnmatchedDrafts({})
|
|
37
|
+
.filter((draft) => draft.recordedAt < cutoff)
|
|
38
|
+
.map((draft) => {
|
|
39
|
+
const outcome = recordOutcome(store, { briefId: draft.briefId, result: "unknown" }, now);
|
|
40
|
+
draft.matchedOutcomeId = outcome.id;
|
|
41
|
+
store.saveDraft(draft);
|
|
42
|
+
return outcome;
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
export function matchDraftToInstance(store, instance, now = () => new Date()) {
|
|
46
|
+
if (instance.kind !== "correction" && instance.kind !== "override" && instance.kind !== "approval") {
|
|
47
|
+
return undefined;
|
|
48
|
+
}
|
|
49
|
+
const captured = instance.before?.content;
|
|
50
|
+
if (!captured)
|
|
51
|
+
return undefined;
|
|
52
|
+
const since = new Date(now().getTime() - MATCH_WINDOW_HOURS * 3600 * 1000).toISOString();
|
|
53
|
+
const candidates = store.listUnmatchedDrafts({ domain: instance.situation.domain, since });
|
|
54
|
+
const draft = candidates.find((candidate) => changedRatio(candidate.content, captured) <= MATCH_RATIO);
|
|
55
|
+
if (!draft)
|
|
56
|
+
return undefined;
|
|
57
|
+
const outcome = recordOutcome(store, instance.kind === "approval"
|
|
58
|
+
? { briefId: draft.briefId, result: "uncorrected" }
|
|
59
|
+
: { briefId: draft.briefId, result: "corrected", correctionInstanceId: instance.id }, now);
|
|
60
|
+
draft.matchedOutcomeId = outcome.id;
|
|
61
|
+
store.saveDraft(draft);
|
|
62
|
+
return outcome;
|
|
63
|
+
}
|
package/dist/serve/brief.js
CHANGED
|
@@ -12,8 +12,9 @@ import { parseRef, refTo } from "../core/refs.js";
|
|
|
12
12
|
* ask_human athena knows nothing useful for this task
|
|
13
13
|
*/
|
|
14
14
|
const MAX_RULES = 5;
|
|
15
|
-
const MAX_FACTS =
|
|
15
|
+
const MAX_FACTS = 5;
|
|
16
16
|
const MAX_OPEN_QUESTIONS = 3;
|
|
17
|
+
const MAX_MAP_ENTRIES = 8;
|
|
17
18
|
const HIGH_CONFIDENCE = 0.7;
|
|
18
19
|
export function compileBrief(store, request, now = () => new Date()) {
|
|
19
20
|
const inScope = collectHypotheses(store, request);
|
|
@@ -32,16 +33,7 @@ export function compileBrief(store, request, now = () => new Date()) {
|
|
|
32
33
|
boundaries: h.doesNotApplyWhen,
|
|
33
34
|
ref: refTo(h.id),
|
|
34
35
|
}));
|
|
35
|
-
const facts = store
|
|
36
|
-
.search(request.task, "source", MAX_FACTS)
|
|
37
|
-
.flatMap((hit) => {
|
|
38
|
-
const source = store.getSource(parseRef(hit.ref).id);
|
|
39
|
-
return source ? [source] : [];
|
|
40
|
-
})
|
|
41
|
-
.map((source) => ({
|
|
42
|
-
statement: `${source.title}: ${source.content.slice(0, 160)}`,
|
|
43
|
-
ref: refTo(source.id),
|
|
44
|
-
}));
|
|
36
|
+
const facts = collectFacts(store, request, now);
|
|
45
37
|
const doNotAssume = stale.map((h) => `A previously learned rule is stale — do not assume it still holds: "${h.rule}"`);
|
|
46
38
|
const openQuestions = unvalidated
|
|
47
39
|
.slice(0, MAX_OPEN_QUESTIONS)
|
|
@@ -49,6 +41,7 @@ export function compileBrief(store, request, now = () => new Date()) {
|
|
|
49
41
|
const brief = {
|
|
50
42
|
id: newId("brf", now().getTime()),
|
|
51
43
|
task: request.task,
|
|
44
|
+
...(request.domain !== undefined ? { domain: request.domain } : {}),
|
|
52
45
|
compiledAt: now().toISOString(),
|
|
53
46
|
rules,
|
|
54
47
|
facts,
|
|
@@ -56,6 +49,7 @@ export function compileBrief(store, request, now = () => new Date()) {
|
|
|
56
49
|
openQuestions,
|
|
57
50
|
readiness: readinessFor(rules, facts, doNotAssume, openQuestions),
|
|
58
51
|
refs: [...rules.map((r) => r.ref), ...facts.map((f) => f.ref), ...unvalidated.slice(0, MAX_OPEN_QUESTIONS).map((h) => refTo(h.id))],
|
|
52
|
+
map: knowledgeMap(store),
|
|
59
53
|
};
|
|
60
54
|
// Serving is an event: fires feed the outcome loop.
|
|
61
55
|
for (const rule of rules) {
|
|
@@ -69,6 +63,65 @@ export function compileBrief(store, request, now = () => new Date()) {
|
|
|
69
63
|
store.saveBrief(brief);
|
|
70
64
|
return brief;
|
|
71
65
|
}
|
|
66
|
+
/**
|
|
67
|
+
* Facts reach a brief three ways: the task names a known entity (alias match),
|
|
68
|
+
* the task's domain produced facts, or lexical search hits. Stale and
|
|
69
|
+
* superseded facts never serve.
|
|
70
|
+
*/
|
|
71
|
+
function collectFacts(store, request, now) {
|
|
72
|
+
const byId = new Map();
|
|
73
|
+
const task = ` ${request.task.toLowerCase()} `;
|
|
74
|
+
for (const object of store.listObjects()) {
|
|
75
|
+
const aliases = [object.name, ...object.aliases].filter((alias) => alias.length >= 3);
|
|
76
|
+
if (!aliases.some((alias) => task.includes(alias.toLowerCase())))
|
|
77
|
+
continue;
|
|
78
|
+
for (const fact of store.listFacts({ objectId: object.id }))
|
|
79
|
+
byId.set(fact.id, fact);
|
|
80
|
+
}
|
|
81
|
+
if (request.domain) {
|
|
82
|
+
for (const fact of store.listFacts({ domain: request.domain }))
|
|
83
|
+
byId.set(fact.id, fact);
|
|
84
|
+
}
|
|
85
|
+
for (const hit of store.search(request.task, "fact", MAX_FACTS)) {
|
|
86
|
+
const fact = store.getFact(parseRef(hit.ref).id);
|
|
87
|
+
if (fact)
|
|
88
|
+
byId.set(fact.id, fact);
|
|
89
|
+
}
|
|
90
|
+
const nowIso = now().toISOString();
|
|
91
|
+
const live = [...byId.values()]
|
|
92
|
+
.filter((fact) => !fact.supersededById && fact.staleAfter > nowIso)
|
|
93
|
+
.sort((a, b) => b.lastConfirmedAt.localeCompare(a.lastConfirmedAt))
|
|
94
|
+
.slice(0, MAX_FACTS)
|
|
95
|
+
.map((fact) => ({ statement: fact.statement, ref: refTo(fact.id) }));
|
|
96
|
+
if (live.length >= MAX_FACTS)
|
|
97
|
+
return live;
|
|
98
|
+
// Raw sources fill remaining slots — weaker than extracted facts, still cited.
|
|
99
|
+
const sources = store
|
|
100
|
+
.search(request.task, "source", MAX_FACTS - live.length)
|
|
101
|
+
.flatMap((hit) => {
|
|
102
|
+
const source = store.getSource(parseRef(hit.ref).id);
|
|
103
|
+
return source ? [source] : [];
|
|
104
|
+
})
|
|
105
|
+
.map((source) => ({
|
|
106
|
+
statement: `${source.title}: ${source.content.slice(0, 160)}`,
|
|
107
|
+
ref: refTo(source.id),
|
|
108
|
+
}));
|
|
109
|
+
return [...live, ...sources];
|
|
110
|
+
}
|
|
111
|
+
/** The coordinates: what athena knows about and how to retrieve each slice. */
|
|
112
|
+
function knowledgeMap(store) {
|
|
113
|
+
const entries = [];
|
|
114
|
+
for (const { domain, count } of store.hypothesisCountsByDomain()) {
|
|
115
|
+
entries.push({ label: `${count} rule${count === 1 ? "" : "s"} in ${domain}`, count, query: domain });
|
|
116
|
+
}
|
|
117
|
+
for (const { objectId, count } of store.factCountsByObject()) {
|
|
118
|
+
const object = store.getObject(objectId);
|
|
119
|
+
if (!object)
|
|
120
|
+
continue;
|
|
121
|
+
entries.push({ label: `${count} fact${count === 1 ? "" : "s"} about ${object.name}`, count, query: object.name });
|
|
122
|
+
}
|
|
123
|
+
return entries.sort((a, b) => b.count - a.count).slice(0, MAX_MAP_ENTRIES);
|
|
124
|
+
}
|
|
72
125
|
function collectHypotheses(store, request) {
|
|
73
126
|
const byId = new Map();
|
|
74
127
|
if (request.domain) {
|
package/dist/store/open.js
CHANGED