useathena 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +258 -0
- package/apps/chrome-extension/README.md +35 -0
- package/apps/chrome-extension/background.js +97 -0
- package/apps/chrome-extension/gmail.js +107 -0
- package/apps/chrome-extension/linkedin.js +123 -0
- package/apps/chrome-extension/manifest.json +27 -0
- package/apps/chrome-extension/options.html +60 -0
- package/apps/chrome-extension/options.js +36 -0
- package/apps/chrome-extension/popup.html +37 -0
- package/apps/chrome-extension/popup.js +22 -0
- package/bin/athena +28 -0
- package/dist/api/server.js +145 -0
- package/dist/capture/ingest.js +85 -0
- package/dist/cli/commands.js +201 -0
- package/dist/cli/format.js +76 -0
- package/dist/cli/setup.js +316 -0
- package/dist/cli.js +291 -0
- package/dist/config.js +26 -0
- package/dist/core/fixtures.js +65 -0
- package/dist/core/ids.js +34 -0
- package/dist/core/refs.js +25 -0
- package/dist/core/types.js +10 -0
- package/dist/engine/engine.js +136 -0
- package/dist/engine/parse.js +76 -0
- package/dist/engine/prompts.js +64 -0
- package/dist/eval/harness.js +123 -0
- package/dist/eval/judge.js +75 -0
- package/dist/eval/run-eval.js +46 -0
- package/dist/eval/scenarios.js +470 -0
- package/dist/mcp/server.js +107 -0
- package/dist/mcp-server.js +7 -0
- package/dist/model/api-model-client.js +99 -0
- package/dist/model/cli-model-client.js +111 -0
- package/dist/model/model-client.js +28 -0
- package/dist/model/registry.js +67 -0
- package/dist/sensors/claude-code-hook.js +131 -0
- package/dist/serve/brief.js +95 -0
- package/dist/serve/outcome.js +56 -0
- package/dist/store/open.js +19 -0
- package/dist/store/store.js +269 -0
- package/docs/schema.md +368 -0
- package/package.json +43 -0
- package/scripts/prepare.mjs +20 -0
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
import { DatabaseSync } from "node:sqlite";
|
|
2
|
+
import { mkdirSync } from "node:fs";
|
|
3
|
+
import { dirname } from "node:path";
|
|
4
|
+
/**
|
|
5
|
+
* The single storage truth: one SQLite file per workspace.
|
|
6
|
+
* Domain invariants live at this boundary:
|
|
7
|
+
* - instances are immutable (no overwrite),
|
|
8
|
+
* - hypotheses cannot exist without supporting evidence.
|
|
9
|
+
* FTS5 gives the lexical retrieval lane; embeddings are a later, optional lane.
|
|
10
|
+
*/
|
|
11
|
+
const SEARCH_TEXT_CAP = 100_000;
|
|
12
|
+
export class AthenaStore {
|
|
13
|
+
db;
|
|
14
|
+
constructor(path) {
|
|
15
|
+
if (path !== ":memory:")
|
|
16
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
17
|
+
this.db = new DatabaseSync(path);
|
|
18
|
+
this.db.exec("PRAGMA journal_mode = WAL;");
|
|
19
|
+
this.migrate();
|
|
20
|
+
}
|
|
21
|
+
close() {
|
|
22
|
+
this.db.close();
|
|
23
|
+
}
|
|
24
|
+
migrate() {
|
|
25
|
+
this.db.exec(`
|
|
26
|
+
CREATE TABLE IF NOT EXISTS instances (
|
|
27
|
+
id TEXT PRIMARY KEY, kind TEXT NOT NULL, domain TEXT NOT NULL,
|
|
28
|
+
actor_id TEXT NOT NULL, observed_at TEXT NOT NULL, data TEXT NOT NULL
|
|
29
|
+
);
|
|
30
|
+
CREATE INDEX IF NOT EXISTS idx_instances_domain ON instances(domain, observed_at);
|
|
31
|
+
CREATE TABLE IF NOT EXISTS hypotheses (
|
|
32
|
+
id TEXT PRIMARY KEY, status TEXT NOT NULL, domain TEXT NOT NULL,
|
|
33
|
+
created_at TEXT NOT NULL, data TEXT NOT NULL
|
|
34
|
+
);
|
|
35
|
+
CREATE INDEX IF NOT EXISTS idx_hypotheses_status ON hypotheses(status, domain);
|
|
36
|
+
CREATE TABLE IF NOT EXISTS sources (
|
|
37
|
+
id TEXT PRIMARY KEY, kind TEXT NOT NULL, access_state TEXT NOT NULL,
|
|
38
|
+
captured_at TEXT NOT NULL, data TEXT NOT NULL
|
|
39
|
+
);
|
|
40
|
+
CREATE TABLE IF NOT EXISTS objects (
|
|
41
|
+
id TEXT PRIMARY KEY, kind TEXT NOT NULL, name TEXT NOT NULL, data TEXT NOT NULL
|
|
42
|
+
);
|
|
43
|
+
CREATE TABLE IF NOT EXISTS object_aliases (
|
|
44
|
+
alias TEXT NOT NULL, object_id TEXT NOT NULL, PRIMARY KEY (alias, object_id)
|
|
45
|
+
);
|
|
46
|
+
CREATE TABLE IF NOT EXISTS relations (
|
|
47
|
+
from_id TEXT NOT NULL, to_id TEXT NOT NULL, kind TEXT NOT NULL,
|
|
48
|
+
valid_from TEXT NOT NULL, data TEXT NOT NULL,
|
|
49
|
+
PRIMARY KEY (from_id, to_id, kind, valid_from)
|
|
50
|
+
);
|
|
51
|
+
CREATE TABLE IF NOT EXISTS outcomes (
|
|
52
|
+
id TEXT PRIMARY KEY, brief_id TEXT NOT NULL, recorded_at TEXT NOT NULL, data TEXT NOT NULL
|
|
53
|
+
);
|
|
54
|
+
CREATE INDEX IF NOT EXISTS idx_outcomes_brief ON outcomes(brief_id);
|
|
55
|
+
CREATE TABLE IF NOT EXISTS briefs (
|
|
56
|
+
id TEXT PRIMARY KEY, compiled_at TEXT NOT NULL, data TEXT NOT NULL
|
|
57
|
+
);
|
|
58
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS search_index USING fts5(ref, lane, text);
|
|
59
|
+
`);
|
|
60
|
+
}
|
|
61
|
+
// --- instances (immutable) ---
|
|
62
|
+
saveInstance(instance) {
|
|
63
|
+
const existing = this.db
|
|
64
|
+
.prepare("SELECT id FROM instances WHERE id = ?")
|
|
65
|
+
.get(instance.id);
|
|
66
|
+
if (existing) {
|
|
67
|
+
throw new Error(`instance ${instance.id} already exists — instances are immutable`);
|
|
68
|
+
}
|
|
69
|
+
this.db
|
|
70
|
+
.prepare("INSERT INTO instances (id, kind, domain, actor_id, observed_at, data) VALUES (?, ?, ?, ?, ?, ?)")
|
|
71
|
+
.run(instance.id, instance.kind, instance.situation.domain, instance.actorId, instance.observedAt, JSON.stringify(instance));
|
|
72
|
+
this.index(`athena://instance/${instance.id}`, "instance", instanceSearchText(instance));
|
|
73
|
+
}
|
|
74
|
+
getInstance(id) {
|
|
75
|
+
return this.getData("instances", id);
|
|
76
|
+
}
|
|
77
|
+
listInstances(filter = {}) {
|
|
78
|
+
const where = [];
|
|
79
|
+
const params = [];
|
|
80
|
+
if (filter.kind) {
|
|
81
|
+
where.push("kind = ?");
|
|
82
|
+
params.push(filter.kind);
|
|
83
|
+
}
|
|
84
|
+
if (filter.domain) {
|
|
85
|
+
where.push("domain = ?");
|
|
86
|
+
params.push(filter.domain);
|
|
87
|
+
}
|
|
88
|
+
if (filter.actorId) {
|
|
89
|
+
where.push("actor_id = ?");
|
|
90
|
+
params.push(filter.actorId);
|
|
91
|
+
}
|
|
92
|
+
if (filter.since) {
|
|
93
|
+
where.push("observed_at >= ?");
|
|
94
|
+
params.push(filter.since);
|
|
95
|
+
}
|
|
96
|
+
const sql = `SELECT data FROM instances ${where.length ? `WHERE ${where.join(" AND ")}` : ""}
|
|
97
|
+
ORDER BY observed_at DESC LIMIT ${filter.limit ?? 200}`;
|
|
98
|
+
return this.db.prepare(sql).all(...params).map((rowData));
|
|
99
|
+
}
|
|
100
|
+
// --- hypotheses (revisable views over evidence) ---
|
|
101
|
+
saveHypothesis(hypothesis) {
|
|
102
|
+
if (hypothesis.supportingInstanceIds.length === 0) {
|
|
103
|
+
throw new Error(`hypothesis ${hypothesis.id} has no supporting instances — everything cites`);
|
|
104
|
+
}
|
|
105
|
+
this.upsert("hypotheses", "INSERT INTO hypotheses (id, status, domain, created_at, data) VALUES (?, ?, ?, ?, ?) " +
|
|
106
|
+
"ON CONFLICT(id) DO UPDATE SET status = excluded.status, domain = excluded.domain, data = excluded.data", [hypothesis.id, hypothesis.status, hypothesis.domain, hypothesis.createdAt, JSON.stringify(hypothesis)]);
|
|
107
|
+
this.index(`athena://hypothesis/${hypothesis.id}`, "hypothesis", hypothesisSearchText(hypothesis));
|
|
108
|
+
}
|
|
109
|
+
getHypothesis(id) {
|
|
110
|
+
return this.getData("hypotheses", id);
|
|
111
|
+
}
|
|
112
|
+
listHypotheses(filter = {}) {
|
|
113
|
+
const where = [];
|
|
114
|
+
const params = [];
|
|
115
|
+
if (filter.status) {
|
|
116
|
+
where.push("status = ?");
|
|
117
|
+
params.push(filter.status);
|
|
118
|
+
}
|
|
119
|
+
if (filter.domain) {
|
|
120
|
+
where.push("domain = ?");
|
|
121
|
+
params.push(filter.domain);
|
|
122
|
+
}
|
|
123
|
+
const sql = `SELECT data FROM hypotheses ${where.length ? `WHERE ${where.join(" AND ")}` : ""}
|
|
124
|
+
ORDER BY created_at DESC LIMIT ${filter.limit ?? 200}`;
|
|
125
|
+
return this.db.prepare(sql).all(...params).map((rowData));
|
|
126
|
+
}
|
|
127
|
+
// --- explicit layer ---
|
|
128
|
+
saveSource(source) {
|
|
129
|
+
this.upsert("sources", "INSERT INTO sources (id, kind, access_state, captured_at, data) VALUES (?, ?, ?, ?, ?) " +
|
|
130
|
+
"ON CONFLICT(id) DO UPDATE SET access_state = excluded.access_state, data = excluded.data", [source.id, source.kind, source.accessState, source.capturedAt, JSON.stringify(source)]);
|
|
131
|
+
this.index(`athena://source/${source.id}`, "source", `${source.title}\n${source.content.slice(0, SEARCH_TEXT_CAP)}`);
|
|
132
|
+
}
|
|
133
|
+
getSource(id) {
|
|
134
|
+
return this.getData("sources", id);
|
|
135
|
+
}
|
|
136
|
+
saveObject(object) {
|
|
137
|
+
this.upsert("objects", "INSERT INTO objects (id, kind, name, data) VALUES (?, ?, ?, ?) " +
|
|
138
|
+
"ON CONFLICT(id) DO UPDATE SET kind = excluded.kind, name = excluded.name, data = excluded.data", [object.id, object.kind, object.name, JSON.stringify(object)]);
|
|
139
|
+
this.db.prepare("DELETE FROM object_aliases WHERE object_id = ?").run(object.id);
|
|
140
|
+
const insertAlias = this.db.prepare("INSERT OR IGNORE INTO object_aliases (alias, object_id) VALUES (?, ?)");
|
|
141
|
+
for (const alias of [object.name, ...object.aliases]) {
|
|
142
|
+
insertAlias.run(alias.toLowerCase(), object.id);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
getObject(id) {
|
|
146
|
+
return this.getData("objects", id);
|
|
147
|
+
}
|
|
148
|
+
listObjects(kind) {
|
|
149
|
+
const sql = kind
|
|
150
|
+
? "SELECT data FROM objects WHERE kind = ? ORDER BY name"
|
|
151
|
+
: "SELECT data FROM objects ORDER BY name";
|
|
152
|
+
const rows = kind ? this.db.prepare(sql).all(kind) : this.db.prepare(sql).all();
|
|
153
|
+
return rows.map((rowData));
|
|
154
|
+
}
|
|
155
|
+
resolveObject(alias) {
|
|
156
|
+
return this.db
|
|
157
|
+
.prepare("SELECT o.data FROM object_aliases a JOIN objects o ON o.id = a.object_id WHERE a.alias = ?")
|
|
158
|
+
.all(alias.toLowerCase())
|
|
159
|
+
.map((rowData));
|
|
160
|
+
}
|
|
161
|
+
saveRelation(relation) {
|
|
162
|
+
this.db
|
|
163
|
+
.prepare("INSERT INTO relations (from_id, to_id, kind, valid_from, data) VALUES (?, ?, ?, ?, ?) " +
|
|
164
|
+
"ON CONFLICT(from_id, to_id, kind, valid_from) DO UPDATE SET data = excluded.data")
|
|
165
|
+
.run(relation.fromId, relation.toId, relation.kind, relation.validFrom, JSON.stringify(relation));
|
|
166
|
+
}
|
|
167
|
+
listRelations(objectId) {
|
|
168
|
+
return this.db
|
|
169
|
+
.prepare("SELECT data FROM relations WHERE from_id = ? OR to_id = ?")
|
|
170
|
+
.all(objectId, objectId)
|
|
171
|
+
.map((rowData));
|
|
172
|
+
}
|
|
173
|
+
// --- serving layer ---
|
|
174
|
+
saveBrief(brief) {
|
|
175
|
+
this.upsert("briefs", "INSERT INTO briefs (id, compiled_at, data) VALUES (?, ?, ?) ON CONFLICT(id) DO UPDATE SET data = excluded.data", [brief.id, brief.compiledAt, JSON.stringify(brief)]);
|
|
176
|
+
}
|
|
177
|
+
getBrief(id) {
|
|
178
|
+
return this.getData("briefs", id);
|
|
179
|
+
}
|
|
180
|
+
saveOutcome(outcome) {
|
|
181
|
+
this.upsert("outcomes", "INSERT INTO outcomes (id, brief_id, recorded_at, data) VALUES (?, ?, ?, ?) " +
|
|
182
|
+
"ON CONFLICT(id) DO UPDATE SET data = excluded.data", [outcome.id, outcome.briefId, outcome.recordedAt, JSON.stringify(outcome)]);
|
|
183
|
+
}
|
|
184
|
+
getOutcome(id) {
|
|
185
|
+
return this.getData("outcomes", id);
|
|
186
|
+
}
|
|
187
|
+
listOutcomes(briefId) {
|
|
188
|
+
const rows = briefId
|
|
189
|
+
? this.db.prepare("SELECT data FROM outcomes WHERE brief_id = ? ORDER BY recorded_at DESC").all(briefId)
|
|
190
|
+
: this.db.prepare("SELECT data FROM outcomes ORDER BY recorded_at DESC").all();
|
|
191
|
+
return rows.map((rowData));
|
|
192
|
+
}
|
|
193
|
+
// --- stats ---
|
|
194
|
+
counts() {
|
|
195
|
+
const byColumn = (table, column) => {
|
|
196
|
+
const rows = this.db
|
|
197
|
+
.prepare(`SELECT ${column} AS key, COUNT(*) AS n FROM ${table} GROUP BY ${column}`)
|
|
198
|
+
.all();
|
|
199
|
+
return Object.fromEntries(rows.map((row) => [row.key, row.n]));
|
|
200
|
+
};
|
|
201
|
+
const total = (table) => this.db.prepare(`SELECT COUNT(*) AS n FROM ${table}`).get().n;
|
|
202
|
+
return {
|
|
203
|
+
instances: byColumn("instances", "kind"),
|
|
204
|
+
hypotheses: byColumn("hypotheses", "status"),
|
|
205
|
+
sources: total("sources"),
|
|
206
|
+
outcomes: total("outcomes"),
|
|
207
|
+
briefs: total("briefs"),
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
// --- search (lexical lane) ---
|
|
211
|
+
search(query, lane, limit = 20) {
|
|
212
|
+
const match = ftsQuery(query);
|
|
213
|
+
if (!match)
|
|
214
|
+
return [];
|
|
215
|
+
const rows = lane
|
|
216
|
+
? this.db
|
|
217
|
+
.prepare("SELECT ref, lane, rank FROM search_index WHERE search_index MATCH ? AND lane = ? ORDER BY rank LIMIT ?")
|
|
218
|
+
.all(match, lane, limit)
|
|
219
|
+
: this.db
|
|
220
|
+
.prepare("SELECT ref, lane, rank FROM search_index WHERE search_index MATCH ? ORDER BY rank LIMIT ?")
|
|
221
|
+
.all(match, limit);
|
|
222
|
+
return rows.map((row) => {
|
|
223
|
+
const r = row;
|
|
224
|
+
return { ref: r.ref, lane: r.lane, score: -r.rank };
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
// --- helpers ---
|
|
228
|
+
upsert(_table, sql, params) {
|
|
229
|
+
this.db.prepare(sql).run(...params);
|
|
230
|
+
}
|
|
231
|
+
getData(table, id) {
|
|
232
|
+
const row = this.db.prepare(`SELECT data FROM ${table} WHERE id = ?`).get(id);
|
|
233
|
+
return row ? rowData(row) : undefined;
|
|
234
|
+
}
|
|
235
|
+
index(ref, lane, text) {
|
|
236
|
+
this.db.prepare("DELETE FROM search_index WHERE ref = ?").run(ref);
|
|
237
|
+
this.db.prepare("INSERT INTO search_index (ref, lane, text) VALUES (?, ?, ?)").run(ref, lane, text);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
function rowData(row) {
|
|
241
|
+
return JSON.parse(row.data);
|
|
242
|
+
}
|
|
243
|
+
/** Quote each token so user input can never be FTS5 syntax. Tokens AND together. */
|
|
244
|
+
function ftsQuery(query) {
|
|
245
|
+
const tokens = query.match(/[\p{L}\p{N}_]+/gu);
|
|
246
|
+
if (!tokens || tokens.length === 0)
|
|
247
|
+
return undefined;
|
|
248
|
+
return tokens.map((t) => `"${t}"`).join(" ");
|
|
249
|
+
}
|
|
250
|
+
function instanceSearchText(instance) {
|
|
251
|
+
return [
|
|
252
|
+
instance.situation.summary,
|
|
253
|
+
instance.situation.domain,
|
|
254
|
+
instance.situation.task ?? "",
|
|
255
|
+
instance.situation.cues.join(" "),
|
|
256
|
+
instance.diff?.summary ?? "",
|
|
257
|
+
instance.probeAnswers.map((p) => p.answer).join(" "),
|
|
258
|
+
].join("\n");
|
|
259
|
+
}
|
|
260
|
+
function hypothesisSearchText(hypothesis) {
|
|
261
|
+
return [
|
|
262
|
+
hypothesis.rule,
|
|
263
|
+
hypothesis.domain,
|
|
264
|
+
hypothesis.cues.join(" "),
|
|
265
|
+
hypothesis.appliesWhen.join(" "),
|
|
266
|
+
hypothesis.doesNotApplyWhen.join(" "),
|
|
267
|
+
hypothesis.inferredRationale ?? "",
|
|
268
|
+
].join("\n");
|
|
269
|
+
}
|
package/docs/schema.md
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
# athena Core Schema v1
|
|
2
|
+
|
|
3
|
+
Status: design draft (2026-06-11). This is the contract everything else hangs off.
|
|
4
|
+
Evidence base: `docs/research/2026-06-11-tacit-capture-research.md`.
|
|
5
|
+
|
|
6
|
+
Mission framing: athena captures **tacit knowledge** from real work so agents become
|
|
7
|
+
**truly autonomous and reliable** — no hand-holding, no repeated corrections.
|
|
8
|
+
|
|
9
|
+
## Design rules
|
|
10
|
+
|
|
11
|
+
1. **Instances are immutable evidence; rules are revisable views over evidence.**
|
|
12
|
+
A `JudgmentInstance` is never edited after capture. A `TacitHypothesis` can change
|
|
13
|
+
status/confidence but its rule text changes only by supersession (new version, old
|
|
14
|
+
one retired with a pointer). No silent rewrites — this is what makes evidence links honest.
|
|
15
|
+
2. **Behavior is ground truth; rationale is labeled hypothesis.** Stated reasons (probe
|
|
16
|
+
answers, inferred rationale) are stored but never as facts.
|
|
17
|
+
3. **Everything cites.** A hypothesis without `supportingInstanceIds` cannot exist.
|
|
18
|
+
A brief item without a `ref` cannot be served.
|
|
19
|
+
4. **Privacy zone fields are mandatory on every capture-derived record** — cheap now,
|
|
20
|
+
impossible to retrofit. Raw instances default to private/non-promotable.
|
|
21
|
+
5. **One storage truth (SQLite), markdown as derived projection.** Pages are generated,
|
|
22
|
+
never authoritative.
|
|
23
|
+
6. **No taxonomy ahead of evidence.** Enums start minimal; `custom`/free-text escape
|
|
24
|
+
hatches instead of 19-member unions. We add members when real data forces it.
|
|
25
|
+
7. **Bi-temporal-lite.** Records that represent claims about the world carry
|
|
26
|
+
`validFrom`/`validUntil` so "what was true then" survives "what is true now" (Zep's
|
|
27
|
+
one great idea, without the graph database).
|
|
28
|
+
|
|
29
|
+
## IDs
|
|
30
|
+
|
|
31
|
+
Prefixed ULIDs: `ins_` instance, `hyp_` hypothesis, `obj_` object, `src_` source,
|
|
32
|
+
`out_` outcome, `brf_` brief, `act_` actor, `sen_` sensor. Sortable by creation time.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## 1. Capture layer
|
|
37
|
+
|
|
38
|
+
### SensorEvent — the sensor contract
|
|
39
|
+
|
|
40
|
+
Every capture surface (Chrome extension, Claude Code hook, MCP tool, CLI, connector)
|
|
41
|
+
emits this one shape. Sensors are dumb: they observe and emit; all interpretation
|
|
42
|
+
happens in the engine. This is what keeps the GTM extension and the dev-tool sensors
|
|
43
|
+
siblings instead of separate products.
|
|
44
|
+
|
|
45
|
+
```ts
|
|
46
|
+
type SensorEvent = {
|
|
47
|
+
sensorId: string; // sen_chrome_gmail, sen_claude_code, sen_mcp, sen_cli
|
|
48
|
+
emittedAt: string; // ISO 8601
|
|
49
|
+
kind: InstanceKind;
|
|
50
|
+
situation: SituationInput; // whatever context the sensor can see
|
|
51
|
+
before?: ArtifactInput; // agent/template output, if any
|
|
52
|
+
after?: ArtifactInput; // human's version, if any
|
|
53
|
+
probeAnswers?: ProbeAnswer[];
|
|
54
|
+
actorHint?: string; // sensor's best guess at who acted
|
|
55
|
+
raw?: unknown; // sensor-native payload, kept for audit, never interpreted
|
|
56
|
+
};
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### JudgmentInstance — the atomic evidence unit
|
|
60
|
+
|
|
61
|
+
A moment where tacit knowledge became visible. Immutable after creation.
|
|
62
|
+
|
|
63
|
+
```ts
|
|
64
|
+
type InstanceKind =
|
|
65
|
+
| "correction" // human edited agent/template output ← the high-signal core
|
|
66
|
+
| "override" // human ignored/reversed a suggestion
|
|
67
|
+
| "decision" // human chose between alternatives
|
|
68
|
+
| "escalation" // human got stuck and asked someone
|
|
69
|
+
| "failed_attempt" // an approach that didn't work
|
|
70
|
+
| "approval" // human explicitly accepted agent output (positive signal!)
|
|
71
|
+
| "manual_note"; // "remember this" — self-report, lowest evidential weight
|
|
72
|
+
|
|
73
|
+
type JudgmentInstance = {
|
|
74
|
+
id: InstanceId;
|
|
75
|
+
kind: InstanceKind;
|
|
76
|
+
observedAt: string;
|
|
77
|
+
situation: Situation;
|
|
78
|
+
before?: Artifact; // what the agent produced
|
|
79
|
+
after?: Artifact; // what the human made it
|
|
80
|
+
diff?: StructuredDiff; // computed at ingest, the engine's main food
|
|
81
|
+
probeAnswers: ProbeAnswer[]; // [] if no probe asked/answered
|
|
82
|
+
sensorId: string;
|
|
83
|
+
actorId: ActorId;
|
|
84
|
+
sourceRefs: SourceId[]; // explicit-layer evidence this moment touched
|
|
85
|
+
objectIds: ObjectId[]; // entities involved (resolved at ingest)
|
|
86
|
+
// privacy zone — defaults shown
|
|
87
|
+
visibility: "user_private_raw"; // | "derived" | "workspace"
|
|
88
|
+
canPromote: false; // raw stays raw; derived twins can promote
|
|
89
|
+
canUseForAgents: false;
|
|
90
|
+
};
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Note: **approvals are instances too.** Positive signal (uncorrected output in a
|
|
94
|
+
situation where a rule fired) is how hypotheses gain validity without pestering the user.
|
|
95
|
+
|
|
96
|
+
### Situation — the retrieval key
|
|
97
|
+
|
|
98
|
+
What makes an instance findable and clusterable. Kept small; the engine can recompute
|
|
99
|
+
embeddings, but these fields are the durable semantics.
|
|
100
|
+
|
|
101
|
+
```ts
|
|
102
|
+
type Situation = {
|
|
103
|
+
summary: string; // one line, natural language: "drafting cold outreach to enterprise CTO"
|
|
104
|
+
domain: string; // dot-path subdomain: "email.outreach", "code.review", "crm.deal_update"
|
|
105
|
+
task?: string; // what was being attempted
|
|
106
|
+
cues: string[]; // salient features the engine/probes identified
|
|
107
|
+
objectIds: ObjectId[]; // who/what this was about
|
|
108
|
+
app?: string; // gmail | hubspot | claude-code | ...
|
|
109
|
+
agent?: { runtime?: string; model?: string }; // recorded, never required (model-agnostic)
|
|
110
|
+
};
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
`domain` is the validity-regime boundary: hypotheses do not transfer across domains
|
|
114
|
+
without re-confirmation (fractionated expertise — Kahneman/Klein).
|
|
115
|
+
|
|
116
|
+
### ProbeAnswer — the micro-interview
|
|
117
|
+
|
|
118
|
+
CDM-derived probe types, RPD-shaped. One or two per correction, max.
|
|
119
|
+
|
|
120
|
+
```ts
|
|
121
|
+
type ProbeKind =
|
|
122
|
+
| "cue" // "What told you this was off?"
|
|
123
|
+
| "expectancy" // "What did you expect to see instead?"
|
|
124
|
+
| "goal" // "What were you protecting here?"
|
|
125
|
+
| "boundary" // "One-off, or always?" / "Would this hold if X were different?"
|
|
126
|
+
| "transfer"; // "Does this apply to other accounts/projects too?"
|
|
127
|
+
|
|
128
|
+
type ProbeAnswer = {
|
|
129
|
+
probe: ProbeKind;
|
|
130
|
+
question: string; // exactly as asked
|
|
131
|
+
answer: string; // verbatim — never summarized at capture time
|
|
132
|
+
askedAt: string;
|
|
133
|
+
dismissed?: boolean; // user skipped — that's signal too (don't re-ask soon)
|
|
134
|
+
};
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Artifact / StructuredDiff
|
|
138
|
+
|
|
139
|
+
```ts
|
|
140
|
+
type Artifact = {
|
|
141
|
+
mediaType: string; // text/plain, text/markdown, text/x-diff, application/json
|
|
142
|
+
content: string; // capped at ingest (e.g. 64KB); overflow → RawSource + ref
|
|
143
|
+
contentHash: string;
|
|
144
|
+
sourceRef?: SourceId; // full original, if archived
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
type StructuredDiff = {
|
|
148
|
+
summary: string; // engine-written: "changed greeting from casual to formal title"
|
|
149
|
+
hunks: { before: string; after: string; label?: string }[];
|
|
150
|
+
magnitude: "trivial" | "minor" | "substantive" | "rewrite";
|
|
151
|
+
};
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## 2. Learning layer
|
|
157
|
+
|
|
158
|
+
### TacitHypothesis — the learning object (THE product)
|
|
159
|
+
|
|
160
|
+
RPD-shaped: cues, expectancies, goal, action — plus boundaries, evidence, calibration.
|
|
161
|
+
|
|
162
|
+
```ts
|
|
163
|
+
type HypothesisStatus =
|
|
164
|
+
| "candidate" // inferred, not yet replay-validated — NOT served to agents
|
|
165
|
+
| "validated" // passed replay against held-out instances — servable with caveats
|
|
166
|
+
| "active" // confirmed by review or repeated outcomes — served confidently
|
|
167
|
+
| "stale" // staleness timer or contradictions — served only with warning, queued for revalidation
|
|
168
|
+
| "retired"; // superseded, rejected, or decayed to zero — kept for audit, never served
|
|
169
|
+
|
|
170
|
+
type TacitHypothesis = {
|
|
171
|
+
id: HypothesisId;
|
|
172
|
+
status: HypothesisStatus;
|
|
173
|
+
|
|
174
|
+
// The rule (RPD byproducts)
|
|
175
|
+
rule: string; // "When drafting outreach to enterprise contacts at Acme, use formal titles."
|
|
176
|
+
cues: string[]; // signals that this situation is *this kind* of situation
|
|
177
|
+
expectancies: string[]; // what you'd observe if the rule is working — and misfire signals
|
|
178
|
+
goal?: string; // what the rule protects ("don't burn exec relationships")
|
|
179
|
+
domain: string; // validity regime — same dot-path vocabulary as Situation.domain
|
|
180
|
+
appliesWhen: string[]; // boundary conditions, human-readable, checkable
|
|
181
|
+
doesNotApplyWhen: string[];// counter-boundaries (from counterexamples + boundary probes)
|
|
182
|
+
|
|
183
|
+
// Evidence — the load-bearing links
|
|
184
|
+
supportingInstanceIds: InstanceId[];
|
|
185
|
+
counterexampleInstanceIds: InstanceId[];
|
|
186
|
+
inferredRationale?: string; // engine's explanation — labeled hypothesis, not fact
|
|
187
|
+
|
|
188
|
+
// Calibration
|
|
189
|
+
confidence: number; // 0..1, set by engine, moved by outcomes — never by actor seniority
|
|
190
|
+
validity: {
|
|
191
|
+
fires: number; // times served in a brief
|
|
192
|
+
upheld: number; // served → output uncorrected
|
|
193
|
+
overridden: number; // served → output corrected anyway (auto-counterexample)
|
|
194
|
+
lastFiredAt?: string;
|
|
195
|
+
};
|
|
196
|
+
replay: { // the validation gate
|
|
197
|
+
tested: number; // held-out instances tested against
|
|
198
|
+
reproduced: number; // where the rule predicted the human's edit
|
|
199
|
+
lastRunAt?: string;
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
// Lifecycle
|
|
203
|
+
createdAt: string;
|
|
204
|
+
lastConfirmedAt?: string; // last supporting evidence or upheld outcome
|
|
205
|
+
staleAfter: string; // hard timer — unconfirmed past this → status: stale
|
|
206
|
+
supersededById?: HypothesisId;
|
|
207
|
+
|
|
208
|
+
// Governance
|
|
209
|
+
visibility: "user_private" | "derived" | "workspace";
|
|
210
|
+
review: ReviewState;
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
type ReviewState =
|
|
214
|
+
| { state: "unreviewed" }
|
|
215
|
+
| { state: "approved" | "edited" | "rejected"; byActorId: ActorId; at: string; note?: string };
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
State machine:
|
|
219
|
+
|
|
220
|
+
```
|
|
221
|
+
candidate --replay pass--> validated --review approve | N upheld outcomes--> active
|
|
222
|
+
candidate --replay fail--> retired
|
|
223
|
+
validated/active --staleAfter exceeded | counterexamples > threshold--> stale
|
|
224
|
+
stale --revalidation (replay + review)--> active stale --decay--> retired
|
|
225
|
+
any --superseded--> retired (supersededById set)
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Deliberately absent (memodis lessons): no `WorkflowEpisode` as a persisted domain object —
|
|
229
|
+
clustering is an engine implementation detail, not schema. No parallel `sourceRefs`/`sourceIds`
|
|
230
|
+
duplication. No 7-level scope taxonomy — `domain` + `visibility` carry it.
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## 3. Explicit layer (thin, boring, essential)
|
|
235
|
+
|
|
236
|
+
Explicit knowledge grounds tacit extraction (entity vocabulary, situation tagging) and
|
|
237
|
+
fills agent briefs with facts. It is deliberately commodity — gbrain-style, no innovation here.
|
|
238
|
+
|
|
239
|
+
```ts
|
|
240
|
+
type RawSource = {
|
|
241
|
+
id: SourceId;
|
|
242
|
+
kind: "document" | "email" | "thread" | "page" | "repo_doc" | "transcript" | "manual";
|
|
243
|
+
title: string;
|
|
244
|
+
content: string; // or contentRef for large bodies
|
|
245
|
+
contentHash: string;
|
|
246
|
+
origin: { sensorId?: string; connector?: string; uri?: string };
|
|
247
|
+
capturedAt: string;
|
|
248
|
+
accessState: "private" | "workspace" | "restricted" | "revoked";
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
type KnowledgeObject = {
|
|
252
|
+
id: ObjectId;
|
|
253
|
+
kind: "person" | "org" | "project" | "repo" | "process" | "custom";
|
|
254
|
+
name: string;
|
|
255
|
+
aliases: string[]; // emails, handles, domains, nicknames — resolution keys
|
|
256
|
+
properties: Record<string, string>;
|
|
257
|
+
validFrom: string;
|
|
258
|
+
validUntil?: string;
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
type ObjectRelation = {
|
|
262
|
+
fromId: ObjectId;
|
|
263
|
+
toId: ObjectId;
|
|
264
|
+
kind: string; // free vocabulary: works_at, owns, blocks, part_of...
|
|
265
|
+
sourceRefs: SourceId[]; // relations cite too
|
|
266
|
+
validFrom: string;
|
|
267
|
+
validUntil?: string; // contradiction = close interval + open new one, never delete
|
|
268
|
+
};
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
Markdown pages (per object, per domain) are **projections** generated from sources +
|
|
272
|
+
hypotheses + relations. Stored in `.athena/pages/` for humans and git; rebuilt, never edited
|
|
273
|
+
as truth.
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
## 4. Serving layer
|
|
278
|
+
|
|
279
|
+
### Brief — the one agent-facing read (replaces memodis packets + coordinate maps)
|
|
280
|
+
|
|
281
|
+
Compact payload + dereferenceable pointers. Persisted for audit and outcome linkage.
|
|
282
|
+
|
|
283
|
+
```ts
|
|
284
|
+
type Brief = {
|
|
285
|
+
id: BriefId;
|
|
286
|
+
task: string; // as the agent stated it
|
|
287
|
+
compiledAt: string;
|
|
288
|
+
|
|
289
|
+
rules: {
|
|
290
|
+
hypothesisId: HypothesisId;
|
|
291
|
+
rule: string;
|
|
292
|
+
confidence: number;
|
|
293
|
+
appliesBecause: string; // which cues matched this task
|
|
294
|
+
boundaries: string[]; // doesNotApplyWhen, surfaced inline
|
|
295
|
+
ref: Ref;
|
|
296
|
+
}[];
|
|
297
|
+
|
|
298
|
+
facts: {
|
|
299
|
+
statement: string;
|
|
300
|
+
ref: Ref; // citation into explicit layer — mandatory
|
|
301
|
+
}[];
|
|
302
|
+
|
|
303
|
+
doNotAssume: string[]; // caveats: ambiguities, contradictions, stale knowledge
|
|
304
|
+
openQuestions: string[]; // what athena knows it doesn't know (gbrain's honesty, kept)
|
|
305
|
+
|
|
306
|
+
readiness: "act" | "act_with_caveats" | "inspect_first" | "ask_human";
|
|
307
|
+
refs: Ref[]; // additional evidence worth opening, ranked
|
|
308
|
+
};
|
|
309
|
+
|
|
310
|
+
type Ref = string; // athena://hypothesis/hyp_x | athena://instance/ins_x
|
|
311
|
+
// athena://source/src_x#L10-40 | athena://object/obj_x
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
`Ref` is the coordinate system's survivor: a plain dereferenceable pointer, no taxonomy.
|
|
315
|
+
|
|
316
|
+
### AgentOutcome — the feedback loop closer
|
|
317
|
+
|
|
318
|
+
```ts
|
|
319
|
+
type AgentOutcome = {
|
|
320
|
+
id: OutcomeId;
|
|
321
|
+
briefId: BriefId; // which serving this outcome judges
|
|
322
|
+
result: "uncorrected" // human accepted → upheld++ for every served rule
|
|
323
|
+
| "corrected" // human edited → correctionInstanceId links the new instance,
|
|
324
|
+
// overridden++ for served rules the correction contradicts
|
|
325
|
+
| "abandoned"
|
|
326
|
+
| "unknown";
|
|
327
|
+
correctionInstanceId?: InstanceId;
|
|
328
|
+
recordedAt: string;
|
|
329
|
+
};
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
This closes the loop: **brief → action → outcome → instance → hypothesis update**, and
|
|
333
|
+
gives us the north-star metric directly: correction rate per served brief, over time.
|
|
334
|
+
|
|
335
|
+
### MCP surface (4 tools, not 19)
|
|
336
|
+
|
|
337
|
+
| Tool | Does |
|
|
338
|
+
|---|---|
|
|
339
|
+
| `athena_brief(task, domain?)` | compile + persist a Brief |
|
|
340
|
+
| `athena_open(ref)` | dereference any `athena://` ref |
|
|
341
|
+
| `athena_record(event)` | SensorEvent in: outcome, correction, manual note, rule *proposal* |
|
|
342
|
+
| `athena_search(query, lane?)` | direct lexical+semantic search when the agent wants to explore |
|
|
343
|
+
|
|
344
|
+
No durable-mutation tools. `athena_record` proposals enter the review queue like any sensor event.
|
|
345
|
+
|
|
346
|
+
---
|
|
347
|
+
|
|
348
|
+
## 5. Storage mapping
|
|
349
|
+
|
|
350
|
+
SQLite, one file per workspace (`.athena/athena.db`):
|
|
351
|
+
- `instances`, `hypotheses`, `sources`, `objects`, `relations`, `outcomes`, `briefs`
|
|
352
|
+
— JSON column + extracted indexed columns (id, kind, domain, status, observedAt, visibility).
|
|
353
|
+
- FTS5 over instance summaries/diffs, hypothesis rules, source content.
|
|
354
|
+
- `sqlite-vec` over situation/rule/source embeddings (optional lane — everything works without it).
|
|
355
|
+
- WAL mode; single-writer per file is acceptable for local; server mode serializes per workspace.
|
|
356
|
+
|
|
357
|
+
Markdown projection in `.athena/pages/` — derived, git-friendly, regenerated.
|
|
358
|
+
|
|
359
|
+
## 6. Open questions (carry into eval-harness design)
|
|
360
|
+
|
|
361
|
+
- Probe selection policy: when is a correction worth a probe at all (magnitude threshold? novelty?),
|
|
362
|
+
and probe-budget per user per day (extension lesson from memodis: budget was right).
|
|
363
|
+
- Replay scoring: what counts as "reproduced the human's edit" — LLM judge with rubric, or
|
|
364
|
+
string/structure similarity floor? (Likely: LLM judge, calibrated against a hand-labeled set.)
|
|
365
|
+
- Confidence math: start with ExpeL-style counters + replay rate; resist Bayesian theater until
|
|
366
|
+
the eval harness can tell us if it helps.
|
|
367
|
+
- Cross-actor aggregation (team mode): hypotheses are per-actor in v1; enterprise merge is a
|
|
368
|
+
governed promotion, not an automatic union.
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "useathena",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "athena captures tacit knowledge from real work so agents become truly autonomous and reliable.",
|
|
5
|
+
"license": "UNLICENSED",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "git+https://github.com/ErikLit005/athena.git"
|
|
9
|
+
},
|
|
10
|
+
"type": "module",
|
|
11
|
+
"engines": {
|
|
12
|
+
"node": ">=22.5"
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"dist",
|
|
16
|
+
"bin",
|
|
17
|
+
"apps/chrome-extension",
|
|
18
|
+
"docs/schema.md",
|
|
19
|
+
"scripts/prepare.mjs"
|
|
20
|
+
],
|
|
21
|
+
"scripts": {
|
|
22
|
+
"test": "node --import tsx --test src/**/*.test.ts",
|
|
23
|
+
"typecheck": "tsc --noEmit",
|
|
24
|
+
"verify": "npm run typecheck && npm test",
|
|
25
|
+
"build": "tsc -p tsconfig.build.json",
|
|
26
|
+
"prepare": "node scripts/prepare.mjs",
|
|
27
|
+
"eval": "node --import tsx src/eval/run-eval.ts",
|
|
28
|
+
"mcp": "node --import tsx src/mcp-server.ts",
|
|
29
|
+
"athena": "node --import tsx src/cli.ts"
|
|
30
|
+
},
|
|
31
|
+
"devDependencies": {
|
|
32
|
+
"@types/node": "^25.9.1",
|
|
33
|
+
"tsx": "^4.22.3",
|
|
34
|
+
"typescript": "^5.5.0"
|
|
35
|
+
},
|
|
36
|
+
"dependencies": {
|
|
37
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
38
|
+
"zod": "^4.4.3"
|
|
39
|
+
},
|
|
40
|
+
"bin": {
|
|
41
|
+
"athena": "bin/athena"
|
|
42
|
+
}
|
|
43
|
+
}
|