useathena 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +258 -0
  2. package/apps/chrome-extension/README.md +35 -0
  3. package/apps/chrome-extension/background.js +97 -0
  4. package/apps/chrome-extension/gmail.js +107 -0
  5. package/apps/chrome-extension/linkedin.js +123 -0
  6. package/apps/chrome-extension/manifest.json +27 -0
  7. package/apps/chrome-extension/options.html +60 -0
  8. package/apps/chrome-extension/options.js +36 -0
  9. package/apps/chrome-extension/popup.html +37 -0
  10. package/apps/chrome-extension/popup.js +22 -0
  11. package/bin/athena +28 -0
  12. package/dist/api/server.js +145 -0
  13. package/dist/capture/ingest.js +85 -0
  14. package/dist/cli/commands.js +201 -0
  15. package/dist/cli/format.js +76 -0
  16. package/dist/cli/setup.js +316 -0
  17. package/dist/cli.js +291 -0
  18. package/dist/config.js +26 -0
  19. package/dist/core/fixtures.js +65 -0
  20. package/dist/core/ids.js +34 -0
  21. package/dist/core/refs.js +25 -0
  22. package/dist/core/types.js +10 -0
  23. package/dist/engine/engine.js +136 -0
  24. package/dist/engine/parse.js +76 -0
  25. package/dist/engine/prompts.js +64 -0
  26. package/dist/eval/harness.js +123 -0
  27. package/dist/eval/judge.js +75 -0
  28. package/dist/eval/run-eval.js +46 -0
  29. package/dist/eval/scenarios.js +470 -0
  30. package/dist/mcp/server.js +107 -0
  31. package/dist/mcp-server.js +7 -0
  32. package/dist/model/api-model-client.js +99 -0
  33. package/dist/model/cli-model-client.js +111 -0
  34. package/dist/model/model-client.js +28 -0
  35. package/dist/model/registry.js +67 -0
  36. package/dist/sensors/claude-code-hook.js +131 -0
  37. package/dist/serve/brief.js +95 -0
  38. package/dist/serve/outcome.js +56 -0
  39. package/dist/store/open.js +19 -0
  40. package/dist/store/store.js +269 -0
  41. package/docs/schema.md +368 -0
  42. package/package.json +43 -0
  43. package/scripts/prepare.mjs +20 -0
@@ -0,0 +1,269 @@
1
+ import { DatabaseSync } from "node:sqlite";
2
+ import { mkdirSync } from "node:fs";
3
+ import { dirname } from "node:path";
4
+ /**
5
+ * The single storage truth: one SQLite file per workspace.
6
+ * Domain invariants live at this boundary:
7
+ * - instances are immutable (no overwrite),
8
+ * - hypotheses cannot exist without supporting evidence.
9
+ * FTS5 gives the lexical retrieval lane; embeddings are a later, optional lane.
10
+ */
11
+ const SEARCH_TEXT_CAP = 100_000;
12
+ export class AthenaStore {
13
+ db;
14
+ constructor(path) {
15
+ if (path !== ":memory:")
16
+ mkdirSync(dirname(path), { recursive: true });
17
+ this.db = new DatabaseSync(path);
18
+ this.db.exec("PRAGMA journal_mode = WAL;");
19
+ this.migrate();
20
+ }
21
+ close() {
22
+ this.db.close();
23
+ }
24
+ migrate() {
25
+ this.db.exec(`
26
+ CREATE TABLE IF NOT EXISTS instances (
27
+ id TEXT PRIMARY KEY, kind TEXT NOT NULL, domain TEXT NOT NULL,
28
+ actor_id TEXT NOT NULL, observed_at TEXT NOT NULL, data TEXT NOT NULL
29
+ );
30
+ CREATE INDEX IF NOT EXISTS idx_instances_domain ON instances(domain, observed_at);
31
+ CREATE TABLE IF NOT EXISTS hypotheses (
32
+ id TEXT PRIMARY KEY, status TEXT NOT NULL, domain TEXT NOT NULL,
33
+ created_at TEXT NOT NULL, data TEXT NOT NULL
34
+ );
35
+ CREATE INDEX IF NOT EXISTS idx_hypotheses_status ON hypotheses(status, domain);
36
+ CREATE TABLE IF NOT EXISTS sources (
37
+ id TEXT PRIMARY KEY, kind TEXT NOT NULL, access_state TEXT NOT NULL,
38
+ captured_at TEXT NOT NULL, data TEXT NOT NULL
39
+ );
40
+ CREATE TABLE IF NOT EXISTS objects (
41
+ id TEXT PRIMARY KEY, kind TEXT NOT NULL, name TEXT NOT NULL, data TEXT NOT NULL
42
+ );
43
+ CREATE TABLE IF NOT EXISTS object_aliases (
44
+ alias TEXT NOT NULL, object_id TEXT NOT NULL, PRIMARY KEY (alias, object_id)
45
+ );
46
+ CREATE TABLE IF NOT EXISTS relations (
47
+ from_id TEXT NOT NULL, to_id TEXT NOT NULL, kind TEXT NOT NULL,
48
+ valid_from TEXT NOT NULL, data TEXT NOT NULL,
49
+ PRIMARY KEY (from_id, to_id, kind, valid_from)
50
+ );
51
+ CREATE TABLE IF NOT EXISTS outcomes (
52
+ id TEXT PRIMARY KEY, brief_id TEXT NOT NULL, recorded_at TEXT NOT NULL, data TEXT NOT NULL
53
+ );
54
+ CREATE INDEX IF NOT EXISTS idx_outcomes_brief ON outcomes(brief_id);
55
+ CREATE TABLE IF NOT EXISTS briefs (
56
+ id TEXT PRIMARY KEY, compiled_at TEXT NOT NULL, data TEXT NOT NULL
57
+ );
58
+ CREATE VIRTUAL TABLE IF NOT EXISTS search_index USING fts5(ref, lane, text);
59
+ `);
60
+ }
61
+ // --- instances (immutable) ---
62
+ saveInstance(instance) {
63
+ const existing = this.db
64
+ .prepare("SELECT id FROM instances WHERE id = ?")
65
+ .get(instance.id);
66
+ if (existing) {
67
+ throw new Error(`instance ${instance.id} already exists — instances are immutable`);
68
+ }
69
+ this.db
70
+ .prepare("INSERT INTO instances (id, kind, domain, actor_id, observed_at, data) VALUES (?, ?, ?, ?, ?, ?)")
71
+ .run(instance.id, instance.kind, instance.situation.domain, instance.actorId, instance.observedAt, JSON.stringify(instance));
72
+ this.index(`athena://instance/${instance.id}`, "instance", instanceSearchText(instance));
73
+ }
74
+ getInstance(id) {
75
+ return this.getData("instances", id);
76
+ }
77
+ listInstances(filter = {}) {
78
+ const where = [];
79
+ const params = [];
80
+ if (filter.kind) {
81
+ where.push("kind = ?");
82
+ params.push(filter.kind);
83
+ }
84
+ if (filter.domain) {
85
+ where.push("domain = ?");
86
+ params.push(filter.domain);
87
+ }
88
+ if (filter.actorId) {
89
+ where.push("actor_id = ?");
90
+ params.push(filter.actorId);
91
+ }
92
+ if (filter.since) {
93
+ where.push("observed_at >= ?");
94
+ params.push(filter.since);
95
+ }
96
+ const sql = `SELECT data FROM instances ${where.length ? `WHERE ${where.join(" AND ")}` : ""}
97
+ ORDER BY observed_at DESC LIMIT ${filter.limit ?? 200}`;
98
+ return this.db.prepare(sql).all(...params).map((rowData));
99
+ }
100
+ // --- hypotheses (revisable views over evidence) ---
101
+ saveHypothesis(hypothesis) {
102
+ if (hypothesis.supportingInstanceIds.length === 0) {
103
+ throw new Error(`hypothesis ${hypothesis.id} has no supporting instances — everything cites`);
104
+ }
105
+ this.upsert("hypotheses", "INSERT INTO hypotheses (id, status, domain, created_at, data) VALUES (?, ?, ?, ?, ?) " +
106
+ "ON CONFLICT(id) DO UPDATE SET status = excluded.status, domain = excluded.domain, data = excluded.data", [hypothesis.id, hypothesis.status, hypothesis.domain, hypothesis.createdAt, JSON.stringify(hypothesis)]);
107
+ this.index(`athena://hypothesis/${hypothesis.id}`, "hypothesis", hypothesisSearchText(hypothesis));
108
+ }
109
+ getHypothesis(id) {
110
+ return this.getData("hypotheses", id);
111
+ }
112
+ listHypotheses(filter = {}) {
113
+ const where = [];
114
+ const params = [];
115
+ if (filter.status) {
116
+ where.push("status = ?");
117
+ params.push(filter.status);
118
+ }
119
+ if (filter.domain) {
120
+ where.push("domain = ?");
121
+ params.push(filter.domain);
122
+ }
123
+ const sql = `SELECT data FROM hypotheses ${where.length ? `WHERE ${where.join(" AND ")}` : ""}
124
+ ORDER BY created_at DESC LIMIT ${filter.limit ?? 200}`;
125
+ return this.db.prepare(sql).all(...params).map((rowData));
126
+ }
127
+ // --- explicit layer ---
128
+ saveSource(source) {
129
+ this.upsert("sources", "INSERT INTO sources (id, kind, access_state, captured_at, data) VALUES (?, ?, ?, ?, ?) " +
130
+ "ON CONFLICT(id) DO UPDATE SET access_state = excluded.access_state, data = excluded.data", [source.id, source.kind, source.accessState, source.capturedAt, JSON.stringify(source)]);
131
+ this.index(`athena://source/${source.id}`, "source", `${source.title}\n${source.content.slice(0, SEARCH_TEXT_CAP)}`);
132
+ }
133
+ getSource(id) {
134
+ return this.getData("sources", id);
135
+ }
136
+ saveObject(object) {
137
+ this.upsert("objects", "INSERT INTO objects (id, kind, name, data) VALUES (?, ?, ?, ?) " +
138
+ "ON CONFLICT(id) DO UPDATE SET kind = excluded.kind, name = excluded.name, data = excluded.data", [object.id, object.kind, object.name, JSON.stringify(object)]);
139
+ this.db.prepare("DELETE FROM object_aliases WHERE object_id = ?").run(object.id);
140
+ const insertAlias = this.db.prepare("INSERT OR IGNORE INTO object_aliases (alias, object_id) VALUES (?, ?)");
141
+ for (const alias of [object.name, ...object.aliases]) {
142
+ insertAlias.run(alias.toLowerCase(), object.id);
143
+ }
144
+ }
145
+ getObject(id) {
146
+ return this.getData("objects", id);
147
+ }
148
+ listObjects(kind) {
149
+ const sql = kind
150
+ ? "SELECT data FROM objects WHERE kind = ? ORDER BY name"
151
+ : "SELECT data FROM objects ORDER BY name";
152
+ const rows = kind ? this.db.prepare(sql).all(kind) : this.db.prepare(sql).all();
153
+ return rows.map((rowData));
154
+ }
155
+ resolveObject(alias) {
156
+ return this.db
157
+ .prepare("SELECT o.data FROM object_aliases a JOIN objects o ON o.id = a.object_id WHERE a.alias = ?")
158
+ .all(alias.toLowerCase())
159
+ .map((rowData));
160
+ }
161
+ saveRelation(relation) {
162
+ this.db
163
+ .prepare("INSERT INTO relations (from_id, to_id, kind, valid_from, data) VALUES (?, ?, ?, ?, ?) " +
164
+ "ON CONFLICT(from_id, to_id, kind, valid_from) DO UPDATE SET data = excluded.data")
165
+ .run(relation.fromId, relation.toId, relation.kind, relation.validFrom, JSON.stringify(relation));
166
+ }
167
+ listRelations(objectId) {
168
+ return this.db
169
+ .prepare("SELECT data FROM relations WHERE from_id = ? OR to_id = ?")
170
+ .all(objectId, objectId)
171
+ .map((rowData));
172
+ }
173
+ // --- serving layer ---
174
+ saveBrief(brief) {
175
+ this.upsert("briefs", "INSERT INTO briefs (id, compiled_at, data) VALUES (?, ?, ?) ON CONFLICT(id) DO UPDATE SET data = excluded.data", [brief.id, brief.compiledAt, JSON.stringify(brief)]);
176
+ }
177
+ getBrief(id) {
178
+ return this.getData("briefs", id);
179
+ }
180
+ saveOutcome(outcome) {
181
+ this.upsert("outcomes", "INSERT INTO outcomes (id, brief_id, recorded_at, data) VALUES (?, ?, ?, ?) " +
182
+ "ON CONFLICT(id) DO UPDATE SET data = excluded.data", [outcome.id, outcome.briefId, outcome.recordedAt, JSON.stringify(outcome)]);
183
+ }
184
+ getOutcome(id) {
185
+ return this.getData("outcomes", id);
186
+ }
187
+ listOutcomes(briefId) {
188
+ const rows = briefId
189
+ ? this.db.prepare("SELECT data FROM outcomes WHERE brief_id = ? ORDER BY recorded_at DESC").all(briefId)
190
+ : this.db.prepare("SELECT data FROM outcomes ORDER BY recorded_at DESC").all();
191
+ return rows.map((rowData));
192
+ }
193
+ // --- stats ---
194
+ counts() {
195
+ const byColumn = (table, column) => {
196
+ const rows = this.db
197
+ .prepare(`SELECT ${column} AS key, COUNT(*) AS n FROM ${table} GROUP BY ${column}`)
198
+ .all();
199
+ return Object.fromEntries(rows.map((row) => [row.key, row.n]));
200
+ };
201
+ const total = (table) => this.db.prepare(`SELECT COUNT(*) AS n FROM ${table}`).get().n;
202
+ return {
203
+ instances: byColumn("instances", "kind"),
204
+ hypotheses: byColumn("hypotheses", "status"),
205
+ sources: total("sources"),
206
+ outcomes: total("outcomes"),
207
+ briefs: total("briefs"),
208
+ };
209
+ }
210
+ // --- search (lexical lane) ---
211
+ search(query, lane, limit = 20) {
212
+ const match = ftsQuery(query);
213
+ if (!match)
214
+ return [];
215
+ const rows = lane
216
+ ? this.db
217
+ .prepare("SELECT ref, lane, rank FROM search_index WHERE search_index MATCH ? AND lane = ? ORDER BY rank LIMIT ?")
218
+ .all(match, lane, limit)
219
+ : this.db
220
+ .prepare("SELECT ref, lane, rank FROM search_index WHERE search_index MATCH ? ORDER BY rank LIMIT ?")
221
+ .all(match, limit);
222
+ return rows.map((row) => {
223
+ const r = row;
224
+ return { ref: r.ref, lane: r.lane, score: -r.rank };
225
+ });
226
+ }
227
+ // --- helpers ---
228
+ upsert(_table, sql, params) {
229
+ this.db.prepare(sql).run(...params);
230
+ }
231
+ getData(table, id) {
232
+ const row = this.db.prepare(`SELECT data FROM ${table} WHERE id = ?`).get(id);
233
+ return row ? rowData(row) : undefined;
234
+ }
235
+ index(ref, lane, text) {
236
+ this.db.prepare("DELETE FROM search_index WHERE ref = ?").run(ref);
237
+ this.db.prepare("INSERT INTO search_index (ref, lane, text) VALUES (?, ?, ?)").run(ref, lane, text);
238
+ }
239
+ }
240
+ function rowData(row) {
241
+ return JSON.parse(row.data);
242
+ }
243
+ /** Quote each token so user input can never be FTS5 syntax. Tokens AND together. */
244
+ function ftsQuery(query) {
245
+ const tokens = query.match(/[\p{L}\p{N}_]+/gu);
246
+ if (!tokens || tokens.length === 0)
247
+ return undefined;
248
+ return tokens.map((t) => `"${t}"`).join(" ");
249
+ }
250
+ function instanceSearchText(instance) {
251
+ return [
252
+ instance.situation.summary,
253
+ instance.situation.domain,
254
+ instance.situation.task ?? "",
255
+ instance.situation.cues.join(" "),
256
+ instance.diff?.summary ?? "",
257
+ instance.probeAnswers.map((p) => p.answer).join(" "),
258
+ ].join("\n");
259
+ }
260
+ function hypothesisSearchText(hypothesis) {
261
+ return [
262
+ hypothesis.rule,
263
+ hypothesis.domain,
264
+ hypothesis.cues.join(" "),
265
+ hypothesis.appliesWhen.join(" "),
266
+ hypothesis.doesNotApplyWhen.join(" "),
267
+ hypothesis.inferredRationale ?? "",
268
+ ].join("\n");
269
+ }
package/docs/schema.md ADDED
@@ -0,0 +1,368 @@
1
+ # athena Core Schema v1
2
+
3
+ Status: design draft (2026-06-11). This is the contract everything else hangs off.
4
+ Evidence base: `docs/research/2026-06-11-tacit-capture-research.md`.
5
+
6
+ Mission framing: athena captures **tacit knowledge** from real work so agents become
7
+ **truly autonomous and reliable** — no hand-holding, no repeated corrections.
8
+
9
+ ## Design rules
10
+
11
+ 1. **Instances are immutable evidence; rules are revisable views over evidence.**
12
+ A `JudgmentInstance` is never edited after capture. A `TacitHypothesis` can change
13
+ status/confidence but its rule text changes only by supersession (new version, old
14
+ one retired with a pointer). No silent rewrites — this is what makes evidence links honest.
15
+ 2. **Behavior is ground truth; rationale is labeled hypothesis.** Stated reasons (probe
16
+ answers, inferred rationale) are stored but never as facts.
17
+ 3. **Everything cites.** A hypothesis without `supportingInstanceIds` cannot exist.
18
+ A brief item without a `ref` cannot be served.
19
+ 4. **Privacy zone fields are mandatory on every capture-derived record** — cheap now,
20
+ impossible to retrofit. Raw instances default to private/non-promotable.
21
+ 5. **One storage truth (SQLite), markdown as derived projection.** Pages are generated,
22
+ never authoritative.
23
+ 6. **No taxonomy ahead of evidence.** Enums start minimal; `custom`/free-text escape
24
+ hatches instead of 19-member unions. We add members when real data forces it.
25
+ 7. **Bi-temporal-lite.** Records that represent claims about the world carry
26
+ `validFrom`/`validUntil` so "what was true then" survives "what is true now" (Zep's
27
+ one great idea, without the graph database).
28
+
29
+ ## IDs
30
+
31
+ Prefixed ULIDs: `ins_` instance, `hyp_` hypothesis, `obj_` object, `src_` source,
32
+ `out_` outcome, `brf_` brief, `act_` actor, `sen_` sensor. Sortable by creation time.
33
+
34
+ ---
35
+
36
+ ## 1. Capture layer
37
+
38
+ ### SensorEvent — the sensor contract
39
+
40
+ Every capture surface (Chrome extension, Claude Code hook, MCP tool, CLI, connector)
41
+ emits this one shape. Sensors are dumb: they observe and emit; all interpretation
42
+ happens in the engine. This is what keeps the GTM extension and the dev-tool sensors
43
+ siblings instead of separate products.
44
+
45
+ ```ts
46
+ type SensorEvent = {
47
+ sensorId: string; // sen_chrome_gmail, sen_claude_code, sen_mcp, sen_cli
48
+ emittedAt: string; // ISO 8601
49
+ kind: InstanceKind;
50
+ situation: SituationInput; // whatever context the sensor can see
51
+ before?: ArtifactInput; // agent/template output, if any
52
+ after?: ArtifactInput; // human's version, if any
53
+ probeAnswers?: ProbeAnswer[];
54
+ actorHint?: string; // sensor's best guess at who acted
55
+ raw?: unknown; // sensor-native payload, kept for audit, never interpreted
56
+ };
57
+ ```
58
+
59
+ ### JudgmentInstance — the atomic evidence unit
60
+
61
+ A moment where tacit knowledge became visible. Immutable after creation.
62
+
63
+ ```ts
64
+ type InstanceKind =
65
+ | "correction" // human edited agent/template output ← the high-signal core
66
+ | "override" // human ignored/reversed a suggestion
67
+ | "decision" // human chose between alternatives
68
+ | "escalation" // human got stuck and asked someone
69
+ | "failed_attempt" // an approach that didn't work
70
+ | "approval" // human explicitly accepted agent output (positive signal!)
71
+ | "manual_note"; // "remember this" — self-report, lowest evidential weight
72
+
73
+ type JudgmentInstance = {
74
+ id: InstanceId;
75
+ kind: InstanceKind;
76
+ observedAt: string;
77
+ situation: Situation;
78
+ before?: Artifact; // what the agent produced
79
+ after?: Artifact; // what the human made it
80
+ diff?: StructuredDiff; // computed at ingest, the engine's main food
81
+ probeAnswers: ProbeAnswer[]; // [] if no probe asked/answered
82
+ sensorId: string;
83
+ actorId: ActorId;
84
+ sourceRefs: SourceId[]; // explicit-layer evidence this moment touched
85
+ objectIds: ObjectId[]; // entities involved (resolved at ingest)
86
+ // privacy zone — defaults shown
87
+ visibility: "user_private_raw"; // | "derived" | "workspace"
88
+ canPromote: false; // raw stays raw; derived twins can promote
89
+ canUseForAgents: false;
90
+ };
91
+ ```
92
+
93
+ Note: **approvals are instances too.** Positive signal (uncorrected output in a
94
+ situation where a rule fired) is how hypotheses gain validity without pestering the user.
95
+
96
+ ### Situation — the retrieval key
97
+
98
+ What makes an instance findable and clusterable. Kept small; the engine can recompute
99
+ embeddings, but these fields are the durable semantics.
100
+
101
+ ```ts
102
+ type Situation = {
103
+ summary: string; // one line, natural language: "drafting cold outreach to enterprise CTO"
104
+ domain: string; // dot-path subdomain: "email.outreach", "code.review", "crm.deal_update"
105
+ task?: string; // what was being attempted
106
+ cues: string[]; // salient features the engine/probes identified
107
+ objectIds: ObjectId[]; // who/what this was about
108
+ app?: string; // gmail | hubspot | claude-code | ...
109
+ agent?: { runtime?: string; model?: string }; // recorded, never required (model-agnostic)
110
+ };
111
+ ```
112
+
113
+ `domain` is the validity-regime boundary: hypotheses do not transfer across domains
114
+ without re-confirmation (fractionated expertise — Kahneman/Klein).
115
+
116
+ ### ProbeAnswer — the micro-interview
117
+
118
+ CDM-derived probe types, RPD-shaped. One or two per correction, max.
119
+
120
+ ```ts
121
+ type ProbeKind =
122
+ | "cue" // "What told you this was off?"
123
+ | "expectancy" // "What did you expect to see instead?"
124
+ | "goal" // "What were you protecting here?"
125
+ | "boundary" // "One-off, or always?" / "Would this hold if X were different?"
126
+ | "transfer"; // "Does this apply to other accounts/projects too?"
127
+
128
+ type ProbeAnswer = {
129
+ probe: ProbeKind;
130
+ question: string; // exactly as asked
131
+ answer: string; // verbatim — never summarized at capture time
132
+ askedAt: string;
133
+ dismissed?: boolean; // user skipped — that's signal too (don't re-ask soon)
134
+ };
135
+ ```
136
+
137
+ ### Artifact / StructuredDiff
138
+
139
+ ```ts
140
+ type Artifact = {
141
+ mediaType: string; // text/plain, text/markdown, text/x-diff, application/json
142
+ content: string; // capped at ingest (e.g. 64KB); overflow → RawSource + ref
143
+ contentHash: string;
144
+ sourceRef?: SourceId; // full original, if archived
145
+ };
146
+
147
+ type StructuredDiff = {
148
+ summary: string; // engine-written: "changed greeting from casual to formal title"
149
+ hunks: { before: string; after: string; label?: string }[];
150
+ magnitude: "trivial" | "minor" | "substantive" | "rewrite";
151
+ };
152
+ ```
153
+
154
+ ---
155
+
156
+ ## 2. Learning layer
157
+
158
+ ### TacitHypothesis — the learning object (THE product)
159
+
160
+ RPD-shaped: cues, expectancies, goal, action — plus boundaries, evidence, calibration.
161
+
162
+ ```ts
163
+ type HypothesisStatus =
164
+ | "candidate" // inferred, not yet replay-validated — NOT served to agents
165
+ | "validated" // passed replay against held-out instances — servable with caveats
166
+ | "active" // confirmed by review or repeated outcomes — served confidently
167
+ | "stale" // staleness timer or contradictions — served only with warning, queued for revalidation
168
+ | "retired"; // superseded, rejected, or decayed to zero — kept for audit, never served
169
+
170
+ type TacitHypothesis = {
171
+ id: HypothesisId;
172
+ status: HypothesisStatus;
173
+
174
+ // The rule (RPD byproducts)
175
+ rule: string; // "When drafting outreach to enterprise contacts at Acme, use formal titles."
176
+ cues: string[]; // signals that this situation is *this kind* of situation
177
+ expectancies: string[]; // what you'd observe if the rule is working — and misfire signals
178
+ goal?: string; // what the rule protects ("don't burn exec relationships")
179
+ domain: string; // validity regime — same dot-path vocabulary as Situation.domain
180
+ appliesWhen: string[]; // boundary conditions, human-readable, checkable
181
+ doesNotApplyWhen: string[];// counter-boundaries (from counterexamples + boundary probes)
182
+
183
+ // Evidence — the load-bearing links
184
+ supportingInstanceIds: InstanceId[];
185
+ counterexampleInstanceIds: InstanceId[];
186
+ inferredRationale?: string; // engine's explanation — labeled hypothesis, not fact
187
+
188
+ // Calibration
189
+ confidence: number; // 0..1, set by engine, moved by outcomes — never by actor seniority
190
+ validity: {
191
+ fires: number; // times served in a brief
192
+ upheld: number; // served → output uncorrected
193
+ overridden: number; // served → output corrected anyway (auto-counterexample)
194
+ lastFiredAt?: string;
195
+ };
196
+ replay: { // the validation gate
197
+ tested: number; // held-out instances tested against
198
+ reproduced: number; // where the rule predicted the human's edit
199
+ lastRunAt?: string;
200
+ };
201
+
202
+ // Lifecycle
203
+ createdAt: string;
204
+ lastConfirmedAt?: string; // last supporting evidence or upheld outcome
205
+ staleAfter: string; // hard timer — unconfirmed past this → status: stale
206
+ supersededById?: HypothesisId;
207
+
208
+ // Governance
209
+ visibility: "user_private" | "derived" | "workspace";
210
+ review: ReviewState;
211
+ };
212
+
213
+ type ReviewState =
214
+ | { state: "unreviewed" }
215
+ | { state: "approved" | "edited" | "rejected"; byActorId: ActorId; at: string; note?: string };
216
+ ```
217
+
218
+ State machine:
219
+
220
+ ```
221
+ candidate --replay pass--> validated --review approve | N upheld outcomes--> active
222
+ candidate --replay fail--> retired
223
+ validated/active --staleAfter exceeded | counterexamples > threshold--> stale
224
+ stale --revalidation (replay + review)--> active stale --decay--> retired
225
+ any --superseded--> retired (supersededById set)
226
+ ```
227
+
228
+ Deliberately absent (memodis lessons): no `WorkflowEpisode` as a persisted domain object —
229
+ clustering is an engine implementation detail, not schema. No parallel `sourceRefs`/`sourceIds`
230
+ duplication. No 7-level scope taxonomy — `domain` + `visibility` carry it.
231
+
232
+ ---
233
+
234
+ ## 3. Explicit layer (thin, boring, essential)
235
+
236
+ Explicit knowledge grounds tacit extraction (entity vocabulary, situation tagging) and
237
+ fills agent briefs with facts. It is deliberately commodity — gbrain-style, no innovation here.
238
+
239
+ ```ts
240
+ type RawSource = {
241
+ id: SourceId;
242
+ kind: "document" | "email" | "thread" | "page" | "repo_doc" | "transcript" | "manual";
243
+ title: string;
244
+ content: string; // or contentRef for large bodies
245
+ contentHash: string;
246
+ origin: { sensorId?: string; connector?: string; uri?: string };
247
+ capturedAt: string;
248
+ accessState: "private" | "workspace" | "restricted" | "revoked";
249
+ };
250
+
251
+ type KnowledgeObject = {
252
+ id: ObjectId;
253
+ kind: "person" | "org" | "project" | "repo" | "process" | "custom";
254
+ name: string;
255
+ aliases: string[]; // emails, handles, domains, nicknames — resolution keys
256
+ properties: Record<string, string>;
257
+ validFrom: string;
258
+ validUntil?: string;
259
+ };
260
+
261
+ type ObjectRelation = {
262
+ fromId: ObjectId;
263
+ toId: ObjectId;
264
+ kind: string; // free vocabulary: works_at, owns, blocks, part_of...
265
+ sourceRefs: SourceId[]; // relations cite too
266
+ validFrom: string;
267
+ validUntil?: string; // contradiction = close interval + open new one, never delete
268
+ };
269
+ ```
270
+
271
+ Markdown pages (per object, per domain) are **projections** generated from sources +
272
+ hypotheses + relations. Stored in `.athena/pages/` for humans and git; rebuilt, never edited
273
+ as truth.
274
+
275
+ ---
276
+
277
+ ## 4. Serving layer
278
+
279
+ ### Brief — the one agent-facing read (replaces memodis packets + coordinate maps)
280
+
281
+ Compact payload + dereferenceable pointers. Persisted for audit and outcome linkage.
282
+
283
+ ```ts
284
+ type Brief = {
285
+ id: BriefId;
286
+ task: string; // as the agent stated it
287
+ compiledAt: string;
288
+
289
+ rules: {
290
+ hypothesisId: HypothesisId;
291
+ rule: string;
292
+ confidence: number;
293
+ appliesBecause: string; // which cues matched this task
294
+ boundaries: string[]; // doesNotApplyWhen, surfaced inline
295
+ ref: Ref;
296
+ }[];
297
+
298
+ facts: {
299
+ statement: string;
300
+ ref: Ref; // citation into explicit layer — mandatory
301
+ }[];
302
+
303
+ doNotAssume: string[]; // caveats: ambiguities, contradictions, stale knowledge
304
+ openQuestions: string[]; // what athena knows it doesn't know (gbrain's honesty, kept)
305
+
306
+ readiness: "act" | "act_with_caveats" | "inspect_first" | "ask_human";
307
+ refs: Ref[]; // additional evidence worth opening, ranked
308
+ };
309
+
310
+ type Ref = string; // athena://hypothesis/hyp_x | athena://instance/ins_x
311
+ // athena://source/src_x#L10-40 | athena://object/obj_x
312
+ ```
313
+
314
+ `Ref` is the coordinate system's survivor: a plain dereferenceable pointer, no taxonomy.
315
+
316
+ ### AgentOutcome — the feedback loop closer
317
+
318
+ ```ts
319
+ type AgentOutcome = {
320
+ id: OutcomeId;
321
+ briefId: BriefId; // which serving this outcome judges
322
+ result: "uncorrected" // human accepted → upheld++ for every served rule
323
+ | "corrected" // human edited → correctionInstanceId links the new instance,
324
+ // overridden++ for served rules the correction contradicts
325
+ | "abandoned"
326
+ | "unknown";
327
+ correctionInstanceId?: InstanceId;
328
+ recordedAt: string;
329
+ };
330
+ ```
331
+
332
+ This closes the loop: **brief → action → outcome → instance → hypothesis update**, and
333
+ gives us the north-star metric directly: correction rate per served brief, over time.
334
+
335
+ ### MCP surface (4 tools, not 19)
336
+
337
+ | Tool | Does |
338
+ |---|---|
339
+ | `athena_brief(task, domain?)` | compile + persist a Brief |
340
+ | `athena_open(ref)` | dereference any `athena://` ref |
341
+ | `athena_record(event)` | SensorEvent in: outcome, correction, manual note, rule *proposal* |
342
+ | `athena_search(query, lane?)` | direct lexical+semantic search when the agent wants to explore |
343
+
344
+ No durable-mutation tools. `athena_record` proposals enter the review queue like any sensor event.
345
+
346
+ ---
347
+
348
+ ## 5. Storage mapping
349
+
350
+ SQLite, one file per workspace (`.athena/athena.db`):
351
+ - `instances`, `hypotheses`, `sources`, `objects`, `relations`, `outcomes`, `briefs`
352
+ — JSON column + extracted indexed columns (id, kind, domain, status, observedAt, visibility).
353
+ - FTS5 over instance summaries/diffs, hypothesis rules, source content.
354
+ - `sqlite-vec` over situation/rule/source embeddings (optional lane — everything works without it).
355
+ - WAL mode; single-writer per file is acceptable for local; server mode serializes per workspace.
356
+
357
+ Markdown projection in `.athena/pages/` — derived, git-friendly, regenerated.
358
+
359
+ ## 6. Open questions (carry into eval-harness design)
360
+
361
+ - Probe selection policy: when is a correction worth a probe at all (magnitude threshold? novelty?),
362
+ and probe-budget per user per day (extension lesson from memodis: budget was right).
363
+ - Replay scoring: what counts as "reproduced the human's edit" — LLM judge with rubric, or
364
+ string/structure similarity floor? (Likely: LLM judge, calibrated against a hand-labeled set.)
365
+ - Confidence math: start with ExpeL-style counters + replay rate; resist Bayesian theater until
366
+ the eval harness can tell us if it helps.
367
+ - Cross-actor aggregation (team mode): hypotheses are per-actor in v1; enterprise merge is a
368
+ governed promotion, not an automatic union.
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "useathena",
3
+ "version": "0.1.0",
4
+ "description": "athena captures tacit knowledge from real work so agents become truly autonomous and reliable.",
5
+ "license": "UNLICENSED",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/ErikLit005/athena.git"
9
+ },
10
+ "type": "module",
11
+ "engines": {
12
+ "node": ">=22.5"
13
+ },
14
+ "files": [
15
+ "dist",
16
+ "bin",
17
+ "apps/chrome-extension",
18
+ "docs/schema.md",
19
+ "scripts/prepare.mjs"
20
+ ],
21
+ "scripts": {
22
+ "test": "node --import tsx --test src/**/*.test.ts",
23
+ "typecheck": "tsc --noEmit",
24
+ "verify": "npm run typecheck && npm test",
25
+ "build": "tsc -p tsconfig.build.json",
26
+ "prepare": "node scripts/prepare.mjs",
27
+ "eval": "node --import tsx src/eval/run-eval.ts",
28
+ "mcp": "node --import tsx src/mcp-server.ts",
29
+ "athena": "node --import tsx src/cli.ts"
30
+ },
31
+ "devDependencies": {
32
+ "@types/node": "^25.9.1",
33
+ "tsx": "^4.22.3",
34
+ "typescript": "^5.5.0"
35
+ },
36
+ "dependencies": {
37
+ "@modelcontextprotocol/sdk": "^1.29.0",
38
+ "zod": "^4.4.3"
39
+ },
40
+ "bin": {
41
+ "athena": "bin/athena"
42
+ }
43
+ }