useathena 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Slack connector: bring-your-own app created from a printed manifest (Slack
3
+ * allows no localhost OAuth redirects, so distributed OAuth would need hosted
4
+ * infra — the manifest flow is the validated zero-infra path). Read-only user
5
+ * token; only public channels the user is a member of are synced, one source
6
+ * per channel holding the recent transcript. Hash-dedupe in the sync runner
7
+ * makes the re-walk cheap, so there is no cursor.
8
+ */
9
+ const API = "https://slack.com/api";
10
+ const MAX_CHANNELS = 100;
11
+ const MESSAGES_PER_CHANNEL = 100;
12
+ const REPLY_THREADS_PER_CHANNEL = 25;
13
+ export const SLACK_MANIFEST = JSON.stringify({
14
+ display_information: { name: "athena", description: "Read-only knowledge capture for the athena CLI" },
15
+ oauth_config: { scopes: { user: ["channels:history", "channels:read", "users:read"] } },
16
+ settings: { org_deploy_enabled: false, socket_mode_enabled: false, token_rotation_enabled: false },
17
+ }, null, 2);
18
+ export function slackFetcher(token) {
19
+ return async (method, params = {}) => {
20
+ const query = new URLSearchParams(params).toString();
21
+ for (let attempt = 0;; attempt += 1) {
22
+ const response = await fetch(`${API}/${method}${query ? `?${query}` : ""}`, {
23
+ headers: { Authorization: `Bearer ${token}` },
24
+ signal: AbortSignal.timeout(30_000),
25
+ });
26
+ if (response.status === 429 && attempt < 3) {
27
+ const retryAfter = Number(response.headers.get("retry-after"));
28
+ await new Promise((resolve) => setTimeout(resolve, (Number.isFinite(retryAfter) && retryAfter > 0 ? retryAfter : 1) * 1000));
29
+ continue;
30
+ }
31
+ const body = (await response.json());
32
+ if (body.ok !== true)
33
+ throw new Error(`slack ${method}: ${typeof body.error === "string" ? body.error : `HTTP ${response.status}`}`);
34
+ return body;
35
+ }
36
+ };
37
+ }
38
+ export async function validateSlackToken(fetchSlack) {
39
+ const auth = await fetchSlack("auth.test");
40
+ const team = typeof auth.team === "string" ? auth.team : "Slack workspace";
41
+ return typeof auth.user === "string" ? `${team} (as ${auth.user})` : team;
42
+ }
43
+ /** `<@U…>` → @name, `<#C…|name>` → #name, `<url|label>` → label, `<url>` → url. */
44
+ export function renderText(text, users) {
45
+ return text
46
+ .replace(/<@([A-Z0-9]+)(?:\|[^>]*)?>/g, (_, id) => `@${users.get(id) ?? id}`)
47
+ .replace(/<#[A-Z0-9]+\|([^>]+)>/g, "#$1")
48
+ .replace(/<([^|>]+)\|([^>]+)>/g, "$2")
49
+ .replace(/<([^>]+)>/g, "$1");
50
+ }
51
+ function renderLine(message, users, indent = "") {
52
+ const day = new Date(Number(message.ts) * 1000).toISOString().slice(0, 10);
53
+ const name = message.user !== undefined ? (users.get(message.user) ?? message.user) : "unknown";
54
+ return `${indent}[${day}] ${name}: ${renderText(message.text, users)}`;
55
+ }
56
+ const hasText = (m) => m.subtype === undefined && typeof m.text === "string" && m.text.trim().length > 0;
57
+ export function renderTranscript(messages, users, replies = new Map()) {
58
+ return messages
59
+ .filter(hasText)
60
+ .sort((a, b) => Number(a.ts) - Number(b.ts))
61
+ .flatMap((message) => [
62
+ renderLine(message, users),
63
+ ...(replies.get(message.ts) ?? [])
64
+ .filter(hasText)
65
+ .sort((a, b) => Number(a.ts) - Number(b.ts))
66
+ .map((reply) => renderLine(reply, users, " ↳ ")),
67
+ ])
68
+ .join("\n");
69
+ }
70
+ async function paginate(fetchSlack, method, params, key) {
71
+ const all = [];
72
+ let cursor;
73
+ do {
74
+ const body = await fetchSlack(method, cursor ? { ...params, cursor } : params);
75
+ all.push(...(Array.isArray(body[key]) ? body[key] : []));
76
+ const metadata = body.response_metadata;
77
+ cursor = metadata?.next_cursor || undefined;
78
+ } while (cursor);
79
+ return all;
80
+ }
81
+ export async function fetchSlackChannels(fetchSlack, _cursor, log) {
82
+ const users = new Map();
83
+ for (const raw of await paginate(fetchSlack, "users.list", { limit: "200" }, "members")) {
84
+ const member = raw;
85
+ if (member.id === undefined)
86
+ continue;
87
+ users.set(member.id, member.profile?.display_name || member.real_name || member.name || member.id);
88
+ }
89
+ const channels = (await paginate(fetchSlack, "conversations.list", { types: "public_channel", exclude_archived: "true", limit: "200" }, "channels"))
90
+ .map((raw) => raw)
91
+ .filter((c) => c.id !== undefined && c.is_member === true);
92
+ const truncated = channels.length > MAX_CHANNELS;
93
+ if (truncated)
94
+ log(`capped at ${MAX_CHANNELS} of ${channels.length} member channels this sync`);
95
+ const items = [];
96
+ for (const channel of channels.slice(0, MAX_CHANNELS)) {
97
+ const history = await fetchSlack("conversations.history", { channel: channel.id, limit: String(MESSAGES_PER_CHANNEL) });
98
+ const messages = (Array.isArray(history.messages) ? history.messages : []);
99
+ // Threads carry the real discussion: pull replies for the most recent
100
+ // threaded parents (same channels:history scope, one call per thread).
101
+ const replies = new Map();
102
+ const parents = messages
103
+ .filter((m) => (m.reply_count ?? 0) > 0)
104
+ .sort((a, b) => Number(b.ts) - Number(a.ts))
105
+ .slice(0, REPLY_THREADS_PER_CHANNEL);
106
+ for (const parent of parents) {
107
+ const thread = await fetchSlack("conversations.replies", { channel: channel.id, ts: parent.ts, limit: "100" });
108
+ const threadMessages = (Array.isArray(thread.messages) ? thread.messages : []);
109
+ replies.set(parent.ts, threadMessages.filter((m) => m.ts !== parent.ts));
110
+ }
111
+ const content = renderTranscript(messages, users, replies);
112
+ if (!content)
113
+ continue;
114
+ const newest = messages.reduce((max, m) => Math.max(max, Number(m.ts) || 0), 0);
115
+ items.push({
116
+ uri: `slack://${channel.context_team_id ?? "T"}/${channel.id}`,
117
+ title: `#${channel.name ?? channel.id}`,
118
+ content,
119
+ editedAt: new Date(newest * 1000).toISOString(),
120
+ });
121
+ }
122
+ return { items, truncated };
123
+ }
@@ -0,0 +1,123 @@
1
+ import { createHash } from "node:crypto";
2
+ import { newId } from "../core/ids.js";
3
+ import { loadSecret } from "./secrets.js";
4
+ import { fetchNotionPages, notionFetcher, validateNotionToken, } from "./notion.js";
5
+ import { fetchSlackChannels, SLACK_MANIFEST, slackFetcher, validateSlackToken } from "./slack.js";
6
+ import { acquireGoogleAuth, fetchGoogleSources, googleApi } from "./google.js";
7
+ export const connectors = {
8
+ notion: {
9
+ label: "Notion",
10
+ sourceKind: "page",
11
+ connectHint: "token paste, ~2 minutes",
12
+ tokenHint: "ntn_… or secret_…",
13
+ instructions: [
14
+ "Open https://www.notion.so/profile/integrations and click “New integration”",
15
+ "Type: Internal. Pick the workspace, save, and copy the integration secret",
16
+ "Share content: on any Notion page → ··· → Connections → add the integration",
17
+ "(subpages are included automatically — what you share is what athena sees)",
18
+ ],
19
+ validate: (token) => validateNotionToken(notionFetcher(token)),
20
+ fetch: (token, cursor, log) => fetchNotionPages(notionFetcher(token), cursor, log),
21
+ },
22
+ slack: {
23
+ label: "Slack",
24
+ sourceKind: "thread",
25
+ connectHint: "app from a manifest, ~3 minutes",
26
+ tokenHint: "xoxp-…",
27
+ instructions: [
28
+ "Open https://api.slack.com/apps?new_app=1 and pick “From a manifest”",
29
+ "Choose your workspace, paste the manifest below, and create the app",
30
+ "Click “Install to Workspace” and allow it",
31
+ "OAuth & Permissions → copy the User OAuth Token (read-only scopes;",
32
+ "athena syncs only public channels you are a member of)",
33
+ ],
34
+ extra: SLACK_MANIFEST,
35
+ validate: (token) => validateSlackToken(slackFetcher(token)),
36
+ fetch: (token, cursor, log) => fetchSlackChannels(slackFetcher(token), cursor, log),
37
+ },
38
+ google: {
39
+ label: "Google (Drive + Gmail)",
40
+ sourceKind: "document",
41
+ connectHint: "browser sign-in, ~1 minute",
42
+ instructions: [
43
+ "A browser window opens — sign in and allow read-only access to Drive and Gmail",
44
+ "During friend testing Google shows an “unverified app” warning:",
45
+ "Advanced → “Go to athena (unsafe)” — expected until the app is verified",
46
+ ],
47
+ acquire: (rl, log) => acquireGoogleAuth(rl, log),
48
+ fetch: async (secret, cursor, log) => fetchGoogleSources(await googleApi(secret), cursor, log),
49
+ },
50
+ };
51
+ const INFLIGHT_FRESH_MS = 15 * 60 * 1000;
52
+ export async function syncConnection(store, connection, log = () => { }) {
53
+ const token = loadSecret(connection.id);
54
+ if (!token)
55
+ throw new Error(`no token stored for ${connection.provider} — reconnect with: athena connect ${connection.provider}`);
56
+ const connector = connectors[connection.provider];
57
+ // One sync per connection at a time, across processes (serve timer vs CLI):
58
+ // a fresh in-flight marker means someone else is on it. Stale markers from
59
+ // crashed runs expire instead of wedging the connection.
60
+ const inflightKey = `sync.inflight.${connection.id}`;
61
+ const inflightSince = store.getMeta(inflightKey);
62
+ if (inflightSince && Date.now() - Date.parse(inflightSince) < INFLIGHT_FRESH_MS) {
63
+ throw new Error(`a ${connection.provider} sync is already running (since ${inflightSince}) — it can take minutes on big workspaces`);
64
+ }
65
+ store.setMeta(inflightKey, new Date().toISOString());
66
+ let fetched;
67
+ try {
68
+ fetched = await connector.fetch(token, connection.cursor, log);
69
+ }
70
+ catch (error) {
71
+ const message = error instanceof Error ? error.message : String(error);
72
+ store.saveConnection({ ...connection, lastError: message });
73
+ store.setMeta(inflightKey, "");
74
+ throw error;
75
+ }
76
+ const result = { added: 0, updated: 0, unchanged: 0 };
77
+ for (const item of fetched.items) {
78
+ const contentHash = hashContent(`${item.title}\n${item.content}`);
79
+ const existing = store.getSourceByOriginUri(item.uri);
80
+ if (existing?.contentHash === contentHash) {
81
+ result.unchanged += 1;
82
+ continue;
83
+ }
84
+ store.saveSource({
85
+ id: existing?.id ?? newId("src"),
86
+ kind: item.kind ?? connector.sourceKind,
87
+ title: item.title,
88
+ content: item.content,
89
+ contentHash,
90
+ origin: { connector: connection.provider, uri: item.uri },
91
+ capturedAt: new Date().toISOString(),
92
+ accessState: "private",
93
+ });
94
+ result[existing ? "updated" : "added"] += 1;
95
+ }
96
+ const { lastError: _cleared, ...rest } = connection;
97
+ store.saveConnection({
98
+ ...rest,
99
+ lastSyncedAt: new Date().toISOString(),
100
+ ...(fetched.cursor !== undefined ? { cursor: fetched.cursor } : {}),
101
+ });
102
+ store.setMeta(inflightKey, "");
103
+ return result;
104
+ }
105
+ const SYNC_DUE_AFTER_MS = 4 * 60 * 60 * 1000;
106
+ /** Sync every connection not synced in the last 4 hours; failures land on the connection, not the caller. */
107
+ export async function syncDueConnections(store, log = () => { }) {
108
+ for (const connection of store.listConnections()) {
109
+ const last = connection.lastSyncedAt ? Date.parse(connection.lastSyncedAt) : 0;
110
+ if (Date.now() - last < SYNC_DUE_AFTER_MS)
111
+ continue;
112
+ try {
113
+ const result = await syncConnection(store, connection, log);
114
+ log(`${connection.provider}: +${result.added} new, ${result.updated} updated, ${result.unchanged} unchanged`);
115
+ }
116
+ catch (error) {
117
+ log(`${connection.provider} sync failed: ${error instanceof Error ? error.message : String(error)}`);
118
+ }
119
+ }
120
+ }
121
+ function hashContent(content) {
122
+ return createHash("sha256").update(content).digest("hex").slice(0, 16);
123
+ }
@@ -65,6 +65,9 @@ export class AthenaStore {
65
65
  recorded_at TEXT NOT NULL, matched_outcome_id TEXT, data TEXT NOT NULL
66
66
  );
67
67
  CREATE INDEX IF NOT EXISTS idx_drafts_unmatched ON drafts(domain, recorded_at) WHERE matched_outcome_id IS NULL;
68
+ CREATE TABLE IF NOT EXISTS connections (
69
+ id TEXT PRIMARY KEY, provider TEXT NOT NULL, created_at TEXT NOT NULL, data TEXT NOT NULL
70
+ );
68
71
  CREATE TABLE IF NOT EXISTS meta (
69
72
  key TEXT PRIMARY KEY, value TEXT NOT NULL
70
73
  );
@@ -153,6 +156,30 @@ export class AthenaStore {
153
156
  getSource(id) {
154
157
  return this.getData("sources", id);
155
158
  }
159
+ /** Connector dedupe key: one source per external URI, updated in place on re-sync. */
160
+ getSourceByOriginUri(uri) {
161
+ const row = this.db
162
+ .prepare("SELECT data FROM sources WHERE json_extract(data, '$.origin.uri') = ? LIMIT 1")
163
+ .get(uri);
164
+ return row ? rowData(row) : undefined;
165
+ }
166
+ // --- connections (external knowledge sources; tokens live in the secret store) ---
167
+ saveConnection(connection) {
168
+ this.upsert("connections", "INSERT INTO connections (id, provider, created_at, data) VALUES (?, ?, ?, ?) " +
169
+ "ON CONFLICT(id) DO UPDATE SET data = excluded.data", [connection.id, connection.provider, connection.createdAt, JSON.stringify(connection)]);
170
+ }
171
+ getConnection(id) {
172
+ return this.getData("connections", id);
173
+ }
174
+ listConnections(provider) {
175
+ const rows = provider
176
+ ? this.db.prepare("SELECT data FROM connections WHERE provider = ? ORDER BY created_at").all(provider)
177
+ : this.db.prepare("SELECT data FROM connections ORDER BY created_at").all();
178
+ return rows.map((rowData));
179
+ }
180
+ deleteConnection(id) {
181
+ this.db.prepare("DELETE FROM connections WHERE id = ?").run(id);
182
+ }
156
183
  saveObject(object) {
157
184
  this.upsert("objects", "INSERT INTO objects (id, kind, name, data) VALUES (?, ?, ?, ?) " +
158
185
  "ON CONFLICT(id) DO UPDATE SET kind = excluded.kind, name = excluded.name, data = excluded.data", [object.id, object.kind, object.name, JSON.stringify(object)]);
package/docs/schema.md CHANGED
@@ -269,6 +269,29 @@ type ObjectRelation = {
269
269
  };
270
270
  ```
271
271
 
272
+ ### Connection — the explicit layer's front door
273
+
274
+ A connected external source — Notion, Slack, and Google (Drive + Gmail). Tokens never
275
+ live on the record — they stay in `~/.athena/secrets.json` (chmod 0600, same convention
276
+ as provider keys in config.json). Sync is incremental per provider (newest-edit
277
+ watermark in `cursor`), deduped one source per external URI: re-syncs update the same
278
+ `RawSource` in place when content changed and skip it when not. Synced content enters
279
+ as `accessState: "private"`; what the user shares with the integration is the privacy
280
+ boundary. Connectors own provider work only — they never write the store directly, the
281
+ sync runner does.
282
+
283
+ ```ts
284
+ type Connection = {
285
+ id: ConnectionId; // con_…
286
+ provider: "notion" | "slack" | "google"; // widens as connectors land
287
+ label: string; // human name, e.g. the Notion workspace
288
+ createdAt: string;
289
+ lastSyncedAt?: string;
290
+ cursor?: string; // provider watermark for incremental sync
291
+ lastError?: string; // last sync failure, cleared on success
292
+ };
293
+ ```
294
+
272
295
  ### Fact — the explicit counterpart of a hypothesis
273
296
 
274
297
  A declarative, entity-grounded statement extracted from evidence ("Acme's renewal
@@ -406,8 +429,8 @@ No durable-mutation tools. `athena_record` proposals enter the review queue like
406
429
 
407
430
  SQLite, one file per workspace (`.athena/athena.db`):
408
431
  - `instances`, `hypotheses`, `sources`, `objects`, `relations`, `outcomes`, `briefs`,
409
- `facts`, `drafts` — JSON column + extracted indexed columns (id, kind, domain, status,
410
- observedAt, visibility).
432
+ `facts`, `drafts`, `connections` — JSON column + extracted indexed columns (id, kind,
433
+ domain, status, observedAt, visibility).
411
434
  - `meta` — key/value operational state (last learn run, auto-learn cooldown).
412
435
  - FTS5 over instance summaries/diffs, hypothesis rules, source content, fact statements.
413
436
  - `sqlite-vec` over situation/rule/source embeddings (optional lane — everything works without it).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "useathena",
3
- "version": "0.2.1",
3
+ "version": "0.3.0",
4
4
  "description": "athena captures tacit knowledge from real work so agents become truly autonomous and reliable.",
5
5
  "license": "UNLICENSED",
6
6
  "repository": {