useathena 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -3
- package/dist/cli/commands.js +5 -0
- package/dist/cli/connect.js +120 -0
- package/dist/cli/format.js +24 -0
- package/dist/cli/setup.js +97 -25
- package/dist/cli.js +34 -3
- package/dist/connect/google.js +371 -0
- package/dist/connect/notion.js +177 -0
- package/dist/connect/secrets.js +37 -0
- package/dist/connect/slack.js +123 -0
- package/dist/connect/sync.js +123 -0
- package/dist/store/store.js +27 -0
- package/docs/schema.md +25 -2
- package/package.json +1 -1
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
import { createHash, randomBytes } from "node:crypto";
|
|
2
|
+
import { createServer } from "node:http";
|
|
3
|
+
import { spawn } from "node:child_process";
|
|
4
|
+
/**
|
|
5
|
+
* Google connector (Drive + Gmail, read-only) on the installed-app OAuth flow:
|
|
6
|
+
* loopback redirect + PKCE, the gcloud model — supermemory-grade connect UX
|
|
7
|
+
* with zero hosted infrastructure, because a CLI can open a local port where
|
|
8
|
+
* a web service cannot. The client credentials ship in the package; for
|
|
9
|
+
* Desktop-type clients Google documents the secret as non-confidential.
|
|
10
|
+
* During friend testing the consent screen is published-unverified: testers
|
|
11
|
+
* click through one "Google hasn't verified this app" warning, and refresh
|
|
12
|
+
* tokens persist (Testing mode would expire them weekly).
|
|
13
|
+
*/
|
|
14
|
+
const CLIENT_ID = process.env.ATHENA_GOOGLE_CLIENT_ID ?? "994548323040-h5k5ikae6e7t33igveqqt4766uk5r4u8.apps.googleusercontent.com";
|
|
15
|
+
const CLIENT_SECRET = process.env.ATHENA_GOOGLE_CLIENT_SECRET ?? "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";
|
|
16
|
+
const AUTH_URL = "https://accounts.google.com/o/oauth2/v2/auth";
|
|
17
|
+
const TOKEN_URL = "https://oauth2.googleapis.com/token";
|
|
18
|
+
const SCOPES = "https://www.googleapis.com/auth/drive.readonly https://www.googleapis.com/auth/gmail.readonly";
|
|
19
|
+
const AUTH_TIMEOUT_MS = 5 * 60 * 1000;
|
|
20
|
+
const MAX_FILES = 100;
|
|
21
|
+
const MAX_THREADS = 200;
|
|
22
|
+
const CONTENT_CAP = 100_000;
|
|
23
|
+
const RETRYABLE = new Set([408, 429, 500, 502, 503, 504]);
|
|
24
|
+
// --- OAuth: loopback + PKCE ---
|
|
25
|
+
export function pkcePair() {
|
|
26
|
+
const verifier = randomBytes(32).toString("base64url");
|
|
27
|
+
const challenge = createHash("sha256").update(verifier).digest("base64url");
|
|
28
|
+
return { verifier, challenge };
|
|
29
|
+
}
|
|
30
|
+
export async function acquireGoogleAuth(rl, log) {
|
|
31
|
+
if (!CLIENT_SECRET) {
|
|
32
|
+
throw new Error("Google client secret is not configured — set ATHENA_GOOGLE_CLIENT_SECRET (or update this build)");
|
|
33
|
+
}
|
|
34
|
+
const { verifier, challenge } = pkcePair();
|
|
35
|
+
const state = randomBytes(16).toString("base64url");
|
|
36
|
+
const server = createServer();
|
|
37
|
+
await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve));
|
|
38
|
+
const redirectUri = `http://127.0.0.1:${server.address().port}`;
|
|
39
|
+
try {
|
|
40
|
+
const authUrl = `${AUTH_URL}?${new URLSearchParams({
|
|
41
|
+
client_id: CLIENT_ID,
|
|
42
|
+
redirect_uri: redirectUri,
|
|
43
|
+
response_type: "code",
|
|
44
|
+
scope: SCOPES,
|
|
45
|
+
code_challenge: challenge,
|
|
46
|
+
code_challenge_method: "S256",
|
|
47
|
+
state,
|
|
48
|
+
access_type: "offline",
|
|
49
|
+
prompt: "consent",
|
|
50
|
+
}).toString()}`;
|
|
51
|
+
log("opening your browser to sign in with Google…");
|
|
52
|
+
log(`if it does not open (or this is an SSH session), open this URL on any machine:\n ${authUrl}`);
|
|
53
|
+
log("after consent the browser lands on a 127.0.0.1 page that fails to load — that is normal");
|
|
54
|
+
openBrowser(authUrl);
|
|
55
|
+
// Two ways home, first one wins: the loopback catch (local browser) or a
|
|
56
|
+
// pasted redirect URL (SSH/headless — the code travels in the URL itself).
|
|
57
|
+
const aborted = new AbortController();
|
|
58
|
+
let code;
|
|
59
|
+
try {
|
|
60
|
+
code = await Promise.race([waitForCode(server, state), codeFromPaste(rl, state, aborted.signal, log)]);
|
|
61
|
+
}
|
|
62
|
+
finally {
|
|
63
|
+
aborted.abort(); // settle the losing path so the readline is usable afterwards
|
|
64
|
+
}
|
|
65
|
+
const token = await postForm(TOKEN_URL, {
|
|
66
|
+
code,
|
|
67
|
+
client_id: CLIENT_ID,
|
|
68
|
+
client_secret: CLIENT_SECRET,
|
|
69
|
+
code_verifier: verifier,
|
|
70
|
+
redirect_uri: redirectUri,
|
|
71
|
+
grant_type: "authorization_code",
|
|
72
|
+
});
|
|
73
|
+
const refreshToken = token.refresh_token;
|
|
74
|
+
const accessToken = token.access_token;
|
|
75
|
+
if (typeof refreshToken !== "string" || typeof accessToken !== "string") {
|
|
76
|
+
throw new Error("Google returned no refresh token — remove athena under myaccount.google.com/permissions and retry");
|
|
77
|
+
}
|
|
78
|
+
const profile = await apiGet(accessToken, "https://gmail.googleapis.com/gmail/v1/users/me/profile");
|
|
79
|
+
const email = typeof profile.emailAddress === "string" ? profile.emailAddress : "Google account";
|
|
80
|
+
return { secret: JSON.stringify({ refreshToken }), label: email };
|
|
81
|
+
}
|
|
82
|
+
finally {
|
|
83
|
+
server.close();
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
function openBrowser(url) {
|
|
87
|
+
const command = process.platform === "darwin" ? "open" : process.platform === "win32" ? "start" : "xdg-open";
|
|
88
|
+
try {
|
|
89
|
+
spawn(command, [url], { stdio: "ignore", detached: true }).unref();
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
// the URL is printed — opening is best-effort
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
/** Parse a pasted redirect URL (`http://127.0.0.1:…/?code=…&state=…`) into the auth code. */
|
|
96
|
+
export function parseRedirectUrl(input, expectedState) {
|
|
97
|
+
let url;
|
|
98
|
+
try {
|
|
99
|
+
url = new URL(input.trim());
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
throw new Error("that is not a URL — paste the full address from the failed 127.0.0.1 page");
|
|
103
|
+
}
|
|
104
|
+
const error = url.searchParams.get("error");
|
|
105
|
+
if (error)
|
|
106
|
+
throw new Error(`Google sign-in failed: ${error}`);
|
|
107
|
+
const code = url.searchParams.get("code");
|
|
108
|
+
if (!code)
|
|
109
|
+
throw new Error("no code in that URL — paste the full address, including everything after the ?");
|
|
110
|
+
if (url.searchParams.get("state") !== expectedState) {
|
|
111
|
+
throw new Error("state mismatch — that URL is from a different sign-in attempt; use the most recent one");
|
|
112
|
+
}
|
|
113
|
+
return code;
|
|
114
|
+
}
|
|
115
|
+
async function codeFromPaste(rl, expectedState, signal, log) {
|
|
116
|
+
while (true) {
|
|
117
|
+
let answer;
|
|
118
|
+
try {
|
|
119
|
+
answer = (await rl.question("waiting — or paste the redirect URL here: ", { signal })).trim();
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
return new Promise(() => { }); // loopback won the race; never settle
|
|
123
|
+
}
|
|
124
|
+
if (!answer)
|
|
125
|
+
continue;
|
|
126
|
+
try {
|
|
127
|
+
return parseRedirectUrl(answer, expectedState);
|
|
128
|
+
}
|
|
129
|
+
catch (error) {
|
|
130
|
+
log(String(error instanceof Error ? error.message : error));
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
function waitForCode(server, expectedState) {
|
|
135
|
+
return new Promise((resolve, reject) => {
|
|
136
|
+
const timer = setTimeout(() => reject(new Error("timed out waiting for the browser sign-in (5 minutes)")), AUTH_TIMEOUT_MS);
|
|
137
|
+
server.on("request", (request, response) => {
|
|
138
|
+
const url = new URL(request.url ?? "/", "http://127.0.0.1");
|
|
139
|
+
const code = url.searchParams.get("code");
|
|
140
|
+
const state = url.searchParams.get("state");
|
|
141
|
+
const error = url.searchParams.get("error");
|
|
142
|
+
if (!code && !error) {
|
|
143
|
+
response.writeHead(404).end();
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
response
|
|
147
|
+
.writeHead(200, { "Content-Type": "text/html" })
|
|
148
|
+
.end("<html><body style='font-family:sans-serif'><h2>athena is connected</h2>You can close this tab and return to the terminal.</body></html>");
|
|
149
|
+
clearTimeout(timer);
|
|
150
|
+
if (error)
|
|
151
|
+
reject(new Error(`Google sign-in failed: ${error}`));
|
|
152
|
+
else if (state !== expectedState)
|
|
153
|
+
reject(new Error("OAuth state mismatch — try again"));
|
|
154
|
+
else
|
|
155
|
+
resolve(code);
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
async function postForm(url, form) {
|
|
160
|
+
const response = await fetch(url, {
|
|
161
|
+
method: "POST",
|
|
162
|
+
headers: { "Content-Type": "application/x-www-form-urlencoded" },
|
|
163
|
+
body: new URLSearchParams(form).toString(),
|
|
164
|
+
signal: AbortSignal.timeout(30_000),
|
|
165
|
+
});
|
|
166
|
+
const body = (await response.json());
|
|
167
|
+
if (!response.ok) {
|
|
168
|
+
throw new Error(`google token endpoint: HTTP ${response.status} ${typeof body.error === "string" ? body.error : ""}`);
|
|
169
|
+
}
|
|
170
|
+
return body;
|
|
171
|
+
}
|
|
172
|
+
async function apiGet(accessToken, url, attempt = 0) {
|
|
173
|
+
const response = await fetch(url, {
|
|
174
|
+
headers: { Authorization: `Bearer ${accessToken}` },
|
|
175
|
+
signal: AbortSignal.timeout(30_000),
|
|
176
|
+
});
|
|
177
|
+
if (!response.ok) {
|
|
178
|
+
if (attempt < 3 && RETRYABLE.has(response.status)) {
|
|
179
|
+
await new Promise((resolve) => setTimeout(resolve, 500 * (attempt + 1)));
|
|
180
|
+
return apiGet(accessToken, url, attempt + 1);
|
|
181
|
+
}
|
|
182
|
+
const raw = await response.text();
|
|
183
|
+
let detail = raw.slice(0, 300);
|
|
184
|
+
try {
|
|
185
|
+
const parsed = JSON.parse(raw);
|
|
186
|
+
if (parsed.error?.message)
|
|
187
|
+
detail = parsed.error.message;
|
|
188
|
+
}
|
|
189
|
+
catch {
|
|
190
|
+
// not JSON — keep the raw slice
|
|
191
|
+
}
|
|
192
|
+
throw new Error(`google api ${new URL(url).pathname}: HTTP ${response.status} — ${detail}`);
|
|
193
|
+
}
|
|
194
|
+
const text = await response.text();
|
|
195
|
+
try {
|
|
196
|
+
return JSON.parse(text);
|
|
197
|
+
}
|
|
198
|
+
catch {
|
|
199
|
+
return { __text: text }; // Drive exports and media downloads are not JSON
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
/** Refresh-token → authenticated fetcher for one sync run. */
|
|
203
|
+
export async function googleApi(storedSecret) {
|
|
204
|
+
const { refreshToken } = JSON.parse(storedSecret);
|
|
205
|
+
const token = await postForm(TOKEN_URL, {
|
|
206
|
+
refresh_token: refreshToken,
|
|
207
|
+
client_id: CLIENT_ID,
|
|
208
|
+
client_secret: CLIENT_SECRET,
|
|
209
|
+
grant_type: "refresh_token",
|
|
210
|
+
});
|
|
211
|
+
const accessToken = token.access_token;
|
|
212
|
+
if (typeof accessToken !== "string")
|
|
213
|
+
throw new Error("google refresh failed — reconnect with: athena connect google");
|
|
214
|
+
return (url) => apiGet(accessToken, url);
|
|
215
|
+
}
|
|
216
|
+
// --- Drive ---
|
|
217
|
+
const DRIVE_QUERY = [
|
|
218
|
+
"(mimeType='application/vnd.google-apps.document' or mimeType='text/plain' or mimeType='text/markdown')",
|
|
219
|
+
"trashed=false",
|
|
220
|
+
].join(" and ");
|
|
221
|
+
async function fetchDrive(api, cursor, log) {
|
|
222
|
+
const files = [];
|
|
223
|
+
let pageToken;
|
|
224
|
+
let truncated = false;
|
|
225
|
+
outer: do {
|
|
226
|
+
const params = new URLSearchParams({
|
|
227
|
+
q: DRIVE_QUERY,
|
|
228
|
+
orderBy: "modifiedTime desc",
|
|
229
|
+
pageSize: "50",
|
|
230
|
+
fields: "nextPageToken,files(id,name,mimeType,modifiedTime,webViewLink)",
|
|
231
|
+
...(pageToken ? { pageToken } : {}),
|
|
232
|
+
});
|
|
233
|
+
const page = await api(`https://www.googleapis.com/drive/v3/files?${params.toString()}`);
|
|
234
|
+
for (const raw of Array.isArray(page.files) ? page.files : []) {
|
|
235
|
+
if (cursor !== undefined && raw.modifiedTime <= cursor)
|
|
236
|
+
break outer; // sorted desc — the rest is synced
|
|
237
|
+
if (files.length >= MAX_FILES) {
|
|
238
|
+
truncated = true;
|
|
239
|
+
break outer;
|
|
240
|
+
}
|
|
241
|
+
files.push(raw);
|
|
242
|
+
}
|
|
243
|
+
pageToken = typeof page.nextPageToken === "string" ? page.nextPageToken : undefined;
|
|
244
|
+
} while (pageToken);
|
|
245
|
+
if (truncated)
|
|
246
|
+
log(`drive: capped at the ${MAX_FILES} most recently edited files — older files sync when next edited`);
|
|
247
|
+
const items = [];
|
|
248
|
+
for (const file of files) {
|
|
249
|
+
const url = file.mimeType === "application/vnd.google-apps.document"
|
|
250
|
+
? `https://www.googleapis.com/drive/v3/files/${file.id}/export?mimeType=text/plain`
|
|
251
|
+
: `https://www.googleapis.com/drive/v3/files/${file.id}?alt=media`;
|
|
252
|
+
let body;
|
|
253
|
+
try {
|
|
254
|
+
body = await api(url);
|
|
255
|
+
}
|
|
256
|
+
catch (error) {
|
|
257
|
+
// export-locked shared docs etc. — one bad file must not kill the sync
|
|
258
|
+
log(`drive: skipping "${file.name}" — ${error instanceof Error ? error.message : String(error)}`);
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
const content = (typeof body.__text === "string" ? body.__text : JSON.stringify(body)).slice(0, CONTENT_CAP);
|
|
262
|
+
if (!content.trim())
|
|
263
|
+
continue;
|
|
264
|
+
items.push({
|
|
265
|
+
uri: file.webViewLink ?? `https://drive.google.com/file/d/${file.id}`,
|
|
266
|
+
title: file.name,
|
|
267
|
+
content,
|
|
268
|
+
editedAt: file.modifiedTime,
|
|
269
|
+
kind: "document",
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
const result = { items, truncated };
|
|
273
|
+
const newest = files[0]?.modifiedTime ?? cursor;
|
|
274
|
+
if (newest !== undefined)
|
|
275
|
+
result.cursor = newest;
|
|
276
|
+
return result;
|
|
277
|
+
}
|
|
278
|
+
// --- Gmail ---
|
|
279
|
+
export function decodeBase64Url(data) {
|
|
280
|
+
return Buffer.from(data, "base64url").toString("utf8");
|
|
281
|
+
}
|
|
282
|
+
/** Depth-first text/plain extraction across nested multiparts. */
|
|
283
|
+
export function extractText(part) {
|
|
284
|
+
if (part.mimeType === "text/plain" && part.body?.data)
|
|
285
|
+
return decodeBase64Url(part.body.data);
|
|
286
|
+
return (part.parts ?? []).map(extractText).filter(Boolean).join("\n");
|
|
287
|
+
}
|
|
288
|
+
export function renderThread(messages) {
|
|
289
|
+
const header = (message, name) => message.payload?.headers?.find((h) => h.name?.toLowerCase() === name)?.value ?? "";
|
|
290
|
+
const subject = messages.map((m) => header(m, "subject")).find((s) => s) ?? "(no subject)";
|
|
291
|
+
const lines = messages.map((message) => {
|
|
292
|
+
const date = Number(message.internalDate ?? 0);
|
|
293
|
+
const day = date > 0 ? new Date(date).toISOString().slice(0, 10) : "????-??-??";
|
|
294
|
+
const text = message.payload ? extractText(message.payload).trim() : "";
|
|
295
|
+
return `[${day}] From: ${header(message, "from")}\n${text}`;
|
|
296
|
+
});
|
|
297
|
+
const newestMs = messages.reduce((max, m) => Math.max(max, Number(m.internalDate ?? 0)), 0);
|
|
298
|
+
return { subject, content: `Subject: ${subject}\n\n${lines.join("\n---\n")}`.slice(0, CONTENT_CAP), newestMs };
|
|
299
|
+
}
|
|
300
|
+
async function fetchGmail(api, afterSec, log) {
|
|
301
|
+
// First sync digs a year deep — sent mail is the knowledge bootstrap; after
|
|
302
|
+
// that the cursor keeps syncs incremental.
|
|
303
|
+
const query = afterSec !== undefined ? `in:sent after:${afterSec}` : "in:sent newer_than:365d";
|
|
304
|
+
const threads = [];
|
|
305
|
+
let pageToken;
|
|
306
|
+
do {
|
|
307
|
+
const params = new URLSearchParams({ q: query, maxResults: "100", ...(pageToken ? { pageToken } : {}) });
|
|
308
|
+
const list = await api(`https://gmail.googleapis.com/gmail/v1/users/me/threads?${params.toString()}`);
|
|
309
|
+
threads.push(...(Array.isArray(list.threads) ? list.threads : []));
|
|
310
|
+
pageToken = typeof list.nextPageToken === "string" ? list.nextPageToken : undefined;
|
|
311
|
+
} while (pageToken && threads.length < MAX_THREADS);
|
|
312
|
+
const truncated = threads.length > MAX_THREADS || pageToken !== undefined;
|
|
313
|
+
if (truncated) {
|
|
314
|
+
log(`gmail: capped at the ${MAX_THREADS} newest threads — older ones enter only when their thread gets new mail`);
|
|
315
|
+
}
|
|
316
|
+
const items = [];
|
|
317
|
+
let newestMs = 0;
|
|
318
|
+
for (const thread of threads.slice(0, MAX_THREADS)) {
|
|
319
|
+
if (!thread.id)
|
|
320
|
+
continue;
|
|
321
|
+
let full;
|
|
322
|
+
try {
|
|
323
|
+
full = await api(`https://gmail.googleapis.com/gmail/v1/users/me/threads/${thread.id}?format=full`);
|
|
324
|
+
}
|
|
325
|
+
catch (error) {
|
|
326
|
+
log(`gmail: skipping a thread — ${error instanceof Error ? error.message : String(error)}`);
|
|
327
|
+
continue;
|
|
328
|
+
}
|
|
329
|
+
const rendered = renderThread((Array.isArray(full.messages) ? full.messages : []));
|
|
330
|
+
newestMs = Math.max(newestMs, rendered.newestMs);
|
|
331
|
+
if (!rendered.content.trim())
|
|
332
|
+
continue;
|
|
333
|
+
items.push({
|
|
334
|
+
uri: `https://mail.google.com/mail/u/0/#all/${thread.id}`,
|
|
335
|
+
title: rendered.subject,
|
|
336
|
+
content: rendered.content,
|
|
337
|
+
editedAt: rendered.newestMs > 0 ? new Date(rendered.newestMs).toISOString() : new Date().toISOString(),
|
|
338
|
+
kind: "email",
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
const result = { items, truncated };
|
|
342
|
+
const newest = newestMs > 0 ? Math.floor(newestMs / 1000) + 1 : afterSec;
|
|
343
|
+
if (newest !== undefined)
|
|
344
|
+
result.cursor = newest;
|
|
345
|
+
return result;
|
|
346
|
+
}
|
|
347
|
+
export function parseGoogleCursor(cursor) {
|
|
348
|
+
if (!cursor)
|
|
349
|
+
return {};
|
|
350
|
+
try {
|
|
351
|
+
return JSON.parse(cursor);
|
|
352
|
+
}
|
|
353
|
+
catch {
|
|
354
|
+
return {};
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
export async function fetchGoogleSources(api, cursor, log) {
|
|
358
|
+
const parsed = parseGoogleCursor(cursor);
|
|
359
|
+
const drive = await fetchDrive(api, parsed.drive, log);
|
|
360
|
+
const gmail = await fetchGmail(api, parsed.gmail, log);
|
|
361
|
+
const next = {};
|
|
362
|
+
if (drive.cursor !== undefined)
|
|
363
|
+
next.drive = drive.cursor;
|
|
364
|
+
if (gmail.cursor !== undefined)
|
|
365
|
+
next.gmail = gmail.cursor;
|
|
366
|
+
return {
|
|
367
|
+
items: [...drive.items, ...gmail.items],
|
|
368
|
+
cursor: JSON.stringify(next),
|
|
369
|
+
truncated: drive.truncated || gmail.truncated,
|
|
370
|
+
};
|
|
371
|
+
}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Notion connector: internal-integration token, pages shared with the
|
|
3
|
+
* integration are the privacy boundary. Sync is incremental the way the
|
|
4
|
+
* Notion API affords it: search returns pages newest-edit-first, so we walk
|
|
5
|
+
* until we pass the last cursor (newest last_edited_time already synced)
|
|
6
|
+
* and stop. Block rendering is plain text with bounded depth — sources feed
|
|
7
|
+
* lexical search and fact extraction, not a Notion clone.
|
|
8
|
+
*/
|
|
9
|
+
const NOTION_VERSION = "2022-06-28";
|
|
10
|
+
const API = "https://api.notion.com/v1";
|
|
11
|
+
const MAX_PAGES_PER_SYNC = 200;
|
|
12
|
+
const MAX_BLOCK_DEPTH = 2;
|
|
13
|
+
const RETRYABLE = new Set([408, 429, 500, 502, 503, 504]);
|
|
14
|
+
export function notionFetcher(token) {
|
|
15
|
+
return async (path, body) => {
|
|
16
|
+
for (let attempt = 0;; attempt += 1) {
|
|
17
|
+
const response = await fetch(`${API}${path}`, {
|
|
18
|
+
method: body === undefined ? "GET" : "POST",
|
|
19
|
+
headers: {
|
|
20
|
+
Authorization: `Bearer ${token}`,
|
|
21
|
+
"Notion-Version": NOTION_VERSION,
|
|
22
|
+
...(body === undefined ? {} : { "Content-Type": "application/json" }),
|
|
23
|
+
},
|
|
24
|
+
...(body === undefined ? {} : { body: JSON.stringify(body) }),
|
|
25
|
+
signal: AbortSignal.timeout(30_000),
|
|
26
|
+
});
|
|
27
|
+
if (response.ok)
|
|
28
|
+
return (await response.json());
|
|
29
|
+
if (attempt < 3 && RETRYABLE.has(response.status)) {
|
|
30
|
+
const retryAfter = Number(response.headers.get("retry-after"));
|
|
31
|
+
await sleep(Number.isFinite(retryAfter) && retryAfter > 0 ? retryAfter * 1000 : 500 * (attempt + 1));
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
const detail = (await response.text()).slice(0, 200);
|
|
35
|
+
throw new Error(`notion ${path}: HTTP ${response.status} ${detail}`);
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
function sleep(ms) {
|
|
40
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
41
|
+
}
|
|
42
|
+
/** Validates the token and names the connection: "<workspace> (via <bot>)". */
|
|
43
|
+
export async function validateNotionToken(fetchNotion) {
|
|
44
|
+
const me = await fetchNotion("/users/me");
|
|
45
|
+
const bot = (me.bot ?? {});
|
|
46
|
+
const workspace = typeof bot.workspace_name === "string" ? bot.workspace_name : "Notion workspace";
|
|
47
|
+
return typeof me.name === "string" && me.name.length > 0 ? `${workspace} (via ${me.name})` : workspace;
|
|
48
|
+
}
|
|
49
|
+
export function parsePage(raw) {
|
|
50
|
+
if (typeof raw !== "object" || raw === null)
|
|
51
|
+
return undefined;
|
|
52
|
+
const page = raw;
|
|
53
|
+
if (page.object !== "page" || typeof page.id !== "string" || typeof page.last_edited_time !== "string") {
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
id: page.id,
|
|
58
|
+
url: typeof page.url === "string" ? page.url : `https://www.notion.so/${page.id.replaceAll("-", "")}`,
|
|
59
|
+
title: pageTitle(page.properties),
|
|
60
|
+
editedAt: page.last_edited_time,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
export function pageTitle(properties) {
|
|
64
|
+
if (typeof properties !== "object" || properties === null)
|
|
65
|
+
return "Untitled";
|
|
66
|
+
for (const property of Object.values(properties)) {
|
|
67
|
+
const p = property;
|
|
68
|
+
if (p?.type === "title" && Array.isArray(p.title)) {
|
|
69
|
+
const text = richText(p.title);
|
|
70
|
+
if (text)
|
|
71
|
+
return text;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return "Untitled";
|
|
75
|
+
}
|
|
76
|
+
export function richText(spans) {
|
|
77
|
+
if (!Array.isArray(spans))
|
|
78
|
+
return "";
|
|
79
|
+
return spans
|
|
80
|
+
.map((span) => {
|
|
81
|
+
const s = span;
|
|
82
|
+
return typeof s?.plain_text === "string" ? s.plain_text : "";
|
|
83
|
+
})
|
|
84
|
+
.join("");
|
|
85
|
+
}
|
|
86
|
+
const BLOCK_PREFIX = {
|
|
87
|
+
heading_1: (t) => `# ${t}`,
|
|
88
|
+
heading_2: (t) => `## ${t}`,
|
|
89
|
+
heading_3: (t) => `### ${t}`,
|
|
90
|
+
bulleted_list_item: (t) => `- ${t}`,
|
|
91
|
+
numbered_list_item: (t) => `- ${t}`,
|
|
92
|
+
to_do: (t) => `- [ ] ${t}`,
|
|
93
|
+
quote: (t) => `> ${t}`,
|
|
94
|
+
paragraph: (t) => t,
|
|
95
|
+
callout: (t) => `> ${t}`,
|
|
96
|
+
toggle: (t) => t,
|
|
97
|
+
code: (t) => t,
|
|
98
|
+
};
|
|
99
|
+
export function renderBlock(raw, indent) {
|
|
100
|
+
if (typeof raw !== "object" || raw === null)
|
|
101
|
+
return undefined;
|
|
102
|
+
const block = raw;
|
|
103
|
+
const type = block.type;
|
|
104
|
+
if (typeof type !== "string")
|
|
105
|
+
return undefined;
|
|
106
|
+
if (type === "child_page") {
|
|
107
|
+
const child = block.child_page;
|
|
108
|
+
return `${indent}▸ ${typeof child?.title === "string" ? child.title : "Untitled"}`;
|
|
109
|
+
}
|
|
110
|
+
if (type === "divider")
|
|
111
|
+
return `${indent}---`;
|
|
112
|
+
const render = BLOCK_PREFIX[type];
|
|
113
|
+
if (!render)
|
|
114
|
+
return undefined;
|
|
115
|
+
const payload = block[type];
|
|
116
|
+
const text = richText(payload?.rich_text);
|
|
117
|
+
if (!text.trim())
|
|
118
|
+
return undefined;
|
|
119
|
+
return indent + render(text);
|
|
120
|
+
}
|
|
121
|
+
async function renderBlockTree(fetchNotion, blockId, depth) {
|
|
122
|
+
const lines = [];
|
|
123
|
+
let cursor;
|
|
124
|
+
do {
|
|
125
|
+
const query = cursor ? `?page_size=100&start_cursor=${cursor}` : "?page_size=100";
|
|
126
|
+
const result = await fetchNotion(`/blocks/${blockId}/children${query}`);
|
|
127
|
+
for (const raw of Array.isArray(result.results) ? result.results : []) {
|
|
128
|
+
const line = renderBlock(raw, " ".repeat(depth));
|
|
129
|
+
if (line !== undefined)
|
|
130
|
+
lines.push(line);
|
|
131
|
+
const block = raw;
|
|
132
|
+
if (block.has_children === true && block.type !== "child_page" && depth < MAX_BLOCK_DEPTH) {
|
|
133
|
+
lines.push(...(await renderBlockTree(fetchNotion, block.id, depth + 1)));
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
cursor = result.has_more === true && typeof result.next_cursor === "string" ? result.next_cursor : undefined;
|
|
137
|
+
} while (cursor);
|
|
138
|
+
return lines;
|
|
139
|
+
}
|
|
140
|
+
/** Pages the cursor has not seen yet, newest first. `cursor` is the newest last_edited_time already synced. */
|
|
141
|
+
export async function fetchNotionPages(fetchNotion, cursor, log) {
|
|
142
|
+
const pages = [];
|
|
143
|
+
let searchCursor;
|
|
144
|
+
let truncated = false;
|
|
145
|
+
outer: do {
|
|
146
|
+
const result = await fetchNotion("/search", {
|
|
147
|
+
filter: { property: "object", value: "page" },
|
|
148
|
+
sort: { direction: "descending", timestamp: "last_edited_time" },
|
|
149
|
+
page_size: 100,
|
|
150
|
+
...(searchCursor ? { start_cursor: searchCursor } : {}),
|
|
151
|
+
});
|
|
152
|
+
for (const raw of Array.isArray(result.results) ? result.results : []) {
|
|
153
|
+
const page = parsePage(raw);
|
|
154
|
+
if (!page)
|
|
155
|
+
continue;
|
|
156
|
+
if (cursor !== undefined && page.editedAt <= cursor)
|
|
157
|
+
break outer; // sorted desc — the rest is already synced
|
|
158
|
+
if (pages.length >= MAX_PAGES_PER_SYNC) {
|
|
159
|
+
truncated = true;
|
|
160
|
+
break outer;
|
|
161
|
+
}
|
|
162
|
+
pages.push(page);
|
|
163
|
+
}
|
|
164
|
+
searchCursor = result.has_more === true && typeof result.next_cursor === "string" ? result.next_cursor : undefined;
|
|
165
|
+
} while (searchCursor);
|
|
166
|
+
if (truncated)
|
|
167
|
+
log(`capped at the ${MAX_PAGES_PER_SYNC} most recently edited pages — older pages sync when they are next edited`);
|
|
168
|
+
const items = [];
|
|
169
|
+
for (const page of pages) {
|
|
170
|
+
const body = (await renderBlockTree(fetchNotion, page.id, 0)).join("\n");
|
|
171
|
+
items.push({ uri: page.url, title: page.title, content: body, editedAt: page.editedAt });
|
|
172
|
+
if (items.length % 25 === 0)
|
|
173
|
+
log(`fetched ${items.length}/${pages.length} pages…`);
|
|
174
|
+
}
|
|
175
|
+
const newest = pages[0]?.editedAt ?? cursor;
|
|
176
|
+
return { items, ...(newest !== undefined ? { cursor: newest } : {}), truncated };
|
|
177
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
/**
|
|
5
|
+
* Connector tokens live in one chmod-0600 JSON file, keyed by connection id —
|
|
6
|
+
* the same convention as provider API keys in config.json. Deliberately not
|
|
7
|
+
* the OS keychain: login services and headless machines hit locked-keychain
|
|
8
|
+
* failures (a documented OpenClaw lesson), and a 0600 file under the user's
|
|
9
|
+
* home is the same trust boundary the SQLite store already lives in.
|
|
10
|
+
*/
|
|
11
|
+
export function secretsPath() {
|
|
12
|
+
return process.env.ATHENA_SECRETS ?? join(homedir(), ".athena", "secrets.json");
|
|
13
|
+
}
|
|
14
|
+
function load() {
|
|
15
|
+
try {
|
|
16
|
+
return JSON.parse(readFileSync(secretsPath(), "utf8"));
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
return {};
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
function persist(secrets) {
|
|
23
|
+
const path = secretsPath();
|
|
24
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
25
|
+
writeFileSync(path, `${JSON.stringify(secrets, null, 2)}\n`, { mode: 0o600 });
|
|
26
|
+
}
|
|
27
|
+
export function saveSecret(key, value) {
|
|
28
|
+
persist({ ...load(), [key]: value });
|
|
29
|
+
}
|
|
30
|
+
export function loadSecret(key) {
|
|
31
|
+
return load()[key];
|
|
32
|
+
}
|
|
33
|
+
export function deleteSecret(key) {
|
|
34
|
+
const secrets = load();
|
|
35
|
+
delete secrets[key];
|
|
36
|
+
persist(secrets);
|
|
37
|
+
}
|